drm/i915: avoid including intel_drv.h via i915_drv.h->i915_trace.h
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include <linux/module.h>
30 #include <linux/pm_runtime.h>
31
32 #include <drm/drm_atomic_helper.h>
33 #include <drm/drm_fourcc.h>
34 #include <drm/drm_plane_helper.h>
35
36 #include "display/intel_atomic.h"
37 #include "display/intel_fbc.h"
38 #include "display/intel_sprite.h"
39
40 #include "i915_drv.h"
41 #include "i915_irq.h"
42 #include "i915_trace.h"
43 #include "intel_drv.h"
44 #include "intel_pm.h"
45 #include "intel_sideband.h"
46 #include "../../../platform/x86/intel_ips.h"
47
48 /**
49  * DOC: RC6
50  *
51  * RC6 is a special power stage which allows the GPU to enter an very
52  * low-voltage mode when idle, using down to 0V while at this stage.  This
53  * stage is entered automatically when the GPU is idle when RC6 support is
54  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
55  *
56  * There are different RC6 modes available in Intel GPU, which differentiate
57  * among each other with the latency required to enter and leave RC6 and
58  * voltage consumed by the GPU in different states.
59  *
60  * The combination of the following flags define which states GPU is allowed
61  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
62  * RC6pp is deepest RC6. Their support by hardware varies according to the
63  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
64  * which brings the most power savings; deeper states save more power, but
65  * require higher latency to switch to and wake up.
66  */
67
68 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
69 {
70         if (HAS_LLC(dev_priv)) {
71                 /*
72                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
73                  * Display WA #0390: skl,kbl
74                  *
75                  * Must match Sampler, Pixel Back End, and Media. See
76                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
77                  */
78                 I915_WRITE(CHICKEN_PAR1_1,
79                            I915_READ(CHICKEN_PAR1_1) |
80                            SKL_DE_COMPRESSED_HASH_MODE);
81         }
82
83         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
84         I915_WRITE(CHICKEN_PAR1_1,
85                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
86
87         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
88         I915_WRITE(GEN8_CHICKEN_DCPR_1,
89                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
90
91         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
92         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
93         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
94                    DISP_FBC_WM_DIS |
95                    DISP_FBC_MEMORY_WAKE);
96
97         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
98         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
99                    ILK_DPFC_DISABLE_DUMMY0);
100
101         if (IS_SKYLAKE(dev_priv)) {
102                 /* WaDisableDopClockGating */
103                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
104                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
105         }
106 }
107
108 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
109 {
110         gen9_init_clock_gating(dev_priv);
111
112         /* WaDisableSDEUnitClockGating:bxt */
113         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
114                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
115
116         /*
117          * FIXME:
118          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
119          */
120         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
121                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
122
123         /*
124          * Wa: Backlight PWM may stop in the asserted state, causing backlight
125          * to stay fully on.
126          */
127         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
128                    PWM1_GATING_DIS | PWM2_GATING_DIS);
129 }
130
131 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
132 {
133         gen9_init_clock_gating(dev_priv);
134
135         /*
136          * WaDisablePWMClockGating:glk
137          * Backlight PWM may stop in the asserted state, causing backlight
138          * to stay fully on.
139          */
140         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
141                    PWM1_GATING_DIS | PWM2_GATING_DIS);
142
143         /* WaDDIIOTimeout:glk */
144         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
145                 u32 val = I915_READ(CHICKEN_MISC_2);
146                 val &= ~(GLK_CL0_PWR_DOWN |
147                          GLK_CL1_PWR_DOWN |
148                          GLK_CL2_PWR_DOWN);
149                 I915_WRITE(CHICKEN_MISC_2, val);
150         }
151
152 }
153
154 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
155 {
156         u32 tmp;
157
158         tmp = I915_READ(CLKCFG);
159
160         switch (tmp & CLKCFG_FSB_MASK) {
161         case CLKCFG_FSB_533:
162                 dev_priv->fsb_freq = 533; /* 133*4 */
163                 break;
164         case CLKCFG_FSB_800:
165                 dev_priv->fsb_freq = 800; /* 200*4 */
166                 break;
167         case CLKCFG_FSB_667:
168                 dev_priv->fsb_freq =  667; /* 167*4 */
169                 break;
170         case CLKCFG_FSB_400:
171                 dev_priv->fsb_freq = 400; /* 100*4 */
172                 break;
173         }
174
175         switch (tmp & CLKCFG_MEM_MASK) {
176         case CLKCFG_MEM_533:
177                 dev_priv->mem_freq = 533;
178                 break;
179         case CLKCFG_MEM_667:
180                 dev_priv->mem_freq = 667;
181                 break;
182         case CLKCFG_MEM_800:
183                 dev_priv->mem_freq = 800;
184                 break;
185         }
186
187         /* detect pineview DDR3 setting */
188         tmp = I915_READ(CSHRDDR3CTL);
189         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
190 }
191
192 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
193 {
194         u16 ddrpll, csipll;
195
196         ddrpll = intel_uncore_read16(&dev_priv->uncore, DDRMPLL1);
197         csipll = intel_uncore_read16(&dev_priv->uncore, CSIPLL0);
198
199         switch (ddrpll & 0xff) {
200         case 0xc:
201                 dev_priv->mem_freq = 800;
202                 break;
203         case 0x10:
204                 dev_priv->mem_freq = 1066;
205                 break;
206         case 0x14:
207                 dev_priv->mem_freq = 1333;
208                 break;
209         case 0x18:
210                 dev_priv->mem_freq = 1600;
211                 break;
212         default:
213                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
214                                  ddrpll & 0xff);
215                 dev_priv->mem_freq = 0;
216                 break;
217         }
218
219         dev_priv->ips.r_t = dev_priv->mem_freq;
220
221         switch (csipll & 0x3ff) {
222         case 0x00c:
223                 dev_priv->fsb_freq = 3200;
224                 break;
225         case 0x00e:
226                 dev_priv->fsb_freq = 3733;
227                 break;
228         case 0x010:
229                 dev_priv->fsb_freq = 4266;
230                 break;
231         case 0x012:
232                 dev_priv->fsb_freq = 4800;
233                 break;
234         case 0x014:
235                 dev_priv->fsb_freq = 5333;
236                 break;
237         case 0x016:
238                 dev_priv->fsb_freq = 5866;
239                 break;
240         case 0x018:
241                 dev_priv->fsb_freq = 6400;
242                 break;
243         default:
244                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
245                                  csipll & 0x3ff);
246                 dev_priv->fsb_freq = 0;
247                 break;
248         }
249
250         if (dev_priv->fsb_freq == 3200) {
251                 dev_priv->ips.c_m = 0;
252         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
253                 dev_priv->ips.c_m = 1;
254         } else {
255                 dev_priv->ips.c_m = 2;
256         }
257 }
258
259 static const struct cxsr_latency cxsr_latency_table[] = {
260         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
261         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
262         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
263         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
264         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
265
266         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
267         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
268         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
269         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
270         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
271
272         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
273         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
274         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
275         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
276         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
277
278         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
279         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
280         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
281         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
282         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
283
284         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
285         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
286         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
287         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
288         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
289
290         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
291         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
292         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
293         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
294         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
295 };
296
297 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
298                                                          bool is_ddr3,
299                                                          int fsb,
300                                                          int mem)
301 {
302         const struct cxsr_latency *latency;
303         int i;
304
305         if (fsb == 0 || mem == 0)
306                 return NULL;
307
308         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
309                 latency = &cxsr_latency_table[i];
310                 if (is_desktop == latency->is_desktop &&
311                     is_ddr3 == latency->is_ddr3 &&
312                     fsb == latency->fsb_freq && mem == latency->mem_freq)
313                         return latency;
314         }
315
316         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
317
318         return NULL;
319 }
320
321 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
322 {
323         u32 val;
324
325         vlv_punit_get(dev_priv);
326
327         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
328         if (enable)
329                 val &= ~FORCE_DDR_HIGH_FREQ;
330         else
331                 val |= FORCE_DDR_HIGH_FREQ;
332         val &= ~FORCE_DDR_LOW_FREQ;
333         val |= FORCE_DDR_FREQ_REQ_ACK;
334         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
335
336         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
337                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
338                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
339
340         vlv_punit_put(dev_priv);
341 }
342
343 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
344 {
345         u32 val;
346
347         vlv_punit_get(dev_priv);
348
349         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
350         if (enable)
351                 val |= DSP_MAXFIFO_PM5_ENABLE;
352         else
353                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
354         vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
355
356         vlv_punit_put(dev_priv);
357 }
358
359 #define FW_WM(value, plane) \
360         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
361
362 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
363 {
364         bool was_enabled;
365         u32 val;
366
367         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
368                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
369                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
370                 POSTING_READ(FW_BLC_SELF_VLV);
371         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
372                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
373                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
374                 POSTING_READ(FW_BLC_SELF);
375         } else if (IS_PINEVIEW(dev_priv)) {
376                 val = I915_READ(DSPFW3);
377                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
378                 if (enable)
379                         val |= PINEVIEW_SELF_REFRESH_EN;
380                 else
381                         val &= ~PINEVIEW_SELF_REFRESH_EN;
382                 I915_WRITE(DSPFW3, val);
383                 POSTING_READ(DSPFW3);
384         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
385                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
386                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
387                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
388                 I915_WRITE(FW_BLC_SELF, val);
389                 POSTING_READ(FW_BLC_SELF);
390         } else if (IS_I915GM(dev_priv)) {
391                 /*
392                  * FIXME can't find a bit like this for 915G, and
393                  * and yet it does have the related watermark in
394                  * FW_BLC_SELF. What's going on?
395                  */
396                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
397                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
398                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
399                 I915_WRITE(INSTPM, val);
400                 POSTING_READ(INSTPM);
401         } else {
402                 return false;
403         }
404
405         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
406
407         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
408                       enableddisabled(enable),
409                       enableddisabled(was_enabled));
410
411         return was_enabled;
412 }
413
414 /**
415  * intel_set_memory_cxsr - Configure CxSR state
416  * @dev_priv: i915 device
417  * @enable: Allow vs. disallow CxSR
418  *
419  * Allow or disallow the system to enter a special CxSR
420  * (C-state self refresh) state. What typically happens in CxSR mode
421  * is that several display FIFOs may get combined into a single larger
422  * FIFO for a particular plane (so called max FIFO mode) to allow the
423  * system to defer memory fetches longer, and the memory will enter
424  * self refresh.
425  *
426  * Note that enabling CxSR does not guarantee that the system enter
427  * this special mode, nor does it guarantee that the system stays
428  * in that mode once entered. So this just allows/disallows the system
429  * to autonomously utilize the CxSR mode. Other factors such as core
430  * C-states will affect when/if the system actually enters/exits the
431  * CxSR mode.
432  *
433  * Note that on VLV/CHV this actually only controls the max FIFO mode,
434  * and the system is free to enter/exit memory self refresh at any time
435  * even when the use of CxSR has been disallowed.
436  *
437  * While the system is actually in the CxSR/max FIFO mode, some plane
438  * control registers will not get latched on vblank. Thus in order to
439  * guarantee the system will respond to changes in the plane registers
440  * we must always disallow CxSR prior to making changes to those registers.
441  * Unfortunately the system will re-evaluate the CxSR conditions at
442  * frame start which happens after vblank start (which is when the plane
443  * registers would get latched), so we can't proceed with the plane update
444  * during the same frame where we disallowed CxSR.
445  *
446  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
447  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
448  * the hardware w.r.t. HPLL SR when writing to plane registers.
449  * Disallowing just CxSR is sufficient.
450  */
451 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
452 {
453         bool ret;
454
455         mutex_lock(&dev_priv->wm.wm_mutex);
456         ret = _intel_set_memory_cxsr(dev_priv, enable);
457         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
458                 dev_priv->wm.vlv.cxsr = enable;
459         else if (IS_G4X(dev_priv))
460                 dev_priv->wm.g4x.cxsr = enable;
461         mutex_unlock(&dev_priv->wm.wm_mutex);
462
463         return ret;
464 }
465
466 /*
467  * Latency for FIFO fetches is dependent on several factors:
468  *   - memory configuration (speed, channels)
469  *   - chipset
470  *   - current MCH state
471  * It can be fairly high in some situations, so here we assume a fairly
472  * pessimal value.  It's a tradeoff between extra memory fetches (if we
473  * set this value too high, the FIFO will fetch frequently to stay full)
474  * and power consumption (set it too low to save power and we might see
475  * FIFO underruns and display "flicker").
476  *
477  * A value of 5us seems to be a good balance; safe for very low end
478  * platforms but not overly aggressive on lower latency configs.
479  */
480 static const int pessimal_latency_ns = 5000;
481
482 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
483         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
484
485 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
486 {
487         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
488         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
489         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
490         enum pipe pipe = crtc->pipe;
491         int sprite0_start, sprite1_start;
492
493         switch (pipe) {
494                 u32 dsparb, dsparb2, dsparb3;
495         case PIPE_A:
496                 dsparb = I915_READ(DSPARB);
497                 dsparb2 = I915_READ(DSPARB2);
498                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
499                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
500                 break;
501         case PIPE_B:
502                 dsparb = I915_READ(DSPARB);
503                 dsparb2 = I915_READ(DSPARB2);
504                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
505                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
506                 break;
507         case PIPE_C:
508                 dsparb2 = I915_READ(DSPARB2);
509                 dsparb3 = I915_READ(DSPARB3);
510                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
511                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
512                 break;
513         default:
514                 MISSING_CASE(pipe);
515                 return;
516         }
517
518         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
519         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
520         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
521         fifo_state->plane[PLANE_CURSOR] = 63;
522 }
523
524 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
525                               enum i9xx_plane_id i9xx_plane)
526 {
527         u32 dsparb = I915_READ(DSPARB);
528         int size;
529
530         size = dsparb & 0x7f;
531         if (i9xx_plane == PLANE_B)
532                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
533
534         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
535                       dsparb, plane_name(i9xx_plane), size);
536
537         return size;
538 }
539
540 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
541                               enum i9xx_plane_id i9xx_plane)
542 {
543         u32 dsparb = I915_READ(DSPARB);
544         int size;
545
546         size = dsparb & 0x1ff;
547         if (i9xx_plane == PLANE_B)
548                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
549         size >>= 1; /* Convert to cachelines */
550
551         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
552                       dsparb, plane_name(i9xx_plane), size);
553
554         return size;
555 }
556
557 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
558                               enum i9xx_plane_id i9xx_plane)
559 {
560         u32 dsparb = I915_READ(DSPARB);
561         int size;
562
563         size = dsparb & 0x7f;
564         size >>= 2; /* Convert to cachelines */
565
566         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
567                       dsparb, plane_name(i9xx_plane), size);
568
569         return size;
570 }
571
572 /* Pineview has different values for various configs */
573 static const struct intel_watermark_params pineview_display_wm = {
574         .fifo_size = PINEVIEW_DISPLAY_FIFO,
575         .max_wm = PINEVIEW_MAX_WM,
576         .default_wm = PINEVIEW_DFT_WM,
577         .guard_size = PINEVIEW_GUARD_WM,
578         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
579 };
580 static const struct intel_watermark_params pineview_display_hplloff_wm = {
581         .fifo_size = PINEVIEW_DISPLAY_FIFO,
582         .max_wm = PINEVIEW_MAX_WM,
583         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
584         .guard_size = PINEVIEW_GUARD_WM,
585         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
586 };
587 static const struct intel_watermark_params pineview_cursor_wm = {
588         .fifo_size = PINEVIEW_CURSOR_FIFO,
589         .max_wm = PINEVIEW_CURSOR_MAX_WM,
590         .default_wm = PINEVIEW_CURSOR_DFT_WM,
591         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
592         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
593 };
594 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
595         .fifo_size = PINEVIEW_CURSOR_FIFO,
596         .max_wm = PINEVIEW_CURSOR_MAX_WM,
597         .default_wm = PINEVIEW_CURSOR_DFT_WM,
598         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
599         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
600 };
601 static const struct intel_watermark_params i965_cursor_wm_info = {
602         .fifo_size = I965_CURSOR_FIFO,
603         .max_wm = I965_CURSOR_MAX_WM,
604         .default_wm = I965_CURSOR_DFT_WM,
605         .guard_size = 2,
606         .cacheline_size = I915_FIFO_LINE_SIZE,
607 };
608 static const struct intel_watermark_params i945_wm_info = {
609         .fifo_size = I945_FIFO_SIZE,
610         .max_wm = I915_MAX_WM,
611         .default_wm = 1,
612         .guard_size = 2,
613         .cacheline_size = I915_FIFO_LINE_SIZE,
614 };
615 static const struct intel_watermark_params i915_wm_info = {
616         .fifo_size = I915_FIFO_SIZE,
617         .max_wm = I915_MAX_WM,
618         .default_wm = 1,
619         .guard_size = 2,
620         .cacheline_size = I915_FIFO_LINE_SIZE,
621 };
622 static const struct intel_watermark_params i830_a_wm_info = {
623         .fifo_size = I855GM_FIFO_SIZE,
624         .max_wm = I915_MAX_WM,
625         .default_wm = 1,
626         .guard_size = 2,
627         .cacheline_size = I830_FIFO_LINE_SIZE,
628 };
629 static const struct intel_watermark_params i830_bc_wm_info = {
630         .fifo_size = I855GM_FIFO_SIZE,
631         .max_wm = I915_MAX_WM/2,
632         .default_wm = 1,
633         .guard_size = 2,
634         .cacheline_size = I830_FIFO_LINE_SIZE,
635 };
636 static const struct intel_watermark_params i845_wm_info = {
637         .fifo_size = I830_FIFO_SIZE,
638         .max_wm = I915_MAX_WM,
639         .default_wm = 1,
640         .guard_size = 2,
641         .cacheline_size = I830_FIFO_LINE_SIZE,
642 };
643
644 /**
645  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
646  * @pixel_rate: Pipe pixel rate in kHz
647  * @cpp: Plane bytes per pixel
648  * @latency: Memory wakeup latency in 0.1us units
649  *
650  * Compute the watermark using the method 1 or "small buffer"
651  * formula. The caller may additonally add extra cachelines
652  * to account for TLB misses and clock crossings.
653  *
654  * This method is concerned with the short term drain rate
655  * of the FIFO, ie. it does not account for blanking periods
656  * which would effectively reduce the average drain rate across
657  * a longer period. The name "small" refers to the fact the
658  * FIFO is relatively small compared to the amount of data
659  * fetched.
660  *
661  * The FIFO level vs. time graph might look something like:
662  *
663  *   |\   |\
664  *   | \  | \
665  * __---__---__ (- plane active, _ blanking)
666  * -> time
667  *
668  * or perhaps like this:
669  *
670  *   |\|\  |\|\
671  * __----__----__ (- plane active, _ blanking)
672  * -> time
673  *
674  * Returns:
675  * The watermark in bytes
676  */
677 static unsigned int intel_wm_method1(unsigned int pixel_rate,
678                                      unsigned int cpp,
679                                      unsigned int latency)
680 {
681         u64 ret;
682
683         ret = mul_u32_u32(pixel_rate, cpp * latency);
684         ret = DIV_ROUND_UP_ULL(ret, 10000);
685
686         return ret;
687 }
688
689 /**
690  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
691  * @pixel_rate: Pipe pixel rate in kHz
692  * @htotal: Pipe horizontal total
693  * @width: Plane width in pixels
694  * @cpp: Plane bytes per pixel
695  * @latency: Memory wakeup latency in 0.1us units
696  *
697  * Compute the watermark using the method 2 or "large buffer"
698  * formula. The caller may additonally add extra cachelines
699  * to account for TLB misses and clock crossings.
700  *
701  * This method is concerned with the long term drain rate
702  * of the FIFO, ie. it does account for blanking periods
703  * which effectively reduce the average drain rate across
704  * a longer period. The name "large" refers to the fact the
705  * FIFO is relatively large compared to the amount of data
706  * fetched.
707  *
708  * The FIFO level vs. time graph might look something like:
709  *
710  *    |\___       |\___
711  *    |    \___   |    \___
712  *    |        \  |        \
713  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
714  * -> time
715  *
716  * Returns:
717  * The watermark in bytes
718  */
719 static unsigned int intel_wm_method2(unsigned int pixel_rate,
720                                      unsigned int htotal,
721                                      unsigned int width,
722                                      unsigned int cpp,
723                                      unsigned int latency)
724 {
725         unsigned int ret;
726
727         /*
728          * FIXME remove once all users are computing
729          * watermarks in the correct place.
730          */
731         if (WARN_ON_ONCE(htotal == 0))
732                 htotal = 1;
733
734         ret = (latency * pixel_rate) / (htotal * 10000);
735         ret = (ret + 1) * width * cpp;
736
737         return ret;
738 }
739
740 /**
741  * intel_calculate_wm - calculate watermark level
742  * @pixel_rate: pixel clock
743  * @wm: chip FIFO params
744  * @fifo_size: size of the FIFO buffer
745  * @cpp: bytes per pixel
746  * @latency_ns: memory latency for the platform
747  *
748  * Calculate the watermark level (the level at which the display plane will
749  * start fetching from memory again).  Each chip has a different display
750  * FIFO size and allocation, so the caller needs to figure that out and pass
751  * in the correct intel_watermark_params structure.
752  *
753  * As the pixel clock runs, the FIFO will be drained at a rate that depends
754  * on the pixel size.  When it reaches the watermark level, it'll start
755  * fetching FIFO line sized based chunks from memory until the FIFO fills
756  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
757  * will occur, and a display engine hang could result.
758  */
759 static unsigned int intel_calculate_wm(int pixel_rate,
760                                        const struct intel_watermark_params *wm,
761                                        int fifo_size, int cpp,
762                                        unsigned int latency_ns)
763 {
764         int entries, wm_size;
765
766         /*
767          * Note: we need to make sure we don't overflow for various clock &
768          * latency values.
769          * clocks go from a few thousand to several hundred thousand.
770          * latency is usually a few thousand
771          */
772         entries = intel_wm_method1(pixel_rate, cpp,
773                                    latency_ns / 100);
774         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
775                 wm->guard_size;
776         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
777
778         wm_size = fifo_size - entries;
779         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
780
781         /* Don't promote wm_size to unsigned... */
782         if (wm_size > wm->max_wm)
783                 wm_size = wm->max_wm;
784         if (wm_size <= 0)
785                 wm_size = wm->default_wm;
786
787         /*
788          * Bspec seems to indicate that the value shouldn't be lower than
789          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
790          * Lets go for 8 which is the burst size since certain platforms
791          * already use a hardcoded 8 (which is what the spec says should be
792          * done).
793          */
794         if (wm_size <= 8)
795                 wm_size = 8;
796
797         return wm_size;
798 }
799
800 static bool is_disabling(int old, int new, int threshold)
801 {
802         return old >= threshold && new < threshold;
803 }
804
805 static bool is_enabling(int old, int new, int threshold)
806 {
807         return old < threshold && new >= threshold;
808 }
809
810 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
811 {
812         return dev_priv->wm.max_level + 1;
813 }
814
815 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
816                                    const struct intel_plane_state *plane_state)
817 {
818         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
819
820         /* FIXME check the 'enable' instead */
821         if (!crtc_state->base.active)
822                 return false;
823
824         /*
825          * Treat cursor with fb as always visible since cursor updates
826          * can happen faster than the vrefresh rate, and the current
827          * watermark code doesn't handle that correctly. Cursor updates
828          * which set/clear the fb or change the cursor size are going
829          * to get throttled by intel_legacy_cursor_update() to work
830          * around this problem with the watermark code.
831          */
832         if (plane->id == PLANE_CURSOR)
833                 return plane_state->base.fb != NULL;
834         else
835                 return plane_state->base.visible;
836 }
837
838 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
839 {
840         struct intel_crtc *crtc, *enabled = NULL;
841
842         for_each_intel_crtc(&dev_priv->drm, crtc) {
843                 if (intel_crtc_active(crtc)) {
844                         if (enabled)
845                                 return NULL;
846                         enabled = crtc;
847                 }
848         }
849
850         return enabled;
851 }
852
853 static void pineview_update_wm(struct intel_crtc *unused_crtc)
854 {
855         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
856         struct intel_crtc *crtc;
857         const struct cxsr_latency *latency;
858         u32 reg;
859         unsigned int wm;
860
861         latency = intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
862                                          dev_priv->is_ddr3,
863                                          dev_priv->fsb_freq,
864                                          dev_priv->mem_freq);
865         if (!latency) {
866                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
867                 intel_set_memory_cxsr(dev_priv, false);
868                 return;
869         }
870
871         crtc = single_enabled_crtc(dev_priv);
872         if (crtc) {
873                 const struct drm_display_mode *adjusted_mode =
874                         &crtc->config->base.adjusted_mode;
875                 const struct drm_framebuffer *fb =
876                         crtc->base.primary->state->fb;
877                 int cpp = fb->format->cpp[0];
878                 int clock = adjusted_mode->crtc_clock;
879
880                 /* Display SR */
881                 wm = intel_calculate_wm(clock, &pineview_display_wm,
882                                         pineview_display_wm.fifo_size,
883                                         cpp, latency->display_sr);
884                 reg = I915_READ(DSPFW1);
885                 reg &= ~DSPFW_SR_MASK;
886                 reg |= FW_WM(wm, SR);
887                 I915_WRITE(DSPFW1, reg);
888                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
889
890                 /* cursor SR */
891                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
892                                         pineview_display_wm.fifo_size,
893                                         4, latency->cursor_sr);
894                 reg = I915_READ(DSPFW3);
895                 reg &= ~DSPFW_CURSOR_SR_MASK;
896                 reg |= FW_WM(wm, CURSOR_SR);
897                 I915_WRITE(DSPFW3, reg);
898
899                 /* Display HPLL off SR */
900                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
901                                         pineview_display_hplloff_wm.fifo_size,
902                                         cpp, latency->display_hpll_disable);
903                 reg = I915_READ(DSPFW3);
904                 reg &= ~DSPFW_HPLL_SR_MASK;
905                 reg |= FW_WM(wm, HPLL_SR);
906                 I915_WRITE(DSPFW3, reg);
907
908                 /* cursor HPLL off SR */
909                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
910                                         pineview_display_hplloff_wm.fifo_size,
911                                         4, latency->cursor_hpll_disable);
912                 reg = I915_READ(DSPFW3);
913                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
914                 reg |= FW_WM(wm, HPLL_CURSOR);
915                 I915_WRITE(DSPFW3, reg);
916                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
917
918                 intel_set_memory_cxsr(dev_priv, true);
919         } else {
920                 intel_set_memory_cxsr(dev_priv, false);
921         }
922 }
923
924 /*
925  * Documentation says:
926  * "If the line size is small, the TLB fetches can get in the way of the
927  *  data fetches, causing some lag in the pixel data return which is not
928  *  accounted for in the above formulas. The following adjustment only
929  *  needs to be applied if eight whole lines fit in the buffer at once.
930  *  The WM is adjusted upwards by the difference between the FIFO size
931  *  and the size of 8 whole lines. This adjustment is always performed
932  *  in the actual pixel depth regardless of whether FBC is enabled or not."
933  */
934 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
935 {
936         int tlb_miss = fifo_size * 64 - width * cpp * 8;
937
938         return max(0, tlb_miss);
939 }
940
941 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
942                                 const struct g4x_wm_values *wm)
943 {
944         enum pipe pipe;
945
946         for_each_pipe(dev_priv, pipe)
947                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
948
949         I915_WRITE(DSPFW1,
950                    FW_WM(wm->sr.plane, SR) |
951                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
952                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
953                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
954         I915_WRITE(DSPFW2,
955                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
956                    FW_WM(wm->sr.fbc, FBC_SR) |
957                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
958                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
959                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
960                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
961         I915_WRITE(DSPFW3,
962                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
963                    FW_WM(wm->sr.cursor, CURSOR_SR) |
964                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
965                    FW_WM(wm->hpll.plane, HPLL_SR));
966
967         POSTING_READ(DSPFW1);
968 }
969
970 #define FW_WM_VLV(value, plane) \
971         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
972
973 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
974                                 const struct vlv_wm_values *wm)
975 {
976         enum pipe pipe;
977
978         for_each_pipe(dev_priv, pipe) {
979                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
980
981                 I915_WRITE(VLV_DDL(pipe),
982                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
983                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
984                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
985                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
986         }
987
988         /*
989          * Zero the (unused) WM1 watermarks, and also clear all the
990          * high order bits so that there are no out of bounds values
991          * present in the registers during the reprogramming.
992          */
993         I915_WRITE(DSPHOWM, 0);
994         I915_WRITE(DSPHOWM1, 0);
995         I915_WRITE(DSPFW4, 0);
996         I915_WRITE(DSPFW5, 0);
997         I915_WRITE(DSPFW6, 0);
998
999         I915_WRITE(DSPFW1,
1000                    FW_WM(wm->sr.plane, SR) |
1001                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
1002                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
1003                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1004         I915_WRITE(DSPFW2,
1005                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1006                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1007                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1008         I915_WRITE(DSPFW3,
1009                    FW_WM(wm->sr.cursor, CURSOR_SR));
1010
1011         if (IS_CHERRYVIEW(dev_priv)) {
1012                 I915_WRITE(DSPFW7_CHV,
1013                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1014                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1015                 I915_WRITE(DSPFW8_CHV,
1016                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1017                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1018                 I915_WRITE(DSPFW9_CHV,
1019                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1020                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1021                 I915_WRITE(DSPHOWM,
1022                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1023                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1024                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1025                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1026                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1027                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1028                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1029                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1030                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1031                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1032         } else {
1033                 I915_WRITE(DSPFW7,
1034                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1035                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1036                 I915_WRITE(DSPHOWM,
1037                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1038                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1039                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1040                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1041                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1042                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1043                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1044         }
1045
1046         POSTING_READ(DSPFW1);
1047 }
1048
1049 #undef FW_WM_VLV
1050
1051 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1052 {
1053         /* all latencies in usec */
1054         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1055         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1056         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1057
1058         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1059 }
1060
1061 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1062 {
1063         /*
1064          * DSPCNTR[13] supposedly controls whether the
1065          * primary plane can use the FIFO space otherwise
1066          * reserved for the sprite plane. It's not 100% clear
1067          * what the actual FIFO size is, but it looks like we
1068          * can happily set both primary and sprite watermarks
1069          * up to 127 cachelines. So that would seem to mean
1070          * that either DSPCNTR[13] doesn't do anything, or that
1071          * the total FIFO is >= 256 cachelines in size. Either
1072          * way, we don't seem to have to worry about this
1073          * repartitioning as the maximum watermark value the
1074          * register can hold for each plane is lower than the
1075          * minimum FIFO size.
1076          */
1077         switch (plane_id) {
1078         case PLANE_CURSOR:
1079                 return 63;
1080         case PLANE_PRIMARY:
1081                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1082         case PLANE_SPRITE0:
1083                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1084         default:
1085                 MISSING_CASE(plane_id);
1086                 return 0;
1087         }
1088 }
1089
1090 static int g4x_fbc_fifo_size(int level)
1091 {
1092         switch (level) {
1093         case G4X_WM_LEVEL_SR:
1094                 return 7;
1095         case G4X_WM_LEVEL_HPLL:
1096                 return 15;
1097         default:
1098                 MISSING_CASE(level);
1099                 return 0;
1100         }
1101 }
1102
1103 static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1104                           const struct intel_plane_state *plane_state,
1105                           int level)
1106 {
1107         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1108         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1109         const struct drm_display_mode *adjusted_mode =
1110                 &crtc_state->base.adjusted_mode;
1111         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1112         unsigned int clock, htotal, cpp, width, wm;
1113
1114         if (latency == 0)
1115                 return USHRT_MAX;
1116
1117         if (!intel_wm_plane_visible(crtc_state, plane_state))
1118                 return 0;
1119
1120         cpp = plane_state->base.fb->format->cpp[0];
1121
1122         /*
1123          * Not 100% sure which way ELK should go here as the
1124          * spec only says CL/CTG should assume 32bpp and BW
1125          * doesn't need to. But as these things followed the
1126          * mobile vs. desktop lines on gen3 as well, let's
1127          * assume ELK doesn't need this.
1128          *
1129          * The spec also fails to list such a restriction for
1130          * the HPLL watermark, which seems a little strange.
1131          * Let's use 32bpp for the HPLL watermark as well.
1132          */
1133         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1134             level != G4X_WM_LEVEL_NORMAL)
1135                 cpp = max(cpp, 4u);
1136
1137         clock = adjusted_mode->crtc_clock;
1138         htotal = adjusted_mode->crtc_htotal;
1139
1140         if (plane->id == PLANE_CURSOR)
1141                 width = plane_state->base.crtc_w;
1142         else
1143                 width = drm_rect_width(&plane_state->base.dst);
1144
1145         if (plane->id == PLANE_CURSOR) {
1146                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1147         } else if (plane->id == PLANE_PRIMARY &&
1148                    level == G4X_WM_LEVEL_NORMAL) {
1149                 wm = intel_wm_method1(clock, cpp, latency);
1150         } else {
1151                 unsigned int small, large;
1152
1153                 small = intel_wm_method1(clock, cpp, latency);
1154                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1155
1156                 wm = min(small, large);
1157         }
1158
1159         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1160                               width, cpp);
1161
1162         wm = DIV_ROUND_UP(wm, 64) + 2;
1163
1164         return min_t(unsigned int, wm, USHRT_MAX);
1165 }
1166
1167 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1168                                  int level, enum plane_id plane_id, u16 value)
1169 {
1170         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1171         bool dirty = false;
1172
1173         for (; level < intel_wm_num_levels(dev_priv); level++) {
1174                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1175
1176                 dirty |= raw->plane[plane_id] != value;
1177                 raw->plane[plane_id] = value;
1178         }
1179
1180         return dirty;
1181 }
1182
1183 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1184                                int level, u16 value)
1185 {
1186         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1187         bool dirty = false;
1188
1189         /* NORMAL level doesn't have an FBC watermark */
1190         level = max(level, G4X_WM_LEVEL_SR);
1191
1192         for (; level < intel_wm_num_levels(dev_priv); level++) {
1193                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1194
1195                 dirty |= raw->fbc != value;
1196                 raw->fbc = value;
1197         }
1198
1199         return dirty;
1200 }
1201
1202 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
1203                               const struct intel_plane_state *plane_state,
1204                               u32 pri_val);
1205
1206 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1207                                      const struct intel_plane_state *plane_state)
1208 {
1209         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1210         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1211         enum plane_id plane_id = plane->id;
1212         bool dirty = false;
1213         int level;
1214
1215         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1216                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1217                 if (plane_id == PLANE_PRIMARY)
1218                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1219                 goto out;
1220         }
1221
1222         for (level = 0; level < num_levels; level++) {
1223                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1224                 int wm, max_wm;
1225
1226                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1227                 max_wm = g4x_plane_fifo_size(plane_id, level);
1228
1229                 if (wm > max_wm)
1230                         break;
1231
1232                 dirty |= raw->plane[plane_id] != wm;
1233                 raw->plane[plane_id] = wm;
1234
1235                 if (plane_id != PLANE_PRIMARY ||
1236                     level == G4X_WM_LEVEL_NORMAL)
1237                         continue;
1238
1239                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1240                                         raw->plane[plane_id]);
1241                 max_wm = g4x_fbc_fifo_size(level);
1242
1243                 /*
1244                  * FBC wm is not mandatory as we
1245                  * can always just disable its use.
1246                  */
1247                 if (wm > max_wm)
1248                         wm = USHRT_MAX;
1249
1250                 dirty |= raw->fbc != wm;
1251                 raw->fbc = wm;
1252         }
1253
1254         /* mark watermarks as invalid */
1255         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1256
1257         if (plane_id == PLANE_PRIMARY)
1258                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1259
1260  out:
1261         if (dirty) {
1262                 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1263                               plane->base.name,
1264                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1265                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1266                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1267
1268                 if (plane_id == PLANE_PRIMARY)
1269                         DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1270                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1271                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1272         }
1273
1274         return dirty;
1275 }
1276
1277 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1278                                       enum plane_id plane_id, int level)
1279 {
1280         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1281
1282         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1283 }
1284
1285 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1286                                      int level)
1287 {
1288         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1289
1290         if (level > dev_priv->wm.max_level)
1291                 return false;
1292
1293         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1294                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1295                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1296 }
1297
1298 /* mark all levels starting from 'level' as invalid */
1299 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1300                                struct g4x_wm_state *wm_state, int level)
1301 {
1302         if (level <= G4X_WM_LEVEL_NORMAL) {
1303                 enum plane_id plane_id;
1304
1305                 for_each_plane_id_on_crtc(crtc, plane_id)
1306                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1307         }
1308
1309         if (level <= G4X_WM_LEVEL_SR) {
1310                 wm_state->cxsr = false;
1311                 wm_state->sr.cursor = USHRT_MAX;
1312                 wm_state->sr.plane = USHRT_MAX;
1313                 wm_state->sr.fbc = USHRT_MAX;
1314         }
1315
1316         if (level <= G4X_WM_LEVEL_HPLL) {
1317                 wm_state->hpll_en = false;
1318                 wm_state->hpll.cursor = USHRT_MAX;
1319                 wm_state->hpll.plane = USHRT_MAX;
1320                 wm_state->hpll.fbc = USHRT_MAX;
1321         }
1322 }
1323
1324 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1325 {
1326         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1327         struct intel_atomic_state *state =
1328                 to_intel_atomic_state(crtc_state->base.state);
1329         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1330         int num_active_planes = hweight32(crtc_state->active_planes &
1331                                           ~BIT(PLANE_CURSOR));
1332         const struct g4x_pipe_wm *raw;
1333         const struct intel_plane_state *old_plane_state;
1334         const struct intel_plane_state *new_plane_state;
1335         struct intel_plane *plane;
1336         enum plane_id plane_id;
1337         int i, level;
1338         unsigned int dirty = 0;
1339
1340         for_each_oldnew_intel_plane_in_state(state, plane,
1341                                              old_plane_state,
1342                                              new_plane_state, i) {
1343                 if (new_plane_state->base.crtc != &crtc->base &&
1344                     old_plane_state->base.crtc != &crtc->base)
1345                         continue;
1346
1347                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1348                         dirty |= BIT(plane->id);
1349         }
1350
1351         if (!dirty)
1352                 return 0;
1353
1354         level = G4X_WM_LEVEL_NORMAL;
1355         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1356                 goto out;
1357
1358         raw = &crtc_state->wm.g4x.raw[level];
1359         for_each_plane_id_on_crtc(crtc, plane_id)
1360                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1361
1362         level = G4X_WM_LEVEL_SR;
1363
1364         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1365                 goto out;
1366
1367         raw = &crtc_state->wm.g4x.raw[level];
1368         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1369         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1370         wm_state->sr.fbc = raw->fbc;
1371
1372         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1373
1374         level = G4X_WM_LEVEL_HPLL;
1375
1376         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1377                 goto out;
1378
1379         raw = &crtc_state->wm.g4x.raw[level];
1380         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1381         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1382         wm_state->hpll.fbc = raw->fbc;
1383
1384         wm_state->hpll_en = wm_state->cxsr;
1385
1386         level++;
1387
1388  out:
1389         if (level == G4X_WM_LEVEL_NORMAL)
1390                 return -EINVAL;
1391
1392         /* invalidate the higher levels */
1393         g4x_invalidate_wms(crtc, wm_state, level);
1394
1395         /*
1396          * Determine if the FBC watermark(s) can be used. IF
1397          * this isn't the case we prefer to disable the FBC
1398          ( watermark(s) rather than disable the SR/HPLL
1399          * level(s) entirely.
1400          */
1401         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1402
1403         if (level >= G4X_WM_LEVEL_SR &&
1404             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1405                 wm_state->fbc_en = false;
1406         else if (level >= G4X_WM_LEVEL_HPLL &&
1407                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1408                 wm_state->fbc_en = false;
1409
1410         return 0;
1411 }
1412
1413 static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
1414 {
1415         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
1416         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1417         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1418         struct intel_atomic_state *intel_state =
1419                 to_intel_atomic_state(new_crtc_state->base.state);
1420         const struct intel_crtc_state *old_crtc_state =
1421                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1422         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1423         enum plane_id plane_id;
1424
1425         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1426                 *intermediate = *optimal;
1427
1428                 intermediate->cxsr = false;
1429                 intermediate->hpll_en = false;
1430                 goto out;
1431         }
1432
1433         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1434                 !new_crtc_state->disable_cxsr;
1435         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1436                 !new_crtc_state->disable_cxsr;
1437         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1438
1439         for_each_plane_id_on_crtc(crtc, plane_id) {
1440                 intermediate->wm.plane[plane_id] =
1441                         max(optimal->wm.plane[plane_id],
1442                             active->wm.plane[plane_id]);
1443
1444                 WARN_ON(intermediate->wm.plane[plane_id] >
1445                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1446         }
1447
1448         intermediate->sr.plane = max(optimal->sr.plane,
1449                                      active->sr.plane);
1450         intermediate->sr.cursor = max(optimal->sr.cursor,
1451                                       active->sr.cursor);
1452         intermediate->sr.fbc = max(optimal->sr.fbc,
1453                                    active->sr.fbc);
1454
1455         intermediate->hpll.plane = max(optimal->hpll.plane,
1456                                        active->hpll.plane);
1457         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1458                                         active->hpll.cursor);
1459         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1460                                      active->hpll.fbc);
1461
1462         WARN_ON((intermediate->sr.plane >
1463                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1464                  intermediate->sr.cursor >
1465                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1466                 intermediate->cxsr);
1467         WARN_ON((intermediate->sr.plane >
1468                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1469                  intermediate->sr.cursor >
1470                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1471                 intermediate->hpll_en);
1472
1473         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1474                 intermediate->fbc_en && intermediate->cxsr);
1475         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1476                 intermediate->fbc_en && intermediate->hpll_en);
1477
1478 out:
1479         /*
1480          * If our intermediate WM are identical to the final WM, then we can
1481          * omit the post-vblank programming; only update if it's different.
1482          */
1483         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1484                 new_crtc_state->wm.need_postvbl_update = true;
1485
1486         return 0;
1487 }
1488
1489 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1490                          struct g4x_wm_values *wm)
1491 {
1492         struct intel_crtc *crtc;
1493         int num_active_crtcs = 0;
1494
1495         wm->cxsr = true;
1496         wm->hpll_en = true;
1497         wm->fbc_en = true;
1498
1499         for_each_intel_crtc(&dev_priv->drm, crtc) {
1500                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1501
1502                 if (!crtc->active)
1503                         continue;
1504
1505                 if (!wm_state->cxsr)
1506                         wm->cxsr = false;
1507                 if (!wm_state->hpll_en)
1508                         wm->hpll_en = false;
1509                 if (!wm_state->fbc_en)
1510                         wm->fbc_en = false;
1511
1512                 num_active_crtcs++;
1513         }
1514
1515         if (num_active_crtcs != 1) {
1516                 wm->cxsr = false;
1517                 wm->hpll_en = false;
1518                 wm->fbc_en = false;
1519         }
1520
1521         for_each_intel_crtc(&dev_priv->drm, crtc) {
1522                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1523                 enum pipe pipe = crtc->pipe;
1524
1525                 wm->pipe[pipe] = wm_state->wm;
1526                 if (crtc->active && wm->cxsr)
1527                         wm->sr = wm_state->sr;
1528                 if (crtc->active && wm->hpll_en)
1529                         wm->hpll = wm_state->hpll;
1530         }
1531 }
1532
1533 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1534 {
1535         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1536         struct g4x_wm_values new_wm = {};
1537
1538         g4x_merge_wm(dev_priv, &new_wm);
1539
1540         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1541                 return;
1542
1543         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1544                 _intel_set_memory_cxsr(dev_priv, false);
1545
1546         g4x_write_wm_values(dev_priv, &new_wm);
1547
1548         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1549                 _intel_set_memory_cxsr(dev_priv, true);
1550
1551         *old_wm = new_wm;
1552 }
1553
1554 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1555                                    struct intel_crtc_state *crtc_state)
1556 {
1557         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1558         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1559
1560         mutex_lock(&dev_priv->wm.wm_mutex);
1561         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1562         g4x_program_watermarks(dev_priv);
1563         mutex_unlock(&dev_priv->wm.wm_mutex);
1564 }
1565
1566 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1567                                     struct intel_crtc_state *crtc_state)
1568 {
1569         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1570         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1571
1572         if (!crtc_state->wm.need_postvbl_update)
1573                 return;
1574
1575         mutex_lock(&dev_priv->wm.wm_mutex);
1576         crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1577         g4x_program_watermarks(dev_priv);
1578         mutex_unlock(&dev_priv->wm.wm_mutex);
1579 }
1580
1581 /* latency must be in 0.1us units. */
1582 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1583                                    unsigned int htotal,
1584                                    unsigned int width,
1585                                    unsigned int cpp,
1586                                    unsigned int latency)
1587 {
1588         unsigned int ret;
1589
1590         ret = intel_wm_method2(pixel_rate, htotal,
1591                                width, cpp, latency);
1592         ret = DIV_ROUND_UP(ret, 64);
1593
1594         return ret;
1595 }
1596
1597 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1598 {
1599         /* all latencies in usec */
1600         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1601
1602         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1603
1604         if (IS_CHERRYVIEW(dev_priv)) {
1605                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1606                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1607
1608                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1609         }
1610 }
1611
1612 static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1613                                 const struct intel_plane_state *plane_state,
1614                                 int level)
1615 {
1616         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1617         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1618         const struct drm_display_mode *adjusted_mode =
1619                 &crtc_state->base.adjusted_mode;
1620         unsigned int clock, htotal, cpp, width, wm;
1621
1622         if (dev_priv->wm.pri_latency[level] == 0)
1623                 return USHRT_MAX;
1624
1625         if (!intel_wm_plane_visible(crtc_state, plane_state))
1626                 return 0;
1627
1628         cpp = plane_state->base.fb->format->cpp[0];
1629         clock = adjusted_mode->crtc_clock;
1630         htotal = adjusted_mode->crtc_htotal;
1631         width = crtc_state->pipe_src_w;
1632
1633         if (plane->id == PLANE_CURSOR) {
1634                 /*
1635                  * FIXME the formula gives values that are
1636                  * too big for the cursor FIFO, and hence we
1637                  * would never be able to use cursors. For
1638                  * now just hardcode the watermark.
1639                  */
1640                 wm = 63;
1641         } else {
1642                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1643                                     dev_priv->wm.pri_latency[level] * 10);
1644         }
1645
1646         return min_t(unsigned int, wm, USHRT_MAX);
1647 }
1648
1649 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1650 {
1651         return (active_planes & (BIT(PLANE_SPRITE0) |
1652                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1653 }
1654
1655 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1656 {
1657         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1658         const struct g4x_pipe_wm *raw =
1659                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1660         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1661         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1662         int num_active_planes = hweight32(active_planes);
1663         const int fifo_size = 511;
1664         int fifo_extra, fifo_left = fifo_size;
1665         int sprite0_fifo_extra = 0;
1666         unsigned int total_rate;
1667         enum plane_id plane_id;
1668
1669         /*
1670          * When enabling sprite0 after sprite1 has already been enabled
1671          * we tend to get an underrun unless sprite0 already has some
1672          * FIFO space allcoated. Hence we always allocate at least one
1673          * cacheline for sprite0 whenever sprite1 is enabled.
1674          *
1675          * All other plane enable sequences appear immune to this problem.
1676          */
1677         if (vlv_need_sprite0_fifo_workaround(active_planes))
1678                 sprite0_fifo_extra = 1;
1679
1680         total_rate = raw->plane[PLANE_PRIMARY] +
1681                 raw->plane[PLANE_SPRITE0] +
1682                 raw->plane[PLANE_SPRITE1] +
1683                 sprite0_fifo_extra;
1684
1685         if (total_rate > fifo_size)
1686                 return -EINVAL;
1687
1688         if (total_rate == 0)
1689                 total_rate = 1;
1690
1691         for_each_plane_id_on_crtc(crtc, plane_id) {
1692                 unsigned int rate;
1693
1694                 if ((active_planes & BIT(plane_id)) == 0) {
1695                         fifo_state->plane[plane_id] = 0;
1696                         continue;
1697                 }
1698
1699                 rate = raw->plane[plane_id];
1700                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1701                 fifo_left -= fifo_state->plane[plane_id];
1702         }
1703
1704         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1705         fifo_left -= sprite0_fifo_extra;
1706
1707         fifo_state->plane[PLANE_CURSOR] = 63;
1708
1709         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1710
1711         /* spread the remainder evenly */
1712         for_each_plane_id_on_crtc(crtc, plane_id) {
1713                 int plane_extra;
1714
1715                 if (fifo_left == 0)
1716                         break;
1717
1718                 if ((active_planes & BIT(plane_id)) == 0)
1719                         continue;
1720
1721                 plane_extra = min(fifo_extra, fifo_left);
1722                 fifo_state->plane[plane_id] += plane_extra;
1723                 fifo_left -= plane_extra;
1724         }
1725
1726         WARN_ON(active_planes != 0 && fifo_left != 0);
1727
1728         /* give it all to the first plane if none are active */
1729         if (active_planes == 0) {
1730                 WARN_ON(fifo_left != fifo_size);
1731                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1732         }
1733
1734         return 0;
1735 }
1736
1737 /* mark all levels starting from 'level' as invalid */
1738 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1739                                struct vlv_wm_state *wm_state, int level)
1740 {
1741         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1742
1743         for (; level < intel_wm_num_levels(dev_priv); level++) {
1744                 enum plane_id plane_id;
1745
1746                 for_each_plane_id_on_crtc(crtc, plane_id)
1747                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1748
1749                 wm_state->sr[level].cursor = USHRT_MAX;
1750                 wm_state->sr[level].plane = USHRT_MAX;
1751         }
1752 }
1753
1754 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1755 {
1756         if (wm > fifo_size)
1757                 return USHRT_MAX;
1758         else
1759                 return fifo_size - wm;
1760 }
1761
1762 /*
1763  * Starting from 'level' set all higher
1764  * levels to 'value' in the "raw" watermarks.
1765  */
1766 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1767                                  int level, enum plane_id plane_id, u16 value)
1768 {
1769         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1770         int num_levels = intel_wm_num_levels(dev_priv);
1771         bool dirty = false;
1772
1773         for (; level < num_levels; level++) {
1774                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1775
1776                 dirty |= raw->plane[plane_id] != value;
1777                 raw->plane[plane_id] = value;
1778         }
1779
1780         return dirty;
1781 }
1782
1783 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1784                                      const struct intel_plane_state *plane_state)
1785 {
1786         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1787         enum plane_id plane_id = plane->id;
1788         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1789         int level;
1790         bool dirty = false;
1791
1792         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1793                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1794                 goto out;
1795         }
1796
1797         for (level = 0; level < num_levels; level++) {
1798                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1799                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1800                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1801
1802                 if (wm > max_wm)
1803                         break;
1804
1805                 dirty |= raw->plane[plane_id] != wm;
1806                 raw->plane[plane_id] = wm;
1807         }
1808
1809         /* mark all higher levels as invalid */
1810         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1811
1812 out:
1813         if (dirty)
1814                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1815                               plane->base.name,
1816                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1817                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1818                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1819
1820         return dirty;
1821 }
1822
1823 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1824                                       enum plane_id plane_id, int level)
1825 {
1826         const struct g4x_pipe_wm *raw =
1827                 &crtc_state->wm.vlv.raw[level];
1828         const struct vlv_fifo_state *fifo_state =
1829                 &crtc_state->wm.vlv.fifo_state;
1830
1831         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1832 }
1833
1834 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1835 {
1836         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1837                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1838                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1839                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1840 }
1841
1842 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1843 {
1844         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1845         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1846         struct intel_atomic_state *state =
1847                 to_intel_atomic_state(crtc_state->base.state);
1848         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1849         const struct vlv_fifo_state *fifo_state =
1850                 &crtc_state->wm.vlv.fifo_state;
1851         int num_active_planes = hweight32(crtc_state->active_planes &
1852                                           ~BIT(PLANE_CURSOR));
1853         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1854         const struct intel_plane_state *old_plane_state;
1855         const struct intel_plane_state *new_plane_state;
1856         struct intel_plane *plane;
1857         enum plane_id plane_id;
1858         int level, ret, i;
1859         unsigned int dirty = 0;
1860
1861         for_each_oldnew_intel_plane_in_state(state, plane,
1862                                              old_plane_state,
1863                                              new_plane_state, i) {
1864                 if (new_plane_state->base.crtc != &crtc->base &&
1865                     old_plane_state->base.crtc != &crtc->base)
1866                         continue;
1867
1868                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1869                         dirty |= BIT(plane->id);
1870         }
1871
1872         /*
1873          * DSPARB registers may have been reset due to the
1874          * power well being turned off. Make sure we restore
1875          * them to a consistent state even if no primary/sprite
1876          * planes are initially active.
1877          */
1878         if (needs_modeset)
1879                 crtc_state->fifo_changed = true;
1880
1881         if (!dirty)
1882                 return 0;
1883
1884         /* cursor changes don't warrant a FIFO recompute */
1885         if (dirty & ~BIT(PLANE_CURSOR)) {
1886                 const struct intel_crtc_state *old_crtc_state =
1887                         intel_atomic_get_old_crtc_state(state, crtc);
1888                 const struct vlv_fifo_state *old_fifo_state =
1889                         &old_crtc_state->wm.vlv.fifo_state;
1890
1891                 ret = vlv_compute_fifo(crtc_state);
1892                 if (ret)
1893                         return ret;
1894
1895                 if (needs_modeset ||
1896                     memcmp(old_fifo_state, fifo_state,
1897                            sizeof(*fifo_state)) != 0)
1898                         crtc_state->fifo_changed = true;
1899         }
1900
1901         /* initially allow all levels */
1902         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1903         /*
1904          * Note that enabling cxsr with no primary/sprite planes
1905          * enabled can wedge the pipe. Hence we only allow cxsr
1906          * with exactly one enabled primary/sprite plane.
1907          */
1908         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1909
1910         for (level = 0; level < wm_state->num_levels; level++) {
1911                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1912                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1913
1914                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1915                         break;
1916
1917                 for_each_plane_id_on_crtc(crtc, plane_id) {
1918                         wm_state->wm[level].plane[plane_id] =
1919                                 vlv_invert_wm_value(raw->plane[plane_id],
1920                                                     fifo_state->plane[plane_id]);
1921                 }
1922
1923                 wm_state->sr[level].plane =
1924                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1925                                                  raw->plane[PLANE_SPRITE0],
1926                                                  raw->plane[PLANE_SPRITE1]),
1927                                             sr_fifo_size);
1928
1929                 wm_state->sr[level].cursor =
1930                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1931                                             63);
1932         }
1933
1934         if (level == 0)
1935                 return -EINVAL;
1936
1937         /* limit to only levels we can actually handle */
1938         wm_state->num_levels = level;
1939
1940         /* invalidate the higher levels */
1941         vlv_invalidate_wms(crtc, wm_state, level);
1942
1943         return 0;
1944 }
1945
1946 #define VLV_FIFO(plane, value) \
1947         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1948
1949 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1950                                    struct intel_crtc_state *crtc_state)
1951 {
1952         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1953         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1954         struct intel_uncore *uncore = &dev_priv->uncore;
1955         const struct vlv_fifo_state *fifo_state =
1956                 &crtc_state->wm.vlv.fifo_state;
1957         int sprite0_start, sprite1_start, fifo_size;
1958
1959         if (!crtc_state->fifo_changed)
1960                 return;
1961
1962         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1963         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1964         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1965
1966         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1967         WARN_ON(fifo_size != 511);
1968
1969         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1970
1971         /*
1972          * uncore.lock serves a double purpose here. It allows us to
1973          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1974          * it protects the DSPARB registers from getting clobbered by
1975          * parallel updates from multiple pipes.
1976          *
1977          * intel_pipe_update_start() has already disabled interrupts
1978          * for us, so a plain spin_lock() is sufficient here.
1979          */
1980         spin_lock(&uncore->lock);
1981
1982         switch (crtc->pipe) {
1983                 u32 dsparb, dsparb2, dsparb3;
1984         case PIPE_A:
1985                 dsparb = intel_uncore_read_fw(uncore, DSPARB);
1986                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
1987
1988                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1989                             VLV_FIFO(SPRITEB, 0xff));
1990                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1991                            VLV_FIFO(SPRITEB, sprite1_start));
1992
1993                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1994                              VLV_FIFO(SPRITEB_HI, 0x1));
1995                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1996                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1997
1998                 intel_uncore_write_fw(uncore, DSPARB, dsparb);
1999                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2000                 break;
2001         case PIPE_B:
2002                 dsparb = intel_uncore_read_fw(uncore, DSPARB);
2003                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
2004
2005                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
2006                             VLV_FIFO(SPRITED, 0xff));
2007                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
2008                            VLV_FIFO(SPRITED, sprite1_start));
2009
2010                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2011                              VLV_FIFO(SPRITED_HI, 0xff));
2012                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2013                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2014
2015                 intel_uncore_write_fw(uncore, DSPARB, dsparb);
2016                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2017                 break;
2018         case PIPE_C:
2019                 dsparb3 = intel_uncore_read_fw(uncore, DSPARB3);
2020                 dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
2021
2022                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2023                              VLV_FIFO(SPRITEF, 0xff));
2024                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2025                             VLV_FIFO(SPRITEF, sprite1_start));
2026
2027                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2028                              VLV_FIFO(SPRITEF_HI, 0xff));
2029                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2030                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2031
2032                 intel_uncore_write_fw(uncore, DSPARB3, dsparb3);
2033                 intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
2034                 break;
2035         default:
2036                 break;
2037         }
2038
2039         intel_uncore_posting_read_fw(uncore, DSPARB);
2040
2041         spin_unlock(&uncore->lock);
2042 }
2043
2044 #undef VLV_FIFO
2045
2046 static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state)
2047 {
2048         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
2049         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2050         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2051         struct intel_atomic_state *intel_state =
2052                 to_intel_atomic_state(new_crtc_state->base.state);
2053         const struct intel_crtc_state *old_crtc_state =
2054                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2055         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2056         int level;
2057
2058         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2059                 *intermediate = *optimal;
2060
2061                 intermediate->cxsr = false;
2062                 goto out;
2063         }
2064
2065         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2066         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2067                 !new_crtc_state->disable_cxsr;
2068
2069         for (level = 0; level < intermediate->num_levels; level++) {
2070                 enum plane_id plane_id;
2071
2072                 for_each_plane_id_on_crtc(crtc, plane_id) {
2073                         intermediate->wm[level].plane[plane_id] =
2074                                 min(optimal->wm[level].plane[plane_id],
2075                                     active->wm[level].plane[plane_id]);
2076                 }
2077
2078                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2079                                                     active->sr[level].plane);
2080                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2081                                                      active->sr[level].cursor);
2082         }
2083
2084         vlv_invalidate_wms(crtc, intermediate, level);
2085
2086 out:
2087         /*
2088          * If our intermediate WM are identical to the final WM, then we can
2089          * omit the post-vblank programming; only update if it's different.
2090          */
2091         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2092                 new_crtc_state->wm.need_postvbl_update = true;
2093
2094         return 0;
2095 }
2096
2097 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2098                          struct vlv_wm_values *wm)
2099 {
2100         struct intel_crtc *crtc;
2101         int num_active_crtcs = 0;
2102
2103         wm->level = dev_priv->wm.max_level;
2104         wm->cxsr = true;
2105
2106         for_each_intel_crtc(&dev_priv->drm, crtc) {
2107                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2108
2109                 if (!crtc->active)
2110                         continue;
2111
2112                 if (!wm_state->cxsr)
2113                         wm->cxsr = false;
2114
2115                 num_active_crtcs++;
2116                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2117         }
2118
2119         if (num_active_crtcs != 1)
2120                 wm->cxsr = false;
2121
2122         if (num_active_crtcs > 1)
2123                 wm->level = VLV_WM_LEVEL_PM2;
2124
2125         for_each_intel_crtc(&dev_priv->drm, crtc) {
2126                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2127                 enum pipe pipe = crtc->pipe;
2128
2129                 wm->pipe[pipe] = wm_state->wm[wm->level];
2130                 if (crtc->active && wm->cxsr)
2131                         wm->sr = wm_state->sr[wm->level];
2132
2133                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2134                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2135                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2136                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2137         }
2138 }
2139
2140 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2141 {
2142         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2143         struct vlv_wm_values new_wm = {};
2144
2145         vlv_merge_wm(dev_priv, &new_wm);
2146
2147         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2148                 return;
2149
2150         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2151                 chv_set_memory_dvfs(dev_priv, false);
2152
2153         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2154                 chv_set_memory_pm5(dev_priv, false);
2155
2156         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2157                 _intel_set_memory_cxsr(dev_priv, false);
2158
2159         vlv_write_wm_values(dev_priv, &new_wm);
2160
2161         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2162                 _intel_set_memory_cxsr(dev_priv, true);
2163
2164         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2165                 chv_set_memory_pm5(dev_priv, true);
2166
2167         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2168                 chv_set_memory_dvfs(dev_priv, true);
2169
2170         *old_wm = new_wm;
2171 }
2172
2173 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2174                                    struct intel_crtc_state *crtc_state)
2175 {
2176         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2177         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2178
2179         mutex_lock(&dev_priv->wm.wm_mutex);
2180         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2181         vlv_program_watermarks(dev_priv);
2182         mutex_unlock(&dev_priv->wm.wm_mutex);
2183 }
2184
2185 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2186                                     struct intel_crtc_state *crtc_state)
2187 {
2188         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2189         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2190
2191         if (!crtc_state->wm.need_postvbl_update)
2192                 return;
2193
2194         mutex_lock(&dev_priv->wm.wm_mutex);
2195         crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2196         vlv_program_watermarks(dev_priv);
2197         mutex_unlock(&dev_priv->wm.wm_mutex);
2198 }
2199
2200 static void i965_update_wm(struct intel_crtc *unused_crtc)
2201 {
2202         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2203         struct intel_crtc *crtc;
2204         int srwm = 1;
2205         int cursor_sr = 16;
2206         bool cxsr_enabled;
2207
2208         /* Calc sr entries for one plane configs */
2209         crtc = single_enabled_crtc(dev_priv);
2210         if (crtc) {
2211                 /* self-refresh has much higher latency */
2212                 static const int sr_latency_ns = 12000;
2213                 const struct drm_display_mode *adjusted_mode =
2214                         &crtc->config->base.adjusted_mode;
2215                 const struct drm_framebuffer *fb =
2216                         crtc->base.primary->state->fb;
2217                 int clock = adjusted_mode->crtc_clock;
2218                 int htotal = adjusted_mode->crtc_htotal;
2219                 int hdisplay = crtc->config->pipe_src_w;
2220                 int cpp = fb->format->cpp[0];
2221                 int entries;
2222
2223                 entries = intel_wm_method2(clock, htotal,
2224                                            hdisplay, cpp, sr_latency_ns / 100);
2225                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2226                 srwm = I965_FIFO_SIZE - entries;
2227                 if (srwm < 0)
2228                         srwm = 1;
2229                 srwm &= 0x1ff;
2230                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2231                               entries, srwm);
2232
2233                 entries = intel_wm_method2(clock, htotal,
2234                                            crtc->base.cursor->state->crtc_w, 4,
2235                                            sr_latency_ns / 100);
2236                 entries = DIV_ROUND_UP(entries,
2237                                        i965_cursor_wm_info.cacheline_size) +
2238                         i965_cursor_wm_info.guard_size;
2239
2240                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2241                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2242                         cursor_sr = i965_cursor_wm_info.max_wm;
2243
2244                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2245                               "cursor %d\n", srwm, cursor_sr);
2246
2247                 cxsr_enabled = true;
2248         } else {
2249                 cxsr_enabled = false;
2250                 /* Turn off self refresh if both pipes are enabled */
2251                 intel_set_memory_cxsr(dev_priv, false);
2252         }
2253
2254         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2255                       srwm);
2256
2257         /* 965 has limitations... */
2258         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2259                    FW_WM(8, CURSORB) |
2260                    FW_WM(8, PLANEB) |
2261                    FW_WM(8, PLANEA));
2262         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2263                    FW_WM(8, PLANEC_OLD));
2264         /* update cursor SR watermark */
2265         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2266
2267         if (cxsr_enabled)
2268                 intel_set_memory_cxsr(dev_priv, true);
2269 }
2270
2271 #undef FW_WM
2272
2273 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2274 {
2275         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2276         const struct intel_watermark_params *wm_info;
2277         u32 fwater_lo;
2278         u32 fwater_hi;
2279         int cwm, srwm = 1;
2280         int fifo_size;
2281         int planea_wm, planeb_wm;
2282         struct intel_crtc *crtc, *enabled = NULL;
2283
2284         if (IS_I945GM(dev_priv))
2285                 wm_info = &i945_wm_info;
2286         else if (!IS_GEN(dev_priv, 2))
2287                 wm_info = &i915_wm_info;
2288         else
2289                 wm_info = &i830_a_wm_info;
2290
2291         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2292         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2293         if (intel_crtc_active(crtc)) {
2294                 const struct drm_display_mode *adjusted_mode =
2295                         &crtc->config->base.adjusted_mode;
2296                 const struct drm_framebuffer *fb =
2297                         crtc->base.primary->state->fb;
2298                 int cpp;
2299
2300                 if (IS_GEN(dev_priv, 2))
2301                         cpp = 4;
2302                 else
2303                         cpp = fb->format->cpp[0];
2304
2305                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2306                                                wm_info, fifo_size, cpp,
2307                                                pessimal_latency_ns);
2308                 enabled = crtc;
2309         } else {
2310                 planea_wm = fifo_size - wm_info->guard_size;
2311                 if (planea_wm > (long)wm_info->max_wm)
2312                         planea_wm = wm_info->max_wm;
2313         }
2314
2315         if (IS_GEN(dev_priv, 2))
2316                 wm_info = &i830_bc_wm_info;
2317
2318         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2319         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2320         if (intel_crtc_active(crtc)) {
2321                 const struct drm_display_mode *adjusted_mode =
2322                         &crtc->config->base.adjusted_mode;
2323                 const struct drm_framebuffer *fb =
2324                         crtc->base.primary->state->fb;
2325                 int cpp;
2326
2327                 if (IS_GEN(dev_priv, 2))
2328                         cpp = 4;
2329                 else
2330                         cpp = fb->format->cpp[0];
2331
2332                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2333                                                wm_info, fifo_size, cpp,
2334                                                pessimal_latency_ns);
2335                 if (enabled == NULL)
2336                         enabled = crtc;
2337                 else
2338                         enabled = NULL;
2339         } else {
2340                 planeb_wm = fifo_size - wm_info->guard_size;
2341                 if (planeb_wm > (long)wm_info->max_wm)
2342                         planeb_wm = wm_info->max_wm;
2343         }
2344
2345         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2346
2347         if (IS_I915GM(dev_priv) && enabled) {
2348                 struct drm_i915_gem_object *obj;
2349
2350                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2351
2352                 /* self-refresh seems busted with untiled */
2353                 if (!i915_gem_object_is_tiled(obj))
2354                         enabled = NULL;
2355         }
2356
2357         /*
2358          * Overlay gets an aggressive default since video jitter is bad.
2359          */
2360         cwm = 2;
2361
2362         /* Play safe and disable self-refresh before adjusting watermarks. */
2363         intel_set_memory_cxsr(dev_priv, false);
2364
2365         /* Calc sr entries for one plane configs */
2366         if (HAS_FW_BLC(dev_priv) && enabled) {
2367                 /* self-refresh has much higher latency */
2368                 static const int sr_latency_ns = 6000;
2369                 const struct drm_display_mode *adjusted_mode =
2370                         &enabled->config->base.adjusted_mode;
2371                 const struct drm_framebuffer *fb =
2372                         enabled->base.primary->state->fb;
2373                 int clock = adjusted_mode->crtc_clock;
2374                 int htotal = adjusted_mode->crtc_htotal;
2375                 int hdisplay = enabled->config->pipe_src_w;
2376                 int cpp;
2377                 int entries;
2378
2379                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2380                         cpp = 4;
2381                 else
2382                         cpp = fb->format->cpp[0];
2383
2384                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2385                                            sr_latency_ns / 100);
2386                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2387                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2388                 srwm = wm_info->fifo_size - entries;
2389                 if (srwm < 0)
2390                         srwm = 1;
2391
2392                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2393                         I915_WRITE(FW_BLC_SELF,
2394                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2395                 else
2396                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2397         }
2398
2399         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2400                       planea_wm, planeb_wm, cwm, srwm);
2401
2402         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2403         fwater_hi = (cwm & 0x1f);
2404
2405         /* Set request length to 8 cachelines per fetch */
2406         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2407         fwater_hi = fwater_hi | (1 << 8);
2408
2409         I915_WRITE(FW_BLC, fwater_lo);
2410         I915_WRITE(FW_BLC2, fwater_hi);
2411
2412         if (enabled)
2413                 intel_set_memory_cxsr(dev_priv, true);
2414 }
2415
2416 static void i845_update_wm(struct intel_crtc *unused_crtc)
2417 {
2418         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2419         struct intel_crtc *crtc;
2420         const struct drm_display_mode *adjusted_mode;
2421         u32 fwater_lo;
2422         int planea_wm;
2423
2424         crtc = single_enabled_crtc(dev_priv);
2425         if (crtc == NULL)
2426                 return;
2427
2428         adjusted_mode = &crtc->config->base.adjusted_mode;
2429         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2430                                        &i845_wm_info,
2431                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2432                                        4, pessimal_latency_ns);
2433         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2434         fwater_lo |= (3<<8) | planea_wm;
2435
2436         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2437
2438         I915_WRITE(FW_BLC, fwater_lo);
2439 }
2440
2441 /* latency must be in 0.1us units. */
2442 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2443                                    unsigned int cpp,
2444                                    unsigned int latency)
2445 {
2446         unsigned int ret;
2447
2448         ret = intel_wm_method1(pixel_rate, cpp, latency);
2449         ret = DIV_ROUND_UP(ret, 64) + 2;
2450
2451         return ret;
2452 }
2453
2454 /* latency must be in 0.1us units. */
2455 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2456                                    unsigned int htotal,
2457                                    unsigned int width,
2458                                    unsigned int cpp,
2459                                    unsigned int latency)
2460 {
2461         unsigned int ret;
2462
2463         ret = intel_wm_method2(pixel_rate, htotal,
2464                                width, cpp, latency);
2465         ret = DIV_ROUND_UP(ret, 64) + 2;
2466
2467         return ret;
2468 }
2469
2470 static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp)
2471 {
2472         /*
2473          * Neither of these should be possible since this function shouldn't be
2474          * called if the CRTC is off or the plane is invisible.  But let's be
2475          * extra paranoid to avoid a potential divide-by-zero if we screw up
2476          * elsewhere in the driver.
2477          */
2478         if (WARN_ON(!cpp))
2479                 return 0;
2480         if (WARN_ON(!horiz_pixels))
2481                 return 0;
2482
2483         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2484 }
2485
2486 struct ilk_wm_maximums {
2487         u16 pri;
2488         u16 spr;
2489         u16 cur;
2490         u16 fbc;
2491 };
2492
2493 /*
2494  * For both WM_PIPE and WM_LP.
2495  * mem_value must be in 0.1us units.
2496  */
2497 static u32 ilk_compute_pri_wm(const struct intel_crtc_state *crtc_state,
2498                               const struct intel_plane_state *plane_state,
2499                               u32 mem_value, bool is_lp)
2500 {
2501         u32 method1, method2;
2502         int cpp;
2503
2504         if (mem_value == 0)
2505                 return U32_MAX;
2506
2507         if (!intel_wm_plane_visible(crtc_state, plane_state))
2508                 return 0;
2509
2510         cpp = plane_state->base.fb->format->cpp[0];
2511
2512         method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
2513
2514         if (!is_lp)
2515                 return method1;
2516
2517         method2 = ilk_wm_method2(crtc_state->pixel_rate,
2518                                  crtc_state->base.adjusted_mode.crtc_htotal,
2519                                  drm_rect_width(&plane_state->base.dst),
2520                                  cpp, mem_value);
2521
2522         return min(method1, method2);
2523 }
2524
2525 /*
2526  * For both WM_PIPE and WM_LP.
2527  * mem_value must be in 0.1us units.
2528  */
2529 static u32 ilk_compute_spr_wm(const struct intel_crtc_state *crtc_state,
2530                               const struct intel_plane_state *plane_state,
2531                               u32 mem_value)
2532 {
2533         u32 method1, method2;
2534         int cpp;
2535
2536         if (mem_value == 0)
2537                 return U32_MAX;
2538
2539         if (!intel_wm_plane_visible(crtc_state, plane_state))
2540                 return 0;
2541
2542         cpp = plane_state->base.fb->format->cpp[0];
2543
2544         method1 = ilk_wm_method1(crtc_state->pixel_rate, cpp, mem_value);
2545         method2 = ilk_wm_method2(crtc_state->pixel_rate,
2546                                  crtc_state->base.adjusted_mode.crtc_htotal,
2547                                  drm_rect_width(&plane_state->base.dst),
2548                                  cpp, mem_value);
2549         return min(method1, method2);
2550 }
2551
2552 /*
2553  * For both WM_PIPE and WM_LP.
2554  * mem_value must be in 0.1us units.
2555  */
2556 static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state,
2557                               const struct intel_plane_state *plane_state,
2558                               u32 mem_value)
2559 {
2560         int cpp;
2561
2562         if (mem_value == 0)
2563                 return U32_MAX;
2564
2565         if (!intel_wm_plane_visible(crtc_state, plane_state))
2566                 return 0;
2567
2568         cpp = plane_state->base.fb->format->cpp[0];
2569
2570         return ilk_wm_method2(crtc_state->pixel_rate,
2571                               crtc_state->base.adjusted_mode.crtc_htotal,
2572                               plane_state->base.crtc_w, cpp, mem_value);
2573 }
2574
2575 /* Only for WM_LP. */
2576 static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *crtc_state,
2577                               const struct intel_plane_state *plane_state,
2578                               u32 pri_val)
2579 {
2580         int cpp;
2581
2582         if (!intel_wm_plane_visible(crtc_state, plane_state))
2583                 return 0;
2584
2585         cpp = plane_state->base.fb->format->cpp[0];
2586
2587         return ilk_wm_fbc(pri_val, drm_rect_width(&plane_state->base.dst), cpp);
2588 }
2589
2590 static unsigned int
2591 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2592 {
2593         if (INTEL_GEN(dev_priv) >= 8)
2594                 return 3072;
2595         else if (INTEL_GEN(dev_priv) >= 7)
2596                 return 768;
2597         else
2598                 return 512;
2599 }
2600
2601 static unsigned int
2602 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2603                      int level, bool is_sprite)
2604 {
2605         if (INTEL_GEN(dev_priv) >= 8)
2606                 /* BDW primary/sprite plane watermarks */
2607                 return level == 0 ? 255 : 2047;
2608         else if (INTEL_GEN(dev_priv) >= 7)
2609                 /* IVB/HSW primary/sprite plane watermarks */
2610                 return level == 0 ? 127 : 1023;
2611         else if (!is_sprite)
2612                 /* ILK/SNB primary plane watermarks */
2613                 return level == 0 ? 127 : 511;
2614         else
2615                 /* ILK/SNB sprite plane watermarks */
2616                 return level == 0 ? 63 : 255;
2617 }
2618
2619 static unsigned int
2620 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2621 {
2622         if (INTEL_GEN(dev_priv) >= 7)
2623                 return level == 0 ? 63 : 255;
2624         else
2625                 return level == 0 ? 31 : 63;
2626 }
2627
2628 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2629 {
2630         if (INTEL_GEN(dev_priv) >= 8)
2631                 return 31;
2632         else
2633                 return 15;
2634 }
2635
2636 /* Calculate the maximum primary/sprite plane watermark */
2637 static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
2638                                      int level,
2639                                      const struct intel_wm_config *config,
2640                                      enum intel_ddb_partitioning ddb_partitioning,
2641                                      bool is_sprite)
2642 {
2643         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2644
2645         /* if sprites aren't enabled, sprites get nothing */
2646         if (is_sprite && !config->sprites_enabled)
2647                 return 0;
2648
2649         /* HSW allows LP1+ watermarks even with multiple pipes */
2650         if (level == 0 || config->num_pipes_active > 1) {
2651                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2652
2653                 /*
2654                  * For some reason the non self refresh
2655                  * FIFO size is only half of the self
2656                  * refresh FIFO size on ILK/SNB.
2657                  */
2658                 if (INTEL_GEN(dev_priv) <= 6)
2659                         fifo_size /= 2;
2660         }
2661
2662         if (config->sprites_enabled) {
2663                 /* level 0 is always calculated with 1:1 split */
2664                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2665                         if (is_sprite)
2666                                 fifo_size *= 5;
2667                         fifo_size /= 6;
2668                 } else {
2669                         fifo_size /= 2;
2670                 }
2671         }
2672
2673         /* clamp to max that the registers can hold */
2674         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2675 }
2676
2677 /* Calculate the maximum cursor plane watermark */
2678 static unsigned int ilk_cursor_wm_max(const struct drm_i915_private *dev_priv,
2679                                       int level,
2680                                       const struct intel_wm_config *config)
2681 {
2682         /* HSW LP1+ watermarks w/ multiple pipes */
2683         if (level > 0 && config->num_pipes_active > 1)
2684                 return 64;
2685
2686         /* otherwise just report max that registers can hold */
2687         return ilk_cursor_wm_reg_max(dev_priv, level);
2688 }
2689
2690 static void ilk_compute_wm_maximums(const struct drm_i915_private *dev_priv,
2691                                     int level,
2692                                     const struct intel_wm_config *config,
2693                                     enum intel_ddb_partitioning ddb_partitioning,
2694                                     struct ilk_wm_maximums *max)
2695 {
2696         max->pri = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, false);
2697         max->spr = ilk_plane_wm_max(dev_priv, level, config, ddb_partitioning, true);
2698         max->cur = ilk_cursor_wm_max(dev_priv, level, config);
2699         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2700 }
2701
2702 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2703                                         int level,
2704                                         struct ilk_wm_maximums *max)
2705 {
2706         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2707         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2708         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2709         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2710 }
2711
2712 static bool ilk_validate_wm_level(int level,
2713                                   const struct ilk_wm_maximums *max,
2714                                   struct intel_wm_level *result)
2715 {
2716         bool ret;
2717
2718         /* already determined to be invalid? */
2719         if (!result->enable)
2720                 return false;
2721
2722         result->enable = result->pri_val <= max->pri &&
2723                          result->spr_val <= max->spr &&
2724                          result->cur_val <= max->cur;
2725
2726         ret = result->enable;
2727
2728         /*
2729          * HACK until we can pre-compute everything,
2730          * and thus fail gracefully if LP0 watermarks
2731          * are exceeded...
2732          */
2733         if (level == 0 && !result->enable) {
2734                 if (result->pri_val > max->pri)
2735                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2736                                       level, result->pri_val, max->pri);
2737                 if (result->spr_val > max->spr)
2738                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2739                                       level, result->spr_val, max->spr);
2740                 if (result->cur_val > max->cur)
2741                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2742                                       level, result->cur_val, max->cur);
2743
2744                 result->pri_val = min_t(u32, result->pri_val, max->pri);
2745                 result->spr_val = min_t(u32, result->spr_val, max->spr);
2746                 result->cur_val = min_t(u32, result->cur_val, max->cur);
2747                 result->enable = true;
2748         }
2749
2750         return ret;
2751 }
2752
2753 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2754                                  const struct intel_crtc *intel_crtc,
2755                                  int level,
2756                                  struct intel_crtc_state *crtc_state,
2757                                  const struct intel_plane_state *pristate,
2758                                  const struct intel_plane_state *sprstate,
2759                                  const struct intel_plane_state *curstate,
2760                                  struct intel_wm_level *result)
2761 {
2762         u16 pri_latency = dev_priv->wm.pri_latency[level];
2763         u16 spr_latency = dev_priv->wm.spr_latency[level];
2764         u16 cur_latency = dev_priv->wm.cur_latency[level];
2765
2766         /* WM1+ latency values stored in 0.5us units */
2767         if (level > 0) {
2768                 pri_latency *= 5;
2769                 spr_latency *= 5;
2770                 cur_latency *= 5;
2771         }
2772
2773         if (pristate) {
2774                 result->pri_val = ilk_compute_pri_wm(crtc_state, pristate,
2775                                                      pri_latency, level);
2776                 result->fbc_val = ilk_compute_fbc_wm(crtc_state, pristate, result->pri_val);
2777         }
2778
2779         if (sprstate)
2780                 result->spr_val = ilk_compute_spr_wm(crtc_state, sprstate, spr_latency);
2781
2782         if (curstate)
2783                 result->cur_val = ilk_compute_cur_wm(crtc_state, curstate, cur_latency);
2784
2785         result->enable = true;
2786 }
2787
2788 static u32
2789 hsw_compute_linetime_wm(const struct intel_crtc_state *crtc_state)
2790 {
2791         const struct intel_atomic_state *intel_state =
2792                 to_intel_atomic_state(crtc_state->base.state);
2793         const struct drm_display_mode *adjusted_mode =
2794                 &crtc_state->base.adjusted_mode;
2795         u32 linetime, ips_linetime;
2796
2797         if (!crtc_state->base.active)
2798                 return 0;
2799         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2800                 return 0;
2801         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2802                 return 0;
2803
2804         /* The WM are computed with base on how long it takes to fill a single
2805          * row at the given clock rate, multiplied by 8.
2806          * */
2807         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2808                                      adjusted_mode->crtc_clock);
2809         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2810                                          intel_state->cdclk.logical.cdclk);
2811
2812         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2813                PIPE_WM_LINETIME_TIME(linetime);
2814 }
2815
2816 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2817                                   u16 wm[8])
2818 {
2819         struct intel_uncore *uncore = &dev_priv->uncore;
2820
2821         if (INTEL_GEN(dev_priv) >= 9) {
2822                 u32 val;
2823                 int ret, i;
2824                 int level, max_level = ilk_wm_max_level(dev_priv);
2825
2826                 /* read the first set of memory latencies[0:3] */
2827                 val = 0; /* data0 to be programmed to 0 for first set */
2828                 ret = sandybridge_pcode_read(dev_priv,
2829                                              GEN9_PCODE_READ_MEM_LATENCY,
2830                                              &val, NULL);
2831
2832                 if (ret) {
2833                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2834                         return;
2835                 }
2836
2837                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2838                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2839                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2840                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2841                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2842                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2843                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2844
2845                 /* read the second set of memory latencies[4:7] */
2846                 val = 1; /* data0 to be programmed to 1 for second set */
2847                 ret = sandybridge_pcode_read(dev_priv,
2848                                              GEN9_PCODE_READ_MEM_LATENCY,
2849                                              &val, NULL);
2850                 if (ret) {
2851                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2852                         return;
2853                 }
2854
2855                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2856                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2857                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2858                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2859                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2860                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2861                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2862
2863                 /*
2864                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2865                  * need to be disabled. We make sure to sanitize the values out
2866                  * of the punit to satisfy this requirement.
2867                  */
2868                 for (level = 1; level <= max_level; level++) {
2869                         if (wm[level] == 0) {
2870                                 for (i = level + 1; i <= max_level; i++)
2871                                         wm[i] = 0;
2872                                 break;
2873                         }
2874                 }
2875
2876                 /*
2877                  * WaWmMemoryReadLatency:skl+,glk
2878                  *
2879                  * punit doesn't take into account the read latency so we need
2880                  * to add 2us to the various latency levels we retrieve from the
2881                  * punit when level 0 response data us 0us.
2882                  */
2883                 if (wm[0] == 0) {
2884                         wm[0] += 2;
2885                         for (level = 1; level <= max_level; level++) {
2886                                 if (wm[level] == 0)
2887                                         break;
2888                                 wm[level] += 2;
2889                         }
2890                 }
2891
2892                 /*
2893                  * WA Level-0 adjustment for 16GB DIMMs: SKL+
2894                  * If we could not get dimm info enable this WA to prevent from
2895                  * any underrun. If not able to get Dimm info assume 16GB dimm
2896                  * to avoid any underrun.
2897                  */
2898                 if (dev_priv->dram_info.is_16gb_dimm)
2899                         wm[0] += 1;
2900
2901         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2902                 u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD);
2903
2904                 wm[0] = (sskpd >> 56) & 0xFF;
2905                 if (wm[0] == 0)
2906                         wm[0] = sskpd & 0xF;
2907                 wm[1] = (sskpd >> 4) & 0xFF;
2908                 wm[2] = (sskpd >> 12) & 0xFF;
2909                 wm[3] = (sskpd >> 20) & 0x1FF;
2910                 wm[4] = (sskpd >> 32) & 0x1FF;
2911         } else if (INTEL_GEN(dev_priv) >= 6) {
2912                 u32 sskpd = intel_uncore_read(uncore, MCH_SSKPD);
2913
2914                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2915                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2916                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2917                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2918         } else if (INTEL_GEN(dev_priv) >= 5) {
2919                 u32 mltr = intel_uncore_read(uncore, MLTR_ILK);
2920
2921                 /* ILK primary LP0 latency is 700 ns */
2922                 wm[0] = 7;
2923                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2924                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2925         } else {
2926                 MISSING_CASE(INTEL_DEVID(dev_priv));
2927         }
2928 }
2929
2930 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2931                                        u16 wm[5])
2932 {
2933         /* ILK sprite LP0 latency is 1300 ns */
2934         if (IS_GEN(dev_priv, 5))
2935                 wm[0] = 13;
2936 }
2937
2938 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2939                                        u16 wm[5])
2940 {
2941         /* ILK cursor LP0 latency is 1300 ns */
2942         if (IS_GEN(dev_priv, 5))
2943                 wm[0] = 13;
2944 }
2945
2946 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2947 {
2948         /* how many WM levels are we expecting */
2949         if (INTEL_GEN(dev_priv) >= 9)
2950                 return 7;
2951         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2952                 return 4;
2953         else if (INTEL_GEN(dev_priv) >= 6)
2954                 return 3;
2955         else
2956                 return 2;
2957 }
2958
2959 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2960                                    const char *name,
2961                                    const u16 wm[8])
2962 {
2963         int level, max_level = ilk_wm_max_level(dev_priv);
2964
2965         for (level = 0; level <= max_level; level++) {
2966                 unsigned int latency = wm[level];
2967
2968                 if (latency == 0) {
2969                         DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2970                                       name, level);
2971                         continue;
2972                 }
2973
2974                 /*
2975                  * - latencies are in us on gen9.
2976                  * - before then, WM1+ latency values are in 0.5us units
2977                  */
2978                 if (INTEL_GEN(dev_priv) >= 9)
2979                         latency *= 10;
2980                 else if (level > 0)
2981                         latency *= 5;
2982
2983                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2984                               name, level, wm[level],
2985                               latency / 10, latency % 10);
2986         }
2987 }
2988
2989 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2990                                     u16 wm[5], u16 min)
2991 {
2992         int level, max_level = ilk_wm_max_level(dev_priv);
2993
2994         if (wm[0] >= min)
2995                 return false;
2996
2997         wm[0] = max(wm[0], min);
2998         for (level = 1; level <= max_level; level++)
2999                 wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5));
3000
3001         return true;
3002 }
3003
3004 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
3005 {
3006         bool changed;
3007
3008         /*
3009          * The BIOS provided WM memory latency values are often
3010          * inadequate for high resolution displays. Adjust them.
3011          */
3012         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3013                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3014                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3015
3016         if (!changed)
3017                 return;
3018
3019         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3020         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3021         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3022         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3023 }
3024
3025 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3026 {
3027         /*
3028          * On some SNB machines (Thinkpad X220 Tablet at least)
3029          * LP3 usage can cause vblank interrupts to be lost.
3030          * The DEIIR bit will go high but it looks like the CPU
3031          * never gets interrupted.
3032          *
3033          * It's not clear whether other interrupt source could
3034          * be affected or if this is somehow limited to vblank
3035          * interrupts only. To play it safe we disable LP3
3036          * watermarks entirely.
3037          */
3038         if (dev_priv->wm.pri_latency[3] == 0 &&
3039             dev_priv->wm.spr_latency[3] == 0 &&
3040             dev_priv->wm.cur_latency[3] == 0)
3041                 return;
3042
3043         dev_priv->wm.pri_latency[3] = 0;
3044         dev_priv->wm.spr_latency[3] = 0;
3045         dev_priv->wm.cur_latency[3] = 0;
3046
3047         DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3048         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3049         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3050         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3051 }
3052
3053 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3054 {
3055         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3056
3057         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3058                sizeof(dev_priv->wm.pri_latency));
3059         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3060                sizeof(dev_priv->wm.pri_latency));
3061
3062         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3063         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3064
3065         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3066         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3067         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3068
3069         if (IS_GEN(dev_priv, 6)) {
3070                 snb_wm_latency_quirk(dev_priv);
3071                 snb_wm_lp3_irq_quirk(dev_priv);
3072         }
3073 }
3074
3075 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3076 {
3077         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3078         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3079 }
3080
3081 static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv,
3082                                  struct intel_pipe_wm *pipe_wm)
3083 {
3084         /* LP0 watermark maximums depend on this pipe alone */
3085         const struct intel_wm_config config = {
3086                 .num_pipes_active = 1,
3087                 .sprites_enabled = pipe_wm->sprites_enabled,
3088                 .sprites_scaled = pipe_wm->sprites_scaled,
3089         };
3090         struct ilk_wm_maximums max;
3091
3092         /* LP0 watermarks always use 1/2 DDB partitioning */
3093         ilk_compute_wm_maximums(dev_priv, 0, &config, INTEL_DDB_PART_1_2, &max);
3094
3095         /* At least LP0 must be valid */
3096         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3097                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3098                 return false;
3099         }
3100
3101         return true;
3102 }
3103
3104 /* Compute new watermarks for the pipe */
3105 static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state)
3106 {
3107         struct drm_atomic_state *state = crtc_state->base.state;
3108         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
3109         struct intel_pipe_wm *pipe_wm;
3110         struct drm_device *dev = state->dev;
3111         const struct drm_i915_private *dev_priv = to_i915(dev);
3112         struct drm_plane *plane;
3113         const struct drm_plane_state *plane_state;
3114         const struct intel_plane_state *pristate = NULL;
3115         const struct intel_plane_state *sprstate = NULL;
3116         const struct intel_plane_state *curstate = NULL;
3117         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3118         struct ilk_wm_maximums max;
3119
3120         pipe_wm = &crtc_state->wm.ilk.optimal;
3121
3122         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) {
3123                 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3124
3125                 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3126                         pristate = ps;
3127                 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3128                         sprstate = ps;
3129                 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3130                         curstate = ps;
3131         }
3132
3133         pipe_wm->pipe_enabled = crtc_state->base.active;
3134         if (sprstate) {
3135                 pipe_wm->sprites_enabled = sprstate->base.visible;
3136                 pipe_wm->sprites_scaled = sprstate->base.visible &&
3137                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3138                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3139         }
3140
3141         usable_level = max_level;
3142
3143         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3144         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3145                 usable_level = 1;
3146
3147         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3148         if (pipe_wm->sprites_scaled)
3149                 usable_level = 0;
3150
3151         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3152         ilk_compute_wm_level(dev_priv, intel_crtc, 0, crtc_state,
3153                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3154
3155         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3156                 pipe_wm->linetime = hsw_compute_linetime_wm(crtc_state);
3157
3158         if (!ilk_validate_pipe_wm(dev_priv, pipe_wm))
3159                 return -EINVAL;
3160
3161         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3162
3163         for (level = 1; level <= usable_level; level++) {
3164                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3165
3166                 ilk_compute_wm_level(dev_priv, intel_crtc, level, crtc_state,
3167                                      pristate, sprstate, curstate, wm);
3168
3169                 /*
3170                  * Disable any watermark level that exceeds the
3171                  * register maximums since such watermarks are
3172                  * always invalid.
3173                  */
3174                 if (!ilk_validate_wm_level(level, &max, wm)) {
3175                         memset(wm, 0, sizeof(*wm));
3176                         break;
3177                 }
3178         }
3179
3180         return 0;
3181 }
3182
3183 /*
3184  * Build a set of 'intermediate' watermark values that satisfy both the old
3185  * state and the new state.  These can be programmed to the hardware
3186  * immediately.
3187  */
3188 static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate)
3189 {
3190         struct intel_crtc *intel_crtc = to_intel_crtc(newstate->base.crtc);
3191         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3192         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3193         struct intel_atomic_state *intel_state =
3194                 to_intel_atomic_state(newstate->base.state);
3195         const struct intel_crtc_state *oldstate =
3196                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3197         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3198         int level, max_level = ilk_wm_max_level(dev_priv);
3199
3200         /*
3201          * Start with the final, target watermarks, then combine with the
3202          * currently active watermarks to get values that are safe both before
3203          * and after the vblank.
3204          */
3205         *a = newstate->wm.ilk.optimal;
3206         if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base) ||
3207             intel_state->skip_intermediate_wm)
3208                 return 0;
3209
3210         a->pipe_enabled |= b->pipe_enabled;
3211         a->sprites_enabled |= b->sprites_enabled;
3212         a->sprites_scaled |= b->sprites_scaled;
3213
3214         for (level = 0; level <= max_level; level++) {
3215                 struct intel_wm_level *a_wm = &a->wm[level];
3216                 const struct intel_wm_level *b_wm = &b->wm[level];
3217
3218                 a_wm->enable &= b_wm->enable;
3219                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3220                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3221                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3222                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3223         }
3224
3225         /*
3226          * We need to make sure that these merged watermark values are
3227          * actually a valid configuration themselves.  If they're not,
3228          * there's no safe way to transition from the old state to
3229          * the new state, so we need to fail the atomic transaction.
3230          */
3231         if (!ilk_validate_pipe_wm(dev_priv, a))
3232                 return -EINVAL;
3233
3234         /*
3235          * If our intermediate WM are identical to the final WM, then we can
3236          * omit the post-vblank programming; only update if it's different.
3237          */
3238         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3239                 newstate->wm.need_postvbl_update = true;
3240
3241         return 0;
3242 }
3243
3244 /*
3245  * Merge the watermarks from all active pipes for a specific level.
3246  */
3247 static void ilk_merge_wm_level(struct drm_i915_private *dev_priv,
3248                                int level,
3249                                struct intel_wm_level *ret_wm)
3250 {
3251         const struct intel_crtc *intel_crtc;
3252
3253         ret_wm->enable = true;
3254
3255         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3256                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3257                 const struct intel_wm_level *wm = &active->wm[level];
3258
3259                 if (!active->pipe_enabled)
3260                         continue;
3261
3262                 /*
3263                  * The watermark values may have been used in the past,
3264                  * so we must maintain them in the registers for some
3265                  * time even if the level is now disabled.
3266                  */
3267                 if (!wm->enable)
3268                         ret_wm->enable = false;
3269
3270                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3271                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3272                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3273                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3274         }
3275 }
3276
3277 /*
3278  * Merge all low power watermarks for all active pipes.
3279  */
3280 static void ilk_wm_merge(struct drm_i915_private *dev_priv,
3281                          const struct intel_wm_config *config,
3282                          const struct ilk_wm_maximums *max,
3283                          struct intel_pipe_wm *merged)
3284 {
3285         int level, max_level = ilk_wm_max_level(dev_priv);
3286         int last_enabled_level = max_level;
3287
3288         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3289         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3290             config->num_pipes_active > 1)
3291                 last_enabled_level = 0;
3292
3293         /* ILK: FBC WM must be disabled always */
3294         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3295
3296         /* merge each WM1+ level */
3297         for (level = 1; level <= max_level; level++) {
3298                 struct intel_wm_level *wm = &merged->wm[level];
3299
3300                 ilk_merge_wm_level(dev_priv, level, wm);
3301
3302                 if (level > last_enabled_level)
3303                         wm->enable = false;
3304                 else if (!ilk_validate_wm_level(level, max, wm))
3305                         /* make sure all following levels get disabled */
3306                         last_enabled_level = level - 1;
3307
3308                 /*
3309                  * The spec says it is preferred to disable
3310                  * FBC WMs instead of disabling a WM level.
3311                  */
3312                 if (wm->fbc_val > max->fbc) {
3313                         if (wm->enable)
3314                                 merged->fbc_wm_enabled = false;
3315                         wm->fbc_val = 0;
3316                 }
3317         }
3318
3319         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3320         /*
3321          * FIXME this is racy. FBC might get enabled later.
3322          * What we should check here is whether FBC can be
3323          * enabled sometime later.
3324          */
3325         if (IS_GEN(dev_priv, 5) && !merged->fbc_wm_enabled &&
3326             intel_fbc_is_active(dev_priv)) {
3327                 for (level = 2; level <= max_level; level++) {
3328                         struct intel_wm_level *wm = &merged->wm[level];
3329
3330                         wm->enable = false;
3331                 }
3332         }
3333 }
3334
3335 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3336 {
3337         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3338         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3339 }
3340
3341 /* The value we need to program into the WM_LPx latency field */
3342 static unsigned int ilk_wm_lp_latency(struct drm_i915_private *dev_priv,
3343                                       int level)
3344 {
3345         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3346                 return 2 * level;
3347         else
3348                 return dev_priv->wm.pri_latency[level];
3349 }
3350
3351 static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
3352                                    const struct intel_pipe_wm *merged,
3353                                    enum intel_ddb_partitioning partitioning,
3354                                    struct ilk_wm_values *results)
3355 {
3356         struct intel_crtc *intel_crtc;
3357         int level, wm_lp;
3358
3359         results->enable_fbc_wm = merged->fbc_wm_enabled;
3360         results->partitioning = partitioning;
3361
3362         /* LP1+ register values */
3363         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3364                 const struct intel_wm_level *r;
3365
3366                 level = ilk_wm_lp_to_level(wm_lp, merged);
3367
3368                 r = &merged->wm[level];
3369
3370                 /*
3371                  * Maintain the watermark values even if the level is
3372                  * disabled. Doing otherwise could cause underruns.
3373                  */
3374                 results->wm_lp[wm_lp - 1] =
3375                         (ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
3376                         (r->pri_val << WM1_LP_SR_SHIFT) |
3377                         r->cur_val;
3378
3379                 if (r->enable)
3380                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3381
3382                 if (INTEL_GEN(dev_priv) >= 8)
3383                         results->wm_lp[wm_lp - 1] |=
3384                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3385                 else
3386                         results->wm_lp[wm_lp - 1] |=
3387                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3388
3389                 /*
3390                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3391                  * level is disabled. Doing otherwise could cause underruns.
3392                  */
3393                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3394                         WARN_ON(wm_lp != 1);
3395                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3396                 } else
3397                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3398         }
3399
3400         /* LP0 register values */
3401         for_each_intel_crtc(&dev_priv->drm, intel_crtc) {
3402                 enum pipe pipe = intel_crtc->pipe;
3403                 const struct intel_wm_level *r =
3404                         &intel_crtc->wm.active.ilk.wm[0];
3405
3406                 if (WARN_ON(!r->enable))
3407                         continue;
3408
3409                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3410
3411                 results->wm_pipe[pipe] =
3412                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3413                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3414                         r->cur_val;
3415         }
3416 }
3417
3418 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3419  * case both are at the same level. Prefer r1 in case they're the same. */
3420 static struct intel_pipe_wm *
3421 ilk_find_best_result(struct drm_i915_private *dev_priv,
3422                      struct intel_pipe_wm *r1,
3423                      struct intel_pipe_wm *r2)
3424 {
3425         int level, max_level = ilk_wm_max_level(dev_priv);
3426         int level1 = 0, level2 = 0;
3427
3428         for (level = 1; level <= max_level; level++) {
3429                 if (r1->wm[level].enable)
3430                         level1 = level;
3431                 if (r2->wm[level].enable)
3432                         level2 = level;
3433         }
3434
3435         if (level1 == level2) {
3436                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3437                         return r2;
3438                 else
3439                         return r1;
3440         } else if (level1 > level2) {
3441                 return r1;
3442         } else {
3443                 return r2;
3444         }
3445 }
3446
3447 /* dirty bits used to track which watermarks need changes */
3448 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3449 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3450 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3451 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3452 #define WM_DIRTY_FBC (1 << 24)
3453 #define WM_DIRTY_DDB (1 << 25)
3454
3455 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3456                                          const struct ilk_wm_values *old,
3457                                          const struct ilk_wm_values *new)
3458 {
3459         unsigned int dirty = 0;
3460         enum pipe pipe;
3461         int wm_lp;
3462
3463         for_each_pipe(dev_priv, pipe) {
3464                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3465                         dirty |= WM_DIRTY_LINETIME(pipe);
3466                         /* Must disable LP1+ watermarks too */
3467                         dirty |= WM_DIRTY_LP_ALL;
3468                 }
3469
3470                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3471                         dirty |= WM_DIRTY_PIPE(pipe);
3472                         /* Must disable LP1+ watermarks too */
3473                         dirty |= WM_DIRTY_LP_ALL;
3474                 }
3475         }
3476
3477         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3478                 dirty |= WM_DIRTY_FBC;
3479                 /* Must disable LP1+ watermarks too */
3480                 dirty |= WM_DIRTY_LP_ALL;
3481         }
3482
3483         if (old->partitioning != new->partitioning) {
3484                 dirty |= WM_DIRTY_DDB;
3485                 /* Must disable LP1+ watermarks too */
3486                 dirty |= WM_DIRTY_LP_ALL;
3487         }
3488
3489         /* LP1+ watermarks already deemed dirty, no need to continue */
3490         if (dirty & WM_DIRTY_LP_ALL)
3491                 return dirty;
3492
3493         /* Find the lowest numbered LP1+ watermark in need of an update... */
3494         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3495                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3496                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3497                         break;
3498         }
3499
3500         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3501         for (; wm_lp <= 3; wm_lp++)
3502                 dirty |= WM_DIRTY_LP(wm_lp);
3503
3504         return dirty;
3505 }
3506
3507 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3508                                unsigned int dirty)
3509 {
3510         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3511         bool changed = false;
3512
3513         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3514                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3515                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3516                 changed = true;
3517         }
3518         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3519                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3520                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3521                 changed = true;
3522         }
3523         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3524                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3525                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3526                 changed = true;
3527         }
3528
3529         /*
3530          * Don't touch WM1S_LP_EN here.
3531          * Doing so could cause underruns.
3532          */
3533
3534         return changed;
3535 }
3536
3537 /*
3538  * The spec says we shouldn't write when we don't need, because every write
3539  * causes WMs to be re-evaluated, expending some power.
3540  */
3541 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3542                                 struct ilk_wm_values *results)
3543 {
3544         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3545         unsigned int dirty;
3546         u32 val;
3547
3548         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3549         if (!dirty)
3550                 return;
3551
3552         _ilk_disable_lp_wm(dev_priv, dirty);
3553
3554         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3555                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3556         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3557                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3558         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3559                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3560
3561         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3562                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3563         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3564                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3565         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3566                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3567
3568         if (dirty & WM_DIRTY_DDB) {
3569                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3570                         val = I915_READ(WM_MISC);
3571                         if (results->partitioning == INTEL_DDB_PART_1_2)
3572                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3573                         else
3574                                 val |= WM_MISC_DATA_PARTITION_5_6;
3575                         I915_WRITE(WM_MISC, val);
3576                 } else {
3577                         val = I915_READ(DISP_ARB_CTL2);
3578                         if (results->partitioning == INTEL_DDB_PART_1_2)
3579                                 val &= ~DISP_DATA_PARTITION_5_6;
3580                         else
3581                                 val |= DISP_DATA_PARTITION_5_6;
3582                         I915_WRITE(DISP_ARB_CTL2, val);
3583                 }
3584         }
3585
3586         if (dirty & WM_DIRTY_FBC) {
3587                 val = I915_READ(DISP_ARB_CTL);
3588                 if (results->enable_fbc_wm)
3589                         val &= ~DISP_FBC_WM_DIS;
3590                 else
3591                         val |= DISP_FBC_WM_DIS;
3592                 I915_WRITE(DISP_ARB_CTL, val);
3593         }
3594
3595         if (dirty & WM_DIRTY_LP(1) &&
3596             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3597                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3598
3599         if (INTEL_GEN(dev_priv) >= 7) {
3600                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3601                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3602                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3603                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3604         }
3605
3606         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3607                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3608         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3609                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3610         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3611                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3612
3613         dev_priv->wm.hw = *results;
3614 }
3615
3616 bool ilk_disable_lp_wm(struct drm_device *dev)
3617 {
3618         struct drm_i915_private *dev_priv = to_i915(dev);
3619
3620         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3621 }
3622
3623 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3624 {
3625         u8 enabled_slices;
3626
3627         /* Slice 1 will always be enabled */
3628         enabled_slices = 1;
3629
3630         /* Gen prior to GEN11 have only one DBuf slice */
3631         if (INTEL_GEN(dev_priv) < 11)
3632                 return enabled_slices;
3633
3634         /*
3635          * FIXME: for now we'll only ever use 1 slice; pretend that we have
3636          * only that 1 slice enabled until we have a proper way for on-demand
3637          * toggling of the second slice.
3638          */
3639         if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3640                 enabled_slices++;
3641
3642         return enabled_slices;
3643 }
3644
3645 /*
3646  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3647  * so assume we'll always need it in order to avoid underruns.
3648  */
3649 static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
3650 {
3651         return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv);
3652 }
3653
3654 static bool
3655 intel_has_sagv(struct drm_i915_private *dev_priv)
3656 {
3657         return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
3658                 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
3659 }
3660
3661 /*
3662  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3663  * depending on power and performance requirements. The display engine access
3664  * to system memory is blocked during the adjustment time. Because of the
3665  * blocking time, having this enabled can cause full system hangs and/or pipe
3666  * underruns if we don't meet all of the following requirements:
3667  *
3668  *  - <= 1 pipe enabled
3669  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3670  *  - We're not using an interlaced display configuration
3671  */
3672 int
3673 intel_enable_sagv(struct drm_i915_private *dev_priv)
3674 {
3675         int ret;
3676
3677         if (!intel_has_sagv(dev_priv))
3678                 return 0;
3679
3680         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3681                 return 0;
3682
3683         DRM_DEBUG_KMS("Enabling SAGV\n");
3684         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3685                                       GEN9_SAGV_ENABLE);
3686
3687         /* We don't need to wait for SAGV when enabling */
3688
3689         /*
3690          * Some skl systems, pre-release machines in particular,
3691          * don't actually have SAGV.
3692          */
3693         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3694                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3695                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3696                 return 0;
3697         } else if (ret < 0) {
3698                 DRM_ERROR("Failed to enable SAGV\n");
3699                 return ret;
3700         }
3701
3702         dev_priv->sagv_status = I915_SAGV_ENABLED;
3703         return 0;
3704 }
3705
3706 int
3707 intel_disable_sagv(struct drm_i915_private *dev_priv)
3708 {
3709         int ret;
3710
3711         if (!intel_has_sagv(dev_priv))
3712                 return 0;
3713
3714         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3715                 return 0;
3716
3717         DRM_DEBUG_KMS("Disabling SAGV\n");
3718         /* bspec says to keep retrying for at least 1 ms */
3719         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3720                                 GEN9_SAGV_DISABLE,
3721                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3722                                 1);
3723         /*
3724          * Some skl systems, pre-release machines in particular,
3725          * don't actually have SAGV.
3726          */
3727         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3728                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3729                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3730                 return 0;
3731         } else if (ret < 0) {
3732                 DRM_ERROR("Failed to disable SAGV (%d)\n", ret);
3733                 return ret;
3734         }
3735
3736         dev_priv->sagv_status = I915_SAGV_DISABLED;
3737         return 0;
3738 }
3739
3740 bool intel_can_enable_sagv(struct intel_atomic_state *state)
3741 {
3742         struct drm_device *dev = state->base.dev;
3743         struct drm_i915_private *dev_priv = to_i915(dev);
3744         struct intel_crtc *crtc;
3745         struct intel_plane *plane;
3746         struct intel_crtc_state *crtc_state;
3747         enum pipe pipe;
3748         int level, latency;
3749         int sagv_block_time_us;
3750
3751         if (!intel_has_sagv(dev_priv))
3752                 return false;
3753
3754         if (IS_GEN(dev_priv, 9))
3755                 sagv_block_time_us = 30;
3756         else if (IS_GEN(dev_priv, 10))
3757                 sagv_block_time_us = 20;
3758         else
3759                 sagv_block_time_us = 10;
3760
3761         /*
3762          * If there are no active CRTCs, no additional checks need be performed
3763          */
3764         if (hweight32(state->active_crtcs) == 0)
3765                 return true;
3766
3767         /*
3768          * SKL+ workaround: bspec recommends we disable SAGV when we have
3769          * more then one pipe enabled
3770          */
3771         if (hweight32(state->active_crtcs) > 1)
3772                 return false;
3773
3774         /* Since we're now guaranteed to only have one active CRTC... */
3775         pipe = ffs(state->active_crtcs) - 1;
3776         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3777         crtc_state = to_intel_crtc_state(crtc->base.state);
3778
3779         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3780                 return false;
3781
3782         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3783                 struct skl_plane_wm *wm =
3784                         &crtc_state->wm.skl.optimal.planes[plane->id];
3785
3786                 /* Skip this plane if it's not enabled */
3787                 if (!wm->wm[0].plane_en)
3788                         continue;
3789
3790                 /* Find the highest enabled wm level for this plane */
3791                 for (level = ilk_wm_max_level(dev_priv);
3792                      !wm->wm[level].plane_en; --level)
3793                      { }
3794
3795                 latency = dev_priv->wm.skl_latency[level];
3796
3797                 if (skl_needs_memory_bw_wa(dev_priv) &&
3798                     plane->base.state->fb->modifier ==
3799                     I915_FORMAT_MOD_X_TILED)
3800                         latency += 15;
3801
3802                 /*
3803                  * If any of the planes on this pipe don't enable wm levels that
3804                  * incur memory latencies higher than sagv_block_time_us we
3805                  * can't enable SAGV.
3806                  */
3807                 if (latency < sagv_block_time_us)
3808                         return false;
3809         }
3810
3811         return true;
3812 }
3813
3814 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3815                               const struct intel_crtc_state *crtc_state,
3816                               const u64 total_data_rate,
3817                               const int num_active,
3818                               struct skl_ddb_allocation *ddb)
3819 {
3820         const struct drm_display_mode *adjusted_mode;
3821         u64 total_data_bw;
3822         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3823
3824         WARN_ON(ddb_size == 0);
3825
3826         if (INTEL_GEN(dev_priv) < 11)
3827                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3828
3829         adjusted_mode = &crtc_state->base.adjusted_mode;
3830         total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
3831
3832         /*
3833          * 12GB/s is maximum BW supported by single DBuf slice.
3834          *
3835          * FIXME dbuf slice code is broken:
3836          * - must wait for planes to stop using the slice before powering it off
3837          * - plane straddling both slices is illegal in multi-pipe scenarios
3838          * - should validate we stay within the hw bandwidth limits
3839          */
3840         if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
3841                 ddb->enabled_slices = 2;
3842         } else {
3843                 ddb->enabled_slices = 1;
3844                 ddb_size /= 2;
3845         }
3846
3847         return ddb_size;
3848 }
3849
3850 static void
3851 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
3852                                    const struct intel_crtc_state *crtc_state,
3853                                    const u64 total_data_rate,
3854                                    struct skl_ddb_allocation *ddb,
3855                                    struct skl_ddb_entry *alloc, /* out */
3856                                    int *num_active /* out */)
3857 {
3858         struct drm_atomic_state *state = crtc_state->base.state;
3859         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3860         struct drm_crtc *for_crtc = crtc_state->base.crtc;
3861         const struct intel_crtc *crtc;
3862         u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3863         enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3864         u16 ddb_size;
3865         u32 i;
3866
3867         if (WARN_ON(!state) || !crtc_state->base.active) {
3868                 alloc->start = 0;
3869                 alloc->end = 0;
3870                 *num_active = hweight32(dev_priv->active_crtcs);
3871                 return;
3872         }
3873
3874         if (intel_state->active_pipe_changes)
3875                 *num_active = hweight32(intel_state->active_crtcs);
3876         else
3877                 *num_active = hweight32(dev_priv->active_crtcs);
3878
3879         ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
3880                                       *num_active, ddb);
3881
3882         /*
3883          * If the state doesn't change the active CRTC's or there is no
3884          * modeset request, then there's no need to recalculate;
3885          * the existing pipe allocation limits should remain unchanged.
3886          * Note that we're safe from racing commits since any racing commit
3887          * that changes the active CRTC list or do modeset would need to
3888          * grab _all_ crtc locks, including the one we currently hold.
3889          */
3890         if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3891                 /*
3892                  * alloc may be cleared by clear_intel_crtc_state,
3893                  * copy from old state to be sure
3894                  */
3895                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3896                 return;
3897         }
3898
3899         /*
3900          * Watermark/ddb requirement highly depends upon width of the
3901          * framebuffer, So instead of allocating DDB equally among pipes
3902          * distribute DDB based on resolution/width of the display.
3903          */
3904         for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
3905                 const struct drm_display_mode *adjusted_mode =
3906                         &crtc_state->base.adjusted_mode;
3907                 enum pipe pipe = crtc->pipe;
3908                 int hdisplay, vdisplay;
3909
3910                 if (!crtc_state->base.enable)
3911                         continue;
3912
3913                 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3914                 total_width += hdisplay;
3915
3916                 if (pipe < for_pipe)
3917                         width_before_pipe += hdisplay;
3918                 else if (pipe == for_pipe)
3919                         pipe_width = hdisplay;
3920         }
3921
3922         alloc->start = ddb_size * width_before_pipe / total_width;
3923         alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3924 }
3925
3926 static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
3927                                  int width, const struct drm_format_info *format,
3928                                  u64 modifier, unsigned int rotation,
3929                                  u32 plane_pixel_rate, struct skl_wm_params *wp,
3930                                  int color_plane);
3931 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
3932                                  int level,
3933                                  const struct skl_wm_params *wp,
3934                                  const struct skl_wm_level *result_prev,
3935                                  struct skl_wm_level *result /* out */);
3936
3937 static unsigned int
3938 skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
3939                       int num_active)
3940 {
3941         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
3942         int level, max_level = ilk_wm_max_level(dev_priv);
3943         struct skl_wm_level wm = {};
3944         int ret, min_ddb_alloc = 0;
3945         struct skl_wm_params wp;
3946
3947         ret = skl_compute_wm_params(crtc_state, 256,
3948                                     drm_format_info(DRM_FORMAT_ARGB8888),
3949                                     DRM_FORMAT_MOD_LINEAR,
3950                                     DRM_MODE_ROTATE_0,
3951                                     crtc_state->pixel_rate, &wp, 0);
3952         WARN_ON(ret);
3953
3954         for (level = 0; level <= max_level; level++) {
3955                 skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
3956                 if (wm.min_ddb_alloc == U16_MAX)
3957                         break;
3958
3959                 min_ddb_alloc = wm.min_ddb_alloc;
3960         }
3961
3962         return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
3963 }
3964
3965 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3966                                        struct skl_ddb_entry *entry, u32 reg)
3967 {
3968
3969         entry->start = reg & DDB_ENTRY_MASK;
3970         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK;
3971
3972         if (entry->end)
3973                 entry->end += 1;
3974 }
3975
3976 static void
3977 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3978                            const enum pipe pipe,
3979                            const enum plane_id plane_id,
3980                            struct skl_ddb_entry *ddb_y,
3981                            struct skl_ddb_entry *ddb_uv)
3982 {
3983         u32 val, val2;
3984         u32 fourcc = 0;
3985
3986         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3987         if (plane_id == PLANE_CURSOR) {
3988                 val = I915_READ(CUR_BUF_CFG(pipe));
3989                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
3990                 return;
3991         }
3992
3993         val = I915_READ(PLANE_CTL(pipe, plane_id));
3994
3995         /* No DDB allocated for disabled planes */
3996         if (val & PLANE_CTL_ENABLE)
3997                 fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
3998                                               val & PLANE_CTL_ORDER_RGBX,
3999                                               val & PLANE_CTL_ALPHA_MASK);
4000
4001         if (INTEL_GEN(dev_priv) >= 11) {
4002                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4003                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4004         } else {
4005                 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
4006                 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
4007
4008                 if (is_planar_yuv_format(fourcc))
4009                         swap(val, val2);
4010
4011                 skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
4012                 skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
4013         }
4014 }
4015
4016 void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
4017                                struct skl_ddb_entry *ddb_y,
4018                                struct skl_ddb_entry *ddb_uv)
4019 {
4020         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4021         enum intel_display_power_domain power_domain;
4022         enum pipe pipe = crtc->pipe;
4023         intel_wakeref_t wakeref;
4024         enum plane_id plane_id;
4025
4026         power_domain = POWER_DOMAIN_PIPE(pipe);
4027         wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain);
4028         if (!wakeref)
4029                 return;
4030
4031         for_each_plane_id_on_crtc(crtc, plane_id)
4032                 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4033                                            plane_id,
4034                                            &ddb_y[plane_id],
4035                                            &ddb_uv[plane_id]);
4036
4037         intel_display_power_put(dev_priv, power_domain, wakeref);
4038 }
4039
4040 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4041                           struct skl_ddb_allocation *ddb /* out */)
4042 {
4043         ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4044 }
4045
4046 /*
4047  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4048  * The bspec defines downscale amount as:
4049  *
4050  * """
4051  * Horizontal down scale amount = maximum[1, Horizontal source size /
4052  *                                           Horizontal destination size]
4053  * Vertical down scale amount = maximum[1, Vertical source size /
4054  *                                         Vertical destination size]
4055  * Total down scale amount = Horizontal down scale amount *
4056  *                           Vertical down scale amount
4057  * """
4058  *
4059  * Return value is provided in 16.16 fixed point form to retain fractional part.
4060  * Caller should take care of dividing & rounding off the value.
4061  */
4062 static uint_fixed_16_16_t
4063 skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
4064                            const struct intel_plane_state *plane_state)
4065 {
4066         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4067         u32 src_w, src_h, dst_w, dst_h;
4068         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4069         uint_fixed_16_16_t downscale_h, downscale_w;
4070
4071         if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
4072                 return u32_to_fixed16(0);
4073
4074         /* n.b., src is 16.16 fixed point, dst is whole integer */
4075         if (plane->id == PLANE_CURSOR) {
4076                 /*
4077                  * Cursors only support 0/180 degree rotation,
4078                  * hence no need to account for rotation here.
4079                  */
4080                 src_w = plane_state->base.src_w >> 16;
4081                 src_h = plane_state->base.src_h >> 16;
4082                 dst_w = plane_state->base.crtc_w;
4083                 dst_h = plane_state->base.crtc_h;
4084         } else {
4085                 /*
4086                  * Src coordinates are already rotated by 270 degrees for
4087                  * the 90/270 degree plane rotation cases (to match the
4088                  * GTT mapping), hence no need to account for rotation here.
4089                  */
4090                 src_w = drm_rect_width(&plane_state->base.src) >> 16;
4091                 src_h = drm_rect_height(&plane_state->base.src) >> 16;
4092                 dst_w = drm_rect_width(&plane_state->base.dst);
4093                 dst_h = drm_rect_height(&plane_state->base.dst);
4094         }
4095
4096         fp_w_ratio = div_fixed16(src_w, dst_w);
4097         fp_h_ratio = div_fixed16(src_h, dst_h);
4098         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4099         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4100
4101         return mul_fixed16(downscale_w, downscale_h);
4102 }
4103
4104 static uint_fixed_16_16_t
4105 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4106 {
4107         uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4108
4109         if (!crtc_state->base.enable)
4110                 return pipe_downscale;
4111
4112         if (crtc_state->pch_pfit.enabled) {
4113                 u32 src_w, src_h, dst_w, dst_h;
4114                 u32 pfit_size = crtc_state->pch_pfit.size;
4115                 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4116                 uint_fixed_16_16_t downscale_h, downscale_w;
4117
4118                 src_w = crtc_state->pipe_src_w;
4119                 src_h = crtc_state->pipe_src_h;
4120                 dst_w = pfit_size >> 16;
4121                 dst_h = pfit_size & 0xffff;
4122
4123                 if (!dst_w || !dst_h)
4124                         return pipe_downscale;
4125
4126                 fp_w_ratio = div_fixed16(src_w, dst_w);
4127                 fp_h_ratio = div_fixed16(src_h, dst_h);
4128                 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4129                 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4130
4131                 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4132         }
4133
4134         return pipe_downscale;
4135 }
4136
4137 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4138                                   struct intel_crtc_state *crtc_state)
4139 {
4140         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4141         struct drm_atomic_state *state = crtc_state->base.state;
4142         struct drm_plane *plane;
4143         const struct drm_plane_state *drm_plane_state;
4144         int crtc_clock, dotclk;
4145         u32 pipe_max_pixel_rate;
4146         uint_fixed_16_16_t pipe_downscale;
4147         uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4148
4149         if (!crtc_state->base.enable)
4150                 return 0;
4151
4152         drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
4153                 uint_fixed_16_16_t plane_downscale;
4154                 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4155                 int bpp;
4156                 const struct intel_plane_state *plane_state =
4157                         to_intel_plane_state(drm_plane_state);
4158
4159                 if (!intel_wm_plane_visible(crtc_state, plane_state))
4160                         continue;
4161
4162                 if (WARN_ON(!plane_state->base.fb))
4163                         return -EINVAL;
4164
4165                 plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
4166                 bpp = plane_state->base.fb->format->cpp[0] * 8;
4167                 if (bpp == 64)
4168                         plane_downscale = mul_fixed16(plane_downscale,
4169                                                       fp_9_div_8);
4170
4171                 max_downscale = max_fixed16(plane_downscale, max_downscale);
4172         }
4173         pipe_downscale = skl_pipe_downscale_amount(crtc_state);
4174
4175         pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4176
4177         crtc_clock = crtc_state->base.adjusted_mode.crtc_clock;
4178         dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4179
4180         if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4181                 dotclk *= 2;
4182
4183         pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4184
4185         if (pipe_max_pixel_rate < crtc_clock) {
4186                 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4187                 return -EINVAL;
4188         }
4189
4190         return 0;
4191 }
4192
4193 static u64
4194 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
4195                              const struct intel_plane_state *plane_state,
4196                              const int plane)
4197 {
4198         struct intel_plane *intel_plane = to_intel_plane(plane_state->base.plane);
4199         u32 data_rate;
4200         u32 width = 0, height = 0;
4201         struct drm_framebuffer *fb;
4202         u32 format;
4203         uint_fixed_16_16_t down_scale_amount;
4204         u64 rate;
4205
4206         if (!plane_state->base.visible)
4207                 return 0;
4208
4209         fb = plane_state->base.fb;
4210         format = fb->format->format;
4211
4212         if (intel_plane->id == PLANE_CURSOR)
4213                 return 0;
4214         if (plane == 1 && !is_planar_yuv_format(format))
4215                 return 0;
4216
4217         /*
4218          * Src coordinates are already rotated by 270 degrees for
4219          * the 90/270 degree plane rotation cases (to match the
4220          * GTT mapping), hence no need to account for rotation here.
4221          */
4222         width = drm_rect_width(&plane_state->base.src) >> 16;
4223         height = drm_rect_height(&plane_state->base.src) >> 16;
4224
4225         /* UV plane does 1/2 pixel sub-sampling */
4226         if (plane == 1 && is_planar_yuv_format(format)) {
4227                 width /= 2;
4228                 height /= 2;
4229         }
4230
4231         data_rate = width * height;
4232
4233         down_scale_amount = skl_plane_downscale_amount(crtc_state, plane_state);
4234
4235         rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4236
4237         rate *= fb->format->cpp[plane];
4238         return rate;
4239 }
4240
4241 static u64
4242 skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
4243                                  u64 *plane_data_rate,
4244                                  u64 *uv_plane_data_rate)
4245 {
4246         struct drm_atomic_state *state = crtc_state->base.state;
4247         struct drm_plane *plane;
4248         const struct drm_plane_state *drm_plane_state;
4249         u64 total_data_rate = 0;
4250
4251         if (WARN_ON(!state))
4252                 return 0;
4253
4254         /* Calculate and cache data rate for each plane */
4255         drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
4256                 enum plane_id plane_id = to_intel_plane(plane)->id;
4257                 const struct intel_plane_state *plane_state =
4258                         to_intel_plane_state(drm_plane_state);
4259                 u64 rate;
4260
4261                 /* packed/y */
4262                 rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4263                 plane_data_rate[plane_id] = rate;
4264                 total_data_rate += rate;
4265
4266                 /* uv-plane */
4267                 rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1);
4268                 uv_plane_data_rate[plane_id] = rate;
4269                 total_data_rate += rate;
4270         }
4271
4272         return total_data_rate;
4273 }
4274
4275 static u64
4276 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
4277                                  u64 *plane_data_rate)
4278 {
4279         struct drm_plane *plane;
4280         const struct drm_plane_state *drm_plane_state;
4281         u64 total_data_rate = 0;
4282
4283         if (WARN_ON(!crtc_state->base.state))
4284                 return 0;
4285
4286         /* Calculate and cache data rate for each plane */
4287         drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
4288                 const struct intel_plane_state *plane_state =
4289                         to_intel_plane_state(drm_plane_state);
4290                 enum plane_id plane_id = to_intel_plane(plane)->id;
4291                 u64 rate;
4292
4293                 if (!plane_state->linked_plane) {
4294                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4295                         plane_data_rate[plane_id] = rate;
4296                         total_data_rate += rate;
4297                 } else {
4298                         enum plane_id y_plane_id;
4299
4300                         /*
4301                          * The slave plane might not iterate in
4302                          * drm_atomic_crtc_state_for_each_plane_state(),
4303                          * and needs the master plane state which may be
4304                          * NULL if we try get_new_plane_state(), so we
4305                          * always calculate from the master.
4306                          */
4307                         if (plane_state->slave)
4308                                 continue;
4309
4310                         /* Y plane rate is calculated on the slave */
4311                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0);
4312                         y_plane_id = plane_state->linked_plane->id;
4313                         plane_data_rate[y_plane_id] = rate;
4314                         total_data_rate += rate;
4315
4316                         rate = skl_plane_relative_data_rate(crtc_state, plane_state, 1);
4317                         plane_data_rate[plane_id] = rate;
4318                         total_data_rate += rate;
4319                 }
4320         }
4321
4322         return total_data_rate;
4323 }
4324
4325 static int
4326 skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
4327                       struct skl_ddb_allocation *ddb /* out */)
4328 {
4329         struct drm_atomic_state *state = crtc_state->base.state;
4330         struct drm_crtc *crtc = crtc_state->base.crtc;
4331         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
4332         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4333         struct skl_ddb_entry *alloc = &crtc_state->wm.skl.ddb;
4334         u16 alloc_size, start = 0;
4335         u16 total[I915_MAX_PLANES] = {};
4336         u16 uv_total[I915_MAX_PLANES] = {};
4337         u64 total_data_rate;
4338         enum plane_id plane_id;
4339         int num_active;
4340         u64 plane_data_rate[I915_MAX_PLANES] = {};
4341         u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
4342         u32 blocks;
4343         int level;
4344
4345         /* Clear the partitioning for disabled planes. */
4346         memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state->wm.skl.plane_ddb_y));
4347         memset(crtc_state->wm.skl.plane_ddb_uv, 0, sizeof(crtc_state->wm.skl.plane_ddb_uv));
4348
4349         if (WARN_ON(!state))
4350                 return 0;
4351
4352         if (!crtc_state->base.active) {
4353                 alloc->start = alloc->end = 0;
4354                 return 0;
4355         }
4356
4357         if (INTEL_GEN(dev_priv) >= 11)
4358                 total_data_rate =
4359                         icl_get_total_relative_data_rate(crtc_state,
4360                                                          plane_data_rate);
4361         else
4362                 total_data_rate =
4363                         skl_get_total_relative_data_rate(crtc_state,
4364                                                          plane_data_rate,
4365                                                          uv_plane_data_rate);
4366
4367
4368         skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate,
4369                                            ddb, alloc, &num_active);
4370         alloc_size = skl_ddb_entry_size(alloc);
4371         if (alloc_size == 0)
4372                 return 0;
4373
4374         /* Allocate fixed number of blocks for cursor. */
4375         total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active);
4376         alloc_size -= total[PLANE_CURSOR];
4377         crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
4378                 alloc->end - total[PLANE_CURSOR];
4379         crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
4380
4381         if (total_data_rate == 0)
4382                 return 0;
4383
4384         /*
4385          * Find the highest watermark level for which we can satisfy the block
4386          * requirement of active planes.
4387          */
4388         for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
4389                 blocks = 0;
4390                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4391                         const struct skl_plane_wm *wm =
4392                                 &crtc_state->wm.skl.optimal.planes[plane_id];
4393
4394                         if (plane_id == PLANE_CURSOR) {
4395                                 if (WARN_ON(wm->wm[level].min_ddb_alloc >
4396                                             total[PLANE_CURSOR])) {
4397                                         blocks = U32_MAX;
4398                                         break;
4399                                 }
4400                                 continue;
4401                         }
4402
4403                         blocks += wm->wm[level].min_ddb_alloc;
4404                         blocks += wm->uv_wm[level].min_ddb_alloc;
4405                 }
4406
4407                 if (blocks <= alloc_size) {
4408                         alloc_size -= blocks;
4409                         break;
4410                 }
4411         }
4412
4413         if (level < 0) {
4414                 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4415                 DRM_DEBUG_KMS("minimum required %d/%d\n", blocks,
4416                               alloc_size);
4417                 return -EINVAL;
4418         }
4419
4420         /*
4421          * Grant each plane the blocks it requires at the highest achievable
4422          * watermark level, plus an extra share of the leftover blocks
4423          * proportional to its relative data rate.
4424          */
4425         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4426                 const struct skl_plane_wm *wm =
4427                         &crtc_state->wm.skl.optimal.planes[plane_id];
4428                 u64 rate;
4429                 u16 extra;
4430
4431                 if (plane_id == PLANE_CURSOR)
4432                         continue;
4433
4434                 /*
4435                  * We've accounted for all active planes; remaining planes are
4436                  * all disabled.
4437                  */
4438                 if (total_data_rate == 0)
4439                         break;
4440
4441                 rate = plane_data_rate[plane_id];
4442                 extra = min_t(u16, alloc_size,
4443                               DIV64_U64_ROUND_UP(alloc_size * rate,
4444                                                  total_data_rate));
4445                 total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
4446                 alloc_size -= extra;
4447                 total_data_rate -= rate;
4448
4449                 if (total_data_rate == 0)
4450                         break;
4451
4452                 rate = uv_plane_data_rate[plane_id];
4453                 extra = min_t(u16, alloc_size,
4454                               DIV64_U64_ROUND_UP(alloc_size * rate,
4455                                                  total_data_rate));
4456                 uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
4457                 alloc_size -= extra;
4458                 total_data_rate -= rate;
4459         }
4460         WARN_ON(alloc_size != 0 || total_data_rate != 0);
4461
4462         /* Set the actual DDB start/end points for each plane */
4463         start = alloc->start;
4464         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4465                 struct skl_ddb_entry *plane_alloc =
4466                         &crtc_state->wm.skl.plane_ddb_y[plane_id];
4467                 struct skl_ddb_entry *uv_plane_alloc =
4468                         &crtc_state->wm.skl.plane_ddb_uv[plane_id];
4469
4470                 if (plane_id == PLANE_CURSOR)
4471                         continue;
4472
4473                 /* Gen11+ uses a separate plane for UV watermarks */
4474                 WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]);
4475
4476                 /* Leave disabled planes at (0,0) */
4477                 if (total[plane_id]) {
4478                         plane_alloc->start = start;
4479                         start += total[plane_id];
4480                         plane_alloc->end = start;
4481                 }
4482
4483                 if (uv_total[plane_id]) {
4484                         uv_plane_alloc->start = start;
4485                         start += uv_total[plane_id];
4486                         uv_plane_alloc->end = start;
4487                 }
4488         }
4489
4490         /*
4491          * When we calculated watermark values we didn't know how high
4492          * of a level we'd actually be able to hit, so we just marked
4493          * all levels as "enabled."  Go back now and disable the ones
4494          * that aren't actually possible.
4495          */
4496         for (level++; level <= ilk_wm_max_level(dev_priv); level++) {
4497                 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4498                         struct skl_plane_wm *wm =
4499                                 &crtc_state->wm.skl.optimal.planes[plane_id];
4500
4501                         /*
4502                          * We only disable the watermarks for each plane if
4503                          * they exceed the ddb allocation of said plane. This
4504                          * is done so that we don't end up touching cursor
4505                          * watermarks needlessly when some other plane reduces
4506                          * our max possible watermark level.
4507                          *
4508                          * Bspec has this to say about the PLANE_WM enable bit:
4509                          * "All the watermarks at this level for all enabled
4510                          *  planes must be enabled before the level will be used."
4511                          * So this is actually safe to do.
4512                          */
4513                         if (wm->wm[level].min_ddb_alloc > total[plane_id] ||
4514                             wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id])
4515                                 memset(&wm->wm[level], 0, sizeof(wm->wm[level]));
4516
4517                         /*
4518                          * Wa_1408961008:icl, ehl
4519                          * Underruns with WM1+ disabled
4520                          */
4521                         if (IS_GEN(dev_priv, 11) &&
4522                             level == 1 && wm->wm[0].plane_en) {
4523                                 wm->wm[level].plane_res_b = wm->wm[0].plane_res_b;
4524                                 wm->wm[level].plane_res_l = wm->wm[0].plane_res_l;
4525                                 wm->wm[level].ignore_lines = wm->wm[0].ignore_lines;
4526                         }
4527                 }
4528         }
4529
4530         /*
4531          * Go back and disable the transition watermark if it turns out we
4532          * don't have enough DDB blocks for it.
4533          */
4534         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4535                 struct skl_plane_wm *wm =
4536                         &crtc_state->wm.skl.optimal.planes[plane_id];
4537
4538                 if (wm->trans_wm.plane_res_b >= total[plane_id])
4539                         memset(&wm->trans_wm, 0, sizeof(wm->trans_wm));
4540         }
4541
4542         return 0;
4543 }
4544
4545 /*
4546  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4547  * for the read latency) and cpp should always be <= 8, so that
4548  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4549  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4550 */
4551 static uint_fixed_16_16_t
4552 skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
4553                u8 cpp, u32 latency, u32 dbuf_block_size)
4554 {
4555         u32 wm_intermediate_val;
4556         uint_fixed_16_16_t ret;
4557
4558         if (latency == 0)
4559                 return FP_16_16_MAX;
4560
4561         wm_intermediate_val = latency * pixel_rate * cpp;
4562         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4563
4564         if (INTEL_GEN(dev_priv) >= 10)
4565                 ret = add_fixed16_u32(ret, 1);
4566
4567         return ret;
4568 }
4569
4570 static uint_fixed_16_16_t
4571 skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency,
4572                uint_fixed_16_16_t plane_blocks_per_line)
4573 {
4574         u32 wm_intermediate_val;
4575         uint_fixed_16_16_t ret;
4576
4577         if (latency == 0)
4578                 return FP_16_16_MAX;
4579
4580         wm_intermediate_val = latency * pixel_rate;
4581         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4582                                            pipe_htotal * 1000);
4583         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4584         return ret;
4585 }
4586
4587 static uint_fixed_16_16_t
4588 intel_get_linetime_us(const struct intel_crtc_state *crtc_state)
4589 {
4590         u32 pixel_rate;
4591         u32 crtc_htotal;
4592         uint_fixed_16_16_t linetime_us;
4593
4594         if (!crtc_state->base.active)
4595                 return u32_to_fixed16(0);
4596
4597         pixel_rate = crtc_state->pixel_rate;
4598
4599         if (WARN_ON(pixel_rate == 0))
4600                 return u32_to_fixed16(0);
4601
4602         crtc_htotal = crtc_state->base.adjusted_mode.crtc_htotal;
4603         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4604
4605         return linetime_us;
4606 }
4607
4608 static u32
4609 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *crtc_state,
4610                               const struct intel_plane_state *plane_state)
4611 {
4612         u64 adjusted_pixel_rate;
4613         uint_fixed_16_16_t downscale_amount;
4614
4615         /* Shouldn't reach here on disabled planes... */
4616         if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)))
4617                 return 0;
4618
4619         /*
4620          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4621          * with additional adjustments for plane-specific scaling.
4622          */
4623         adjusted_pixel_rate = crtc_state->pixel_rate;
4624         downscale_amount = skl_plane_downscale_amount(crtc_state, plane_state);
4625
4626         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4627                                             downscale_amount);
4628 }
4629
4630 static int
4631 skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
4632                       int width, const struct drm_format_info *format,
4633                       u64 modifier, unsigned int rotation,
4634                       u32 plane_pixel_rate, struct skl_wm_params *wp,
4635                       int color_plane)
4636 {
4637         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
4638         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
4639         u32 interm_pbpl;
4640
4641         /* only planar format has two planes */
4642         if (color_plane == 1 && !is_planar_yuv_format(format->format)) {
4643                 DRM_DEBUG_KMS("Non planar format have single plane\n");
4644                 return -EINVAL;
4645         }
4646
4647         wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
4648                       modifier == I915_FORMAT_MOD_Yf_TILED ||
4649                       modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4650                       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4651         wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
4652         wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4653                          modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4654         wp->is_planar = is_planar_yuv_format(format->format);
4655
4656         wp->width = width;
4657         if (color_plane == 1 && wp->is_planar)
4658                 wp->width /= 2;
4659
4660         wp->cpp = format->cpp[color_plane];
4661         wp->plane_pixel_rate = plane_pixel_rate;
4662
4663         if (INTEL_GEN(dev_priv) >= 11 &&
4664             modifier == I915_FORMAT_MOD_Yf_TILED  && wp->cpp == 1)
4665                 wp->dbuf_block_size = 256;
4666         else
4667                 wp->dbuf_block_size = 512;
4668
4669         if (drm_rotation_90_or_270(rotation)) {
4670                 switch (wp->cpp) {
4671                 case 1:
4672                         wp->y_min_scanlines = 16;
4673                         break;
4674                 case 2:
4675                         wp->y_min_scanlines = 8;
4676                         break;
4677                 case 4:
4678                         wp->y_min_scanlines = 4;
4679                         break;
4680                 default:
4681                         MISSING_CASE(wp->cpp);
4682                         return -EINVAL;
4683                 }
4684         } else {
4685                 wp->y_min_scanlines = 4;
4686         }
4687
4688         if (skl_needs_memory_bw_wa(dev_priv))
4689                 wp->y_min_scanlines *= 2;
4690
4691         wp->plane_bytes_per_line = wp->width * wp->cpp;
4692         if (wp->y_tiled) {
4693                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4694                                            wp->y_min_scanlines,
4695                                            wp->dbuf_block_size);
4696
4697                 if (INTEL_GEN(dev_priv) >= 10)
4698                         interm_pbpl++;
4699
4700                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4701                                                         wp->y_min_scanlines);
4702         } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
4703                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4704                                            wp->dbuf_block_size);
4705                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4706         } else {
4707                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4708                                            wp->dbuf_block_size) + 1;
4709                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4710         }
4711
4712         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4713                                              wp->plane_blocks_per_line);
4714
4715         wp->linetime_us = fixed16_to_u32_round_up(
4716                                         intel_get_linetime_us(crtc_state));
4717
4718         return 0;
4719 }
4720
4721 static int
4722 skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state,
4723                             const struct intel_plane_state *plane_state,
4724                             struct skl_wm_params *wp, int color_plane)
4725 {
4726         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
4727         const struct drm_framebuffer *fb = plane_state->base.fb;
4728         int width;
4729
4730         if (plane->id == PLANE_CURSOR) {
4731                 width = plane_state->base.crtc_w;
4732         } else {
4733                 /*
4734                  * Src coordinates are already rotated by 270 degrees for
4735                  * the 90/270 degree plane rotation cases (to match the
4736                  * GTT mapping), hence no need to account for rotation here.
4737                  */
4738                 width = drm_rect_width(&plane_state->base.src) >> 16;
4739         }
4740
4741         return skl_compute_wm_params(crtc_state, width,
4742                                      fb->format, fb->modifier,
4743                                      plane_state->base.rotation,
4744                                      skl_adjusted_plane_pixel_rate(crtc_state, plane_state),
4745                                      wp, color_plane);
4746 }
4747
4748 static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
4749 {
4750         if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
4751                 return true;
4752
4753         /* The number of lines are ignored for the level 0 watermark. */
4754         return level > 0;
4755 }
4756
4757 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
4758                                  int level,
4759                                  const struct skl_wm_params *wp,
4760                                  const struct skl_wm_level *result_prev,
4761                                  struct skl_wm_level *result /* out */)
4762 {
4763         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
4764         u32 latency = dev_priv->wm.skl_latency[level];
4765         uint_fixed_16_16_t method1, method2;
4766         uint_fixed_16_16_t selected_result;
4767         u32 res_blocks, res_lines, min_ddb_alloc = 0;
4768
4769         if (latency == 0) {
4770                 /* reject it */
4771                 result->min_ddb_alloc = U16_MAX;
4772                 return;
4773         }
4774
4775         /*
4776          * WaIncreaseLatencyIPCEnabled: kbl,cfl
4777          * Display WA #1141: kbl,cfl
4778          */
4779         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
4780             dev_priv->ipc_enabled)
4781                 latency += 4;
4782
4783         if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled)
4784                 latency += 15;
4785
4786         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4787                                  wp->cpp, latency, wp->dbuf_block_size);
4788         method2 = skl_wm_method2(wp->plane_pixel_rate,
4789                                  crtc_state->base.adjusted_mode.crtc_htotal,
4790                                  latency,
4791                                  wp->plane_blocks_per_line);
4792
4793         if (wp->y_tiled) {
4794                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4795         } else {
4796                 if ((wp->cpp * crtc_state->base.adjusted_mode.crtc_htotal /
4797                      wp->dbuf_block_size < 1) &&
4798                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
4799                         selected_result = method2;
4800                 } else if (latency >= wp->linetime_us) {
4801                         if (IS_GEN(dev_priv, 9) &&
4802                             !IS_GEMINILAKE(dev_priv))
4803                                 selected_result = min_fixed16(method1, method2);
4804                         else
4805                                 selected_result = method2;
4806                 } else {
4807                         selected_result = method1;
4808                 }
4809         }
4810
4811         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4812         res_lines = div_round_up_fixed16(selected_result,
4813                                          wp->plane_blocks_per_line);
4814
4815         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) {
4816                 /* Display WA #1125: skl,bxt,kbl */
4817                 if (level == 0 && wp->rc_surface)
4818                         res_blocks +=
4819                                 fixed16_to_u32_round_up(wp->y_tile_minimum);
4820
4821                 /* Display WA #1126: skl,bxt,kbl */
4822                 if (level >= 1 && level <= 7) {
4823                         if (wp->y_tiled) {
4824                                 res_blocks +=
4825                                     fixed16_to_u32_round_up(wp->y_tile_minimum);
4826                                 res_lines += wp->y_min_scanlines;
4827                         } else {
4828                                 res_blocks++;
4829                         }
4830
4831                         /*
4832                          * Make sure result blocks for higher latency levels are
4833                          * atleast as high as level below the current level.
4834                          * Assumption in DDB algorithm optimization for special
4835                          * cases. Also covers Display WA #1125 for RC.
4836                          */
4837                         if (result_prev->plane_res_b > res_blocks)
4838                                 res_blocks = result_prev->plane_res_b;
4839                 }
4840         }
4841
4842         if (INTEL_GEN(dev_priv) >= 11) {
4843                 if (wp->y_tiled) {
4844                         int extra_lines;
4845
4846                         if (res_lines % wp->y_min_scanlines == 0)
4847                                 extra_lines = wp->y_min_scanlines;
4848                         else
4849                                 extra_lines = wp->y_min_scanlines * 2 -
4850                                         res_lines % wp->y_min_scanlines;
4851
4852                         min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines,
4853                                                                  wp->plane_blocks_per_line);
4854                 } else {
4855                         min_ddb_alloc = res_blocks +
4856                                 DIV_ROUND_UP(res_blocks, 10);
4857                 }
4858         }
4859
4860         if (!skl_wm_has_lines(dev_priv, level))
4861                 res_lines = 0;
4862
4863         if (res_lines > 31) {
4864                 /* reject it */
4865                 result->min_ddb_alloc = U16_MAX;
4866                 return;
4867         }
4868
4869         /*
4870          * If res_lines is valid, assume we can use this watermark level
4871          * for now.  We'll come back and disable it after we calculate the
4872          * DDB allocation if it turns out we don't actually have enough
4873          * blocks to satisfy it.
4874          */
4875         result->plane_res_b = res_blocks;
4876         result->plane_res_l = res_lines;
4877         /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */
4878         result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1;
4879         result->plane_en = true;
4880 }
4881
4882 static void
4883 skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
4884                       const struct skl_wm_params *wm_params,
4885                       struct skl_wm_level *levels)
4886 {
4887         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
4888         int level, max_level = ilk_wm_max_level(dev_priv);
4889         struct skl_wm_level *result_prev = &levels[0];
4890
4891         for (level = 0; level <= max_level; level++) {
4892                 struct skl_wm_level *result = &levels[level];
4893
4894                 skl_compute_plane_wm(crtc_state, level, wm_params,
4895                                      result_prev, result);
4896
4897                 result_prev = result;
4898         }
4899 }
4900
4901 static u32
4902 skl_compute_linetime_wm(const struct intel_crtc_state *crtc_state)
4903 {
4904         struct drm_atomic_state *state = crtc_state->base.state;
4905         struct drm_i915_private *dev_priv = to_i915(state->dev);
4906         uint_fixed_16_16_t linetime_us;
4907         u32 linetime_wm;
4908
4909         linetime_us = intel_get_linetime_us(crtc_state);
4910         linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4911
4912         /* Display WA #1135: BXT:ALL GLK:ALL */
4913         if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled)
4914                 linetime_wm /= 2;
4915
4916         return linetime_wm;
4917 }
4918
4919 static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,
4920                                       const struct skl_wm_params *wp,
4921                                       struct skl_plane_wm *wm)
4922 {
4923         struct drm_device *dev = crtc_state->base.crtc->dev;
4924         const struct drm_i915_private *dev_priv = to_i915(dev);
4925         u16 trans_min, trans_y_tile_min;
4926         const u16 trans_amount = 10; /* This is configurable amount */
4927         u16 wm0_sel_res_b, trans_offset_b, res_blocks;
4928
4929         /* Transition WM are not recommended by HW team for GEN9 */
4930         if (INTEL_GEN(dev_priv) <= 9)
4931                 return;
4932
4933         /* Transition WM don't make any sense if ipc is disabled */
4934         if (!dev_priv->ipc_enabled)
4935                 return;
4936
4937         trans_min = 14;
4938         if (INTEL_GEN(dev_priv) >= 11)
4939                 trans_min = 4;
4940
4941         trans_offset_b = trans_min + trans_amount;
4942
4943         /*
4944          * The spec asks for Selected Result Blocks for wm0 (the real value),
4945          * not Result Blocks (the integer value). Pay attention to the capital
4946          * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
4947          * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
4948          * and since we later will have to get the ceiling of the sum in the
4949          * transition watermarks calculation, we can just pretend Selected
4950          * Result Blocks is Result Blocks minus 1 and it should work for the
4951          * current platforms.
4952          */
4953         wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
4954
4955         if (wp->y_tiled) {
4956                 trans_y_tile_min =
4957                         (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
4958                 res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
4959                                 trans_offset_b;
4960         } else {
4961                 res_blocks = wm0_sel_res_b + trans_offset_b;
4962
4963                 /* WA BUG:1938466 add one block for non y-tile planes */
4964                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4965                         res_blocks += 1;
4966
4967         }
4968
4969         /*
4970          * Just assume we can enable the transition watermark.  After
4971          * computing the DDB we'll come back and disable it if that
4972          * assumption turns out to be false.
4973          */
4974         wm->trans_wm.plane_res_b = res_blocks + 1;
4975         wm->trans_wm.plane_en = true;
4976 }
4977
4978 static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
4979                                      const struct intel_plane_state *plane_state,
4980                                      enum plane_id plane_id, int color_plane)
4981 {
4982         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
4983         struct skl_wm_params wm_params;
4984         int ret;
4985
4986         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
4987                                           &wm_params, color_plane);
4988         if (ret)
4989                 return ret;
4990
4991         skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
4992         skl_compute_transition_wm(crtc_state, &wm_params, wm);
4993
4994         return 0;
4995 }
4996
4997 static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
4998                                  const struct intel_plane_state *plane_state,
4999                                  enum plane_id plane_id)
5000 {
5001         struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
5002         struct skl_wm_params wm_params;
5003         int ret;
5004
5005         wm->is_planar = true;
5006
5007         /* uv plane watermarks must also be validated for NV12/Planar */
5008         ret = skl_compute_plane_wm_params(crtc_state, plane_state,
5009                                           &wm_params, 1);
5010         if (ret)
5011                 return ret;
5012
5013         skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
5014
5015         return 0;
5016 }
5017
5018 static int skl_build_plane_wm(struct intel_crtc_state *crtc_state,
5019                               const struct intel_plane_state *plane_state)
5020 {
5021         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
5022         const struct drm_framebuffer *fb = plane_state->base.fb;
5023         enum plane_id plane_id = plane->id;
5024         int ret;
5025
5026         if (!intel_wm_plane_visible(crtc_state, plane_state))
5027                 return 0;
5028
5029         ret = skl_build_plane_wm_single(crtc_state, plane_state,
5030                                         plane_id, 0);
5031         if (ret)
5032                 return ret;
5033
5034         if (fb->format->is_yuv && fb->format->num_planes > 1) {
5035                 ret = skl_build_plane_wm_uv(crtc_state, plane_state,
5036                                             plane_id);
5037                 if (ret)
5038                         return ret;
5039         }
5040
5041         return 0;
5042 }
5043
5044 static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
5045                               const struct intel_plane_state *plane_state)
5046 {
5047         enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
5048         int ret;
5049
5050         /* Watermarks calculated in master */
5051         if (plane_state->slave)
5052                 return 0;
5053
5054         if (plane_state->linked_plane) {
5055                 const struct drm_framebuffer *fb = plane_state->base.fb;
5056                 enum plane_id y_plane_id = plane_state->linked_plane->id;
5057
5058                 WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
5059                 WARN_ON(!fb->format->is_yuv ||
5060                         fb->format->num_planes == 1);
5061
5062                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5063                                                 y_plane_id, 0);
5064                 if (ret)
5065                         return ret;
5066
5067                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5068                                                 plane_id, 1);
5069                 if (ret)
5070                         return ret;
5071         } else if (intel_wm_plane_visible(crtc_state, plane_state)) {
5072                 ret = skl_build_plane_wm_single(crtc_state, plane_state,
5073                                                 plane_id, 0);
5074                 if (ret)
5075                         return ret;
5076         }
5077
5078         return 0;
5079 }
5080
5081 static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state)
5082 {
5083         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
5084         struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
5085         struct drm_plane *plane;
5086         const struct drm_plane_state *drm_plane_state;
5087         int ret;
5088
5089         /*
5090          * We'll only calculate watermarks for planes that are actually
5091          * enabled, so make sure all other planes are set as disabled.
5092          */
5093         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
5094
5095         drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state,
5096                                                    &crtc_state->base) {
5097                 const struct intel_plane_state *plane_state =
5098                         to_intel_plane_state(drm_plane_state);
5099
5100                 if (INTEL_GEN(dev_priv) >= 11)
5101                         ret = icl_build_plane_wm(crtc_state, plane_state);
5102                 else
5103                         ret = skl_build_plane_wm(crtc_state, plane_state);
5104                 if (ret)
5105                         return ret;
5106         }
5107
5108         pipe_wm->linetime = skl_compute_linetime_wm(crtc_state);
5109
5110         return 0;
5111 }
5112
5113 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5114                                 i915_reg_t reg,
5115                                 const struct skl_ddb_entry *entry)
5116 {
5117         if (entry->end)
5118                 I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
5119         else
5120                 I915_WRITE_FW(reg, 0);
5121 }
5122
5123 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5124                                i915_reg_t reg,
5125                                const struct skl_wm_level *level)
5126 {
5127         u32 val = 0;
5128
5129         if (level->plane_en)
5130                 val |= PLANE_WM_EN;
5131         if (level->ignore_lines)
5132                 val |= PLANE_WM_IGNORE_LINES;
5133         val |= level->plane_res_b;
5134         val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5135
5136         I915_WRITE_FW(reg, val);
5137 }
5138
5139 void skl_write_plane_wm(struct intel_plane *plane,
5140                         const struct intel_crtc_state *crtc_state)
5141 {
5142         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5143         int level, max_level = ilk_wm_max_level(dev_priv);
5144         enum plane_id plane_id = plane->id;
5145         enum pipe pipe = plane->pipe;
5146         const struct skl_plane_wm *wm =
5147                 &crtc_state->wm.skl.optimal.planes[plane_id];
5148         const struct skl_ddb_entry *ddb_y =
5149                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5150         const struct skl_ddb_entry *ddb_uv =
5151                 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
5152
5153         for (level = 0; level <= max_level; level++) {
5154                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5155                                    &wm->wm[level]);
5156         }
5157         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5158                            &wm->trans_wm);
5159
5160         if (INTEL_GEN(dev_priv) >= 11) {
5161                 skl_ddb_entry_write(dev_priv,
5162                                     PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5163                 return;
5164         }
5165
5166         if (wm->is_planar)
5167                 swap(ddb_y, ddb_uv);
5168
5169         skl_ddb_entry_write(dev_priv,
5170                             PLANE_BUF_CFG(pipe, plane_id), ddb_y);
5171         skl_ddb_entry_write(dev_priv,
5172                             PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
5173 }
5174
5175 void skl_write_cursor_wm(struct intel_plane *plane,
5176                          const struct intel_crtc_state *crtc_state)
5177 {
5178         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
5179         int level, max_level = ilk_wm_max_level(dev_priv);
5180         enum plane_id plane_id = plane->id;
5181         enum pipe pipe = plane->pipe;
5182         const struct skl_plane_wm *wm =
5183                 &crtc_state->wm.skl.optimal.planes[plane_id];
5184         const struct skl_ddb_entry *ddb =
5185                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
5186
5187         for (level = 0; level <= max_level; level++) {
5188                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5189                                    &wm->wm[level]);
5190         }
5191         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5192
5193         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
5194 }
5195
5196 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5197                          const struct skl_wm_level *l2)
5198 {
5199         return l1->plane_en == l2->plane_en &&
5200                 l1->ignore_lines == l2->ignore_lines &&
5201                 l1->plane_res_l == l2->plane_res_l &&
5202                 l1->plane_res_b == l2->plane_res_b;
5203 }
5204
5205 static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
5206                                 const struct skl_plane_wm *wm1,
5207                                 const struct skl_plane_wm *wm2)
5208 {
5209         int level, max_level = ilk_wm_max_level(dev_priv);
5210
5211         for (level = 0; level <= max_level; level++) {
5212                 if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
5213                     !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
5214                         return false;
5215         }
5216
5217         return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
5218 }
5219
5220 static bool skl_pipe_wm_equals(struct intel_crtc *crtc,
5221                                const struct skl_pipe_wm *wm1,
5222                                const struct skl_pipe_wm *wm2)
5223 {
5224         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5225         enum plane_id plane_id;
5226
5227         for_each_plane_id_on_crtc(crtc, plane_id) {
5228                 if (!skl_plane_wm_equals(dev_priv,
5229                                          &wm1->planes[plane_id],
5230                                          &wm2->planes[plane_id]))
5231                         return false;
5232         }
5233
5234         return wm1->linetime == wm2->linetime;
5235 }
5236
5237 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5238                                            const struct skl_ddb_entry *b)
5239 {
5240         return a->start < b->end && b->start < a->end;
5241 }
5242
5243 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
5244                                  const struct skl_ddb_entry *entries,
5245                                  int num_entries, int ignore_idx)
5246 {
5247         int i;
5248
5249         for (i = 0; i < num_entries; i++) {
5250                 if (i != ignore_idx &&
5251                     skl_ddb_entries_overlap(ddb, &entries[i]))
5252                         return true;
5253         }
5254
5255         return false;
5256 }
5257
5258 static u32
5259 pipes_modified(struct intel_atomic_state *state)
5260 {
5261         struct intel_crtc *crtc;
5262         struct intel_crtc_state *crtc_state;
5263         u32 i, ret = 0;
5264
5265         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
5266                 ret |= drm_crtc_mask(&crtc->base);
5267
5268         return ret;
5269 }
5270
5271 static int
5272 skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
5273                             struct intel_crtc_state *new_crtc_state)
5274 {
5275         struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
5276         struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
5277         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5278         struct intel_plane *plane;
5279
5280         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5281                 struct intel_plane_state *plane_state;
5282                 enum plane_id plane_id = plane->id;
5283
5284                 if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
5285                                         &new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
5286                     skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
5287                                         &new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
5288                         continue;
5289
5290                 plane_state = intel_atomic_get_plane_state(state, plane);
5291                 if (IS_ERR(plane_state))
5292                         return PTR_ERR(plane_state);
5293
5294                 new_crtc_state->update_planes |= BIT(plane_id);
5295         }
5296
5297         return 0;
5298 }
5299
5300 static int
5301 skl_compute_ddb(struct intel_atomic_state *state)
5302 {
5303         const struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5304         struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5305         struct intel_crtc_state *old_crtc_state;
5306         struct intel_crtc_state *new_crtc_state;
5307         struct intel_crtc *crtc;
5308         int ret, i;
5309
5310         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5311
5312         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5313                                             new_crtc_state, i) {
5314                 ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
5315                 if (ret)
5316                         return ret;
5317
5318                 ret = skl_ddb_add_affected_planes(old_crtc_state,
5319                                                   new_crtc_state);
5320                 if (ret)
5321                         return ret;
5322         }
5323
5324         return 0;
5325 }
5326
5327 static char enast(bool enable)
5328 {
5329         return enable ? '*' : ' ';
5330 }
5331
5332 static void
5333 skl_print_wm_changes(struct intel_atomic_state *state)
5334 {
5335         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5336         const struct intel_crtc_state *old_crtc_state;
5337         const struct intel_crtc_state *new_crtc_state;
5338         struct intel_plane *plane;
5339         struct intel_crtc *crtc;
5340         int i;
5341
5342         if ((drm_debug & DRM_UT_KMS) == 0)
5343                 return;
5344
5345         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5346                                             new_crtc_state, i) {
5347                 const struct skl_pipe_wm *old_pipe_wm, *new_pipe_wm;
5348
5349                 old_pipe_wm = &old_crtc_state->wm.skl.optimal;
5350                 new_pipe_wm = &new_crtc_state->wm.skl.optimal;
5351
5352                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5353                         enum plane_id plane_id = plane->id;
5354                         const struct skl_ddb_entry *old, *new;
5355
5356                         old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
5357                         new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
5358
5359                         if (skl_ddb_entry_equal(old, new))
5360                                 continue;
5361
5362                         DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n",
5363                                       plane->base.base.id, plane->base.name,
5364                                       old->start, old->end, new->start, new->end,
5365                                       skl_ddb_entry_size(old), skl_ddb_entry_size(new));
5366                 }
5367
5368                 for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5369                         enum plane_id plane_id = plane->id;
5370                         const struct skl_plane_wm *old_wm, *new_wm;
5371
5372                         old_wm = &old_pipe_wm->planes[plane_id];
5373                         new_wm = &new_pipe_wm->planes[plane_id];
5374
5375                         if (skl_plane_wm_equals(dev_priv, old_wm, new_wm))
5376                                 continue;
5377
5378                         DRM_DEBUG_KMS("[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
5379                                       " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
5380                                       plane->base.base.id, plane->base.name,
5381                                       enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
5382                                       enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
5383                                       enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
5384                                       enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
5385                                       enast(old_wm->trans_wm.plane_en),
5386                                       enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
5387                                       enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
5388                                       enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
5389                                       enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
5390                                       enast(new_wm->trans_wm.plane_en));
5391
5392                         DRM_DEBUG_KMS("[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
5393                                       " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
5394                                       plane->base.base.id, plane->base.name,
5395                                       enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
5396                                       enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
5397                                       enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l,
5398                                       enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l,
5399                                       enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l,
5400                                       enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l,
5401                                       enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
5402                                       enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
5403                                       enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
5404
5405                                       enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
5406                                       enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
5407                                       enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l,
5408                                       enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l,
5409                                       enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l,
5410                                       enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
5411                                       enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
5412                                       enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
5413                                       enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
5414
5415                         DRM_DEBUG_KMS("[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5416                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5417                                       plane->base.base.id, plane->base.name,
5418                                       old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
5419                                       old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
5420                                       old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
5421                                       old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
5422                                       old_wm->trans_wm.plane_res_b,
5423                                       new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
5424                                       new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
5425                                       new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
5426                                       new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
5427                                       new_wm->trans_wm.plane_res_b);
5428
5429                         DRM_DEBUG_KMS("[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
5430                                       " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
5431                                       plane->base.base.id, plane->base.name,
5432                                       old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
5433                                       old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
5434                                       old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
5435                                       old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
5436                                       old_wm->trans_wm.min_ddb_alloc,
5437                                       new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
5438                                       new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
5439                                       new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
5440                                       new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
5441                                       new_wm->trans_wm.min_ddb_alloc);
5442                 }
5443         }
5444 }
5445
5446 static int
5447 skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed)
5448 {
5449         struct drm_device *dev = state->base.dev;
5450         const struct drm_i915_private *dev_priv = to_i915(dev);
5451         struct intel_crtc *crtc;
5452         struct intel_crtc_state *crtc_state;
5453         u32 realloc_pipes = pipes_modified(state);
5454         int ret, i;
5455
5456         /*
5457          * When we distrust bios wm we always need to recompute to set the
5458          * expected DDB allocations for each CRTC.
5459          */
5460         if (dev_priv->wm.distrust_bios_wm)
5461                 (*changed) = true;
5462
5463         /*
5464          * If this transaction isn't actually touching any CRTC's, don't
5465          * bother with watermark calculation.  Note that if we pass this
5466          * test, we're guaranteed to hold at least one CRTC state mutex,
5467          * which means we can safely use values like dev_priv->active_crtcs
5468          * since any racing commits that want to update them would need to
5469          * hold _all_ CRTC state mutexes.
5470          */
5471         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i)
5472                 (*changed) = true;
5473
5474         if (!*changed)
5475                 return 0;
5476
5477         /*
5478          * If this is our first atomic update following hardware readout,
5479          * we can't trust the DDB that the BIOS programmed for us.  Let's
5480          * pretend that all pipes switched active status so that we'll
5481          * ensure a full DDB recompute.
5482          */
5483         if (dev_priv->wm.distrust_bios_wm) {
5484                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5485                                        state->base.acquire_ctx);
5486                 if (ret)
5487                         return ret;
5488
5489                 state->active_pipe_changes = ~0;
5490
5491                 /*
5492                  * We usually only initialize state->active_crtcs if we
5493                  * we're doing a modeset; make sure this field is always
5494                  * initialized during the sanitization process that happens
5495                  * on the first commit too.
5496                  */
5497                 if (!state->modeset)
5498                         state->active_crtcs = dev_priv->active_crtcs;
5499         }
5500
5501         /*
5502          * If the modeset changes which CRTC's are active, we need to
5503          * recompute the DDB allocation for *all* active pipes, even
5504          * those that weren't otherwise being modified in any way by this
5505          * atomic commit.  Due to the shrinking of the per-pipe allocations
5506          * when new active CRTC's are added, it's possible for a pipe that
5507          * we were already using and aren't changing at all here to suddenly
5508          * become invalid if its DDB needs exceeds its new allocation.
5509          *
5510          * Note that if we wind up doing a full DDB recompute, we can't let
5511          * any other display updates race with this transaction, so we need
5512          * to grab the lock on *all* CRTC's.
5513          */
5514         if (state->active_pipe_changes || state->modeset) {
5515                 realloc_pipes = ~0;
5516                 state->wm_results.dirty_pipes = ~0;
5517         }
5518
5519         /*
5520          * We're not recomputing for the pipes not included in the commit, so
5521          * make sure we start with the current state.
5522          */
5523         for_each_intel_crtc_mask(dev, crtc, realloc_pipes) {
5524                 crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
5525                 if (IS_ERR(crtc_state))
5526                         return PTR_ERR(crtc_state);
5527         }
5528
5529         return 0;
5530 }
5531
5532 /*
5533  * To make sure the cursor watermark registers are always consistent
5534  * with our computed state the following scenario needs special
5535  * treatment:
5536  *
5537  * 1. enable cursor
5538  * 2. move cursor entirely offscreen
5539  * 3. disable cursor
5540  *
5541  * Step 2. does call .disable_plane() but does not zero the watermarks
5542  * (since we consider an offscreen cursor still active for the purposes
5543  * of watermarks). Step 3. would not normally call .disable_plane()
5544  * because the actual plane visibility isn't changing, and we don't
5545  * deallocate the cursor ddb until the pipe gets disabled. So we must
5546  * force step 3. to call .disable_plane() to update the watermark
5547  * registers properly.
5548  *
5549  * Other planes do not suffer from this issues as their watermarks are
5550  * calculated based on the actual plane visibility. The only time this
5551  * can trigger for the other planes is during the initial readout as the
5552  * default value of the watermarks registers is not zero.
5553  */
5554 static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
5555                                       struct intel_crtc *crtc)
5556 {
5557         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5558         const struct intel_crtc_state *old_crtc_state =
5559                 intel_atomic_get_old_crtc_state(state, crtc);
5560         struct intel_crtc_state *new_crtc_state =
5561                 intel_atomic_get_new_crtc_state(state, crtc);
5562         struct intel_plane *plane;
5563
5564         for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
5565                 struct intel_plane_state *plane_state;
5566                 enum plane_id plane_id = plane->id;
5567
5568                 /*
5569                  * Force a full wm update for every plane on modeset.
5570                  * Required because the reset value of the wm registers
5571                  * is non-zero, whereas we want all disabled planes to
5572                  * have zero watermarks. So if we turn off the relevant
5573                  * power well the hardware state will go out of sync
5574                  * with the software state.
5575                  */
5576                 if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
5577                     skl_plane_wm_equals(dev_priv,
5578                                         &old_crtc_state->wm.skl.optimal.planes[plane_id],
5579                                         &new_crtc_state->wm.skl.optimal.planes[plane_id]))
5580                         continue;
5581
5582                 plane_state = intel_atomic_get_plane_state(state, plane);
5583                 if (IS_ERR(plane_state))
5584                         return PTR_ERR(plane_state);
5585
5586                 new_crtc_state->update_planes |= BIT(plane_id);
5587         }
5588
5589         return 0;
5590 }
5591
5592 static int
5593 skl_compute_wm(struct intel_atomic_state *state)
5594 {
5595         struct intel_crtc *crtc;
5596         struct intel_crtc_state *new_crtc_state;
5597         struct intel_crtc_state *old_crtc_state;
5598         struct skl_ddb_values *results = &state->wm_results;
5599         bool changed = false;
5600         int ret, i;
5601
5602         /* Clear all dirty flags */
5603         results->dirty_pipes = 0;
5604
5605         ret = skl_ddb_add_affected_pipes(state, &changed);
5606         if (ret || !changed)
5607                 return ret;
5608
5609         /*
5610          * Calculate WM's for all pipes that are part of this transaction.
5611          * Note that skl_ddb_add_affected_pipes may have added more CRTC's that
5612          * weren't otherwise being modified (and set bits in dirty_pipes) if
5613          * pipe allocations had to change.
5614          */
5615         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
5616                                             new_crtc_state, i) {
5617                 ret = skl_build_pipe_wm(new_crtc_state);
5618                 if (ret)
5619                         return ret;
5620
5621                 ret = skl_wm_add_affected_planes(state, crtc);
5622                 if (ret)
5623                         return ret;
5624
5625                 if (!skl_pipe_wm_equals(crtc,
5626                                         &old_crtc_state->wm.skl.optimal,
5627                                         &new_crtc_state->wm.skl.optimal))
5628                         results->dirty_pipes |= drm_crtc_mask(&crtc->base);
5629         }
5630
5631         ret = skl_compute_ddb(state);
5632         if (ret)
5633                 return ret;
5634
5635         skl_print_wm_changes(state);
5636
5637         return 0;
5638 }
5639
5640 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5641                                       struct intel_crtc_state *crtc_state)
5642 {
5643         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
5644         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5645         struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
5646         enum pipe pipe = crtc->pipe;
5647
5648         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5649                 return;
5650
5651         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5652 }
5653
5654 static void skl_initial_wm(struct intel_atomic_state *state,
5655                            struct intel_crtc_state *crtc_state)
5656 {
5657         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
5658         struct drm_device *dev = intel_crtc->base.dev;
5659         struct drm_i915_private *dev_priv = to_i915(dev);
5660         struct skl_ddb_values *results = &state->wm_results;
5661
5662         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5663                 return;
5664
5665         mutex_lock(&dev_priv->wm.wm_mutex);
5666
5667         if (crtc_state->base.active_changed)
5668                 skl_atomic_update_crtc_wm(state, crtc_state);
5669
5670         mutex_unlock(&dev_priv->wm.wm_mutex);
5671 }
5672
5673 static void ilk_compute_wm_config(struct drm_i915_private *dev_priv,
5674                                   struct intel_wm_config *config)
5675 {
5676         struct intel_crtc *crtc;
5677
5678         /* Compute the currently _active_ config */
5679         for_each_intel_crtc(&dev_priv->drm, crtc) {
5680                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5681
5682                 if (!wm->pipe_enabled)
5683                         continue;
5684
5685                 config->sprites_enabled |= wm->sprites_enabled;
5686                 config->sprites_scaled |= wm->sprites_scaled;
5687                 config->num_pipes_active++;
5688         }
5689 }
5690
5691 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5692 {
5693         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5694         struct ilk_wm_maximums max;
5695         struct intel_wm_config config = {};
5696         struct ilk_wm_values results = {};
5697         enum intel_ddb_partitioning partitioning;
5698
5699         ilk_compute_wm_config(dev_priv, &config);
5700
5701         ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_1_2, &max);
5702         ilk_wm_merge(dev_priv, &config, &max, &lp_wm_1_2);
5703
5704         /* 5/6 split only in single pipe config on IVB+ */
5705         if (INTEL_GEN(dev_priv) >= 7 &&
5706             config.num_pipes_active == 1 && config.sprites_enabled) {
5707                 ilk_compute_wm_maximums(dev_priv, 1, &config, INTEL_DDB_PART_5_6, &max);
5708                 ilk_wm_merge(dev_priv, &config, &max, &lp_wm_5_6);
5709
5710                 best_lp_wm = ilk_find_best_result(dev_priv, &lp_wm_1_2, &lp_wm_5_6);
5711         } else {
5712                 best_lp_wm = &lp_wm_1_2;
5713         }
5714
5715         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5716                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5717
5718         ilk_compute_wm_results(dev_priv, best_lp_wm, partitioning, &results);
5719
5720         ilk_write_wm_values(dev_priv, &results);
5721 }
5722
5723 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5724                                    struct intel_crtc_state *crtc_state)
5725 {
5726         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
5727         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
5728
5729         mutex_lock(&dev_priv->wm.wm_mutex);
5730         crtc->wm.active.ilk = crtc_state->wm.ilk.intermediate;
5731         ilk_program_watermarks(dev_priv);
5732         mutex_unlock(&dev_priv->wm.wm_mutex);
5733 }
5734
5735 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5736                                     struct intel_crtc_state *crtc_state)
5737 {
5738         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
5739         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
5740
5741         if (!crtc_state->wm.need_postvbl_update)
5742                 return;
5743
5744         mutex_lock(&dev_priv->wm.wm_mutex);
5745         crtc->wm.active.ilk = crtc_state->wm.ilk.optimal;
5746         ilk_program_watermarks(dev_priv);
5747         mutex_unlock(&dev_priv->wm.wm_mutex);
5748 }
5749
5750 static inline void skl_wm_level_from_reg_val(u32 val,
5751                                              struct skl_wm_level *level)
5752 {
5753         level->plane_en = val & PLANE_WM_EN;
5754         level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
5755         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5756         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5757                 PLANE_WM_LINES_MASK;
5758 }
5759
5760 void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
5761                               struct skl_pipe_wm *out)
5762 {
5763         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5764         enum pipe pipe = crtc->pipe;
5765         int level, max_level;
5766         enum plane_id plane_id;
5767         u32 val;
5768
5769         max_level = ilk_wm_max_level(dev_priv);
5770
5771         for_each_plane_id_on_crtc(crtc, plane_id) {
5772                 struct skl_plane_wm *wm = &out->planes[plane_id];
5773
5774                 for (level = 0; level <= max_level; level++) {
5775                         if (plane_id != PLANE_CURSOR)
5776                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5777                         else
5778                                 val = I915_READ(CUR_WM(pipe, level));
5779
5780                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5781                 }
5782
5783                 if (plane_id != PLANE_CURSOR)
5784                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5785                 else
5786                         val = I915_READ(CUR_WM_TRANS(pipe));
5787
5788                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5789         }
5790
5791         if (!crtc->active)
5792                 return;
5793
5794         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5795 }
5796
5797 void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
5798 {
5799         struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5800         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5801         struct intel_crtc *crtc;
5802         struct intel_crtc_state *crtc_state;
5803
5804         skl_ddb_get_hw_state(dev_priv, ddb);
5805         for_each_intel_crtc(&dev_priv->drm, crtc) {
5806                 crtc_state = to_intel_crtc_state(crtc->base.state);
5807
5808                 skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
5809
5810                 if (crtc->active)
5811                         hw->dirty_pipes |= drm_crtc_mask(&crtc->base);
5812         }
5813
5814         if (dev_priv->active_crtcs) {
5815                 /* Fully recompute DDB on first atomic commit */
5816                 dev_priv->wm.distrust_bios_wm = true;
5817         }
5818 }
5819
5820 static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
5821 {
5822         struct drm_device *dev = crtc->base.dev;
5823         struct drm_i915_private *dev_priv = to_i915(dev);
5824         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5825         struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state);
5826         struct intel_pipe_wm *active = &crtc_state->wm.ilk.optimal;
5827         enum pipe pipe = crtc->pipe;
5828         static const i915_reg_t wm0_pipe_reg[] = {
5829                 [PIPE_A] = WM0_PIPEA_ILK,
5830                 [PIPE_B] = WM0_PIPEB_ILK,
5831                 [PIPE_C] = WM0_PIPEC_IVB,
5832         };
5833
5834         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5835         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5836                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5837
5838         memset(active, 0, sizeof(*active));
5839
5840         active->pipe_enabled = crtc->active;
5841
5842         if (active->pipe_enabled) {
5843                 u32 tmp = hw->wm_pipe[pipe];
5844
5845                 /*
5846                  * For active pipes LP0 watermark is marked as
5847                  * enabled, and LP1+ watermaks as disabled since
5848                  * we can't really reverse compute them in case
5849                  * multiple pipes are active.
5850                  */
5851                 active->wm[0].enable = true;
5852                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5853                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5854                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5855                 active->linetime = hw->wm_linetime[pipe];
5856         } else {
5857                 int level, max_level = ilk_wm_max_level(dev_priv);
5858
5859                 /*
5860                  * For inactive pipes, all watermark levels
5861                  * should be marked as enabled but zeroed,
5862                  * which is what we'd compute them to.
5863                  */
5864                 for (level = 0; level <= max_level; level++)
5865                         active->wm[level].enable = true;
5866         }
5867
5868         crtc->wm.active.ilk = *active;
5869 }
5870
5871 #define _FW_WM(value, plane) \
5872         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5873 #define _FW_WM_VLV(value, plane) \
5874         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5875
5876 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5877                                struct g4x_wm_values *wm)
5878 {
5879         u32 tmp;
5880
5881         tmp = I915_READ(DSPFW1);
5882         wm->sr.plane = _FW_WM(tmp, SR);
5883         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5884         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5885         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5886
5887         tmp = I915_READ(DSPFW2);
5888         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5889         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5890         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5891         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5892         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5893         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5894
5895         tmp = I915_READ(DSPFW3);
5896         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5897         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5898         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5899         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5900 }
5901
5902 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5903                                struct vlv_wm_values *wm)
5904 {
5905         enum pipe pipe;
5906         u32 tmp;
5907
5908         for_each_pipe(dev_priv, pipe) {
5909                 tmp = I915_READ(VLV_DDL(pipe));
5910
5911                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5912                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5913                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5914                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5915                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5916                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5917                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5918                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5919         }
5920
5921         tmp = I915_READ(DSPFW1);
5922         wm->sr.plane = _FW_WM(tmp, SR);
5923         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5924         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5925         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5926
5927         tmp = I915_READ(DSPFW2);
5928         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5929         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5930         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5931
5932         tmp = I915_READ(DSPFW3);
5933         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5934
5935         if (IS_CHERRYVIEW(dev_priv)) {
5936                 tmp = I915_READ(DSPFW7_CHV);
5937                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5938                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5939
5940                 tmp = I915_READ(DSPFW8_CHV);
5941                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5942                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5943
5944                 tmp = I915_READ(DSPFW9_CHV);
5945                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5946                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5947
5948                 tmp = I915_READ(DSPHOWM);
5949                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5950                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5951                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5952                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5953                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5954                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5955                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5956                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5957                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5958                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5959         } else {
5960                 tmp = I915_READ(DSPFW7);
5961                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5962                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5963
5964                 tmp = I915_READ(DSPHOWM);
5965                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5966                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5967                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5968                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5969                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5970                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5971                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5972         }
5973 }
5974
5975 #undef _FW_WM
5976 #undef _FW_WM_VLV
5977
5978 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv)
5979 {
5980         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5981         struct intel_crtc *crtc;
5982
5983         g4x_read_wm_values(dev_priv, wm);
5984
5985         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5986
5987         for_each_intel_crtc(&dev_priv->drm, crtc) {
5988                 struct intel_crtc_state *crtc_state =
5989                         to_intel_crtc_state(crtc->base.state);
5990                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5991                 struct g4x_pipe_wm *raw;
5992                 enum pipe pipe = crtc->pipe;
5993                 enum plane_id plane_id;
5994                 int level, max_level;
5995
5996                 active->cxsr = wm->cxsr;
5997                 active->hpll_en = wm->hpll_en;
5998                 active->fbc_en = wm->fbc_en;
5999
6000                 active->sr = wm->sr;
6001                 active->hpll = wm->hpll;
6002
6003                 for_each_plane_id_on_crtc(crtc, plane_id) {
6004                         active->wm.plane[plane_id] =
6005                                 wm->pipe[pipe].plane[plane_id];
6006                 }
6007
6008                 if (wm->cxsr && wm->hpll_en)
6009                         max_level = G4X_WM_LEVEL_HPLL;
6010                 else if (wm->cxsr)
6011                         max_level = G4X_WM_LEVEL_SR;
6012                 else
6013                         max_level = G4X_WM_LEVEL_NORMAL;
6014
6015                 level = G4X_WM_LEVEL_NORMAL;
6016                 raw = &crtc_state->wm.g4x.raw[level];
6017                 for_each_plane_id_on_crtc(crtc, plane_id)
6018                         raw->plane[plane_id] = active->wm.plane[plane_id];
6019
6020                 if (++level > max_level)
6021                         goto out;
6022
6023                 raw = &crtc_state->wm.g4x.raw[level];
6024                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
6025                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
6026                 raw->plane[PLANE_SPRITE0] = 0;
6027                 raw->fbc = active->sr.fbc;
6028
6029                 if (++level > max_level)
6030                         goto out;
6031
6032                 raw = &crtc_state->wm.g4x.raw[level];
6033                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
6034                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
6035                 raw->plane[PLANE_SPRITE0] = 0;
6036                 raw->fbc = active->hpll.fbc;
6037
6038         out:
6039                 for_each_plane_id_on_crtc(crtc, plane_id)
6040                         g4x_raw_plane_wm_set(crtc_state, level,
6041                                              plane_id, USHRT_MAX);
6042                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
6043
6044                 crtc_state->wm.g4x.optimal = *active;
6045                 crtc_state->wm.g4x.intermediate = *active;
6046
6047                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
6048                               pipe_name(pipe),
6049                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6050                               wm->pipe[pipe].plane[PLANE_CURSOR],
6051                               wm->pipe[pipe].plane[PLANE_SPRITE0]);
6052         }
6053
6054         DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
6055                       wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
6056         DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
6057                       wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
6058         DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
6059                       yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
6060 }
6061
6062 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
6063 {
6064         struct intel_plane *plane;
6065         struct intel_crtc *crtc;
6066
6067         mutex_lock(&dev_priv->wm.wm_mutex);
6068
6069         for_each_intel_plane(&dev_priv->drm, plane) {
6070                 struct intel_crtc *crtc =
6071                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6072                 struct intel_crtc_state *crtc_state =
6073                         to_intel_crtc_state(crtc->base.state);
6074                 struct intel_plane_state *plane_state =
6075                         to_intel_plane_state(plane->base.state);
6076                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
6077                 enum plane_id plane_id = plane->id;
6078                 int level;
6079
6080                 if (plane_state->base.visible)
6081                         continue;
6082
6083                 for (level = 0; level < 3; level++) {
6084                         struct g4x_pipe_wm *raw =
6085                                 &crtc_state->wm.g4x.raw[level];
6086
6087                         raw->plane[plane_id] = 0;
6088                         wm_state->wm.plane[plane_id] = 0;
6089                 }
6090
6091                 if (plane_id == PLANE_PRIMARY) {
6092                         for (level = 0; level < 3; level++) {
6093                                 struct g4x_pipe_wm *raw =
6094                                         &crtc_state->wm.g4x.raw[level];
6095                                 raw->fbc = 0;
6096                         }
6097
6098                         wm_state->sr.fbc = 0;
6099                         wm_state->hpll.fbc = 0;
6100                         wm_state->fbc_en = false;
6101                 }
6102         }
6103
6104         for_each_intel_crtc(&dev_priv->drm, crtc) {
6105                 struct intel_crtc_state *crtc_state =
6106                         to_intel_crtc_state(crtc->base.state);
6107
6108                 crtc_state->wm.g4x.intermediate =
6109                         crtc_state->wm.g4x.optimal;
6110                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
6111         }
6112
6113         g4x_program_watermarks(dev_priv);
6114
6115         mutex_unlock(&dev_priv->wm.wm_mutex);
6116 }
6117
6118 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
6119 {
6120         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
6121         struct intel_crtc *crtc;
6122         u32 val;
6123
6124         vlv_read_wm_values(dev_priv, wm);
6125
6126         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
6127         wm->level = VLV_WM_LEVEL_PM2;
6128
6129         if (IS_CHERRYVIEW(dev_priv)) {
6130                 vlv_punit_get(dev_priv);
6131
6132                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
6133                 if (val & DSP_MAXFIFO_PM5_ENABLE)
6134                         wm->level = VLV_WM_LEVEL_PM5;
6135
6136                 /*
6137                  * If DDR DVFS is disabled in the BIOS, Punit
6138                  * will never ack the request. So if that happens
6139                  * assume we don't have to enable/disable DDR DVFS
6140                  * dynamically. To test that just set the REQ_ACK
6141                  * bit to poke the Punit, but don't change the
6142                  * HIGH/LOW bits so that we don't actually change
6143                  * the current state.
6144                  */
6145                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6146                 val |= FORCE_DDR_FREQ_REQ_ACK;
6147                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
6148
6149                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
6150                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
6151                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
6152                                       "assuming DDR DVFS is disabled\n");
6153                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
6154                 } else {
6155                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
6156                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
6157                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
6158                 }
6159
6160                 vlv_punit_put(dev_priv);
6161         }
6162
6163         for_each_intel_crtc(&dev_priv->drm, crtc) {
6164                 struct intel_crtc_state *crtc_state =
6165                         to_intel_crtc_state(crtc->base.state);
6166                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
6167                 const struct vlv_fifo_state *fifo_state =
6168                         &crtc_state->wm.vlv.fifo_state;
6169                 enum pipe pipe = crtc->pipe;
6170                 enum plane_id plane_id;
6171                 int level;
6172
6173                 vlv_get_fifo_size(crtc_state);
6174
6175                 active->num_levels = wm->level + 1;
6176                 active->cxsr = wm->cxsr;
6177
6178                 for (level = 0; level < active->num_levels; level++) {
6179                         struct g4x_pipe_wm *raw =
6180                                 &crtc_state->wm.vlv.raw[level];
6181
6182                         active->sr[level].plane = wm->sr.plane;
6183                         active->sr[level].cursor = wm->sr.cursor;
6184
6185                         for_each_plane_id_on_crtc(crtc, plane_id) {
6186                                 active->wm[level].plane[plane_id] =
6187                                         wm->pipe[pipe].plane[plane_id];
6188
6189                                 raw->plane[plane_id] =
6190                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
6191                                                             fifo_state->plane[plane_id]);
6192                         }
6193                 }
6194
6195                 for_each_plane_id_on_crtc(crtc, plane_id)
6196                         vlv_raw_plane_wm_set(crtc_state, level,
6197                                              plane_id, USHRT_MAX);
6198                 vlv_invalidate_wms(crtc, active, level);
6199
6200                 crtc_state->wm.vlv.optimal = *active;
6201                 crtc_state->wm.vlv.intermediate = *active;
6202
6203                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6204                               pipe_name(pipe),
6205                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6206                               wm->pipe[pipe].plane[PLANE_CURSOR],
6207                               wm->pipe[pipe].plane[PLANE_SPRITE0],
6208                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
6209         }
6210
6211         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6212                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6213 }
6214
6215 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6216 {
6217         struct intel_plane *plane;
6218         struct intel_crtc *crtc;
6219
6220         mutex_lock(&dev_priv->wm.wm_mutex);
6221
6222         for_each_intel_plane(&dev_priv->drm, plane) {
6223                 struct intel_crtc *crtc =
6224                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6225                 struct intel_crtc_state *crtc_state =
6226                         to_intel_crtc_state(crtc->base.state);
6227                 struct intel_plane_state *plane_state =
6228                         to_intel_plane_state(plane->base.state);
6229                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6230                 const struct vlv_fifo_state *fifo_state =
6231                         &crtc_state->wm.vlv.fifo_state;
6232                 enum plane_id plane_id = plane->id;
6233                 int level;
6234
6235                 if (plane_state->base.visible)
6236                         continue;
6237
6238                 for (level = 0; level < wm_state->num_levels; level++) {
6239                         struct g4x_pipe_wm *raw =
6240                                 &crtc_state->wm.vlv.raw[level];
6241
6242                         raw->plane[plane_id] = 0;
6243
6244                         wm_state->wm[level].plane[plane_id] =
6245                                 vlv_invert_wm_value(raw->plane[plane_id],
6246                                                     fifo_state->plane[plane_id]);
6247                 }
6248         }
6249
6250         for_each_intel_crtc(&dev_priv->drm, crtc) {
6251                 struct intel_crtc_state *crtc_state =
6252                         to_intel_crtc_state(crtc->base.state);
6253
6254                 crtc_state->wm.vlv.intermediate =
6255                         crtc_state->wm.vlv.optimal;
6256                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6257         }
6258
6259         vlv_program_watermarks(dev_priv);
6260
6261         mutex_unlock(&dev_priv->wm.wm_mutex);
6262 }
6263
6264 /*
6265  * FIXME should probably kill this and improve
6266  * the real watermark readout/sanitation instead
6267  */
6268 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6269 {
6270         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6271         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6272         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6273
6274         /*
6275          * Don't touch WM1S_LP_EN here.
6276          * Doing so could cause underruns.
6277          */
6278 }
6279
6280 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv)
6281 {
6282         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6283         struct intel_crtc *crtc;
6284
6285         ilk_init_lp_watermarks(dev_priv);
6286
6287         for_each_intel_crtc(&dev_priv->drm, crtc)
6288                 ilk_pipe_wm_get_hw_state(crtc);
6289
6290         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6291         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6292         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6293
6294         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6295         if (INTEL_GEN(dev_priv) >= 7) {
6296                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6297                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6298         }
6299
6300         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6301                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6302                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6303         else if (IS_IVYBRIDGE(dev_priv))
6304                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6305                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6306
6307         hw->enable_fbc_wm =
6308                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6309 }
6310
6311 /**
6312  * intel_update_watermarks - update FIFO watermark values based on current modes
6313  * @crtc: the #intel_crtc on which to compute the WM
6314  *
6315  * Calculate watermark values for the various WM regs based on current mode
6316  * and plane configuration.
6317  *
6318  * There are several cases to deal with here:
6319  *   - normal (i.e. non-self-refresh)
6320  *   - self-refresh (SR) mode
6321  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6322  *   - lines are small relative to FIFO size (buffer can hold more than 2
6323  *     lines), so need to account for TLB latency
6324  *
6325  *   The normal calculation is:
6326  *     watermark = dotclock * bytes per pixel * latency
6327  *   where latency is platform & configuration dependent (we assume pessimal
6328  *   values here).
6329  *
6330  *   The SR calculation is:
6331  *     watermark = (trunc(latency/line time)+1) * surface width *
6332  *       bytes per pixel
6333  *   where
6334  *     line time = htotal / dotclock
6335  *     surface width = hdisplay for normal plane and 64 for cursor
6336  *   and latency is assumed to be high, as above.
6337  *
6338  * The final value programmed to the register should always be rounded up,
6339  * and include an extra 2 entries to account for clock crossings.
6340  *
6341  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6342  * to set the non-SR watermarks to 8.
6343  */
6344 void intel_update_watermarks(struct intel_crtc *crtc)
6345 {
6346         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6347
6348         if (dev_priv->display.update_wm)
6349                 dev_priv->display.update_wm(crtc);
6350 }
6351
6352 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6353 {
6354         u32 val;
6355
6356         if (!HAS_IPC(dev_priv))
6357                 return;
6358
6359         val = I915_READ(DISP_ARB_CTL2);
6360
6361         if (dev_priv->ipc_enabled)
6362                 val |= DISP_IPC_ENABLE;
6363         else
6364                 val &= ~DISP_IPC_ENABLE;
6365
6366         I915_WRITE(DISP_ARB_CTL2, val);
6367 }
6368
6369 static bool intel_can_enable_ipc(struct drm_i915_private *dev_priv)
6370 {
6371         /* Display WA #0477 WaDisableIPC: skl */
6372         if (IS_SKYLAKE(dev_priv))
6373                 return false;
6374
6375         /* Display WA #1141: SKL:all KBL:all CFL */
6376         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
6377                 return dev_priv->dram_info.symmetric_memory;
6378
6379         return true;
6380 }
6381
6382 void intel_init_ipc(struct drm_i915_private *dev_priv)
6383 {
6384         if (!HAS_IPC(dev_priv))
6385                 return;
6386
6387         dev_priv->ipc_enabled = intel_can_enable_ipc(dev_priv);
6388
6389         intel_enable_ipc(dev_priv);
6390 }
6391
6392 /*
6393  * Lock protecting IPS related data structures
6394  */
6395 DEFINE_SPINLOCK(mchdev_lock);
6396
6397 bool ironlake_set_drps(struct drm_i915_private *i915, u8 val)
6398 {
6399         struct intel_uncore *uncore = &i915->uncore;
6400         u16 rgvswctl;
6401
6402         lockdep_assert_held(&mchdev_lock);
6403
6404         rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
6405         if (rgvswctl & MEMCTL_CMD_STS) {
6406                 DRM_DEBUG("gpu busy, RCS change rejected\n");
6407                 return false; /* still busy with another command */
6408         }
6409
6410         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6411                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6412         intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
6413         intel_uncore_posting_read16(uncore, MEMSWCTL);
6414
6415         rgvswctl |= MEMCTL_CMD_STS;
6416         intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
6417
6418         return true;
6419 }
6420
6421 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6422 {
6423         struct intel_uncore *uncore = &dev_priv->uncore;
6424         u32 rgvmodectl;
6425         u8 fmax, fmin, fstart, vstart;
6426
6427         spin_lock_irq(&mchdev_lock);
6428
6429         rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
6430
6431         /* Enable temp reporting */
6432         intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6433         intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE);
6434
6435         /* 100ms RC evaluation intervals */
6436         intel_uncore_write(uncore, RCUPEI, 100000);
6437         intel_uncore_write(uncore, RCDNEI, 100000);
6438
6439         /* Set max/min thresholds to 90ms and 80ms respectively */
6440         intel_uncore_write(uncore, RCBMAXAVG, 90000);
6441         intel_uncore_write(uncore, RCBMINAVG, 80000);
6442
6443         intel_uncore_write(uncore, MEMIHYST, 1);
6444
6445         /* Set up min, max, and cur for interrupt handling */
6446         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6447         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6448         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6449                 MEMMODE_FSTART_SHIFT;
6450
6451         vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
6452                   PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
6453
6454         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6455         dev_priv->ips.fstart = fstart;
6456
6457         dev_priv->ips.max_delay = fstart;
6458         dev_priv->ips.min_delay = fmin;
6459         dev_priv->ips.cur_delay = fstart;
6460
6461         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6462                          fmax, fmin, fstart);
6463
6464         intel_uncore_write(uncore,
6465                            MEMINTREN,
6466                            MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6467
6468         /*
6469          * Interrupts will be enabled in ironlake_irq_postinstall
6470          */
6471
6472         intel_uncore_write(uncore, VIDSTART, vstart);
6473         intel_uncore_posting_read(uncore, VIDSTART);
6474
6475         rgvmodectl |= MEMMODE_SWMODE_EN;
6476         intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
6477
6478         if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
6479                              MEMCTL_CMD_STS) == 0, 10))
6480                 DRM_ERROR("stuck trying to change perf mode\n");
6481         mdelay(1);
6482
6483         ironlake_set_drps(dev_priv, fstart);
6484
6485         dev_priv->ips.last_count1 =
6486                 intel_uncore_read(uncore, DMIEC) +
6487                 intel_uncore_read(uncore, DDREC) +
6488                 intel_uncore_read(uncore, CSIEC);
6489         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6490         dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
6491         dev_priv->ips.last_time2 = ktime_get_raw_ns();
6492
6493         spin_unlock_irq(&mchdev_lock);
6494 }
6495
6496 static void ironlake_disable_drps(struct drm_i915_private *i915)
6497 {
6498         struct intel_uncore *uncore = &i915->uncore;
6499         u16 rgvswctl;
6500
6501         spin_lock_irq(&mchdev_lock);
6502
6503         rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
6504
6505         /* Ack interrupts, disable EFC interrupt */
6506         intel_uncore_write(uncore,
6507                            MEMINTREN,
6508                            intel_uncore_read(uncore, MEMINTREN) &
6509                            ~MEMINT_EVAL_CHG_EN);
6510         intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
6511         intel_uncore_write(uncore,
6512                            DEIER,
6513                            intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
6514         intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
6515         intel_uncore_write(uncore,
6516                            DEIMR,
6517                            intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
6518
6519         /* Go back to the starting frequency */
6520         ironlake_set_drps(i915, i915->ips.fstart);
6521         mdelay(1);
6522         rgvswctl |= MEMCTL_CMD_STS;
6523         intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
6524         mdelay(1);
6525
6526         spin_unlock_irq(&mchdev_lock);
6527 }
6528
6529 /* There's a funny hw issue where the hw returns all 0 when reading from
6530  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6531  * ourselves, instead of doing a rmw cycle (which might result in us clearing
6532  * all limits and the gpu stuck at whatever frequency it is at atm).
6533  */
6534 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6535 {
6536         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6537         u32 limits;
6538
6539         /* Only set the down limit when we've reached the lowest level to avoid
6540          * getting more interrupts, otherwise leave this clear. This prevents a
6541          * race in the hw when coming out of rc6: There's a tiny window where
6542          * the hw runs at the minimal clock before selecting the desired
6543          * frequency, if the down threshold expires in that window we will not
6544          * receive a down interrupt. */
6545         if (INTEL_GEN(dev_priv) >= 9) {
6546                 limits = (rps->max_freq_softlimit) << 23;
6547                 if (val <= rps->min_freq_softlimit)
6548                         limits |= (rps->min_freq_softlimit) << 14;
6549         } else {
6550                 limits = rps->max_freq_softlimit << 24;
6551                 if (val <= rps->min_freq_softlimit)
6552                         limits |= rps->min_freq_softlimit << 16;
6553         }
6554
6555         return limits;
6556 }
6557
6558 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6559 {
6560         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6561         u32 threshold_up = 0, threshold_down = 0; /* in % */
6562         u32 ei_up = 0, ei_down = 0;
6563
6564         lockdep_assert_held(&rps->power.mutex);
6565
6566         if (new_power == rps->power.mode)
6567                 return;
6568
6569         /* Note the units here are not exactly 1us, but 1280ns. */
6570         switch (new_power) {
6571         case LOW_POWER:
6572                 /* Upclock if more than 95% busy over 16ms */
6573                 ei_up = 16000;
6574                 threshold_up = 95;
6575
6576                 /* Downclock if less than 85% busy over 32ms */
6577                 ei_down = 32000;
6578                 threshold_down = 85;
6579                 break;
6580
6581         case BETWEEN:
6582                 /* Upclock if more than 90% busy over 13ms */
6583                 ei_up = 13000;
6584                 threshold_up = 90;
6585
6586                 /* Downclock if less than 75% busy over 32ms */
6587                 ei_down = 32000;
6588                 threshold_down = 75;
6589                 break;
6590
6591         case HIGH_POWER:
6592                 /* Upclock if more than 85% busy over 10ms */
6593                 ei_up = 10000;
6594                 threshold_up = 85;
6595
6596                 /* Downclock if less than 60% busy over 32ms */
6597                 ei_down = 32000;
6598                 threshold_down = 60;
6599                 break;
6600         }
6601
6602         /* When byt can survive without system hang with dynamic
6603          * sw freq adjustments, this restriction can be lifted.
6604          */
6605         if (IS_VALLEYVIEW(dev_priv))
6606                 goto skip_hw_write;
6607
6608         I915_WRITE(GEN6_RP_UP_EI,
6609                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
6610         I915_WRITE(GEN6_RP_UP_THRESHOLD,
6611                    GT_INTERVAL_FROM_US(dev_priv,
6612                                        ei_up * threshold_up / 100));
6613
6614         I915_WRITE(GEN6_RP_DOWN_EI,
6615                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
6616         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6617                    GT_INTERVAL_FROM_US(dev_priv,
6618                                        ei_down * threshold_down / 100));
6619
6620         I915_WRITE(GEN6_RP_CONTROL,
6621                    (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
6622                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6623                    GEN6_RP_MEDIA_IS_GFX |
6624                    GEN6_RP_ENABLE |
6625                    GEN6_RP_UP_BUSY_AVG |
6626                    GEN6_RP_DOWN_IDLE_AVG);
6627
6628 skip_hw_write:
6629         rps->power.mode = new_power;
6630         rps->power.up_threshold = threshold_up;
6631         rps->power.down_threshold = threshold_down;
6632 }
6633
6634 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6635 {
6636         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6637         int new_power;
6638
6639         new_power = rps->power.mode;
6640         switch (rps->power.mode) {
6641         case LOW_POWER:
6642                 if (val > rps->efficient_freq + 1 &&
6643                     val > rps->cur_freq)
6644                         new_power = BETWEEN;
6645                 break;
6646
6647         case BETWEEN:
6648                 if (val <= rps->efficient_freq &&
6649                     val < rps->cur_freq)
6650                         new_power = LOW_POWER;
6651                 else if (val >= rps->rp0_freq &&
6652                          val > rps->cur_freq)
6653                         new_power = HIGH_POWER;
6654                 break;
6655
6656         case HIGH_POWER:
6657                 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6658                     val < rps->cur_freq)
6659                         new_power = BETWEEN;
6660                 break;
6661         }
6662         /* Max/min bins are special */
6663         if (val <= rps->min_freq_softlimit)
6664                 new_power = LOW_POWER;
6665         if (val >= rps->max_freq_softlimit)
6666                 new_power = HIGH_POWER;
6667
6668         mutex_lock(&rps->power.mutex);
6669         if (rps->power.interactive)
6670                 new_power = HIGH_POWER;
6671         rps_set_power(dev_priv, new_power);
6672         mutex_unlock(&rps->power.mutex);
6673 }
6674
6675 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6676 {
6677         struct intel_rps *rps = &i915->gt_pm.rps;
6678
6679         if (INTEL_GEN(i915) < 6)
6680                 return;
6681
6682         mutex_lock(&rps->power.mutex);
6683         if (interactive) {
6684                 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6685                         rps_set_power(i915, HIGH_POWER);
6686         } else {
6687                 GEM_BUG_ON(!rps->power.interactive);
6688                 rps->power.interactive--;
6689         }
6690         mutex_unlock(&rps->power.mutex);
6691 }
6692
6693 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6694 {
6695         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6696         u32 mask = 0;
6697
6698         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6699         if (val > rps->min_freq_softlimit)
6700                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6701         if (val < rps->max_freq_softlimit)
6702                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6703
6704         mask &= dev_priv->pm_rps_events;
6705
6706         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6707 }
6708
6709 /* gen6_set_rps is called to update the frequency request, but should also be
6710  * called when the range (min_delay and max_delay) is modified so that we can
6711  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6712 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6713 {
6714         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6715
6716         /* min/max delay may still have been modified so be sure to
6717          * write the limits value.
6718          */
6719         if (val != rps->cur_freq) {
6720                 gen6_set_rps_thresholds(dev_priv, val);
6721
6722                 if (INTEL_GEN(dev_priv) >= 9)
6723                         I915_WRITE(GEN6_RPNSWREQ,
6724                                    GEN9_FREQUENCY(val));
6725                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6726                         I915_WRITE(GEN6_RPNSWREQ,
6727                                    HSW_FREQUENCY(val));
6728                 else
6729                         I915_WRITE(GEN6_RPNSWREQ,
6730                                    GEN6_FREQUENCY(val) |
6731                                    GEN6_OFFSET(0) |
6732                                    GEN6_AGGRESSIVE_TURBO);
6733         }
6734
6735         /* Make sure we continue to get interrupts
6736          * until we hit the minimum or maximum frequencies.
6737          */
6738         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6739         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6740
6741         rps->cur_freq = val;
6742         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6743
6744         return 0;
6745 }
6746
6747 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6748 {
6749         int err;
6750
6751         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6752                       "Odd GPU freq value\n"))
6753                 val &= ~1;
6754
6755         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6756
6757         if (val != dev_priv->gt_pm.rps.cur_freq) {
6758                 vlv_punit_get(dev_priv);
6759                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6760                 vlv_punit_put(dev_priv);
6761                 if (err)
6762                         return err;
6763
6764                 gen6_set_rps_thresholds(dev_priv, val);
6765         }
6766
6767         dev_priv->gt_pm.rps.cur_freq = val;
6768         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6769
6770         return 0;
6771 }
6772
6773 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6774  *
6775  * * If Gfx is Idle, then
6776  * 1. Forcewake Media well.
6777  * 2. Request idle freq.
6778  * 3. Release Forcewake of Media well.
6779 */
6780 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6781 {
6782         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6783         u32 val = rps->idle_freq;
6784         int err;
6785
6786         if (rps->cur_freq <= val)
6787                 return;
6788
6789         /* The punit delays the write of the frequency and voltage until it
6790          * determines the GPU is awake. During normal usage we don't want to
6791          * waste power changing the frequency if the GPU is sleeping (rc6).
6792          * However, the GPU and driver is now idle and we do not want to delay
6793          * switching to minimum voltage (reducing power whilst idle) as we do
6794          * not expect to be woken in the near future and so must flush the
6795          * change by waking the device.
6796          *
6797          * We choose to take the media powerwell (either would do to trick the
6798          * punit into committing the voltage change) as that takes a lot less
6799          * power than the render powerwell.
6800          */
6801         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA);
6802         err = valleyview_set_rps(dev_priv, val);
6803         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA);
6804
6805         if (err)
6806                 DRM_ERROR("Failed to set RPS for idle\n");
6807 }
6808
6809 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6810 {
6811         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6812
6813         mutex_lock(&rps->lock);
6814         if (rps->enabled) {
6815                 u8 freq;
6816
6817                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6818                         gen6_rps_reset_ei(dev_priv);
6819                 I915_WRITE(GEN6_PMINTRMSK,
6820                            gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6821
6822                 gen6_enable_rps_interrupts(dev_priv);
6823
6824                 /* Use the user's desired frequency as a guide, but for better
6825                  * performance, jump directly to RPe as our starting frequency.
6826                  */
6827                 freq = max(rps->cur_freq,
6828                            rps->efficient_freq);
6829
6830                 if (intel_set_rps(dev_priv,
6831                                   clamp(freq,
6832                                         rps->min_freq_softlimit,
6833                                         rps->max_freq_softlimit)))
6834                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6835         }
6836         mutex_unlock(&rps->lock);
6837 }
6838
6839 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6840 {
6841         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6842
6843         /* Flush our bottom-half so that it does not race with us
6844          * setting the idle frequency and so that it is bounded by
6845          * our rpm wakeref. And then disable the interrupts to stop any
6846          * futher RPS reclocking whilst we are asleep.
6847          */
6848         gen6_disable_rps_interrupts(dev_priv);
6849
6850         mutex_lock(&rps->lock);
6851         if (rps->enabled) {
6852                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6853                         vlv_set_rps_idle(dev_priv);
6854                 else
6855                         gen6_set_rps(dev_priv, rps->idle_freq);
6856                 rps->last_adj = 0;
6857                 I915_WRITE(GEN6_PMINTRMSK,
6858                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6859         }
6860         mutex_unlock(&rps->lock);
6861 }
6862
6863 void gen6_rps_boost(struct i915_request *rq)
6864 {
6865         struct intel_rps *rps = &rq->i915->gt_pm.rps;
6866         unsigned long flags;
6867         bool boost;
6868
6869         /* This is intentionally racy! We peek at the state here, then
6870          * validate inside the RPS worker.
6871          */
6872         if (!rps->enabled)
6873                 return;
6874
6875         if (i915_request_signaled(rq))
6876                 return;
6877
6878         /* Serializes with i915_request_retire() */
6879         boost = false;
6880         spin_lock_irqsave(&rq->lock, flags);
6881         if (!i915_request_has_waitboost(rq) &&
6882             !dma_fence_is_signaled_locked(&rq->fence)) {
6883                 boost = !atomic_fetch_inc(&rps->num_waiters);
6884                 rq->flags |= I915_REQUEST_WAITBOOST;
6885         }
6886         spin_unlock_irqrestore(&rq->lock, flags);
6887         if (!boost)
6888                 return;
6889
6890         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6891                 schedule_work(&rps->work);
6892
6893         atomic_inc(&rps->boosts);
6894 }
6895
6896 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6897 {
6898         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6899         int err;
6900
6901         lockdep_assert_held(&rps->lock);
6902         GEM_BUG_ON(val > rps->max_freq);
6903         GEM_BUG_ON(val < rps->min_freq);
6904
6905         if (!rps->enabled) {
6906                 rps->cur_freq = val;
6907                 return 0;
6908         }
6909
6910         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6911                 err = valleyview_set_rps(dev_priv, val);
6912         else
6913                 err = gen6_set_rps(dev_priv, val);
6914
6915         return err;
6916 }
6917
6918 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6919 {
6920         I915_WRITE(GEN6_RC_CONTROL, 0);
6921         I915_WRITE(GEN9_PG_ENABLE, 0);
6922 }
6923
6924 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6925 {
6926         I915_WRITE(GEN6_RP_CONTROL, 0);
6927 }
6928
6929 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6930 {
6931         I915_WRITE(GEN6_RC_CONTROL, 0);
6932 }
6933
6934 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6935 {
6936         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6937         I915_WRITE(GEN6_RP_CONTROL, 0);
6938 }
6939
6940 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6941 {
6942         I915_WRITE(GEN6_RC_CONTROL, 0);
6943 }
6944
6945 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6946 {
6947         I915_WRITE(GEN6_RP_CONTROL, 0);
6948 }
6949
6950 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6951 {
6952         /* We're doing forcewake before Disabling RC6,
6953          * This what the BIOS expects when going into suspend */
6954         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
6955
6956         I915_WRITE(GEN6_RC_CONTROL, 0);
6957
6958         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
6959 }
6960
6961 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6962 {
6963         I915_WRITE(GEN6_RP_CONTROL, 0);
6964 }
6965
6966 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6967 {
6968         bool enable_rc6 = true;
6969         unsigned long rc6_ctx_base;
6970         u32 rc_ctl;
6971         int rc_sw_target;
6972
6973         rc_ctl = I915_READ(GEN6_RC_CONTROL);
6974         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6975                        RC_SW_TARGET_STATE_SHIFT;
6976         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6977                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6978                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6979                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6980                          rc_sw_target);
6981
6982         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6983                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6984                 enable_rc6 = false;
6985         }
6986
6987         /*
6988          * The exact context size is not known for BXT, so assume a page size
6989          * for this check.
6990          */
6991         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6992         if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6993               (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6994                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6995                 enable_rc6 = false;
6996         }
6997
6998         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6999               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
7000               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
7001               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
7002                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
7003                 enable_rc6 = false;
7004         }
7005
7006         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
7007             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
7008             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
7009                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
7010                 enable_rc6 = false;
7011         }
7012
7013         if (!I915_READ(GEN6_GFXPAUSE)) {
7014                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
7015                 enable_rc6 = false;
7016         }
7017
7018         if (!I915_READ(GEN8_MISC_CTRL0)) {
7019                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
7020                 enable_rc6 = false;
7021         }
7022
7023         return enable_rc6;
7024 }
7025
7026 static bool sanitize_rc6(struct drm_i915_private *i915)
7027 {
7028         struct intel_device_info *info = mkwrite_device_info(i915);
7029
7030         /* Powersaving is controlled by the host when inside a VM */
7031         if (intel_vgpu_active(i915)) {
7032                 info->has_rc6 = 0;
7033                 info->has_rps = false;
7034         }
7035
7036         if (info->has_rc6 &&
7037             IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
7038                 DRM_INFO("RC6 disabled by BIOS\n");
7039                 info->has_rc6 = 0;
7040         }
7041
7042         /*
7043          * We assume that we do not have any deep rc6 levels if we don't have
7044          * have the previous rc6 level supported, i.e. we use HAS_RC6()
7045          * as the initial coarse check for rc6 in general, moving on to
7046          * progressively finer/deeper levels.
7047          */
7048         if (!info->has_rc6 && info->has_rc6p)
7049                 info->has_rc6p = 0;
7050
7051         return info->has_rc6;
7052 }
7053
7054 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
7055 {
7056         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7057
7058         /* All of these values are in units of 50MHz */
7059
7060         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
7061         if (IS_GEN9_LP(dev_priv)) {
7062                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
7063                 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
7064                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
7065                 rps->min_freq = (rp_state_cap >>  0) & 0xff;
7066         } else {
7067                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
7068                 rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
7069                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
7070                 rps->min_freq = (rp_state_cap >> 16) & 0xff;
7071         }
7072         /* hw_max = RP0 until we check for overclocking */
7073         rps->max_freq = rps->rp0_freq;
7074
7075         rps->efficient_freq = rps->rp1_freq;
7076         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
7077             IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7078                 u32 ddcc_status = 0;
7079
7080                 if (sandybridge_pcode_read(dev_priv,
7081                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
7082                                            &ddcc_status, NULL) == 0)
7083                         rps->efficient_freq =
7084                                 clamp_t(u8,
7085                                         ((ddcc_status >> 8) & 0xff),
7086                                         rps->min_freq,
7087                                         rps->max_freq);
7088         }
7089
7090         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7091                 /* Store the frequency values in 16.66 MHZ units, which is
7092                  * the natural hardware unit for SKL
7093                  */
7094                 rps->rp0_freq *= GEN9_FREQ_SCALER;
7095                 rps->rp1_freq *= GEN9_FREQ_SCALER;
7096                 rps->min_freq *= GEN9_FREQ_SCALER;
7097                 rps->max_freq *= GEN9_FREQ_SCALER;
7098                 rps->efficient_freq *= GEN9_FREQ_SCALER;
7099         }
7100 }
7101
7102 static void reset_rps(struct drm_i915_private *dev_priv,
7103                       int (*set)(struct drm_i915_private *, u8))
7104 {
7105         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7106         u8 freq = rps->cur_freq;
7107
7108         /* force a reset */
7109         rps->power.mode = -1;
7110         rps->cur_freq = -1;
7111
7112         if (set(dev_priv, freq))
7113                 DRM_ERROR("Failed to reset RPS to initial values\n");
7114 }
7115
7116 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
7117 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
7118 {
7119         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7120
7121         /* Program defaults and thresholds for RPS */
7122         if (IS_GEN(dev_priv, 9))
7123                 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7124                         GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
7125
7126         /* 1 second timeout*/
7127         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
7128                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
7129
7130         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
7131
7132         /* Leaning on the below call to gen6_set_rps to program/setup the
7133          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
7134          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
7135         reset_rps(dev_priv, gen6_set_rps);
7136
7137         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7138 }
7139
7140 static void gen11_enable_rc6(struct drm_i915_private *dev_priv)
7141 {
7142         struct intel_engine_cs *engine;
7143         enum intel_engine_id id;
7144
7145         /* 1a: Software RC state - RC0 */
7146         I915_WRITE(GEN6_RC_STATE, 0);
7147
7148         /*
7149          * 1b: Get forcewake during program sequence. Although the driver
7150          * hasn't enabled a state yet where we need forcewake, BIOS may have.
7151          */
7152         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7153
7154         /* 2a: Disable RC states. */
7155         I915_WRITE(GEN6_RC_CONTROL, 0);
7156
7157         /* 2b: Program RC6 thresholds.*/
7158         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7159         I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7160
7161         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7162         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7163         for_each_engine(engine, dev_priv, id)
7164                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7165
7166         if (HAS_GT_UC(dev_priv))
7167                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7168
7169         I915_WRITE(GEN6_RC_SLEEP, 0);
7170
7171         I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
7172
7173         /*
7174          * 2c: Program Coarse Power Gating Policies.
7175          *
7176          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7177          * use instead is a more conservative estimate for the maximum time
7178          * it takes us to service a CS interrupt and submit a new ELSP - that
7179          * is the time which the GPU is idle waiting for the CPU to select the
7180          * next request to execute. If the idle hysteresis is less than that
7181          * interrupt service latency, the hardware will automatically gate
7182          * the power well and we will then incur the wake up cost on top of
7183          * the service latency. A similar guide from plane_state is that we
7184          * do not want the enable hysteresis to less than the wakeup latency.
7185          *
7186          * igt/gem_exec_nop/sequential provides a rough estimate for the
7187          * service latency, and puts it around 10us for Broadwell (and other
7188          * big core) and around 40us for Broxton (and other low power cores).
7189          * [Note that for legacy ringbuffer submission, this is less than 1us!]
7190          * However, the wakeup latency on Broxton is closer to 100us. To be
7191          * conservative, we have to factor in a context switch on top (due
7192          * to ksoftirqd).
7193          */
7194         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7195         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7196
7197         /* 3a: Enable RC6 */
7198         I915_WRITE(GEN6_RC_CONTROL,
7199                    GEN6_RC_CTL_HW_ENABLE |
7200                    GEN6_RC_CTL_RC6_ENABLE |
7201                    GEN6_RC_CTL_EI_MODE(1));
7202
7203         /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */
7204         I915_WRITE(GEN9_PG_ENABLE,
7205                    GEN9_RENDER_PG_ENABLE |
7206                    GEN9_MEDIA_PG_ENABLE |
7207                    GEN11_MEDIA_SAMPLER_PG_ENABLE);
7208
7209         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7210 }
7211
7212 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
7213 {
7214         struct intel_engine_cs *engine;
7215         enum intel_engine_id id;
7216         u32 rc6_mode;
7217
7218         /* 1a: Software RC state - RC0 */
7219         I915_WRITE(GEN6_RC_STATE, 0);
7220
7221         /* 1b: Get forcewake during program sequence. Although the driver
7222          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7223         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7224
7225         /* 2a: Disable RC states. */
7226         I915_WRITE(GEN6_RC_CONTROL, 0);
7227
7228         /* 2b: Program RC6 thresholds.*/
7229         if (INTEL_GEN(dev_priv) >= 10) {
7230                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
7231                 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
7232         } else if (IS_SKYLAKE(dev_priv)) {
7233                 /*
7234                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
7235                  * when CPG is enabled
7236                  */
7237                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
7238         } else {
7239                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
7240         }
7241
7242         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7243         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7244         for_each_engine(engine, dev_priv, id)
7245                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7246
7247         if (HAS_GT_UC(dev_priv))
7248                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
7249
7250         I915_WRITE(GEN6_RC_SLEEP, 0);
7251
7252         /*
7253          * 2c: Program Coarse Power Gating Policies.
7254          *
7255          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
7256          * use instead is a more conservative estimate for the maximum time
7257          * it takes us to service a CS interrupt and submit a new ELSP - that
7258          * is the time which the GPU is idle waiting for the CPU to select the
7259          * next request to execute. If the idle hysteresis is less than that
7260          * interrupt service latency, the hardware will automatically gate
7261          * the power well and we will then incur the wake up cost on top of
7262          * the service latency. A similar guide from plane_state is that we
7263          * do not want the enable hysteresis to less than the wakeup latency.
7264          *
7265          * igt/gem_exec_nop/sequential provides a rough estimate for the
7266          * service latency, and puts it around 10us for Broadwell (and other
7267          * big core) and around 40us for Broxton (and other low power cores).
7268          * [Note that for legacy ringbuffer submission, this is less than 1us!]
7269          * However, the wakeup latency on Broxton is closer to 100us. To be
7270          * conservative, we have to factor in a context switch on top (due
7271          * to ksoftirqd).
7272          */
7273         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
7274         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
7275
7276         /* 3a: Enable RC6 */
7277         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
7278
7279         /* WaRsUseTimeoutMode:cnl (pre-prod) */
7280         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
7281                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7282         else
7283                 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
7284
7285         I915_WRITE(GEN6_RC_CONTROL,
7286                    GEN6_RC_CTL_HW_ENABLE |
7287                    GEN6_RC_CTL_RC6_ENABLE |
7288                    rc6_mode);
7289
7290         /*
7291          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7292          * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7293          */
7294         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7295                 I915_WRITE(GEN9_PG_ENABLE, 0);
7296         else
7297                 I915_WRITE(GEN9_PG_ENABLE,
7298                            GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7299
7300         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7301 }
7302
7303 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7304 {
7305         struct intel_engine_cs *engine;
7306         enum intel_engine_id id;
7307
7308         /* 1a: Software RC state - RC0 */
7309         I915_WRITE(GEN6_RC_STATE, 0);
7310
7311         /* 1b: Get forcewake during program sequence. Although the driver
7312          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7313         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7314
7315         /* 2a: Disable RC states. */
7316         I915_WRITE(GEN6_RC_CONTROL, 0);
7317
7318         /* 2b: Program RC6 thresholds.*/
7319         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7320         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7321         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7322         for_each_engine(engine, dev_priv, id)
7323                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7324         I915_WRITE(GEN6_RC_SLEEP, 0);
7325         I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7326
7327         /* 3: Enable RC6 */
7328
7329         I915_WRITE(GEN6_RC_CONTROL,
7330                    GEN6_RC_CTL_HW_ENABLE |
7331                    GEN7_RC_CTL_TO_MODE |
7332                    GEN6_RC_CTL_RC6_ENABLE);
7333
7334         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7335 }
7336
7337 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7338 {
7339         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7340
7341         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7342
7343         /* 1 Program defaults and thresholds for RPS*/
7344         I915_WRITE(GEN6_RPNSWREQ,
7345                    HSW_FREQUENCY(rps->rp1_freq));
7346         I915_WRITE(GEN6_RC_VIDEO_FREQ,
7347                    HSW_FREQUENCY(rps->rp1_freq));
7348         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7349         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7350
7351         /* Docs recommend 900MHz, and 300 MHz respectively */
7352         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7353                    rps->max_freq_softlimit << 24 |
7354                    rps->min_freq_softlimit << 16);
7355
7356         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7357         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7358         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7359         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7360
7361         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7362
7363         /* 2: Enable RPS */
7364         I915_WRITE(GEN6_RP_CONTROL,
7365                    GEN6_RP_MEDIA_TURBO |
7366                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7367                    GEN6_RP_MEDIA_IS_GFX |
7368                    GEN6_RP_ENABLE |
7369                    GEN6_RP_UP_BUSY_AVG |
7370                    GEN6_RP_DOWN_IDLE_AVG);
7371
7372         reset_rps(dev_priv, gen6_set_rps);
7373
7374         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7375 }
7376
7377 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7378 {
7379         struct intel_engine_cs *engine;
7380         enum intel_engine_id id;
7381         u32 rc6vids, rc6_mask;
7382         u32 gtfifodbg;
7383         int ret;
7384
7385         I915_WRITE(GEN6_RC_STATE, 0);
7386
7387         /* Clear the DBG now so we don't confuse earlier errors */
7388         gtfifodbg = I915_READ(GTFIFODBG);
7389         if (gtfifodbg) {
7390                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7391                 I915_WRITE(GTFIFODBG, gtfifodbg);
7392         }
7393
7394         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7395
7396         /* disable the counters and set deterministic thresholds */
7397         I915_WRITE(GEN6_RC_CONTROL, 0);
7398
7399         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7400         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7401         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7402         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7403         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7404
7405         for_each_engine(engine, dev_priv, id)
7406                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7407
7408         I915_WRITE(GEN6_RC_SLEEP, 0);
7409         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7410         if (IS_IVYBRIDGE(dev_priv))
7411                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7412         else
7413                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7414         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7415         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7416
7417         /* We don't use those on Haswell */
7418         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7419         if (HAS_RC6p(dev_priv))
7420                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7421         if (HAS_RC6pp(dev_priv))
7422                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7423         I915_WRITE(GEN6_RC_CONTROL,
7424                    rc6_mask |
7425                    GEN6_RC_CTL_EI_MODE(1) |
7426                    GEN6_RC_CTL_HW_ENABLE);
7427
7428         rc6vids = 0;
7429         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
7430                                      &rc6vids, NULL);
7431         if (IS_GEN(dev_priv, 6) && ret) {
7432                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7433         } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7434                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7435                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7436                 rc6vids &= 0xffff00;
7437                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7438                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7439                 if (ret)
7440                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7441         }
7442
7443         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7444 }
7445
7446 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7447 {
7448         /* Here begins a magic sequence of register writes to enable
7449          * auto-downclocking.
7450          *
7451          * Perhaps there might be some value in exposing these to
7452          * userspace...
7453          */
7454         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7455
7456         /* Power down if completely idle for over 50ms */
7457         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7458         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7459
7460         reset_rps(dev_priv, gen6_set_rps);
7461
7462         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7463 }
7464
7465 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7466 {
7467         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7468         const int min_freq = 15;
7469         const int scaling_factor = 180;
7470         unsigned int gpu_freq;
7471         unsigned int max_ia_freq, min_ring_freq;
7472         unsigned int max_gpu_freq, min_gpu_freq;
7473         struct cpufreq_policy *policy;
7474
7475         lockdep_assert_held(&rps->lock);
7476
7477         if (rps->max_freq <= rps->min_freq)
7478                 return;
7479
7480         policy = cpufreq_cpu_get(0);
7481         if (policy) {
7482                 max_ia_freq = policy->cpuinfo.max_freq;
7483                 cpufreq_cpu_put(policy);
7484         } else {
7485                 /*
7486                  * Default to measured freq if none found, PCU will ensure we
7487                  * don't go over
7488                  */
7489                 max_ia_freq = tsc_khz;
7490         }
7491
7492         /* Convert from kHz to MHz */
7493         max_ia_freq /= 1000;
7494
7495         min_ring_freq = I915_READ(DCLK) & 0xf;
7496         /* convert DDR frequency from units of 266.6MHz to bandwidth */
7497         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7498
7499         min_gpu_freq = rps->min_freq;
7500         max_gpu_freq = rps->max_freq;
7501         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7502                 /* Convert GT frequency to 50 HZ units */
7503                 min_gpu_freq /= GEN9_FREQ_SCALER;
7504                 max_gpu_freq /= GEN9_FREQ_SCALER;
7505         }
7506
7507         /*
7508          * For each potential GPU frequency, load a ring frequency we'd like
7509          * to use for memory access.  We do this by specifying the IA frequency
7510          * the PCU should use as a reference to determine the ring frequency.
7511          */
7512         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7513                 const int diff = max_gpu_freq - gpu_freq;
7514                 unsigned int ia_freq = 0, ring_freq = 0;
7515
7516                 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7517                         /*
7518                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
7519                          * No floor required for ring frequency on SKL.
7520                          */
7521                         ring_freq = gpu_freq;
7522                 } else if (INTEL_GEN(dev_priv) >= 8) {
7523                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
7524                         ring_freq = max(min_ring_freq, gpu_freq);
7525                 } else if (IS_HASWELL(dev_priv)) {
7526                         ring_freq = mult_frac(gpu_freq, 5, 4);
7527                         ring_freq = max(min_ring_freq, ring_freq);
7528                         /* leave ia_freq as the default, chosen by cpufreq */
7529                 } else {
7530                         /* On older processors, there is no separate ring
7531                          * clock domain, so in order to boost the bandwidth
7532                          * of the ring, we need to upclock the CPU (ia_freq).
7533                          *
7534                          * For GPU frequencies less than 750MHz,
7535                          * just use the lowest ring freq.
7536                          */
7537                         if (gpu_freq < min_freq)
7538                                 ia_freq = 800;
7539                         else
7540                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7541                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7542                 }
7543
7544                 sandybridge_pcode_write(dev_priv,
7545                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7546                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7547                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7548                                         gpu_freq);
7549         }
7550 }
7551
7552 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7553 {
7554         u32 val, rp0;
7555
7556         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7557
7558         switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) {
7559         case 8:
7560                 /* (2 * 4) config */
7561                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7562                 break;
7563         case 12:
7564                 /* (2 * 6) config */
7565                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7566                 break;
7567         case 16:
7568                 /* (2 * 8) config */
7569         default:
7570                 /* Setting (2 * 8) Min RP0 for any other combination */
7571                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7572                 break;
7573         }
7574
7575         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7576
7577         return rp0;
7578 }
7579
7580 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7581 {
7582         u32 val, rpe;
7583
7584         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7585         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7586
7587         return rpe;
7588 }
7589
7590 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7591 {
7592         u32 val, rp1;
7593
7594         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7595         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7596
7597         return rp1;
7598 }
7599
7600 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7601 {
7602         u32 val, rpn;
7603
7604         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7605         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7606                        FB_GFX_FREQ_FUSE_MASK);
7607
7608         return rpn;
7609 }
7610
7611 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7612 {
7613         u32 val, rp1;
7614
7615         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7616
7617         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7618
7619         return rp1;
7620 }
7621
7622 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7623 {
7624         u32 val, rp0;
7625
7626         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7627
7628         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7629         /* Clamp to max */
7630         rp0 = min_t(u32, rp0, 0xea);
7631
7632         return rp0;
7633 }
7634
7635 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7636 {
7637         u32 val, rpe;
7638
7639         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7640         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7641         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7642         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7643
7644         return rpe;
7645 }
7646
7647 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7648 {
7649         u32 val;
7650
7651         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7652         /*
7653          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7654          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7655          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7656          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7657          * to make sure it matches what Punit accepts.
7658          */
7659         return max_t(u32, val, 0xc0);
7660 }
7661
7662 /* Check that the pctx buffer wasn't move under us. */
7663 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7664 {
7665         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7666
7667         WARN_ON(pctx_addr != dev_priv->dsm.start +
7668                              dev_priv->vlv_pctx->stolen->start);
7669 }
7670
7671
7672 /* Check that the pcbr address is not empty. */
7673 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7674 {
7675         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7676
7677         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7678 }
7679
7680 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7681 {
7682         resource_size_t pctx_paddr, paddr;
7683         resource_size_t pctx_size = 32*1024;
7684         u32 pcbr;
7685
7686         pcbr = I915_READ(VLV_PCBR);
7687         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7688                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7689                 paddr = dev_priv->dsm.end + 1 - pctx_size;
7690                 GEM_BUG_ON(paddr > U32_MAX);
7691
7692                 pctx_paddr = (paddr & (~4095));
7693                 I915_WRITE(VLV_PCBR, pctx_paddr);
7694         }
7695
7696         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7697 }
7698
7699 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7700 {
7701         struct drm_i915_gem_object *pctx;
7702         resource_size_t pctx_paddr;
7703         resource_size_t pctx_size = 24*1024;
7704         u32 pcbr;
7705
7706         pcbr = I915_READ(VLV_PCBR);
7707         if (pcbr) {
7708                 /* BIOS set it up already, grab the pre-alloc'd space */
7709                 resource_size_t pcbr_offset;
7710
7711                 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7712                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7713                                                                       pcbr_offset,
7714                                                                       I915_GTT_OFFSET_NONE,
7715                                                                       pctx_size);
7716                 goto out;
7717         }
7718
7719         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7720
7721         /*
7722          * From the Gunit register HAS:
7723          * The Gfx driver is expected to program this register and ensure
7724          * proper allocation within Gfx stolen memory.  For example, this
7725          * register should be programmed such than the PCBR range does not
7726          * overlap with other ranges, such as the frame buffer, protected
7727          * memory, or any other relevant ranges.
7728          */
7729         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7730         if (!pctx) {
7731                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7732                 goto out;
7733         }
7734
7735         GEM_BUG_ON(range_overflows_t(u64,
7736                                      dev_priv->dsm.start,
7737                                      pctx->stolen->start,
7738                                      U32_MAX));
7739         pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7740         I915_WRITE(VLV_PCBR, pctx_paddr);
7741
7742 out:
7743         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7744         dev_priv->vlv_pctx = pctx;
7745 }
7746
7747 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7748 {
7749         struct drm_i915_gem_object *pctx;
7750
7751         pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7752         if (pctx)
7753                 i915_gem_object_put(pctx);
7754 }
7755
7756 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7757 {
7758         dev_priv->gt_pm.rps.gpll_ref_freq =
7759                 vlv_get_cck_clock(dev_priv, "GPLL ref",
7760                                   CCK_GPLL_CLOCK_CONTROL,
7761                                   dev_priv->czclk_freq);
7762
7763         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7764                          dev_priv->gt_pm.rps.gpll_ref_freq);
7765 }
7766
7767 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7768 {
7769         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7770         u32 val;
7771
7772         valleyview_setup_pctx(dev_priv);
7773
7774         vlv_iosf_sb_get(dev_priv,
7775                         BIT(VLV_IOSF_SB_PUNIT) |
7776                         BIT(VLV_IOSF_SB_NC) |
7777                         BIT(VLV_IOSF_SB_CCK));
7778
7779         vlv_init_gpll_ref_freq(dev_priv);
7780
7781         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7782         switch ((val >> 6) & 3) {
7783         case 0:
7784         case 1:
7785                 dev_priv->mem_freq = 800;
7786                 break;
7787         case 2:
7788                 dev_priv->mem_freq = 1066;
7789                 break;
7790         case 3:
7791                 dev_priv->mem_freq = 1333;
7792                 break;
7793         }
7794         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7795
7796         rps->max_freq = valleyview_rps_max_freq(dev_priv);
7797         rps->rp0_freq = rps->max_freq;
7798         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7799                          intel_gpu_freq(dev_priv, rps->max_freq),
7800                          rps->max_freq);
7801
7802         rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7803         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7804                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7805                          rps->efficient_freq);
7806
7807         rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7808         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7809                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7810                          rps->rp1_freq);
7811
7812         rps->min_freq = valleyview_rps_min_freq(dev_priv);
7813         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7814                          intel_gpu_freq(dev_priv, rps->min_freq),
7815                          rps->min_freq);
7816
7817         vlv_iosf_sb_put(dev_priv,
7818                         BIT(VLV_IOSF_SB_PUNIT) |
7819                         BIT(VLV_IOSF_SB_NC) |
7820                         BIT(VLV_IOSF_SB_CCK));
7821 }
7822
7823 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7824 {
7825         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7826         u32 val;
7827
7828         cherryview_setup_pctx(dev_priv);
7829
7830         vlv_iosf_sb_get(dev_priv,
7831                         BIT(VLV_IOSF_SB_PUNIT) |
7832                         BIT(VLV_IOSF_SB_NC) |
7833                         BIT(VLV_IOSF_SB_CCK));
7834
7835         vlv_init_gpll_ref_freq(dev_priv);
7836
7837         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7838
7839         switch ((val >> 2) & 0x7) {
7840         case 3:
7841                 dev_priv->mem_freq = 2000;
7842                 break;
7843         default:
7844                 dev_priv->mem_freq = 1600;
7845                 break;
7846         }
7847         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7848
7849         rps->max_freq = cherryview_rps_max_freq(dev_priv);
7850         rps->rp0_freq = rps->max_freq;
7851         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7852                          intel_gpu_freq(dev_priv, rps->max_freq),
7853                          rps->max_freq);
7854
7855         rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7856         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7857                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7858                          rps->efficient_freq);
7859
7860         rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7861         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7862                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7863                          rps->rp1_freq);
7864
7865         rps->min_freq = cherryview_rps_min_freq(dev_priv);
7866         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7867                          intel_gpu_freq(dev_priv, rps->min_freq),
7868                          rps->min_freq);
7869
7870         vlv_iosf_sb_put(dev_priv,
7871                         BIT(VLV_IOSF_SB_PUNIT) |
7872                         BIT(VLV_IOSF_SB_NC) |
7873                         BIT(VLV_IOSF_SB_CCK));
7874
7875         WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7876                    rps->min_freq) & 1,
7877                   "Odd GPU freq values\n");
7878 }
7879
7880 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7881 {
7882         valleyview_cleanup_pctx(dev_priv);
7883 }
7884
7885 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7886 {
7887         struct intel_engine_cs *engine;
7888         enum intel_engine_id id;
7889         u32 gtfifodbg, rc6_mode, pcbr;
7890
7891         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7892                                              GT_FIFO_FREE_ENTRIES_CHV);
7893         if (gtfifodbg) {
7894                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7895                                  gtfifodbg);
7896                 I915_WRITE(GTFIFODBG, gtfifodbg);
7897         }
7898
7899         cherryview_check_pctx(dev_priv);
7900
7901         /* 1a & 1b: Get forcewake during program sequence. Although the driver
7902          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7903         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7904
7905         /*  Disable RC states. */
7906         I915_WRITE(GEN6_RC_CONTROL, 0);
7907
7908         /* 2a: Program RC6 thresholds.*/
7909         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7910         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7911         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7912
7913         for_each_engine(engine, dev_priv, id)
7914                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7915         I915_WRITE(GEN6_RC_SLEEP, 0);
7916
7917         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7918         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7919
7920         /* Allows RC6 residency counter to work */
7921         I915_WRITE(VLV_COUNTER_CONTROL,
7922                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7923                                       VLV_MEDIA_RC6_COUNT_EN |
7924                                       VLV_RENDER_RC6_COUNT_EN));
7925
7926         /* For now we assume BIOS is allocating and populating the PCBR  */
7927         pcbr = I915_READ(VLV_PCBR);
7928
7929         /* 3: Enable RC6 */
7930         rc6_mode = 0;
7931         if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7932                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7933         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7934
7935         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7936 }
7937
7938 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7939 {
7940         u32 val;
7941
7942         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7943
7944         /* 1: Program defaults and thresholds for RPS*/
7945         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7946         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7947         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7948         I915_WRITE(GEN6_RP_UP_EI, 66000);
7949         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7950
7951         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7952
7953         /* 2: Enable RPS */
7954         I915_WRITE(GEN6_RP_CONTROL,
7955                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7956                    GEN6_RP_MEDIA_IS_GFX |
7957                    GEN6_RP_ENABLE |
7958                    GEN6_RP_UP_BUSY_AVG |
7959                    GEN6_RP_DOWN_IDLE_AVG);
7960
7961         /* Setting Fixed Bias */
7962         vlv_punit_get(dev_priv);
7963
7964         val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
7965         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7966
7967         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7968
7969         vlv_punit_put(dev_priv);
7970
7971         /* RPS code assumes GPLL is used */
7972         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7973
7974         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7975         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7976
7977         reset_rps(dev_priv, valleyview_set_rps);
7978
7979         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
7980 }
7981
7982 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7983 {
7984         struct intel_engine_cs *engine;
7985         enum intel_engine_id id;
7986         u32 gtfifodbg;
7987
7988         valleyview_check_pctx(dev_priv);
7989
7990         gtfifodbg = I915_READ(GTFIFODBG);
7991         if (gtfifodbg) {
7992                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7993                                  gtfifodbg);
7994                 I915_WRITE(GTFIFODBG, gtfifodbg);
7995         }
7996
7997         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
7998
7999         /*  Disable RC states. */
8000         I915_WRITE(GEN6_RC_CONTROL, 0);
8001
8002         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
8003         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
8004         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
8005
8006         for_each_engine(engine, dev_priv, id)
8007                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
8008
8009         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
8010
8011         /* Allows RC6 residency counter to work */
8012         I915_WRITE(VLV_COUNTER_CONTROL,
8013                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
8014                                       VLV_MEDIA_RC0_COUNT_EN |
8015                                       VLV_RENDER_RC0_COUNT_EN |
8016                                       VLV_MEDIA_RC6_COUNT_EN |
8017                                       VLV_RENDER_RC6_COUNT_EN));
8018
8019         I915_WRITE(GEN6_RC_CONTROL,
8020                    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
8021
8022         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
8023 }
8024
8025 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
8026 {
8027         u32 val;
8028
8029         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
8030
8031         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
8032         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
8033         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
8034         I915_WRITE(GEN6_RP_UP_EI, 66000);
8035         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
8036
8037         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
8038
8039         I915_WRITE(GEN6_RP_CONTROL,
8040                    GEN6_RP_MEDIA_TURBO |
8041                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
8042                    GEN6_RP_MEDIA_IS_GFX |
8043                    GEN6_RP_ENABLE |
8044                    GEN6_RP_UP_BUSY_AVG |
8045                    GEN6_RP_DOWN_IDLE_CONT);
8046
8047         vlv_punit_get(dev_priv);
8048
8049         /* Setting Fixed Bias */
8050         val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
8051         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
8052
8053         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
8054
8055         vlv_punit_put(dev_priv);
8056
8057         /* RPS code assumes GPLL is used */
8058         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
8059
8060         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
8061         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
8062
8063         reset_rps(dev_priv, valleyview_set_rps);
8064
8065         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
8066 }
8067
8068 static unsigned long intel_pxfreq(u32 vidfreq)
8069 {
8070         unsigned long freq;
8071         int div = (vidfreq & 0x3f0000) >> 16;
8072         int post = (vidfreq & 0x3000) >> 12;
8073         int pre = (vidfreq & 0x7);
8074
8075         if (!pre)
8076                 return 0;
8077
8078         freq = ((div * 133333) / ((1<<post) * pre));
8079
8080         return freq;
8081 }
8082
8083 static const struct cparams {
8084         u16 i;
8085         u16 t;
8086         u16 m;
8087         u16 c;
8088 } cparams[] = {
8089         { 1, 1333, 301, 28664 },
8090         { 1, 1066, 294, 24460 },
8091         { 1, 800, 294, 25192 },
8092         { 0, 1333, 276, 27605 },
8093         { 0, 1066, 276, 27605 },
8094         { 0, 800, 231, 23784 },
8095 };
8096
8097 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
8098 {
8099         u64 total_count, diff, ret;
8100         u32 count1, count2, count3, m = 0, c = 0;
8101         unsigned long now = jiffies_to_msecs(jiffies), diff1;
8102         int i;
8103
8104         lockdep_assert_held(&mchdev_lock);
8105
8106         diff1 = now - dev_priv->ips.last_time1;
8107
8108         /* Prevent division-by-zero if we are asking too fast.
8109          * Also, we don't get interesting results if we are polling
8110          * faster than once in 10ms, so just return the saved value
8111          * in such cases.
8112          */
8113         if (diff1 <= 10)
8114                 return dev_priv->ips.chipset_power;
8115
8116         count1 = I915_READ(DMIEC);
8117         count2 = I915_READ(DDREC);
8118         count3 = I915_READ(CSIEC);
8119
8120         total_count = count1 + count2 + count3;
8121
8122         /* FIXME: handle per-counter overflow */
8123         if (total_count < dev_priv->ips.last_count1) {
8124                 diff = ~0UL - dev_priv->ips.last_count1;
8125                 diff += total_count;
8126         } else {
8127                 diff = total_count - dev_priv->ips.last_count1;
8128         }
8129
8130         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
8131                 if (cparams[i].i == dev_priv->ips.c_m &&
8132                     cparams[i].t == dev_priv->ips.r_t) {
8133                         m = cparams[i].m;
8134                         c = cparams[i].c;
8135                         break;
8136                 }
8137         }
8138
8139         diff = div_u64(diff, diff1);
8140         ret = ((m * diff) + c);
8141         ret = div_u64(ret, 10);
8142
8143         dev_priv->ips.last_count1 = total_count;
8144         dev_priv->ips.last_time1 = now;
8145
8146         dev_priv->ips.chipset_power = ret;
8147
8148         return ret;
8149 }
8150
8151 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
8152 {
8153         intel_wakeref_t wakeref;
8154         unsigned long val = 0;
8155
8156         if (!IS_GEN(dev_priv, 5))
8157                 return 0;
8158
8159         with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
8160                 spin_lock_irq(&mchdev_lock);
8161                 val = __i915_chipset_val(dev_priv);
8162                 spin_unlock_irq(&mchdev_lock);
8163         }
8164
8165         return val;
8166 }
8167
8168 unsigned long i915_mch_val(struct drm_i915_private *i915)
8169 {
8170         unsigned long m, x, b;
8171         u32 tsfs;
8172
8173         tsfs = intel_uncore_read(&i915->uncore, TSFS);
8174
8175         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
8176         x = intel_uncore_read8(&i915->uncore, TR1);
8177
8178         b = tsfs & TSFS_INTR_MASK;
8179
8180         return ((m * x) / 127) - b;
8181 }
8182
8183 static int _pxvid_to_vd(u8 pxvid)
8184 {
8185         if (pxvid == 0)
8186                 return 0;
8187
8188         if (pxvid >= 8 && pxvid < 31)
8189                 pxvid = 31;
8190
8191         return (pxvid + 2) * 125;
8192 }
8193
8194 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
8195 {
8196         const int vd = _pxvid_to_vd(pxvid);
8197         const int vm = vd - 1125;
8198
8199         if (INTEL_INFO(dev_priv)->is_mobile)
8200                 return vm > 0 ? vm : 0;
8201
8202         return vd;
8203 }
8204
8205 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
8206 {
8207         u64 now, diff, diffms;
8208         u32 count;
8209
8210         lockdep_assert_held(&mchdev_lock);
8211
8212         now = ktime_get_raw_ns();
8213         diffms = now - dev_priv->ips.last_time2;
8214         do_div(diffms, NSEC_PER_MSEC);
8215
8216         /* Don't divide by 0 */
8217         if (!diffms)
8218                 return;
8219
8220         count = I915_READ(GFXEC);
8221
8222         if (count < dev_priv->ips.last_count2) {
8223                 diff = ~0UL - dev_priv->ips.last_count2;
8224                 diff += count;
8225         } else {
8226                 diff = count - dev_priv->ips.last_count2;
8227         }
8228
8229         dev_priv->ips.last_count2 = count;
8230         dev_priv->ips.last_time2 = now;
8231
8232         /* More magic constants... */
8233         diff = diff * 1181;
8234         diff = div_u64(diff, diffms * 10);
8235         dev_priv->ips.gfx_power = diff;
8236 }
8237
8238 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
8239 {
8240         intel_wakeref_t wakeref;
8241
8242         if (!IS_GEN(dev_priv, 5))
8243                 return;
8244
8245         with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
8246                 spin_lock_irq(&mchdev_lock);
8247                 __i915_update_gfx_val(dev_priv);
8248                 spin_unlock_irq(&mchdev_lock);
8249         }
8250 }
8251
8252 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
8253 {
8254         unsigned long t, corr, state1, corr2, state2;
8255         u32 pxvid, ext_v;
8256
8257         lockdep_assert_held(&mchdev_lock);
8258
8259         pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
8260         pxvid = (pxvid >> 24) & 0x7f;
8261         ext_v = pvid_to_extvid(dev_priv, pxvid);
8262
8263         state1 = ext_v;
8264
8265         t = i915_mch_val(dev_priv);
8266
8267         /* Revel in the empirically derived constants */
8268
8269         /* Correction factor in 1/100000 units */
8270         if (t > 80)
8271                 corr = ((t * 2349) + 135940);
8272         else if (t >= 50)
8273                 corr = ((t * 964) + 29317);
8274         else /* < 50 */
8275                 corr = ((t * 301) + 1004);
8276
8277         corr = corr * ((150142 * state1) / 10000 - 78642);
8278         corr /= 100000;
8279         corr2 = (corr * dev_priv->ips.corr);
8280
8281         state2 = (corr2 * state1) / 10000;
8282         state2 /= 100; /* convert to mW */
8283
8284         __i915_update_gfx_val(dev_priv);
8285
8286         return dev_priv->ips.gfx_power + state2;
8287 }
8288
8289 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
8290 {
8291         intel_wakeref_t wakeref;
8292         unsigned long val = 0;
8293
8294         if (!IS_GEN(dev_priv, 5))
8295                 return 0;
8296
8297         with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
8298                 spin_lock_irq(&mchdev_lock);
8299                 val = __i915_gfx_val(dev_priv);
8300                 spin_unlock_irq(&mchdev_lock);
8301         }
8302
8303         return val;
8304 }
8305
8306 static struct drm_i915_private __rcu *i915_mch_dev;
8307
8308 static struct drm_i915_private *mchdev_get(void)
8309 {
8310         struct drm_i915_private *i915;
8311
8312         rcu_read_lock();
8313         i915 = rcu_dereference(i915_mch_dev);
8314         if (!kref_get_unless_zero(&i915->drm.ref))
8315                 i915 = NULL;
8316         rcu_read_unlock();
8317
8318         return i915;
8319 }
8320
8321 /**
8322  * i915_read_mch_val - return value for IPS use
8323  *
8324  * Calculate and return a value for the IPS driver to use when deciding whether
8325  * we have thermal and power headroom to increase CPU or GPU power budget.
8326  */
8327 unsigned long i915_read_mch_val(void)
8328 {
8329         struct drm_i915_private *i915;
8330         unsigned long chipset_val = 0;
8331         unsigned long graphics_val = 0;
8332         intel_wakeref_t wakeref;
8333
8334         i915 = mchdev_get();
8335         if (!i915)
8336                 return 0;
8337
8338         with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
8339                 spin_lock_irq(&mchdev_lock);
8340                 chipset_val = __i915_chipset_val(i915);
8341                 graphics_val = __i915_gfx_val(i915);
8342                 spin_unlock_irq(&mchdev_lock);
8343         }
8344
8345         drm_dev_put(&i915->drm);
8346         return chipset_val + graphics_val;
8347 }
8348 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8349
8350 /**
8351  * i915_gpu_raise - raise GPU frequency limit
8352  *
8353  * Raise the limit; IPS indicates we have thermal headroom.
8354  */
8355 bool i915_gpu_raise(void)
8356 {
8357         struct drm_i915_private *i915;
8358
8359         i915 = mchdev_get();
8360         if (!i915)
8361                 return false;
8362
8363         spin_lock_irq(&mchdev_lock);
8364         if (i915->ips.max_delay > i915->ips.fmax)
8365                 i915->ips.max_delay--;
8366         spin_unlock_irq(&mchdev_lock);
8367
8368         drm_dev_put(&i915->drm);
8369         return true;
8370 }
8371 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8372
8373 /**
8374  * i915_gpu_lower - lower GPU frequency limit
8375  *
8376  * IPS indicates we're close to a thermal limit, so throttle back the GPU
8377  * frequency maximum.
8378  */
8379 bool i915_gpu_lower(void)
8380 {
8381         struct drm_i915_private *i915;
8382
8383         i915 = mchdev_get();
8384         if (!i915)
8385                 return false;
8386
8387         spin_lock_irq(&mchdev_lock);
8388         if (i915->ips.max_delay < i915->ips.min_delay)
8389                 i915->ips.max_delay++;
8390         spin_unlock_irq(&mchdev_lock);
8391
8392         drm_dev_put(&i915->drm);
8393         return true;
8394 }
8395 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8396
8397 /**
8398  * i915_gpu_busy - indicate GPU business to IPS
8399  *
8400  * Tell the IPS driver whether or not the GPU is busy.
8401  */
8402 bool i915_gpu_busy(void)
8403 {
8404         struct drm_i915_private *i915;
8405         bool ret;
8406
8407         i915 = mchdev_get();
8408         if (!i915)
8409                 return false;
8410
8411         ret = i915->gt.awake;
8412
8413         drm_dev_put(&i915->drm);
8414         return ret;
8415 }
8416 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8417
8418 /**
8419  * i915_gpu_turbo_disable - disable graphics turbo
8420  *
8421  * Disable graphics turbo by resetting the max frequency and setting the
8422  * current frequency to the default.
8423  */
8424 bool i915_gpu_turbo_disable(void)
8425 {
8426         struct drm_i915_private *i915;
8427         bool ret;
8428
8429         i915 = mchdev_get();
8430         if (!i915)
8431                 return false;
8432
8433         spin_lock_irq(&mchdev_lock);
8434         i915->ips.max_delay = i915->ips.fstart;
8435         ret = ironlake_set_drps(i915, i915->ips.fstart);
8436         spin_unlock_irq(&mchdev_lock);
8437
8438         drm_dev_put(&i915->drm);
8439         return ret;
8440 }
8441 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8442
8443 /**
8444  * Tells the intel_ips driver that the i915 driver is now loaded, if
8445  * IPS got loaded first.
8446  *
8447  * This awkward dance is so that neither module has to depend on the
8448  * other in order for IPS to do the appropriate communication of
8449  * GPU turbo limits to i915.
8450  */
8451 static void
8452 ips_ping_for_i915_load(void)
8453 {
8454         void (*link)(void);
8455
8456         link = symbol_get(ips_link_to_i915_driver);
8457         if (link) {
8458                 link();
8459                 symbol_put(ips_link_to_i915_driver);
8460         }
8461 }
8462
8463 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8464 {
8465         /* We only register the i915 ips part with intel-ips once everything is
8466          * set up, to avoid intel-ips sneaking in and reading bogus values. */
8467         rcu_assign_pointer(i915_mch_dev, dev_priv);
8468
8469         ips_ping_for_i915_load();
8470 }
8471
8472 void intel_gpu_ips_teardown(void)
8473 {
8474         rcu_assign_pointer(i915_mch_dev, NULL);
8475 }
8476
8477 static void intel_init_emon(struct drm_i915_private *dev_priv)
8478 {
8479         u32 lcfuse;
8480         u8 pxw[16];
8481         int i;
8482
8483         /* Disable to program */
8484         I915_WRITE(ECR, 0);
8485         POSTING_READ(ECR);
8486
8487         /* Program energy weights for various events */
8488         I915_WRITE(SDEW, 0x15040d00);
8489         I915_WRITE(CSIEW0, 0x007f0000);
8490         I915_WRITE(CSIEW1, 0x1e220004);
8491         I915_WRITE(CSIEW2, 0x04000004);
8492
8493         for (i = 0; i < 5; i++)
8494                 I915_WRITE(PEW(i), 0);
8495         for (i = 0; i < 3; i++)
8496                 I915_WRITE(DEW(i), 0);
8497
8498         /* Program P-state weights to account for frequency power adjustment */
8499         for (i = 0; i < 16; i++) {
8500                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8501                 unsigned long freq = intel_pxfreq(pxvidfreq);
8502                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8503                         PXVFREQ_PX_SHIFT;
8504                 unsigned long val;
8505
8506                 val = vid * vid;
8507                 val *= (freq / 1000);
8508                 val *= 255;
8509                 val /= (127*127*900);
8510                 if (val > 0xff)
8511                         DRM_ERROR("bad pxval: %ld\n", val);
8512                 pxw[i] = val;
8513         }
8514         /* Render standby states get 0 weight */
8515         pxw[14] = 0;
8516         pxw[15] = 0;
8517
8518         for (i = 0; i < 4; i++) {
8519                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8520                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8521                 I915_WRITE(PXW(i), val);
8522         }
8523
8524         /* Adjust magic regs to magic values (more experimental results) */
8525         I915_WRITE(OGW0, 0);
8526         I915_WRITE(OGW1, 0);
8527         I915_WRITE(EG0, 0x00007f00);
8528         I915_WRITE(EG1, 0x0000000e);
8529         I915_WRITE(EG2, 0x000e0000);
8530         I915_WRITE(EG3, 0x68000300);
8531         I915_WRITE(EG4, 0x42000000);
8532         I915_WRITE(EG5, 0x00140031);
8533         I915_WRITE(EG6, 0);
8534         I915_WRITE(EG7, 0);
8535
8536         for (i = 0; i < 8; i++)
8537                 I915_WRITE(PXWL(i), 0);
8538
8539         /* Enable PMON + select events */
8540         I915_WRITE(ECR, 0x80000019);
8541
8542         lcfuse = I915_READ(LCFUSE02);
8543
8544         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8545 }
8546
8547 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8548 {
8549         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8550
8551         /*
8552          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8553          * requirement.
8554          */
8555         if (!sanitize_rc6(dev_priv)) {
8556                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8557                 pm_runtime_get(&dev_priv->drm.pdev->dev);
8558         }
8559
8560         /* Initialize RPS limits (for userspace) */
8561         if (IS_CHERRYVIEW(dev_priv))
8562                 cherryview_init_gt_powersave(dev_priv);
8563         else if (IS_VALLEYVIEW(dev_priv))
8564                 valleyview_init_gt_powersave(dev_priv);
8565         else if (INTEL_GEN(dev_priv) >= 6)
8566                 gen6_init_rps_frequencies(dev_priv);
8567
8568         /* Derive initial user preferences/limits from the hardware limits */
8569         rps->max_freq_softlimit = rps->max_freq;
8570         rps->min_freq_softlimit = rps->min_freq;
8571
8572         /* After setting max-softlimit, find the overclock max freq */
8573         if (IS_GEN(dev_priv, 6) ||
8574             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8575                 u32 params = 0;
8576
8577                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
8578                                        &params, NULL);
8579                 if (params & BIT(31)) { /* OC supported */
8580                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8581                                          (rps->max_freq & 0xff) * 50,
8582                                          (params & 0xff) * 50);
8583                         rps->max_freq = params & 0xff;
8584                 }
8585         }
8586
8587         /* Finally allow us to boost to max by default */
8588         rps->boost_freq = rps->max_freq;
8589         rps->idle_freq = rps->min_freq;
8590         rps->cur_freq = rps->idle_freq;
8591 }
8592
8593 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8594 {
8595         if (IS_VALLEYVIEW(dev_priv))
8596                 valleyview_cleanup_gt_powersave(dev_priv);
8597
8598         if (!HAS_RC6(dev_priv))
8599                 pm_runtime_put(&dev_priv->drm.pdev->dev);
8600 }
8601
8602 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8603 {
8604         dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8605         dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8606         intel_disable_gt_powersave(dev_priv);
8607
8608         if (INTEL_GEN(dev_priv) >= 11)
8609                 gen11_reset_rps_interrupts(dev_priv);
8610         else if (INTEL_GEN(dev_priv) >= 6)
8611                 gen6_reset_rps_interrupts(dev_priv);
8612 }
8613
8614 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8615 {
8616         lockdep_assert_held(&i915->gt_pm.rps.lock);
8617
8618         if (!i915->gt_pm.llc_pstate.enabled)
8619                 return;
8620
8621         /* Currently there is no HW configuration to be done to disable. */
8622
8623         i915->gt_pm.llc_pstate.enabled = false;
8624 }
8625
8626 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8627 {
8628         lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
8629
8630         if (!dev_priv->gt_pm.rc6.enabled)
8631                 return;
8632
8633         if (INTEL_GEN(dev_priv) >= 9)
8634                 gen9_disable_rc6(dev_priv);
8635         else if (IS_CHERRYVIEW(dev_priv))
8636                 cherryview_disable_rc6(dev_priv);
8637         else if (IS_VALLEYVIEW(dev_priv))
8638                 valleyview_disable_rc6(dev_priv);
8639         else if (INTEL_GEN(dev_priv) >= 6)
8640                 gen6_disable_rc6(dev_priv);
8641
8642         dev_priv->gt_pm.rc6.enabled = false;
8643 }
8644
8645 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8646 {
8647         lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
8648
8649         if (!dev_priv->gt_pm.rps.enabled)
8650                 return;
8651
8652         if (INTEL_GEN(dev_priv) >= 9)
8653                 gen9_disable_rps(dev_priv);
8654         else if (IS_CHERRYVIEW(dev_priv))
8655                 cherryview_disable_rps(dev_priv);
8656         else if (IS_VALLEYVIEW(dev_priv))
8657                 valleyview_disable_rps(dev_priv);
8658         else if (INTEL_GEN(dev_priv) >= 6)
8659                 gen6_disable_rps(dev_priv);
8660         else if (IS_IRONLAKE_M(dev_priv))
8661                 ironlake_disable_drps(dev_priv);
8662
8663         dev_priv->gt_pm.rps.enabled = false;
8664 }
8665
8666 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8667 {
8668         mutex_lock(&dev_priv->gt_pm.rps.lock);
8669
8670         intel_disable_rc6(dev_priv);
8671         intel_disable_rps(dev_priv);
8672         if (HAS_LLC(dev_priv))
8673                 intel_disable_llc_pstate(dev_priv);
8674
8675         mutex_unlock(&dev_priv->gt_pm.rps.lock);
8676 }
8677
8678 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8679 {
8680         lockdep_assert_held(&i915->gt_pm.rps.lock);
8681
8682         if (i915->gt_pm.llc_pstate.enabled)
8683                 return;
8684
8685         gen6_update_ring_freq(i915);
8686
8687         i915->gt_pm.llc_pstate.enabled = true;
8688 }
8689
8690 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8691 {
8692         lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
8693
8694         if (dev_priv->gt_pm.rc6.enabled)
8695                 return;
8696
8697         if (IS_CHERRYVIEW(dev_priv))
8698                 cherryview_enable_rc6(dev_priv);
8699         else if (IS_VALLEYVIEW(dev_priv))
8700                 valleyview_enable_rc6(dev_priv);
8701         else if (INTEL_GEN(dev_priv) >= 11)
8702                 gen11_enable_rc6(dev_priv);
8703         else if (INTEL_GEN(dev_priv) >= 9)
8704                 gen9_enable_rc6(dev_priv);
8705         else if (IS_BROADWELL(dev_priv))
8706                 gen8_enable_rc6(dev_priv);
8707         else if (INTEL_GEN(dev_priv) >= 6)
8708                 gen6_enable_rc6(dev_priv);
8709
8710         dev_priv->gt_pm.rc6.enabled = true;
8711 }
8712
8713 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8714 {
8715         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8716
8717         lockdep_assert_held(&rps->lock);
8718
8719         if (rps->enabled)
8720                 return;
8721
8722         if (IS_CHERRYVIEW(dev_priv)) {
8723                 cherryview_enable_rps(dev_priv);
8724         } else if (IS_VALLEYVIEW(dev_priv)) {
8725                 valleyview_enable_rps(dev_priv);
8726         } else if (INTEL_GEN(dev_priv) >= 9) {
8727                 gen9_enable_rps(dev_priv);
8728         } else if (IS_BROADWELL(dev_priv)) {
8729                 gen8_enable_rps(dev_priv);
8730         } else if (INTEL_GEN(dev_priv) >= 6) {
8731                 gen6_enable_rps(dev_priv);
8732         } else if (IS_IRONLAKE_M(dev_priv)) {
8733                 ironlake_enable_drps(dev_priv);
8734                 intel_init_emon(dev_priv);
8735         }
8736
8737         WARN_ON(rps->max_freq < rps->min_freq);
8738         WARN_ON(rps->idle_freq > rps->max_freq);
8739
8740         WARN_ON(rps->efficient_freq < rps->min_freq);
8741         WARN_ON(rps->efficient_freq > rps->max_freq);
8742
8743         rps->enabled = true;
8744 }
8745
8746 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8747 {
8748         /* Powersaving is controlled by the host when inside a VM */
8749         if (intel_vgpu_active(dev_priv))
8750                 return;
8751
8752         mutex_lock(&dev_priv->gt_pm.rps.lock);
8753
8754         if (HAS_RC6(dev_priv))
8755                 intel_enable_rc6(dev_priv);
8756         if (HAS_RPS(dev_priv))
8757                 intel_enable_rps(dev_priv);
8758         if (HAS_LLC(dev_priv))
8759                 intel_enable_llc_pstate(dev_priv);
8760
8761         mutex_unlock(&dev_priv->gt_pm.rps.lock);
8762 }
8763
8764 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8765 {
8766         /*
8767          * On Ibex Peak and Cougar Point, we need to disable clock
8768          * gating for the panel power sequencer or it will fail to
8769          * start up when no ports are active.
8770          */
8771         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8772 }
8773
8774 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8775 {
8776         enum pipe pipe;
8777
8778         for_each_pipe(dev_priv, pipe) {
8779                 I915_WRITE(DSPCNTR(pipe),
8780                            I915_READ(DSPCNTR(pipe)) |
8781                            DISPPLANE_TRICKLE_FEED_DISABLE);
8782
8783                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8784                 POSTING_READ(DSPSURF(pipe));
8785         }
8786 }
8787
8788 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8789 {
8790         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8791
8792         /*
8793          * Required for FBC
8794          * WaFbcDisableDpfcClockGating:ilk
8795          */
8796         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8797                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8798                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8799
8800         I915_WRITE(PCH_3DCGDIS0,
8801                    MARIUNIT_CLOCK_GATE_DISABLE |
8802                    SVSMUNIT_CLOCK_GATE_DISABLE);
8803         I915_WRITE(PCH_3DCGDIS1,
8804                    VFMUNIT_CLOCK_GATE_DISABLE);
8805
8806         /*
8807          * According to the spec the following bits should be set in
8808          * order to enable memory self-refresh
8809          * The bit 22/21 of 0x42004
8810          * The bit 5 of 0x42020
8811          * The bit 15 of 0x45000
8812          */
8813         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8814                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
8815                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8816         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8817         I915_WRITE(DISP_ARB_CTL,
8818                    (I915_READ(DISP_ARB_CTL) |
8819                     DISP_FBC_WM_DIS));
8820
8821         /*
8822          * Based on the document from hardware guys the following bits
8823          * should be set unconditionally in order to enable FBC.
8824          * The bit 22 of 0x42000
8825          * The bit 22 of 0x42004
8826          * The bit 7,8,9 of 0x42020.
8827          */
8828         if (IS_IRONLAKE_M(dev_priv)) {
8829                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8830                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8831                            I915_READ(ILK_DISPLAY_CHICKEN1) |
8832                            ILK_FBCQ_DIS);
8833                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8834                            I915_READ(ILK_DISPLAY_CHICKEN2) |
8835                            ILK_DPARB_GATE);
8836         }
8837
8838         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8839
8840         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8841                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8842                    ILK_ELPIN_409_SELECT);
8843         I915_WRITE(_3D_CHICKEN2,
8844                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8845                    _3D_CHICKEN2_WM_READ_PIPELINED);
8846
8847         /* WaDisableRenderCachePipelinedFlush:ilk */
8848         I915_WRITE(CACHE_MODE_0,
8849                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8850
8851         /* WaDisable_RenderCache_OperationalFlush:ilk */
8852         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8853
8854         g4x_disable_trickle_feed(dev_priv);
8855
8856         ibx_init_clock_gating(dev_priv);
8857 }
8858
8859 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8860 {
8861         int pipe;
8862         u32 val;
8863
8864         /*
8865          * On Ibex Peak and Cougar Point, we need to disable clock
8866          * gating for the panel power sequencer or it will fail to
8867          * start up when no ports are active.
8868          */
8869         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8870                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8871                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
8872         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8873                    DPLS_EDP_PPS_FIX_DIS);
8874         /* The below fixes the weird display corruption, a few pixels shifted
8875          * downward, on (only) LVDS of some HP laptops with IVY.
8876          */
8877         for_each_pipe(dev_priv, pipe) {
8878                 val = I915_READ(TRANS_CHICKEN2(pipe));
8879                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8880                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8881                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8882                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8883                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8884                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8885                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8886                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8887         }
8888         /* WADP0ClockGatingDisable */
8889         for_each_pipe(dev_priv, pipe) {
8890                 I915_WRITE(TRANS_CHICKEN1(pipe),
8891                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8892         }
8893 }
8894
8895 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8896 {
8897         u32 tmp;
8898
8899         tmp = I915_READ(MCH_SSKPD);
8900         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8901                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8902                               tmp);
8903 }
8904
8905 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8906 {
8907         u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8908
8909         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8910
8911         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8912                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8913                    ILK_ELPIN_409_SELECT);
8914
8915         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8916         I915_WRITE(_3D_CHICKEN,
8917                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8918
8919         /* WaDisable_RenderCache_OperationalFlush:snb */
8920         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8921
8922         /*
8923          * BSpec recoomends 8x4 when MSAA is used,
8924          * however in practice 16x4 seems fastest.
8925          *
8926          * Note that PS/WM thread counts depend on the WIZ hashing
8927          * disable bit, which we don't touch here, but it's good
8928          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8929          */
8930         I915_WRITE(GEN6_GT_MODE,
8931                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8932
8933         I915_WRITE(CACHE_MODE_0,
8934                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8935
8936         I915_WRITE(GEN6_UCGCTL1,
8937                    I915_READ(GEN6_UCGCTL1) |
8938                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8939                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8940
8941         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8942          * gating disable must be set.  Failure to set it results in
8943          * flickering pixels due to Z write ordering failures after
8944          * some amount of runtime in the Mesa "fire" demo, and Unigine
8945          * Sanctuary and Tropics, and apparently anything else with
8946          * alpha test or pixel discard.
8947          *
8948          * According to the spec, bit 11 (RCCUNIT) must also be set,
8949          * but we didn't debug actual testcases to find it out.
8950          *
8951          * WaDisableRCCUnitClockGating:snb
8952          * WaDisableRCPBUnitClockGating:snb
8953          */
8954         I915_WRITE(GEN6_UCGCTL2,
8955                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8956                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8957
8958         /* WaStripsFansDisableFastClipPerformanceFix:snb */
8959         I915_WRITE(_3D_CHICKEN3,
8960                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8961
8962         /*
8963          * Bspec says:
8964          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8965          * 3DSTATE_SF number of SF output attributes is more than 16."
8966          */
8967         I915_WRITE(_3D_CHICKEN3,
8968                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8969
8970         /*
8971          * According to the spec the following bits should be
8972          * set in order to enable memory self-refresh and fbc:
8973          * The bit21 and bit22 of 0x42000
8974          * The bit21 and bit22 of 0x42004
8975          * The bit5 and bit7 of 0x42020
8976          * The bit14 of 0x70180
8977          * The bit14 of 0x71180
8978          *
8979          * WaFbcAsynchFlipDisableFbcQueue:snb
8980          */
8981         I915_WRITE(ILK_DISPLAY_CHICKEN1,
8982                    I915_READ(ILK_DISPLAY_CHICKEN1) |
8983                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8984         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8985                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8986                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8987         I915_WRITE(ILK_DSPCLK_GATE_D,
8988                    I915_READ(ILK_DSPCLK_GATE_D) |
8989                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8990                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8991
8992         g4x_disable_trickle_feed(dev_priv);
8993
8994         cpt_init_clock_gating(dev_priv);
8995
8996         gen6_check_mch_setup(dev_priv);
8997 }
8998
8999 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
9000 {
9001         u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
9002
9003         /*
9004          * WaVSThreadDispatchOverride:ivb,vlv
9005          *
9006          * This actually overrides the dispatch
9007          * mode for all thread types.
9008          */
9009         reg &= ~GEN7_FF_SCHED_MASK;
9010         reg |= GEN7_FF_TS_SCHED_HW;
9011         reg |= GEN7_FF_VS_SCHED_HW;
9012         reg |= GEN7_FF_DS_SCHED_HW;
9013
9014         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
9015 }
9016
9017 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
9018 {
9019         /*
9020          * TODO: this bit should only be enabled when really needed, then
9021          * disabled when not needed anymore in order to save power.
9022          */
9023         if (HAS_PCH_LPT_LP(dev_priv))
9024                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
9025                            I915_READ(SOUTH_DSPCLK_GATE_D) |
9026                            PCH_LP_PARTITION_LEVEL_DISABLE);
9027
9028         /* WADPOClockGatingDisable:hsw */
9029         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
9030                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
9031                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
9032 }
9033
9034 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
9035 {
9036         if (HAS_PCH_LPT_LP(dev_priv)) {
9037                 u32 val = I915_READ(SOUTH_DSPCLK_GATE_D);
9038
9039                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
9040                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
9041         }
9042 }
9043
9044 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
9045                                    int general_prio_credits,
9046                                    int high_prio_credits)
9047 {
9048         u32 misccpctl;
9049         u32 val;
9050
9051         /* WaTempDisableDOPClkGating:bdw */
9052         misccpctl = I915_READ(GEN7_MISCCPCTL);
9053         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
9054
9055         val = I915_READ(GEN8_L3SQCREG1);
9056         val &= ~L3_PRIO_CREDITS_MASK;
9057         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
9058         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
9059         I915_WRITE(GEN8_L3SQCREG1, val);
9060
9061         /*
9062          * Wait at least 100 clocks before re-enabling clock gating.
9063          * See the definition of L3SQCREG1 in BSpec.
9064          */
9065         POSTING_READ(GEN8_L3SQCREG1);
9066         udelay(1);
9067         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
9068 }
9069
9070 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
9071 {
9072         /* This is not an Wa. Enable to reduce Sampler power */
9073         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
9074                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
9075
9076         /* WaEnable32PlaneMode:icl */
9077         I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
9078                    _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE));
9079 }
9080
9081 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
9082 {
9083         if (!HAS_PCH_CNP(dev_priv))
9084                 return;
9085
9086         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
9087         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
9088                    CNP_PWM_CGE_GATING_DISABLE);
9089 }
9090
9091 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
9092 {
9093         u32 val;
9094         cnp_init_clock_gating(dev_priv);
9095
9096         /* This is not an Wa. Enable for better image quality */
9097         I915_WRITE(_3D_CHICKEN3,
9098                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
9099
9100         /* WaEnableChickenDCPR:cnl */
9101         I915_WRITE(GEN8_CHICKEN_DCPR_1,
9102                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
9103
9104         /* WaFbcWakeMemOn:cnl */
9105         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
9106                    DISP_FBC_MEMORY_WAKE);
9107
9108         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
9109         /* ReadHitWriteOnlyDisable:cnl */
9110         val |= RCCUNIT_CLKGATE_DIS;
9111         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
9112         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
9113                 val |= SARBUNIT_CLKGATE_DIS;
9114         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
9115
9116         /* Wa_2201832410:cnl */
9117         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
9118         val |= GWUNIT_CLKGATE_DIS;
9119         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
9120
9121         /* WaDisableVFclkgate:cnl */
9122         /* WaVFUnitClockGatingDisable:cnl */
9123         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
9124         val |= VFUNIT_CLKGATE_DIS;
9125         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
9126 }
9127
9128 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
9129 {
9130         cnp_init_clock_gating(dev_priv);
9131         gen9_init_clock_gating(dev_priv);
9132
9133         /* WaFbcNukeOnHostModify:cfl */
9134         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9135                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9136 }
9137
9138 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
9139 {
9140         gen9_init_clock_gating(dev_priv);
9141
9142         /* WaDisableSDEUnitClockGating:kbl */
9143         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9144                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9145                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9146
9147         /* WaDisableGamClockGating:kbl */
9148         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
9149                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9150                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
9151
9152         /* WaFbcNukeOnHostModify:kbl */
9153         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9154                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9155 }
9156
9157 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
9158 {
9159         gen9_init_clock_gating(dev_priv);
9160
9161         /* WAC6entrylatency:skl */
9162         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
9163                    FBC_LLC_FULLY_OPEN);
9164
9165         /* WaFbcNukeOnHostModify:skl */
9166         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
9167                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
9168 }
9169
9170 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
9171 {
9172         /* The GTT cache must be disabled if the system is using 2M pages. */
9173         bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
9174                                                  I915_GTT_PAGE_SIZE_2M);
9175         enum pipe pipe;
9176
9177         /* WaSwitchSolVfFArbitrationPriority:bdw */
9178         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9179
9180         /* WaPsrDPAMaskVBlankInSRD:bdw */
9181         I915_WRITE(CHICKEN_PAR1_1,
9182                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
9183
9184         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
9185         for_each_pipe(dev_priv, pipe) {
9186                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
9187                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
9188                            BDW_DPRS_MASK_VBLANK_SRD);
9189         }
9190
9191         /* WaVSRefCountFullforceMissDisable:bdw */
9192         /* WaDSRefCountFullforceMissDisable:bdw */
9193         I915_WRITE(GEN7_FF_THREAD_MODE,
9194                    I915_READ(GEN7_FF_THREAD_MODE) &
9195                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9196
9197         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9198                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9199
9200         /* WaDisableSDEUnitClockGating:bdw */
9201         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9202                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9203
9204         /* WaProgramL3SqcReg1Default:bdw */
9205         gen8_set_l3sqc_credits(dev_priv, 30, 2);
9206
9207         /* WaGttCachingOffByDefault:bdw */
9208         I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
9209
9210         /* WaKVMNotificationOnConfigChange:bdw */
9211         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
9212                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
9213
9214         lpt_init_clock_gating(dev_priv);
9215
9216         /* WaDisableDopClockGating:bdw
9217          *
9218          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
9219          * clock gating.
9220          */
9221         I915_WRITE(GEN6_UCGCTL1,
9222                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
9223 }
9224
9225 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
9226 {
9227         /* L3 caching of data atomics doesn't work -- disable it. */
9228         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9229         I915_WRITE(HSW_ROW_CHICKEN3,
9230                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9231
9232         /* This is required by WaCatErrorRejectionIssue:hsw */
9233         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9234                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9235                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9236
9237         /* WaVSRefCountFullforceMissDisable:hsw */
9238         I915_WRITE(GEN7_FF_THREAD_MODE,
9239                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
9240
9241         /* WaDisable_RenderCache_OperationalFlush:hsw */
9242         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9243
9244         /* enable HiZ Raw Stall Optimization */
9245         I915_WRITE(CACHE_MODE_0_GEN7,
9246                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9247
9248         /* WaDisable4x2SubspanOptimization:hsw */
9249         I915_WRITE(CACHE_MODE_1,
9250                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9251
9252         /*
9253          * BSpec recommends 8x4 when MSAA is used,
9254          * however in practice 16x4 seems fastest.
9255          *
9256          * Note that PS/WM thread counts depend on the WIZ hashing
9257          * disable bit, which we don't touch here, but it's good
9258          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9259          */
9260         I915_WRITE(GEN7_GT_MODE,
9261                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9262
9263         /* WaSampleCChickenBitEnable:hsw */
9264         I915_WRITE(HALF_SLICE_CHICKEN3,
9265                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9266
9267         /* WaSwitchSolVfFArbitrationPriority:hsw */
9268         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9269
9270         lpt_init_clock_gating(dev_priv);
9271 }
9272
9273 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
9274 {
9275         u32 snpcr;
9276
9277         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
9278
9279         /* WaDisableEarlyCull:ivb */
9280         I915_WRITE(_3D_CHICKEN3,
9281                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9282
9283         /* WaDisableBackToBackFlipFix:ivb */
9284         I915_WRITE(IVB_CHICKEN3,
9285                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9286                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9287
9288         /* WaDisablePSDDualDispatchEnable:ivb */
9289         if (IS_IVB_GT1(dev_priv))
9290                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9291                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9292
9293         /* WaDisable_RenderCache_OperationalFlush:ivb */
9294         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9295
9296         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9297         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9298                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9299
9300         /* WaApplyL3ControlAndL3ChickenMode:ivb */
9301         I915_WRITE(GEN7_L3CNTLREG1,
9302                         GEN7_WA_FOR_GEN7_L3_CONTROL);
9303         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9304                    GEN7_WA_L3_CHICKEN_MODE);
9305         if (IS_IVB_GT1(dev_priv))
9306                 I915_WRITE(GEN7_ROW_CHICKEN2,
9307                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9308         else {
9309                 /* must write both registers */
9310                 I915_WRITE(GEN7_ROW_CHICKEN2,
9311                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9312                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9313                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9314         }
9315
9316         /* WaForceL3Serialization:ivb */
9317         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9318                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9319
9320         /*
9321          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9322          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9323          */
9324         I915_WRITE(GEN6_UCGCTL2,
9325                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9326
9327         /* This is required by WaCatErrorRejectionIssue:ivb */
9328         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9329                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9330                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9331
9332         g4x_disable_trickle_feed(dev_priv);
9333
9334         gen7_setup_fixed_func_scheduler(dev_priv);
9335
9336         if (0) { /* causes HiZ corruption on ivb:gt1 */
9337                 /* enable HiZ Raw Stall Optimization */
9338                 I915_WRITE(CACHE_MODE_0_GEN7,
9339                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9340         }
9341
9342         /* WaDisable4x2SubspanOptimization:ivb */
9343         I915_WRITE(CACHE_MODE_1,
9344                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9345
9346         /*
9347          * BSpec recommends 8x4 when MSAA is used,
9348          * however in practice 16x4 seems fastest.
9349          *
9350          * Note that PS/WM thread counts depend on the WIZ hashing
9351          * disable bit, which we don't touch here, but it's good
9352          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9353          */
9354         I915_WRITE(GEN7_GT_MODE,
9355                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9356
9357         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9358         snpcr &= ~GEN6_MBC_SNPCR_MASK;
9359         snpcr |= GEN6_MBC_SNPCR_MED;
9360         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9361
9362         if (!HAS_PCH_NOP(dev_priv))
9363                 cpt_init_clock_gating(dev_priv);
9364
9365         gen6_check_mch_setup(dev_priv);
9366 }
9367
9368 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9369 {
9370         /* WaDisableEarlyCull:vlv */
9371         I915_WRITE(_3D_CHICKEN3,
9372                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9373
9374         /* WaDisableBackToBackFlipFix:vlv */
9375         I915_WRITE(IVB_CHICKEN3,
9376                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9377                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9378
9379         /* WaPsdDispatchEnable:vlv */
9380         /* WaDisablePSDDualDispatchEnable:vlv */
9381         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9382                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9383                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9384
9385         /* WaDisable_RenderCache_OperationalFlush:vlv */
9386         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9387
9388         /* WaForceL3Serialization:vlv */
9389         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9390                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9391
9392         /* WaDisableDopClockGating:vlv */
9393         I915_WRITE(GEN7_ROW_CHICKEN2,
9394                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9395
9396         /* This is required by WaCatErrorRejectionIssue:vlv */
9397         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9398                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9399                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9400
9401         gen7_setup_fixed_func_scheduler(dev_priv);
9402
9403         /*
9404          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9405          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9406          */
9407         I915_WRITE(GEN6_UCGCTL2,
9408                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9409
9410         /* WaDisableL3Bank2xClockGate:vlv
9411          * Disabling L3 clock gating- MMIO 940c[25] = 1
9412          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9413         I915_WRITE(GEN7_UCGCTL4,
9414                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9415
9416         /*
9417          * BSpec says this must be set, even though
9418          * WaDisable4x2SubspanOptimization isn't listed for VLV.
9419          */
9420         I915_WRITE(CACHE_MODE_1,
9421                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9422
9423         /*
9424          * BSpec recommends 8x4 when MSAA is used,
9425          * however in practice 16x4 seems fastest.
9426          *
9427          * Note that PS/WM thread counts depend on the WIZ hashing
9428          * disable bit, which we don't touch here, but it's good
9429          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9430          */
9431         I915_WRITE(GEN7_GT_MODE,
9432                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9433
9434         /*
9435          * WaIncreaseL3CreditsForVLVB0:vlv
9436          * This is the hardware default actually.
9437          */
9438         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9439
9440         /*
9441          * WaDisableVLVClockGating_VBIIssue:vlv
9442          * Disable clock gating on th GCFG unit to prevent a delay
9443          * in the reporting of vblank events.
9444          */
9445         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9446 }
9447
9448 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9449 {
9450         /* WaVSRefCountFullforceMissDisable:chv */
9451         /* WaDSRefCountFullforceMissDisable:chv */
9452         I915_WRITE(GEN7_FF_THREAD_MODE,
9453                    I915_READ(GEN7_FF_THREAD_MODE) &
9454                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9455
9456         /* WaDisableSemaphoreAndSyncFlipWait:chv */
9457         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9458                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9459
9460         /* WaDisableCSUnitClockGating:chv */
9461         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9462                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9463
9464         /* WaDisableSDEUnitClockGating:chv */
9465         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9466                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9467
9468         /*
9469          * WaProgramL3SqcReg1Default:chv
9470          * See gfxspecs/Related Documents/Performance Guide/
9471          * LSQC Setting Recommendations.
9472          */
9473         gen8_set_l3sqc_credits(dev_priv, 38, 2);
9474
9475         /*
9476          * GTT cache may not work with big pages, so if those
9477          * are ever enabled GTT cache may need to be disabled.
9478          */
9479         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9480 }
9481
9482 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9483 {
9484         u32 dspclk_gate;
9485
9486         I915_WRITE(RENCLK_GATE_D1, 0);
9487         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9488                    GS_UNIT_CLOCK_GATE_DISABLE |
9489                    CL_UNIT_CLOCK_GATE_DISABLE);
9490         I915_WRITE(RAMCLK_GATE_D, 0);
9491         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9492                 OVRUNIT_CLOCK_GATE_DISABLE |
9493                 OVCUNIT_CLOCK_GATE_DISABLE;
9494         if (IS_GM45(dev_priv))
9495                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9496         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9497
9498         /* WaDisableRenderCachePipelinedFlush */
9499         I915_WRITE(CACHE_MODE_0,
9500                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9501
9502         /* WaDisable_RenderCache_OperationalFlush:g4x */
9503         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9504
9505         g4x_disable_trickle_feed(dev_priv);
9506 }
9507
9508 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9509 {
9510         struct intel_uncore *uncore = &dev_priv->uncore;
9511
9512         intel_uncore_write(uncore, RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9513         intel_uncore_write(uncore, RENCLK_GATE_D2, 0);
9514         intel_uncore_write(uncore, DSPCLK_GATE_D, 0);
9515         intel_uncore_write(uncore, RAMCLK_GATE_D, 0);
9516         intel_uncore_write16(uncore, DEUC, 0);
9517         intel_uncore_write(uncore,
9518                            MI_ARB_STATE,
9519                            _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9520
9521         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9522         intel_uncore_write(uncore,
9523                            CACHE_MODE_0,
9524                            _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9525 }
9526
9527 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9528 {
9529         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9530                    I965_RCC_CLOCK_GATE_DISABLE |
9531                    I965_RCPB_CLOCK_GATE_DISABLE |
9532                    I965_ISC_CLOCK_GATE_DISABLE |
9533                    I965_FBC_CLOCK_GATE_DISABLE);
9534         I915_WRITE(RENCLK_GATE_D2, 0);
9535         I915_WRITE(MI_ARB_STATE,
9536                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9537
9538         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9539         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9540 }
9541
9542 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9543 {
9544         u32 dstate = I915_READ(D_STATE);
9545
9546         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9547                 DSTATE_DOT_CLOCK_GATING;
9548         I915_WRITE(D_STATE, dstate);
9549
9550         if (IS_PINEVIEW(dev_priv))
9551                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9552
9553         /* IIR "flip pending" means done if this bit is set */
9554         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9555
9556         /* interrupts should cause a wake up from C3 */
9557         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9558
9559         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9560         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9561
9562         I915_WRITE(MI_ARB_STATE,
9563                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9564 }
9565
9566 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9567 {
9568         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9569
9570         /* interrupts should cause a wake up from C3 */
9571         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9572                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9573
9574         I915_WRITE(MEM_MODE,
9575                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9576 }
9577
9578 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9579 {
9580         I915_WRITE(MEM_MODE,
9581                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9582                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9583 }
9584
9585 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9586 {
9587         dev_priv->display.init_clock_gating(dev_priv);
9588 }
9589
9590 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9591 {
9592         if (HAS_PCH_LPT(dev_priv))
9593                 lpt_suspend_hw(dev_priv);
9594 }
9595
9596 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9597 {
9598         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9599 }
9600
9601 /**
9602  * intel_init_clock_gating_hooks - setup the clock gating hooks
9603  * @dev_priv: device private
9604  *
9605  * Setup the hooks that configure which clocks of a given platform can be
9606  * gated and also apply various GT and display specific workarounds for these
9607  * platforms. Note that some GT specific workarounds are applied separately
9608  * when GPU contexts or batchbuffers start their execution.
9609  */
9610 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9611 {
9612         if (IS_GEN(dev_priv, 11))
9613                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9614         else if (IS_CANNONLAKE(dev_priv))
9615                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9616         else if (IS_COFFEELAKE(dev_priv))
9617                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9618         else if (IS_SKYLAKE(dev_priv))
9619                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9620         else if (IS_KABYLAKE(dev_priv))
9621                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9622         else if (IS_BROXTON(dev_priv))
9623                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9624         else if (IS_GEMINILAKE(dev_priv))
9625                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9626         else if (IS_BROADWELL(dev_priv))
9627                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9628         else if (IS_CHERRYVIEW(dev_priv))
9629                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9630         else if (IS_HASWELL(dev_priv))
9631                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9632         else if (IS_IVYBRIDGE(dev_priv))
9633                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9634         else if (IS_VALLEYVIEW(dev_priv))
9635                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9636         else if (IS_GEN(dev_priv, 6))
9637                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9638         else if (IS_GEN(dev_priv, 5))
9639                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9640         else if (IS_G4X(dev_priv))
9641                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9642         else if (IS_I965GM(dev_priv))
9643                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9644         else if (IS_I965G(dev_priv))
9645                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9646         else if (IS_GEN(dev_priv, 3))
9647                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9648         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9649                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9650         else if (IS_GEN(dev_priv, 2))
9651                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9652         else {
9653                 MISSING_CASE(INTEL_DEVID(dev_priv));
9654                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9655         }
9656 }
9657
9658 /* Set up chip specific power management-related functions */
9659 void intel_init_pm(struct drm_i915_private *dev_priv)
9660 {
9661         /* For cxsr */
9662         if (IS_PINEVIEW(dev_priv))
9663                 i915_pineview_get_mem_freq(dev_priv);
9664         else if (IS_GEN(dev_priv, 5))
9665                 i915_ironlake_get_mem_freq(dev_priv);
9666
9667         /* For FIFO watermark updates */
9668         if (INTEL_GEN(dev_priv) >= 9) {
9669                 skl_setup_wm_latency(dev_priv);
9670                 dev_priv->display.initial_watermarks = skl_initial_wm;
9671                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9672                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9673         } else if (HAS_PCH_SPLIT(dev_priv)) {
9674                 ilk_setup_wm_latency(dev_priv);
9675
9676                 if ((IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[1] &&
9677                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9678                     (!IS_GEN(dev_priv, 5) && dev_priv->wm.pri_latency[0] &&
9679                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9680                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9681                         dev_priv->display.compute_intermediate_wm =
9682                                 ilk_compute_intermediate_wm;
9683                         dev_priv->display.initial_watermarks =
9684                                 ilk_initial_watermarks;
9685                         dev_priv->display.optimize_watermarks =
9686                                 ilk_optimize_watermarks;
9687                 } else {
9688                         DRM_DEBUG_KMS("Failed to read display plane latency. "
9689                                       "Disable CxSR\n");
9690                 }
9691         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9692                 vlv_setup_wm_latency(dev_priv);
9693                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9694                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9695                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9696                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9697                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9698         } else if (IS_G4X(dev_priv)) {
9699                 g4x_setup_wm_latency(dev_priv);
9700                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9701                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9702                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9703                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9704         } else if (IS_PINEVIEW(dev_priv)) {
9705                 if (!intel_get_cxsr_latency(!IS_MOBILE(dev_priv),
9706                                             dev_priv->is_ddr3,
9707                                             dev_priv->fsb_freq,
9708                                             dev_priv->mem_freq)) {
9709                         DRM_INFO("failed to find known CxSR latency "
9710                                  "(found ddr%s fsb freq %d, mem freq %d), "
9711                                  "disabling CxSR\n",
9712                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
9713                                  dev_priv->fsb_freq, dev_priv->mem_freq);
9714                         /* Disable CxSR and never update its watermark again */
9715                         intel_set_memory_cxsr(dev_priv, false);
9716                         dev_priv->display.update_wm = NULL;
9717                 } else
9718                         dev_priv->display.update_wm = pineview_update_wm;
9719         } else if (IS_GEN(dev_priv, 4)) {
9720                 dev_priv->display.update_wm = i965_update_wm;
9721         } else if (IS_GEN(dev_priv, 3)) {
9722                 dev_priv->display.update_wm = i9xx_update_wm;
9723                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9724         } else if (IS_GEN(dev_priv, 2)) {
9725                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9726                         dev_priv->display.update_wm = i845_update_wm;
9727                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
9728                 } else {
9729                         dev_priv->display.update_wm = i9xx_update_wm;
9730                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
9731                 }
9732         } else {
9733                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9734         }
9735 }
9736
9737 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9738 {
9739         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9740
9741         /*
9742          * N = val - 0xb7
9743          * Slow = Fast = GPLL ref * N
9744          */
9745         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9746 }
9747
9748 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9749 {
9750         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9751
9752         return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9753 }
9754
9755 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9756 {
9757         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9758
9759         /*
9760          * N = val / 2
9761          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9762          */
9763         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9764 }
9765
9766 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9767 {
9768         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9769
9770         /* CHV needs even values */
9771         return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9772 }
9773
9774 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9775 {
9776         if (INTEL_GEN(dev_priv) >= 9)
9777                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9778                                          GEN9_FREQ_SCALER);
9779         else if (IS_CHERRYVIEW(dev_priv))
9780                 return chv_gpu_freq(dev_priv, val);
9781         else if (IS_VALLEYVIEW(dev_priv))
9782                 return byt_gpu_freq(dev_priv, val);
9783         else
9784                 return val * GT_FREQUENCY_MULTIPLIER;
9785 }
9786
9787 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9788 {
9789         if (INTEL_GEN(dev_priv) >= 9)
9790                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9791                                          GT_FREQUENCY_MULTIPLIER);
9792         else if (IS_CHERRYVIEW(dev_priv))
9793                 return chv_freq_opcode(dev_priv, val);
9794         else if (IS_VALLEYVIEW(dev_priv))
9795                 return byt_freq_opcode(dev_priv, val);
9796         else
9797                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9798 }
9799
9800 void intel_pm_setup(struct drm_i915_private *dev_priv)
9801 {
9802         mutex_init(&dev_priv->gt_pm.rps.lock);
9803         mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9804
9805         atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9806
9807         dev_priv->runtime_pm.suspended = false;
9808         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9809 }
9810
9811 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9812                              const i915_reg_t reg)
9813 {
9814         u32 lower, upper, tmp;
9815         int loop = 2;
9816
9817         /*
9818          * The register accessed do not need forcewake. We borrow
9819          * uncore lock to prevent concurrent access to range reg.
9820          */
9821         lockdep_assert_held(&dev_priv->uncore.lock);
9822
9823         /*
9824          * vlv and chv residency counters are 40 bits in width.
9825          * With a control bit, we can choose between upper or lower
9826          * 32bit window into this counter.
9827          *
9828          * Although we always use the counter in high-range mode elsewhere,
9829          * userspace may attempt to read the value before rc6 is initialised,
9830          * before we have set the default VLV_COUNTER_CONTROL value. So always
9831          * set the high bit to be safe.
9832          */
9833         I915_WRITE_FW(VLV_COUNTER_CONTROL,
9834                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9835         upper = I915_READ_FW(reg);
9836         do {
9837                 tmp = upper;
9838
9839                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9840                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9841                 lower = I915_READ_FW(reg);
9842
9843                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9844                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9845                 upper = I915_READ_FW(reg);
9846         } while (upper != tmp && --loop);
9847
9848         /*
9849          * Everywhere else we always use VLV_COUNTER_CONTROL with the
9850          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9851          * now.
9852          */
9853
9854         return lower | (u64)upper << 8;
9855 }
9856
9857 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9858                            const i915_reg_t reg)
9859 {
9860         struct intel_uncore *uncore = &dev_priv->uncore;
9861         u64 time_hw, prev_hw, overflow_hw;
9862         unsigned int fw_domains;
9863         unsigned long flags;
9864         unsigned int i;
9865         u32 mul, div;
9866
9867         if (!HAS_RC6(dev_priv))
9868                 return 0;
9869
9870         /*
9871          * Store previous hw counter values for counter wrap-around handling.
9872          *
9873          * There are only four interesting registers and they live next to each
9874          * other so we can use the relative address, compared to the smallest
9875          * one as the index into driver storage.
9876          */
9877         i = (i915_mmio_reg_offset(reg) -
9878              i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9879         if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9880                 return 0;
9881
9882         fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
9883
9884         spin_lock_irqsave(&uncore->lock, flags);
9885         intel_uncore_forcewake_get__locked(uncore, fw_domains);
9886
9887         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9888         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9889                 mul = 1000000;
9890                 div = dev_priv->czclk_freq;
9891                 overflow_hw = BIT_ULL(40);
9892                 time_hw = vlv_residency_raw(dev_priv, reg);
9893         } else {
9894                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9895                 if (IS_GEN9_LP(dev_priv)) {
9896                         mul = 10000;
9897                         div = 12;
9898                 } else {
9899                         mul = 1280;
9900                         div = 1;
9901                 }
9902
9903                 overflow_hw = BIT_ULL(32);
9904                 time_hw = intel_uncore_read_fw(uncore, reg);
9905         }
9906
9907         /*
9908          * Counter wrap handling.
9909          *
9910          * But relying on a sufficient frequency of queries otherwise counters
9911          * can still wrap.
9912          */
9913         prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9914         dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9915
9916         /* RC6 delta from last sample. */
9917         if (time_hw >= prev_hw)
9918                 time_hw -= prev_hw;
9919         else
9920                 time_hw += overflow_hw - prev_hw;
9921
9922         /* Add delta to RC6 extended raw driver copy. */
9923         time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9924         dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9925
9926         intel_uncore_forcewake_put__locked(uncore, fw_domains);
9927         spin_unlock_irqrestore(&uncore->lock, flags);
9928
9929         return mul_u64_u32_div(time_hw, mul, div);
9930 }
9931
9932 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
9933                            i915_reg_t reg)
9934 {
9935         return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
9936 }
9937
9938 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9939 {
9940         u32 cagf;
9941
9942         if (INTEL_GEN(dev_priv) >= 9)
9943                 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9944         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9945                 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9946         else
9947                 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9948
9949         return  cagf;
9950 }