]> rtime.felk.cvut.cz Git - linux-imx.git/blobdiff - drivers/gpu/drm/i915/intel_pm.c
drm/i915: add intel_display_power_enabled
[linux-imx.git] / drivers / gpu / drm / i915 / intel_pm.c
index a1794c6df1bf1b80917f60e2145c39ae741aa478..9b3e90e10ed7af64c34b0ed65e930e046ef68963 100644 (file)
@@ -113,8 +113,8 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
        fbc_ctl |= obj->fence_reg;
        I915_WRITE(FBC_CONTROL, fbc_ctl);
 
-       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %d, ",
-                     cfb_pitch, crtc->y, intel_crtc->plane);
+       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c, ",
+                     cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
 }
 
 static bool i8xx_fbc_enabled(struct drm_device *dev)
@@ -148,7 +148,7 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
        /* enable it... */
        I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
 
-       DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane);
+       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
 }
 
 static void g4x_disable_fbc(struct drm_device *dev)
@@ -228,7 +228,7 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
                sandybridge_blit_fbc_update(dev);
        }
 
-       DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane);
+       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
 }
 
 static void ironlake_disable_fbc(struct drm_device *dev)
@@ -481,8 +481,6 @@ void intel_update_fbc(struct drm_device *dev)
                goto out_disable;
 
        if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
-               DRM_INFO("not enough stolen space for compressed buffer (need %zd bytes), disabling\n", intel_fb->obj->base.size);
-               DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
                DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
                dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
                goto out_disable;
@@ -2146,15 +2144,15 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
                                            &sandybridge_display_wm_info,
                                            latency, &sprite_wm);
        if (!ret) {
-               DRM_DEBUG_KMS("failed to compute sprite wm for pipe %d\n",
-                             pipe);
+               DRM_DEBUG_KMS("failed to compute sprite wm for pipe %c\n",
+                             pipe_name(pipe));
                return;
        }
 
        val = I915_READ(reg);
        val &= ~WM0_PIPE_SPRITE_MASK;
        I915_WRITE(reg, val | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
-       DRM_DEBUG_KMS("sprite watermarks For pipe %d - %d\n", pipe, sprite_wm);
+       DRM_DEBUG_KMS("sprite watermarks For pipe %c - %d\n", pipe_name(pipe), sprite_wm);
 
 
        ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
@@ -2163,8 +2161,8 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
                                              SNB_READ_WM1_LATENCY() * 500,
                                              &sprite_wm);
        if (!ret) {
-               DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %d\n",
-                             pipe);
+               DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
+                             pipe_name(pipe));
                return;
        }
        I915_WRITE(WM1S_LP_ILK, sprite_wm);
@@ -2179,8 +2177,8 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
                                              SNB_READ_WM2_LATENCY() * 500,
                                              &sprite_wm);
        if (!ret) {
-               DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %d\n",
-                             pipe);
+               DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
+                             pipe_name(pipe));
                return;
        }
        I915_WRITE(WM2S_LP_IVB, sprite_wm);
@@ -2191,8 +2189,8 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
                                              SNB_READ_WM3_LATENCY() * 500,
                                              &sprite_wm);
        if (!ret) {
-               DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %d\n",
-                             pipe);
+               DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
+                             pipe_name(pipe));
                return;
        }
        I915_WRITE(WM3S_LP_IVB, sprite_wm);
@@ -2460,10 +2458,14 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        if (val == dev_priv->rps.cur_delay)
                return;
 
-       I915_WRITE(GEN6_RPNSWREQ,
-                  GEN6_FREQUENCY(val) |
-                  GEN6_OFFSET(0) |
-                  GEN6_AGGRESSIVE_TURBO);
+       if (IS_HASWELL(dev))
+               I915_WRITE(GEN6_RPNSWREQ,
+                          HSW_FREQUENCY(val));
+       else
+               I915_WRITE(GEN6_RPNSWREQ,
+                          GEN6_FREQUENCY(val) |
+                          GEN6_OFFSET(0) |
+                          GEN6_AGGRESSIVE_TURBO);
 
        /* Make sure we continue to get interrupts
         * until we hit the minimum or maximum frequencies.
@@ -2477,6 +2479,52 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(val * 50);
 }
 
+void valleyview_set_rps(struct drm_device *dev, u8 val)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned long timeout = jiffies + msecs_to_jiffies(10);
+       u32 limits = gen6_rps_limits(dev_priv, &val);
+       u32 pval;
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       WARN_ON(val > dev_priv->rps.max_delay);
+       WARN_ON(val < dev_priv->rps.min_delay);
+
+       DRM_DEBUG_DRIVER("gpu freq request from %d to %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq,
+                                     dev_priv->rps.cur_delay),
+                        vlv_gpu_freq(dev_priv->mem_freq, val));
+
+       if (val == dev_priv->rps.cur_delay)
+               return;
+
+       valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+
+       do {
+               valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
+               if (time_after(jiffies, timeout)) {
+                       DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
+                       break;
+               }
+               udelay(10);
+       } while (pval & 1);
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
+       if ((pval >> 8) != val)
+               DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n",
+                         val, pval >> 8);
+
+       /* Make sure we continue to get interrupts
+        * until we hit the minimum or maximum frequencies.
+        */
+       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
+
+       dev_priv->rps.cur_delay = pval >> 8;
+
+       trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
+}
+
+
 static void gen6_disable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2497,6 +2545,25 @@ static void gen6_disable_rps(struct drm_device *dev)
        I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 }
 
+static void valleyview_disable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+       I915_WRITE(GEN6_PMIER, 0);
+       /* Complete PM interrupt masking here doesn't race with the rps work
+        * item again unmasking PM interrupts because that is using a different
+        * register (PMIMR) to mask PM interrupts. The only risk is in leaving
+        * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
+
+       spin_lock_irq(&dev_priv->rps.lock);
+       dev_priv->rps.pm_iir = 0;
+       spin_unlock_irq(&dev_priv->rps.lock);
+
+       I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
+}
+
 int intel_enable_rc6(const struct drm_device *dev)
 {
        /* Respect the kernel parameter if it is set */
@@ -2554,8 +2621,8 @@ static void gen6_enable_rps(struct drm_device *dev)
        rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
        gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
-       /* In units of 100MHz */
-       dev_priv->rps.max_delay = rp_state_cap & 0xff;
+       /* In units of 50MHz */
+       dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
        dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
        dev_priv->rps.cur_delay = 0;
 
@@ -2601,12 +2668,19 @@ static void gen6_enable_rps(struct drm_device *dev)
                   GEN6_RC_CTL_EI_MODE(1) |
                   GEN6_RC_CTL_HW_ENABLE);
 
-       I915_WRITE(GEN6_RPNSWREQ,
-                  GEN6_FREQUENCY(10) |
-                  GEN6_OFFSET(0) |
-                  GEN6_AGGRESSIVE_TURBO);
-       I915_WRITE(GEN6_RC_VIDEO_FREQ,
-                  GEN6_FREQUENCY(12));
+       if (IS_HASWELL(dev)) {
+               I915_WRITE(GEN6_RPNSWREQ,
+                          HSW_FREQUENCY(10));
+               I915_WRITE(GEN6_RC_VIDEO_FREQ,
+                          HSW_FREQUENCY(12));
+       } else {
+               I915_WRITE(GEN6_RPNSWREQ,
+                          GEN6_FREQUENCY(10) |
+                          GEN6_OFFSET(0) |
+                          GEN6_AGGRESSIVE_TURBO);
+               I915_WRITE(GEN6_RC_VIDEO_FREQ,
+                          GEN6_FREQUENCY(12));
+       }
 
        I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
        I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
@@ -2628,12 +2702,14 @@ static void gen6_enable_rps(struct drm_device *dev)
                   (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
 
        ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
-       if (!ret) {
+       if (!ret && (IS_GEN6(dev) || IS_IVYBRIDGE(dev))) {
                pcu_mbox = 0;
                ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
-               if (ret && pcu_mbox & (1<<31)) { /* OC supported */
-                       dev_priv->rps.max_delay = pcu_mbox & 0xff;
-                       DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
+               if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
+                       DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
+                                        (dev_priv->rps.max_delay & 0xff) * 50,
+                                        (pcu_mbox & 0xff) * 50);
+                       dev_priv->rps.hw_max = pcu_mbox & 0xff;
                }
        } else {
                DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
@@ -2671,8 +2747,8 @@ static void gen6_update_ring_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int min_freq = 15;
-       int gpu_freq;
-       unsigned int ia_freq, max_ia_freq;
+       unsigned int gpu_freq;
+       unsigned int max_ia_freq, min_ring_freq;
        int scaling_factor = 180;
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -2688,6 +2764,10 @@ static void gen6_update_ring_freq(struct drm_device *dev)
        /* Convert from kHz to MHz */
        max_ia_freq /= 1000;
 
+       min_ring_freq = I915_READ(MCHBAR_MIRROR_BASE_SNB + DCLK);
+       /* convert DDR frequency from units of 133.3MHz to bandwidth */
+       min_ring_freq = (2 * 4 * min_ring_freq + 2) / 3;
+
        /*
         * For each potential GPU frequency, load a ring frequency we'd like
         * to use for memory access.  We do this by specifying the IA frequency
@@ -2696,24 +2776,187 @@ static void gen6_update_ring_freq(struct drm_device *dev)
        for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
             gpu_freq--) {
                int diff = dev_priv->rps.max_delay - gpu_freq;
-
-               /*
-                * For GPU frequencies less than 750MHz, just use the lowest
-                * ring freq.
-                */
-               if (gpu_freq < min_freq)
-                       ia_freq = 800;
-               else
-                       ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-               ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-               ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT;
+               unsigned int ia_freq = 0, ring_freq = 0;
+
+               if (IS_HASWELL(dev)) {
+                       ring_freq = (gpu_freq * 5 + 3) / 4;
+                       ring_freq = max(min_ring_freq, ring_freq);
+                       /* leave ia_freq as the default, chosen by cpufreq */
+               } else {
+                       /* On older processors, there is no separate ring
+                        * clock domain, so in order to boost the bandwidth
+                        * of the ring, we need to upclock the CPU (ia_freq).
+                        *
+                        * For GPU frequencies less than 750MHz,
+                        * just use the lowest ring freq.
+                        */
+                       if (gpu_freq < min_freq)
+                               ia_freq = 800;
+                       else
+                               ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
+                       ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+               }
 
                sandybridge_pcode_write(dev_priv,
                                        GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-                                       ia_freq | gpu_freq);
+                                       ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+                                       ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+                                       gpu_freq);
        }
 }
 
+int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rp0;
+
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE, &val);
+
+       rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+       /* Clamp to max */
+       rp0 = min_t(u32, rp0, 0xea);
+
+       return rp0;
+}
+
+static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rpe;
+
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO, &val);
+       rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI, &val);
+       rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+       return rpe;
+}
+
+int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val;
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val);
+
+       return val & 0xff;
+}
+
+static void vlv_rps_timer_work(struct work_struct *work)
+{
+       drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
+                                                   rps.vlv_work.work);
+
+       /*
+        * Timer fired, we must be idle.  Drop to min voltage state.
+        * Note: we use RPe here since it should match the
+        * Vmin we were shooting for.  That should give us better
+        * perf when we come back out of RC6 than if we used the
+        * min freq available.
+        */
+       mutex_lock(&dev_priv->rps.hw_lock);
+       valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void valleyview_enable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_ring_buffer *ring;
+       u32 gtfifodbg, val, rpe;
+       int i;
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       if ((gtfifodbg = I915_READ(GTFIFODBG))) {
+               DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
+               I915_WRITE(GTFIFODBG, gtfifodbg);
+       }
+
+       gen6_gt_force_wake_get(dev_priv);
+
+       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+       I915_WRITE(GEN6_RP_UP_EI, 66000);
+       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+       I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_TURBO |
+                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                  GEN6_RP_MEDIA_IS_GFX |
+                  GEN6_RP_ENABLE |
+                  GEN6_RP_UP_BUSY_AVG |
+                  GEN6_RP_DOWN_IDLE_CONT);
+
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+       for_each_ring(ring, dev_priv, i)
+               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350);
+
+       /* allows RC6 residency counter to work */
+       I915_WRITE(0x138104, _MASKED_BIT_ENABLE(0x3));
+       I915_WRITE(GEN6_RC_CONTROL,
+                  GEN7_RC_CTL_TO_MODE);
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &val);
+       switch ((val >> 6) & 3) {
+       case 0:
+       case 1:
+               dev_priv->mem_freq = 800;
+               break;
+       case 2:
+               dev_priv->mem_freq = 1066;
+               break;
+       case 3:
+               dev_priv->mem_freq = 1333;
+               break;
+       }
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+       DRM_DEBUG_DRIVER("current GPU freq: %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, (val >> 8) & 0xff));
+       dev_priv->rps.cur_delay = (val >> 8) & 0xff;
+
+       dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
+       dev_priv->rps.hw_max = dev_priv->rps.max_delay;
+       DRM_DEBUG_DRIVER("max GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
+                                                    dev_priv->rps.max_delay));
+
+       rpe = valleyview_rps_rpe_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RPe GPU freq: %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, rpe));
+       dev_priv->rps.rpe_delay = rpe;
+
+       val = valleyview_rps_min_freq(dev_priv);
+       DRM_DEBUG_DRIVER("min GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
+                                                           val));
+       dev_priv->rps.min_delay = val;
+
+       DRM_DEBUG_DRIVER("setting GPU freq to %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, rpe));
+
+       INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
+
+       valleyview_set_rps(dev_priv->dev, rpe);
+
+       /* requires MSI enabled */
+       I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS);
+       spin_lock_irq(&dev_priv->rps.lock);
+       WARN_ON(dev_priv->rps.pm_iir != 0);
+       I915_WRITE(GEN6_PMIMR, 0);
+       spin_unlock_irq(&dev_priv->rps.lock);
+       /* enable all PM interrupts */
+       I915_WRITE(GEN6_PMINTRMSK, 0);
+
+       gen6_gt_force_wake_put(dev_priv);
+}
+
 void ironlake_teardown_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2821,7 +3064,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
        ret = intel_ring_idle(ring);
        dev_priv->mm.interruptible = was_interruptible;
        if (ret) {
-               DRM_ERROR("failed to enable ironlake power power savings\n");
+               DRM_ERROR("failed to enable ironlake power savings\n");
                ironlake_teardown_rc6(dev);
                return;
        }
@@ -3437,13 +3680,22 @@ void intel_disable_gt_powersave(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       /* Interrupts should be disabled already to avoid re-arming. */
+       WARN_ON(dev->irq_enabled);
+
        if (IS_IRONLAKE_M(dev)) {
                ironlake_disable_drps(dev);
                ironlake_disable_rc6(dev);
-       } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) {
+       } else if (INTEL_INFO(dev)->gen >= 6) {
                cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
+               cancel_work_sync(&dev_priv->rps.work);
+               if (IS_VALLEYVIEW(dev))
+                       cancel_delayed_work_sync(&dev_priv->rps.vlv_work);
                mutex_lock(&dev_priv->rps.hw_lock);
-               gen6_disable_rps(dev);
+               if (IS_VALLEYVIEW(dev))
+                       valleyview_disable_rps(dev);
+               else
+                       gen6_disable_rps(dev);
                mutex_unlock(&dev_priv->rps.hw_lock);
        }
 }
@@ -3456,8 +3708,13 @@ static void intel_gen6_powersave_work(struct work_struct *work)
        struct drm_device *dev = dev_priv->dev;
 
        mutex_lock(&dev_priv->rps.hw_lock);
-       gen6_enable_rps(dev);
-       gen6_update_ring_freq(dev);
+
+       if (IS_VALLEYVIEW(dev)) {
+               valleyview_enable_rps(dev);
+       } else {
+               gen6_enable_rps(dev);
+               gen6_update_ring_freq(dev);
+       }
        mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
@@ -3469,7 +3726,7 @@ void intel_enable_gt_powersave(struct drm_device *dev)
                ironlake_enable_drps(dev);
                ironlake_enable_rc6(dev);
                intel_init_emon(dev);
-       } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
+       } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
                /*
                 * PCU communication is slow and this doesn't need to be
                 * done at any specific time, so do this out of our fast path
@@ -3562,6 +3819,7 @@ static void cpt_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
+       uint32_t val;
 
        /*
         * On Ibex Peak and Cougar Point, we need to disable clock
@@ -3574,8 +3832,17 @@ static void cpt_init_clock_gating(struct drm_device *dev)
        /* The below fixes the weird display corruption, a few pixels shifted
         * downward, on (only) LVDS of some HP laptops with IVY.
         */
-       for_each_pipe(pipe)
-               I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE);
+       for_each_pipe(pipe) {
+               val = I915_READ(TRANS_CHICKEN2(pipe));
+               val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+               val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+               if (dev_priv->fdi_rx_polarity_inverted)
+                       val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+               val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
+               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
+               I915_WRITE(TRANS_CHICKEN2(pipe), val);
+       }
        /* WADP0ClockGatingDisable */
        for_each_pipe(pipe) {
                I915_WRITE(TRANS_CHICKEN1(pipe),
@@ -3768,6 +4035,9 @@ static void haswell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
                   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
+       /* WaSwitchSolVfFArbitrationPriority */
+       I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+
        /* XXX: This is a workaround for early silicon revisions and should be
         * removed later.
         */
@@ -3874,7 +4144,8 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        snpcr |= GEN6_MBC_SNPCR_MED;
        I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-       cpt_init_clock_gating(dev);
+       if (!HAS_PCH_NOP(dev))
+               cpt_init_clock_gating(dev);
 
        gen6_check_mch_setup(dev);
 }
@@ -3899,8 +4170,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+       /* WaDisablePSDDualDispatchEnable */
        I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-                  _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+                  _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
+                                     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
        /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
@@ -3967,25 +4240,21 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
-       /*
-        * On ValleyView, the GUnit needs to signal the GT
-        * when flip and other events complete.  So enable
-        * all the GUnit->GT interrupts here
-        */
-       I915_WRITE(VLV_DPFLIPSTAT, PIPEB_LINE_COMPARE_INT_EN |
-                  PIPEB_HLINE_INT_EN | PIPEB_VBLANK_INT_EN |
-                  SPRITED_FLIPDONE_INT_EN | SPRITEC_FLIPDONE_INT_EN |
-                  PLANEB_FLIPDONE_INT_EN | PIPEA_LINE_COMPARE_INT_EN |
-                  PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN |
-                  SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN |
-                  PLANEA_FLIPDONE_INT_EN);
-
        /*
         * WaDisableVLVClockGating_VBIIssue
         * Disable clock gating on th GCFG unit to prevent a delay
         * in the reporting of vblank events.
         */
-       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
+       I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
+
+       /* Conservative clock gating settings for now */
+       I915_WRITE(0x9400, 0xffffffff);
+       I915_WRITE(0x9404, 0xffffffff);
+       I915_WRITE(0x9408, 0xffffffff);
+       I915_WRITE(0x940c, 0xffffffff);
+       I915_WRITE(0x9410, 0xffffffff);
+       I915_WRITE(0x9414, 0xffffffff);
+       I915_WRITE(0x9418, 0xffffffff);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -4070,13 +4339,48 @@ void intel_init_clock_gating(struct drm_device *dev)
        dev_priv->display.init_clock_gating(dev);
 }
 
+/**
+ * We should only use the power well if we explicitly asked the hardware to
+ * enable it, so check if it's enabled and also check if we've requested it to
+ * be enabled.
+ */
+bool intel_display_power_enabled(struct drm_device *dev,
+                                enum intel_display_power_domain domain)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!HAS_POWER_WELL(dev))
+               return true;
+
+       switch (domain) {
+       case POWER_DOMAIN_PIPE_A:
+       case POWER_DOMAIN_TRANSCODER_EDP:
+               return true;
+       case POWER_DOMAIN_PIPE_B:
+       case POWER_DOMAIN_PIPE_C:
+       case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
+       case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
+       case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
+       case POWER_DOMAIN_TRANSCODER_A:
+       case POWER_DOMAIN_TRANSCODER_B:
+       case POWER_DOMAIN_TRANSCODER_C:
+               return I915_READ(HSW_PWR_WELL_DRIVER) ==
+                      (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
+       default:
+               BUG();
+       }
+}
+
 void intel_set_power_well(struct drm_device *dev, bool enable)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        bool is_enabled, enable_requested;
        uint32_t tmp;
 
-       if (!IS_HASWELL(dev))
+       if (!HAS_POWER_WELL(dev))
+               return;
+
+       if (!i915_disable_power_well && !enable)
                return;
 
        tmp = I915_READ(HSW_PWR_WELL_DRIVER);
@@ -4111,7 +4415,7 @@ void intel_init_power_well(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (!IS_HASWELL(dev))
+       if (!HAS_POWER_WELL(dev))
                return;
 
        /* For now, we need the power well to be always enabled. */
@@ -4173,7 +4477,6 @@ void intel_init_pm(struct drm_device *dev)
                        }
                        dev_priv->display.init_clock_gating = gen6_init_clock_gating;
                } else if (IS_IVYBRIDGE(dev)) {
-                       /* FIXME: detect B0+ stepping and use auto training */
                        if (SNB_READ_WM0_LATENCY()) {
                                dev_priv->display.update_wm = ivybridge_update_wm;
                                dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
@@ -4271,21 +4574,14 @@ static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
 
 static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
-       u32 forcewake_ack;
-
-       if (IS_HASWELL(dev_priv->dev))
-               forcewake_ack = FORCEWAKE_ACK_HSW;
-       else
-               forcewake_ack = FORCEWAKE_ACK;
-
-       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0,
+       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-       I915_WRITE_NOTRACE(FORCEWAKE, FORCEWAKE_KERNEL);
+       I915_WRITE_NOTRACE(FORCEWAKE, 1);
        POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 
-       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
+       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
 
@@ -4308,7 +4604,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
        else
                forcewake_ack = FORCEWAKE_MT_ACK;
 
-       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0,
+       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
@@ -4316,7 +4612,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
        /* something from same cacheline, but !FORCEWAKE_MT */
        POSTING_READ(ECOBUS);
 
-       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
+       if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
 
@@ -4406,15 +4702,22 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
 
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
-       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1) == 0,
+       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
        I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+       I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
+                          _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 
-       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1),
+       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
                            FORCEWAKE_ACK_TIMEOUT_MS))
-               DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
+               DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
+
+       if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) &
+                            FORCEWAKE_KERNEL),
+                           FORCEWAKE_ACK_TIMEOUT_MS))
+               DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
 
        __gen6_gt_wait_for_thread_c0(dev_priv);
 }
@@ -4422,8 +4725,9 @@ static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 {
        I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
-       /* something from same cacheline, but !FORCEWAKE_VLV */
-       POSTING_READ(FORCEWAKE_ACK_VLV);
+       I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
+                          _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+       /* The below doubles as a POSTING_READ */
        gen6_gt_check_fifodbg(dev_priv);
 }
 
@@ -4508,3 +4812,119 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
 
        return 0;
 }
+
+static int vlv_punit_rw(struct drm_i915_private *dev_priv, u32 port, u8 opcode,
+                       u8 addr, u32 *val)
+{
+       u32 cmd, devfn, be, bar;
+
+       bar = 0;
+       be = 0xf;
+       devfn = PCI_DEVFN(2, 0);
+
+       cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
+               (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
+               (bar << IOSF_BAR_SHIFT);
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       if (I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) {
+               DRM_DEBUG_DRIVER("warning: pcode (%s) mailbox access failed\n",
+                                opcode == PUNIT_OPCODE_REG_READ ?
+                                "read" : "write");
+               return -EAGAIN;
+       }
+
+       I915_WRITE(VLV_IOSF_ADDR, addr);
+       if (opcode == PUNIT_OPCODE_REG_WRITE)
+               I915_WRITE(VLV_IOSF_DATA, *val);
+       I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
+
+       if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0,
+                    5)) {
+               DRM_ERROR("timeout waiting for pcode %s (%d) to finish\n",
+                         opcode == PUNIT_OPCODE_REG_READ ? "read" : "write",
+                         addr);
+               return -ETIMEDOUT;
+       }
+
+       if (opcode == PUNIT_OPCODE_REG_READ)
+               *val = I915_READ(VLV_IOSF_DATA);
+       I915_WRITE(VLV_IOSF_DATA, 0);
+
+       return 0;
+}
+
+int valleyview_punit_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
+{
+       return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_READ,
+                           addr, val);
+}
+
+int valleyview_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val)
+{
+       return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_WRITE,
+                           addr, &val);
+}
+
+int valleyview_nc_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
+{
+       return vlv_punit_rw(dev_priv, IOSF_PORT_NC, PUNIT_OPCODE_REG_READ,
+                           addr, val);
+}
+
+int vlv_gpu_freq(int ddr_freq, int val)
+{
+       int mult, base;
+
+       switch (ddr_freq) {
+       case 800:
+               mult = 20;
+               base = 120;
+               break;
+       case 1066:
+               mult = 22;
+               base = 133;
+               break;
+       case 1333:
+               mult = 21;
+               base = 125;
+               break;
+       default:
+               return -1;
+       }
+
+       return ((val - 0xbd) * mult) + base;
+}
+
+int vlv_freq_opcode(int ddr_freq, int val)
+{
+       int mult, base;
+
+       switch (ddr_freq) {
+       case 800:
+               mult = 20;
+               base = 120;
+               break;
+       case 1066:
+               mult = 22;
+               base = 133;
+               break;
+       case 1333:
+               mult = 21;
+               base = 125;
+               break;
+       default:
+               return -1;
+       }
+
+       val /= mult;
+       val -= base / mult;
+       val += 0xbd;
+
+       if (val > 0xea)
+               val = 0xea;
+
+       return val;
+}
+