]> rtime.felk.cvut.cz Git - linux-imx.git/blobdiff - drivers/gpu/drm/radeon/cik.c
drm/radeon: fix halting UVD
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
index cf1e0b1846235d1198803ee8a68f9ceb2ffdc33a..524db70aaf6e71dc384fedd2da5d81cae143bb5d 100644 (file)
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 #include <linux/firmware.h>
-#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include "drmP.h"
@@ -72,7 +71,525 @@ extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 extern void r600_ih_ring_fini(struct radeon_device *rdev);
 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
+extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
+extern void si_rlc_fini(struct radeon_device *rdev);
+extern int si_rlc_init(struct radeon_device *rdev);
+static void cik_rlc_stop(struct radeon_device *rdev);
+
+/*
+ * Indirect registers accessor
+ */
+u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
+{
+       u32 r;
+
+       WREG32(PCIE_INDEX, reg);
+       (void)RREG32(PCIE_INDEX);
+       r = RREG32(PCIE_DATA);
+       return r;
+}
+
+void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       WREG32(PCIE_INDEX, reg);
+       (void)RREG32(PCIE_INDEX);
+       WREG32(PCIE_DATA, v);
+       (void)RREG32(PCIE_DATA);
+}
+
+static const u32 bonaire_golden_spm_registers[] =
+{
+       0x30800, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 bonaire_golden_common_registers[] =
+{
+       0xc770, 0xffffffff, 0x00000800,
+       0xc774, 0xffffffff, 0x00000800,
+       0xc798, 0xffffffff, 0x00007fbf,
+       0xc79c, 0xffffffff, 0x00007faf
+};
+
+static const u32 bonaire_golden_registers[] =
+{
+       0x3354, 0x00000333, 0x00000333,
+       0x3350, 0x000c0fc0, 0x00040200,
+       0x9a10, 0x00010000, 0x00058208,
+       0x3c000, 0xffff1fff, 0x00140000,
+       0x3c200, 0xfdfc0fff, 0x00000100,
+       0x3c234, 0x40000000, 0x40000200,
+       0x9830, 0xffffffff, 0x00000000,
+       0x9834, 0xf00fffff, 0x00000400,
+       0x9838, 0x0002021c, 0x00020200,
+       0xc78, 0x00000080, 0x00000000,
+       0x5bb0, 0x000000f0, 0x00000070,
+       0x5bc0, 0xf0311fff, 0x80300000,
+       0x98f8, 0x73773777, 0x12010001,
+       0x350c, 0x00810000, 0x408af000,
+       0x7030, 0x31000111, 0x00000011,
+       0x2f48, 0x73773777, 0x12010001,
+       0x220c, 0x00007fb6, 0x0021a1b1,
+       0x2210, 0x00007fb6, 0x002021b1,
+       0x2180, 0x00007fb6, 0x00002191,
+       0x2218, 0x00007fb6, 0x002121b1,
+       0x221c, 0x00007fb6, 0x002021b1,
+       0x21dc, 0x00007fb6, 0x00002191,
+       0x21e0, 0x00007fb6, 0x00002191,
+       0x3628, 0x0000003f, 0x0000000a,
+       0x362c, 0x0000003f, 0x0000000a,
+       0x2ae4, 0x00073ffe, 0x000022a2,
+       0x240c, 0x000007ff, 0x00000000,
+       0x8a14, 0xf000003f, 0x00000007,
+       0x8bf0, 0x00002001, 0x00000001,
+       0x8b24, 0xffffffff, 0x00ffffff,
+       0x30a04, 0x0000ff0f, 0x00000000,
+       0x28a4c, 0x07ffffff, 0x06000000,
+       0x4d8, 0x00000fff, 0x00000100,
+       0x3e78, 0x00000001, 0x00000002,
+       0x9100, 0x03000000, 0x0362c688,
+       0x8c00, 0x000000ff, 0x00000001,
+       0xe40, 0x00001fff, 0x00001fff,
+       0x9060, 0x0000007f, 0x00000020,
+       0x9508, 0x00010000, 0x00010000,
+       0xac14, 0x000003ff, 0x000000f3,
+       0xac0c, 0xffffffff, 0x00001032
+};
+
+static const u32 bonaire_mgcg_cgcg_init[] =
+{
+       0xc420, 0xffffffff, 0xfffffffc,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c2a0, 0xffffffff, 0x00000100,
+       0x3c208, 0xffffffff, 0x00000100,
+       0x3c2c0, 0xffffffff, 0xc0000100,
+       0x3c2c8, 0xffffffff, 0xc0000100,
+       0x3c2c4, 0xffffffff, 0xc0000100,
+       0x55e4, 0xffffffff, 0x00600100,
+       0x3c280, 0xffffffff, 0x00000100,
+       0x3c214, 0xffffffff, 0x06000100,
+       0x3c220, 0xffffffff, 0x00000100,
+       0x3c218, 0xffffffff, 0x06000100,
+       0x3c204, 0xffffffff, 0x00000100,
+       0x3c2e0, 0xffffffff, 0x00000100,
+       0x3c224, 0xffffffff, 0x00000100,
+       0x3c200, 0xffffffff, 0x00000100,
+       0x3c230, 0xffffffff, 0x00000100,
+       0x3c234, 0xffffffff, 0x00000100,
+       0x3c250, 0xffffffff, 0x00000100,
+       0x3c254, 0xffffffff, 0x00000100,
+       0x3c258, 0xffffffff, 0x00000100,
+       0x3c25c, 0xffffffff, 0x00000100,
+       0x3c260, 0xffffffff, 0x00000100,
+       0x3c27c, 0xffffffff, 0x00000100,
+       0x3c278, 0xffffffff, 0x00000100,
+       0x3c210, 0xffffffff, 0x06000100,
+       0x3c290, 0xffffffff, 0x00000100,
+       0x3c274, 0xffffffff, 0x00000100,
+       0x3c2b4, 0xffffffff, 0x00000100,
+       0x3c2b0, 0xffffffff, 0x00000100,
+       0x3c270, 0xffffffff, 0x00000100,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c020, 0xffffffff, 0x00010000,
+       0x3c024, 0xffffffff, 0x00030002,
+       0x3c028, 0xffffffff, 0x00040007,
+       0x3c02c, 0xffffffff, 0x00060005,
+       0x3c030, 0xffffffff, 0x00090008,
+       0x3c034, 0xffffffff, 0x00010000,
+       0x3c038, 0xffffffff, 0x00030002,
+       0x3c03c, 0xffffffff, 0x00040007,
+       0x3c040, 0xffffffff, 0x00060005,
+       0x3c044, 0xffffffff, 0x00090008,
+       0x3c048, 0xffffffff, 0x00010000,
+       0x3c04c, 0xffffffff, 0x00030002,
+       0x3c050, 0xffffffff, 0x00040007,
+       0x3c054, 0xffffffff, 0x00060005,
+       0x3c058, 0xffffffff, 0x00090008,
+       0x3c05c, 0xffffffff, 0x00010000,
+       0x3c060, 0xffffffff, 0x00030002,
+       0x3c064, 0xffffffff, 0x00040007,
+       0x3c068, 0xffffffff, 0x00060005,
+       0x3c06c, 0xffffffff, 0x00090008,
+       0x3c070, 0xffffffff, 0x00010000,
+       0x3c074, 0xffffffff, 0x00030002,
+       0x3c078, 0xffffffff, 0x00040007,
+       0x3c07c, 0xffffffff, 0x00060005,
+       0x3c080, 0xffffffff, 0x00090008,
+       0x3c084, 0xffffffff, 0x00010000,
+       0x3c088, 0xffffffff, 0x00030002,
+       0x3c08c, 0xffffffff, 0x00040007,
+       0x3c090, 0xffffffff, 0x00060005,
+       0x3c094, 0xffffffff, 0x00090008,
+       0x3c098, 0xffffffff, 0x00010000,
+       0x3c09c, 0xffffffff, 0x00030002,
+       0x3c0a0, 0xffffffff, 0x00040007,
+       0x3c0a4, 0xffffffff, 0x00060005,
+       0x3c0a8, 0xffffffff, 0x00090008,
+       0x3c000, 0xffffffff, 0x96e00200,
+       0x8708, 0xffffffff, 0x00900100,
+       0xc424, 0xffffffff, 0x0020003f,
+       0x38, 0xffffffff, 0x0140001c,
+       0x3c, 0x000f0000, 0x000f0000,
+       0x220, 0xffffffff, 0xC060000C,
+       0x224, 0xc0000fff, 0x00000100,
+       0xf90, 0xffffffff, 0x00000100,
+       0xf98, 0x00000101, 0x00000000,
+       0x20a8, 0xffffffff, 0x00000104,
+       0x55e4, 0xff000fff, 0x00000100,
+       0x30cc, 0xc0000fff, 0x00000104,
+       0xc1e4, 0x00000001, 0x00000001,
+       0xd00c, 0xff000ff0, 0x00000100,
+       0xd80c, 0xff000ff0, 0x00000100
+};
+
+static const u32 spectre_golden_spm_registers[] =
+{
+       0x30800, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 spectre_golden_common_registers[] =
+{
+       0xc770, 0xffffffff, 0x00000800,
+       0xc774, 0xffffffff, 0x00000800,
+       0xc798, 0xffffffff, 0x00007fbf,
+       0xc79c, 0xffffffff, 0x00007faf
+};
+
+static const u32 spectre_golden_registers[] =
+{
+       0x3c000, 0xffff1fff, 0x96940200,
+       0x3c00c, 0xffff0001, 0xff000000,
+       0x3c200, 0xfffc0fff, 0x00000100,
+       0x6ed8, 0x00010101, 0x00010000,
+       0x9834, 0xf00fffff, 0x00000400,
+       0x9838, 0xfffffffc, 0x00020200,
+       0x5bb0, 0x000000f0, 0x00000070,
+       0x5bc0, 0xf0311fff, 0x80300000,
+       0x98f8, 0x73773777, 0x12010001,
+       0x9b7c, 0x00ff0000, 0x00fc0000,
+       0x2f48, 0x73773777, 0x12010001,
+       0x8a14, 0xf000003f, 0x00000007,
+       0x8b24, 0xffffffff, 0x00ffffff,
+       0x28350, 0x3f3f3fff, 0x00000082,
+       0x28355, 0x0000003f, 0x00000000,
+       0x3e78, 0x00000001, 0x00000002,
+       0x913c, 0xffff03df, 0x00000004,
+       0xc768, 0x00000008, 0x00000008,
+       0x8c00, 0x000008ff, 0x00000800,
+       0x9508, 0x00010000, 0x00010000,
+       0xac0c, 0xffffffff, 0x54763210,
+       0x214f8, 0x01ff01ff, 0x00000002,
+       0x21498, 0x007ff800, 0x00200000,
+       0x2015c, 0xffffffff, 0x00000f40,
+       0x30934, 0xffffffff, 0x00000001
+};
+
+static const u32 spectre_mgcg_cgcg_init[] =
+{
+       0xc420, 0xffffffff, 0xfffffffc,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c2a0, 0xffffffff, 0x00000100,
+       0x3c208, 0xffffffff, 0x00000100,
+       0x3c2c0, 0xffffffff, 0x00000100,
+       0x3c2c8, 0xffffffff, 0x00000100,
+       0x3c2c4, 0xffffffff, 0x00000100,
+       0x55e4, 0xffffffff, 0x00600100,
+       0x3c280, 0xffffffff, 0x00000100,
+       0x3c214, 0xffffffff, 0x06000100,
+       0x3c220, 0xffffffff, 0x00000100,
+       0x3c218, 0xffffffff, 0x06000100,
+       0x3c204, 0xffffffff, 0x00000100,
+       0x3c2e0, 0xffffffff, 0x00000100,
+       0x3c224, 0xffffffff, 0x00000100,
+       0x3c200, 0xffffffff, 0x00000100,
+       0x3c230, 0xffffffff, 0x00000100,
+       0x3c234, 0xffffffff, 0x00000100,
+       0x3c250, 0xffffffff, 0x00000100,
+       0x3c254, 0xffffffff, 0x00000100,
+       0x3c258, 0xffffffff, 0x00000100,
+       0x3c25c, 0xffffffff, 0x00000100,
+       0x3c260, 0xffffffff, 0x00000100,
+       0x3c27c, 0xffffffff, 0x00000100,
+       0x3c278, 0xffffffff, 0x00000100,
+       0x3c210, 0xffffffff, 0x06000100,
+       0x3c290, 0xffffffff, 0x00000100,
+       0x3c274, 0xffffffff, 0x00000100,
+       0x3c2b4, 0xffffffff, 0x00000100,
+       0x3c2b0, 0xffffffff, 0x00000100,
+       0x3c270, 0xffffffff, 0x00000100,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c020, 0xffffffff, 0x00010000,
+       0x3c024, 0xffffffff, 0x00030002,
+       0x3c028, 0xffffffff, 0x00040007,
+       0x3c02c, 0xffffffff, 0x00060005,
+       0x3c030, 0xffffffff, 0x00090008,
+       0x3c034, 0xffffffff, 0x00010000,
+       0x3c038, 0xffffffff, 0x00030002,
+       0x3c03c, 0xffffffff, 0x00040007,
+       0x3c040, 0xffffffff, 0x00060005,
+       0x3c044, 0xffffffff, 0x00090008,
+       0x3c048, 0xffffffff, 0x00010000,
+       0x3c04c, 0xffffffff, 0x00030002,
+       0x3c050, 0xffffffff, 0x00040007,
+       0x3c054, 0xffffffff, 0x00060005,
+       0x3c058, 0xffffffff, 0x00090008,
+       0x3c05c, 0xffffffff, 0x00010000,
+       0x3c060, 0xffffffff, 0x00030002,
+       0x3c064, 0xffffffff, 0x00040007,
+       0x3c068, 0xffffffff, 0x00060005,
+       0x3c06c, 0xffffffff, 0x00090008,
+       0x3c070, 0xffffffff, 0x00010000,
+       0x3c074, 0xffffffff, 0x00030002,
+       0x3c078, 0xffffffff, 0x00040007,
+       0x3c07c, 0xffffffff, 0x00060005,
+       0x3c080, 0xffffffff, 0x00090008,
+       0x3c084, 0xffffffff, 0x00010000,
+       0x3c088, 0xffffffff, 0x00030002,
+       0x3c08c, 0xffffffff, 0x00040007,
+       0x3c090, 0xffffffff, 0x00060005,
+       0x3c094, 0xffffffff, 0x00090008,
+       0x3c098, 0xffffffff, 0x00010000,
+       0x3c09c, 0xffffffff, 0x00030002,
+       0x3c0a0, 0xffffffff, 0x00040007,
+       0x3c0a4, 0xffffffff, 0x00060005,
+       0x3c0a8, 0xffffffff, 0x00090008,
+       0x3c0ac, 0xffffffff, 0x00010000,
+       0x3c0b0, 0xffffffff, 0x00030002,
+       0x3c0b4, 0xffffffff, 0x00040007,
+       0x3c0b8, 0xffffffff, 0x00060005,
+       0x3c0bc, 0xffffffff, 0x00090008,
+       0x3c000, 0xffffffff, 0x96e00200,
+       0x8708, 0xffffffff, 0x00900100,
+       0xc424, 0xffffffff, 0x0020003f,
+       0x38, 0xffffffff, 0x0140001c,
+       0x3c, 0x000f0000, 0x000f0000,
+       0x220, 0xffffffff, 0xC060000C,
+       0x224, 0xc0000fff, 0x00000100,
+       0xf90, 0xffffffff, 0x00000100,
+       0xf98, 0x00000101, 0x00000000,
+       0x20a8, 0xffffffff, 0x00000104,
+       0x55e4, 0xff000fff, 0x00000100,
+       0x30cc, 0xc0000fff, 0x00000104,
+       0xc1e4, 0x00000001, 0x00000001,
+       0xd00c, 0xff000ff0, 0x00000100,
+       0xd80c, 0xff000ff0, 0x00000100
+};
+
+static const u32 kalindi_golden_spm_registers[] =
+{
+       0x30800, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 kalindi_golden_common_registers[] =
+{
+       0xc770, 0xffffffff, 0x00000800,
+       0xc774, 0xffffffff, 0x00000800,
+       0xc798, 0xffffffff, 0x00007fbf,
+       0xc79c, 0xffffffff, 0x00007faf
+};
+
+static const u32 kalindi_golden_registers[] =
+{
+       0x3c000, 0xffffdfff, 0x6e944040,
+       0x55e4, 0xff607fff, 0xfc000100,
+       0x3c220, 0xff000fff, 0x00000100,
+       0x3c224, 0xff000fff, 0x00000100,
+       0x3c200, 0xfffc0fff, 0x00000100,
+       0x6ed8, 0x00010101, 0x00010000,
+       0x9830, 0xffffffff, 0x00000000,
+       0x9834, 0xf00fffff, 0x00000400,
+       0x5bb0, 0x000000f0, 0x00000070,
+       0x5bc0, 0xf0311fff, 0x80300000,
+       0x98f8, 0x73773777, 0x12010001,
+       0x98fc, 0xffffffff, 0x00000010,
+       0x9b7c, 0x00ff0000, 0x00fc0000,
+       0x8030, 0x00001f0f, 0x0000100a,
+       0x2f48, 0x73773777, 0x12010001,
+       0x2408, 0x000fffff, 0x000c007f,
+       0x8a14, 0xf000003f, 0x00000007,
+       0x8b24, 0x3fff3fff, 0x00ffcfff,
+       0x30a04, 0x0000ff0f, 0x00000000,
+       0x28a4c, 0x07ffffff, 0x06000000,
+       0x4d8, 0x00000fff, 0x00000100,
+       0x3e78, 0x00000001, 0x00000002,
+       0xc768, 0x00000008, 0x00000008,
+       0x8c00, 0x000000ff, 0x00000003,
+       0x214f8, 0x01ff01ff, 0x00000002,
+       0x21498, 0x007ff800, 0x00200000,
+       0x2015c, 0xffffffff, 0x00000f40,
+       0x88c4, 0x001f3ae3, 0x00000082,
+       0x88d4, 0x0000001f, 0x00000010,
+       0x30934, 0xffffffff, 0x00000000
+};
+
+static const u32 kalindi_mgcg_cgcg_init[] =
+{
+       0xc420, 0xffffffff, 0xfffffffc,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c2a0, 0xffffffff, 0x00000100,
+       0x3c208, 0xffffffff, 0x00000100,
+       0x3c2c0, 0xffffffff, 0x00000100,
+       0x3c2c8, 0xffffffff, 0x00000100,
+       0x3c2c4, 0xffffffff, 0x00000100,
+       0x55e4, 0xffffffff, 0x00600100,
+       0x3c280, 0xffffffff, 0x00000100,
+       0x3c214, 0xffffffff, 0x06000100,
+       0x3c220, 0xffffffff, 0x00000100,
+       0x3c218, 0xffffffff, 0x06000100,
+       0x3c204, 0xffffffff, 0x00000100,
+       0x3c2e0, 0xffffffff, 0x00000100,
+       0x3c224, 0xffffffff, 0x00000100,
+       0x3c200, 0xffffffff, 0x00000100,
+       0x3c230, 0xffffffff, 0x00000100,
+       0x3c234, 0xffffffff, 0x00000100,
+       0x3c250, 0xffffffff, 0x00000100,
+       0x3c254, 0xffffffff, 0x00000100,
+       0x3c258, 0xffffffff, 0x00000100,
+       0x3c25c, 0xffffffff, 0x00000100,
+       0x3c260, 0xffffffff, 0x00000100,
+       0x3c27c, 0xffffffff, 0x00000100,
+       0x3c278, 0xffffffff, 0x00000100,
+       0x3c210, 0xffffffff, 0x06000100,
+       0x3c290, 0xffffffff, 0x00000100,
+       0x3c274, 0xffffffff, 0x00000100,
+       0x3c2b4, 0xffffffff, 0x00000100,
+       0x3c2b0, 0xffffffff, 0x00000100,
+       0x3c270, 0xffffffff, 0x00000100,
+       0x30800, 0xffffffff, 0xe0000000,
+       0x3c020, 0xffffffff, 0x00010000,
+       0x3c024, 0xffffffff, 0x00030002,
+       0x3c028, 0xffffffff, 0x00040007,
+       0x3c02c, 0xffffffff, 0x00060005,
+       0x3c030, 0xffffffff, 0x00090008,
+       0x3c034, 0xffffffff, 0x00010000,
+       0x3c038, 0xffffffff, 0x00030002,
+       0x3c03c, 0xffffffff, 0x00040007,
+       0x3c040, 0xffffffff, 0x00060005,
+       0x3c044, 0xffffffff, 0x00090008,
+       0x3c000, 0xffffffff, 0x96e00200,
+       0x8708, 0xffffffff, 0x00900100,
+       0xc424, 0xffffffff, 0x0020003f,
+       0x38, 0xffffffff, 0x0140001c,
+       0x3c, 0x000f0000, 0x000f0000,
+       0x220, 0xffffffff, 0xC060000C,
+       0x224, 0xc0000fff, 0x00000100,
+       0x20a8, 0xffffffff, 0x00000104,
+       0x55e4, 0xff000fff, 0x00000100,
+       0x30cc, 0xc0000fff, 0x00000104,
+       0xc1e4, 0x00000001, 0x00000001,
+       0xd00c, 0xff000ff0, 0x00000100,
+       0xd80c, 0xff000ff0, 0x00000100
+};
+
+static void cik_init_golden_registers(struct radeon_device *rdev)
+{
+       switch (rdev->family) {
+       case CHIP_BONAIRE:
+               radeon_program_register_sequence(rdev,
+                                                bonaire_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
+               radeon_program_register_sequence(rdev,
+                                                bonaire_golden_registers,
+                                                (const u32)ARRAY_SIZE(bonaire_golden_registers));
+               radeon_program_register_sequence(rdev,
+                                                bonaire_golden_common_registers,
+                                                (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
+               radeon_program_register_sequence(rdev,
+                                                bonaire_golden_spm_registers,
+                                                (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
+               break;
+       case CHIP_KABINI:
+               radeon_program_register_sequence(rdev,
+                                                kalindi_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+               radeon_program_register_sequence(rdev,
+                                                kalindi_golden_registers,
+                                                (const u32)ARRAY_SIZE(kalindi_golden_registers));
+               radeon_program_register_sequence(rdev,
+                                                kalindi_golden_common_registers,
+                                                (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
+               radeon_program_register_sequence(rdev,
+                                                kalindi_golden_spm_registers,
+                                                (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
+               break;
+       case CHIP_KAVERI:
+               radeon_program_register_sequence(rdev,
+                                                spectre_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
+               radeon_program_register_sequence(rdev,
+                                                spectre_golden_registers,
+                                                (const u32)ARRAY_SIZE(spectre_golden_registers));
+               radeon_program_register_sequence(rdev,
+                                                spectre_golden_common_registers,
+                                                (const u32)ARRAY_SIZE(spectre_golden_common_registers));
+               radeon_program_register_sequence(rdev,
+                                                spectre_golden_spm_registers,
+                                                (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * cik_get_xclk - get the xclk
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Returns the reference clock used by the gfx engine
+ * (CIK).
+ */
+u32 cik_get_xclk(struct radeon_device *rdev)
+{
+        u32 reference_clock = rdev->clock.spll.reference_freq;
+
+       if (rdev->flags & RADEON_IS_IGP) {
+               if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
+                       return reference_clock / 2;
+       } else {
+               if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
+                       return reference_clock / 4;
+       }
+       return reference_clock;
+}
+
+/**
+ * cik_mm_rdoorbell - read a doorbell dword
+ *
+ * @rdev: radeon_device pointer
+ * @offset: byte offset into the aperture
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested offset (CIK).
+ */
+u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
+{
+       if (offset < rdev->doorbell.size) {
+               return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
+       } else {
+               DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
+               return 0;
+       }
+}
+
+/**
+ * cik_mm_wdoorbell - write a doorbell dword
+ *
+ * @rdev: radeon_device pointer
+ * @offset: byte offset into the aperture
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested offset (CIK).
+ */
+void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
+{
+       if (offset < rdev->doorbell.size) {
+               writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
+       } else {
+               DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
+       }
+}
 
 #define BONAIRE_IO_MC_REGS_SIZE 36
 
@@ -116,6 +633,29 @@ static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
        {0x0000009f, 0x00b48000}
 };
 
+/**
+ * cik_srbm_select - select specific register instances
+ *
+ * @rdev: radeon_device pointer
+ * @me: selected ME (micro engine)
+ * @pipe: pipe
+ * @queue: queue
+ * @vmid: VMID
+ *
+ * Switches the currently active registers instances.  Some
+ * registers are instanced per VMID, others are instanced per
+ * me/pipe/queue combination.
+ */
+static void cik_srbm_select(struct radeon_device *rdev,
+                           u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+       u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
+                            MEID(me & 0x3) |
+                            VMID(vmid & 0xf) |
+                            QUEUEID(queue & 0x7));
+       WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
+}
+
 /* ucode loading */
 /**
  * ci_mc_load_microcode - load MC ucode into the hw
@@ -201,7 +741,6 @@ static int ci_mc_load_microcode(struct radeon_device *rdev)
  */
 static int cik_init_microcode(struct radeon_device *rdev)
 {
-       struct platform_device *pdev;
        const char *chip_name;
        size_t pfp_req_size, me_req_size, ce_req_size,
                mec_req_size, rlc_req_size, mc_req_size,
@@ -211,13 +750,6 @@ static int cik_init_microcode(struct radeon_device *rdev)
 
        DRM_DEBUG("\n");
 
-       pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
-       err = IS_ERR(pdev);
-       if (err) {
-               printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
-               return -EINVAL;
-       }
-
        switch (rdev->family) {
        case CHIP_BONAIRE:
                chip_name = "BONAIRE";
@@ -253,7 +785,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        DRM_INFO("Loading %s Microcode\n", chip_name);
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
-       err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->pfp_fw->size != pfp_req_size) {
@@ -265,7 +797,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
-       err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->me_fw->size != me_req_size) {
@@ -276,7 +808,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
-       err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->ce_fw->size != ce_req_size) {
@@ -287,7 +819,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
-       err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->mec_fw->size != mec_req_size) {
@@ -298,7 +830,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
-       err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->rlc_fw->size != rlc_req_size) {
@@ -309,7 +841,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
-       err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
        if (err)
                goto out;
        if (rdev->sdma_fw->size != sdma_req_size) {
@@ -322,7 +854,7 @@ static int cik_init_microcode(struct radeon_device *rdev)
        /* No MC ucode on APUs */
        if (!(rdev->flags & RADEON_IS_IGP)) {
                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
-               err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
+               err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
                if (err)
                        goto out;
                if (rdev->mc_fw->size != mc_req_size) {
@@ -334,8 +866,6 @@ static int cik_init_microcode(struct radeon_device *rdev)
        }
 
 out:
-       platform_device_unregister(pdev);
-
        if (err) {
                if (err != -EINVAL)
                        printk(KERN_ERR
@@ -518,6 +1048,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
                                gb_tile_moden = 0;
                                break;
                        }
+                       rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
                }
                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
@@ -736,6 +1267,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
                                        gb_tile_moden = 0;
                                        break;
                                }
+                               rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
                                WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
                        }
                } else if (num_rbs < 4) {
@@ -861,6 +1393,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
                                        gb_tile_moden = 0;
                                        break;
                                }
+                               rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
                                WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
                        }
                }
@@ -1078,6 +1611,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
                                gb_tile_moden = 0;
                                break;
                        }
+                       rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
                        WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
                }
                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
@@ -1193,7 +1727,7 @@ static void cik_select_se_sh(struct radeon_device *rdev,
        u32 data = INSTANCE_BROADCAST_WRITES;
 
        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
-               data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
+               data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
        else if (se_num == 0xffffffff)
                data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
        else if (sh_num == 0xffffffff)
@@ -1448,6 +1982,9 @@ static void cik_gpu_init(struct radeon_device *rdev)
        WREG32(DMIF_ADDR_CALC, gb_addr_config);
        WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
        WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
+       WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
+       WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
+       WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
 
        cik_tiling_mode_table_init(rdev);
 
@@ -1576,6 +2113,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
        radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
        radeon_ring_write(ring, 0xDEADBEEF);
        radeon_ring_unlock_commit(rdev, ring);
+
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = RREG32(scratch);
                if (tmp == 0xDEADBEEF)
@@ -1594,7 +2132,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 }
 
 /**
- * cik_fence_ring_emit - emit a fence on the gfx ring
+ * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
  *
  * @rdev: radeon_device pointer
  * @fence: radeon fence object
@@ -1602,8 +2140,8 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
  * Emits a fence sequnce number on the gfx ring and flushes
  * GPU caches.
  */
-void cik_fence_ring_emit(struct radeon_device *rdev,
-                        struct radeon_fence *fence)
+void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
+                            struct radeon_fence *fence)
 {
        struct radeon_ring *ring = &rdev->ring[fence->ring];
        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
@@ -1630,6 +2168,44 @@ void cik_fence_ring_emit(struct radeon_device *rdev,
        radeon_ring_write(ring, 0);
 }
 
+/**
+ * cik_fence_compute_ring_emit - emit a fence on the compute ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Emits a fence sequnce number on the compute ring and flushes
+ * GPU caches.
+ */
+void cik_fence_compute_ring_emit(struct radeon_device *rdev,
+                                struct radeon_fence *fence)
+{
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+       /* RELEASE_MEM - flush caches, send int */
+       radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
+       radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
+                                EOP_TC_ACTION_EN |
+                                EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+                                EVENT_INDEX(5)));
+       radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(addr));
+       radeon_ring_write(ring, fence->seq);
+       radeon_ring_write(ring, 0);
+       /* HDP flush */
+       /* We should be using the new WAIT_REG_MEM special op packet here
+        * but it causes the CP to hang
+        */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0);
+}
+
 void cik_semaphore_ring_emit(struct radeon_device *rdev,
                             struct radeon_ring *ring,
                             struct radeon_semaphore *semaphore,
@@ -2001,6 +2577,51 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev)
        return 0;
 }
 
+u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
+                             struct radeon_ring *ring)
+{
+       u32 rptr;
+
+
+
+       if (rdev->wb.enabled) {
+               rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
+       } else {
+               cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
+               rptr = RREG32(CP_HQD_PQ_RPTR);
+               cik_srbm_select(rdev, 0, 0, 0, 0);
+       }
+       rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
+
+       return rptr;
+}
+
+u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
+                             struct radeon_ring *ring)
+{
+       u32 wptr;
+
+       if (rdev->wb.enabled) {
+               wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
+       } else {
+               cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
+               wptr = RREG32(CP_HQD_PQ_WPTR);
+               cik_srbm_select(rdev, 0, 0, 0, 0);
+       }
+       wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
+
+       return wptr;
+}
+
+void cik_compute_ring_set_wptr(struct radeon_device *rdev,
+                              struct radeon_ring *ring)
+{
+       u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
+
+       rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
+       WDOORBELL32(ring->doorbell_offset, wptr);
+}
+
 /**
  * cik_cp_compute_enable - enable/disable the compute CP MEs
  *
@@ -2065,7 +2686,8 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
  */
 static int cik_cp_compute_start(struct radeon_device *rdev)
 {
-       //todo
+       cik_cp_compute_enable(rdev, true);
+
        return 0;
 }
 
@@ -2079,109 +2701,491 @@ static int cik_cp_compute_start(struct radeon_device *rdev)
  */
 static void cik_cp_compute_fini(struct radeon_device *rdev)
 {
+       int i, idx, r;
+
        cik_cp_compute_enable(rdev, false);
-       //todo
-}
 
-/**
- * cik_cp_compute_resume - setup the compute queue registers
- *
- * @rdev: radeon_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int cik_cp_compute_resume(struct radeon_device *rdev)
-{
-       int r;
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       idx = CAYMAN_RING_TYPE_CP1_INDEX;
+               else
+                       idx = CAYMAN_RING_TYPE_CP2_INDEX;
 
-       //todo
-       r = cik_cp_compute_start(rdev);
-       if (r)
-               return r;
-       return 0;
-}
+               if (rdev->ring[idx].mqd_obj) {
+                       r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
+                       if (unlikely(r != 0))
+                               dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
 
-/* XXX temporary wrappers to handle both compute and gfx */
-/* XXX */
-static void cik_cp_enable(struct radeon_device *rdev, bool enable)
-{
-       cik_cp_gfx_enable(rdev, enable);
-       cik_cp_compute_enable(rdev, enable);
+                       radeon_bo_unpin(rdev->ring[idx].mqd_obj);
+                       radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
+
+                       radeon_bo_unref(&rdev->ring[idx].mqd_obj);
+                       rdev->ring[idx].mqd_obj = NULL;
+               }
+       }
 }
 
-/* XXX */
-static int cik_cp_load_microcode(struct radeon_device *rdev)
+static void cik_mec_fini(struct radeon_device *rdev)
 {
        int r;
 
-       r = cik_cp_gfx_load_microcode(rdev);
-       if (r)
-               return r;
-       r = cik_cp_compute_load_microcode(rdev);
-       if (r)
-               return r;
+       if (rdev->mec.hpd_eop_obj) {
+               r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
+               if (unlikely(r != 0))
+                       dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
+               radeon_bo_unpin(rdev->mec.hpd_eop_obj);
+               radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
 
-       return 0;
+               radeon_bo_unref(&rdev->mec.hpd_eop_obj);
+               rdev->mec.hpd_eop_obj = NULL;
+       }
 }
 
-/* XXX */
-static void cik_cp_fini(struct radeon_device *rdev)
-{
-       cik_cp_gfx_fini(rdev);
-       cik_cp_compute_fini(rdev);
-}
+#define MEC_HPD_SIZE 2048
 
-/* XXX */
-static int cik_cp_resume(struct radeon_device *rdev)
+static int cik_mec_init(struct radeon_device *rdev)
 {
        int r;
+       u32 *hpd;
 
-       /* Reset all cp blocks */
-       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
-       RREG32(GRBM_SOFT_RESET);
-       mdelay(15);
-       WREG32(GRBM_SOFT_RESET, 0);
-       RREG32(GRBM_SOFT_RESET);
+       /*
+        * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
+        * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
+        */
+       if (rdev->family == CHIP_KAVERI)
+               rdev->mec.num_mec = 2;
+       else
+               rdev->mec.num_mec = 1;
+       rdev->mec.num_pipe = 4;
+       rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
+
+       if (rdev->mec.hpd_eop_obj == NULL) {
+               r = radeon_bo_create(rdev,
+                                    rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
+                                    PAGE_SIZE, true,
+                                    RADEON_GEM_DOMAIN_GTT, NULL,
+                                    &rdev->mec.hpd_eop_obj);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
+                       return r;
+               }
+       }
 
-       r = cik_cp_load_microcode(rdev);
-       if (r)
+       r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
+       if (unlikely(r != 0)) {
+               cik_mec_fini(rdev);
                return r;
-
-       r = cik_cp_gfx_resume(rdev);
-       if (r)
+       }
+       r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
+                         &rdev->mec.hpd_eop_gpu_addr);
+       if (r) {
+               dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
+               cik_mec_fini(rdev);
                return r;
-       r = cik_cp_compute_resume(rdev);
-       if (r)
+       }
+       r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
+       if (r) {
+               dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
+               cik_mec_fini(rdev);
                return r;
+       }
+
+       /* clear memory.  Not sure if this is required or not */
+       memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
+
+       radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
+       radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
 
        return 0;
 }
 
-/*
- * sDMA - System DMA
- * Starting with CIK, the GPU has new asynchronous
- * DMA engines.  These engines are used for compute
- * and gfx.  There are two DMA engines (SDMA0, SDMA1)
- * and each one supports 1 ring buffer used for gfx
- * and 2 queues used for compute.
- *
- * The programming model is very similar to the CP
- * (ring buffer, IBs, etc.), but sDMA has it's own
- * packet format that is different from the PM4 format
- * used by the CP. sDMA supports copying data, writing
- * embedded data, solid fills, and a number of other
- * things.  It also has support for tiling/detiling of
- * buffers.
- */
+struct hqd_registers
+{
+       u32 cp_mqd_base_addr;
+       u32 cp_mqd_base_addr_hi;
+       u32 cp_hqd_active;
+       u32 cp_hqd_vmid;
+       u32 cp_hqd_persistent_state;
+       u32 cp_hqd_pipe_priority;
+       u32 cp_hqd_queue_priority;
+       u32 cp_hqd_quantum;
+       u32 cp_hqd_pq_base;
+       u32 cp_hqd_pq_base_hi;
+       u32 cp_hqd_pq_rptr;
+       u32 cp_hqd_pq_rptr_report_addr;
+       u32 cp_hqd_pq_rptr_report_addr_hi;
+       u32 cp_hqd_pq_wptr_poll_addr;
+       u32 cp_hqd_pq_wptr_poll_addr_hi;
+       u32 cp_hqd_pq_doorbell_control;
+       u32 cp_hqd_pq_wptr;
+       u32 cp_hqd_pq_control;
+       u32 cp_hqd_ib_base_addr;
+       u32 cp_hqd_ib_base_addr_hi;
+       u32 cp_hqd_ib_rptr;
+       u32 cp_hqd_ib_control;
+       u32 cp_hqd_iq_timer;
+       u32 cp_hqd_iq_rptr;
+       u32 cp_hqd_dequeue_request;
+       u32 cp_hqd_dma_offload;
+       u32 cp_hqd_sema_cmd;
+       u32 cp_hqd_msg_type;
+       u32 cp_hqd_atomic0_preop_lo;
+       u32 cp_hqd_atomic0_preop_hi;
+       u32 cp_hqd_atomic1_preop_lo;
+       u32 cp_hqd_atomic1_preop_hi;
+       u32 cp_hqd_hq_scheduler0;
+       u32 cp_hqd_hq_scheduler1;
+       u32 cp_mqd_control;
+};
+
+struct bonaire_mqd
+{
+       u32 header;
+       u32 dispatch_initiator;
+       u32 dimensions[3];
+       u32 start_idx[3];
+       u32 num_threads[3];
+       u32 pipeline_stat_enable;
+       u32 perf_counter_enable;
+       u32 pgm[2];
+       u32 tba[2];
+       u32 tma[2];
+       u32 pgm_rsrc[2];
+       u32 vmid;
+       u32 resource_limits;
+       u32 static_thread_mgmt01[2];
+       u32 tmp_ring_size;
+       u32 static_thread_mgmt23[2];
+       u32 restart[3];
+       u32 thread_trace_enable;
+       u32 reserved1;
+       u32 user_data[16];
+       u32 vgtcs_invoke_count[2];
+       struct hqd_registers queue_state;
+       u32 dequeue_cntr;
+       u32 interrupt_queue[64];
+};
+
 /**
- * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+ * cik_cp_compute_resume - setup the compute queue registers
  *
  * @rdev: radeon_device pointer
- * @ib: IB object to schedule
  *
- * Schedule an IB in the DMA ring (CIK).
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int cik_cp_compute_resume(struct radeon_device *rdev)
+{
+       int r, i, idx;
+       u32 tmp;
+       bool use_doorbell = true;
+       u64 hqd_gpu_addr;
+       u64 mqd_gpu_addr;
+       u64 eop_gpu_addr;
+       u64 wb_gpu_addr;
+       u32 *buf;
+       struct bonaire_mqd *mqd;
+
+       r = cik_cp_compute_start(rdev);
+       if (r)
+               return r;
+
+       /* fix up chicken bits */
+       tmp = RREG32(CP_CPF_DEBUG);
+       tmp |= (1 << 23);
+       WREG32(CP_CPF_DEBUG, tmp);
+
+       /* init the pipes */
+       for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
+               int me = (i < 4) ? 1 : 2;
+               int pipe = (i < 4) ? i : (i - 4);
+
+               eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+
+               cik_srbm_select(rdev, me, pipe, 0, 0);
+
+               /* write the EOP addr */
+               WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+               WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+
+               /* set the VMID assigned */
+               WREG32(CP_HPD_EOP_VMID, 0);
+
+               /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+               tmp = RREG32(CP_HPD_EOP_CONTROL);
+               tmp &= ~EOP_SIZE_MASK;
+               tmp |= drm_order(MEC_HPD_SIZE / 8);
+               WREG32(CP_HPD_EOP_CONTROL, tmp);
+       }
+       cik_srbm_select(rdev, 0, 0, 0, 0);
+
+       /* init the queues.  Just two for now. */
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       idx = CAYMAN_RING_TYPE_CP1_INDEX;
+               else
+                       idx = CAYMAN_RING_TYPE_CP2_INDEX;
+
+               if (rdev->ring[idx].mqd_obj == NULL) {
+                       r = radeon_bo_create(rdev,
+                                            sizeof(struct bonaire_mqd),
+                                            PAGE_SIZE, true,
+                                            RADEON_GEM_DOMAIN_GTT, NULL,
+                                            &rdev->ring[idx].mqd_obj);
+                       if (r) {
+                               dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
+                               return r;
+                       }
+               }
+
+               r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
+               if (unlikely(r != 0)) {
+                       cik_cp_compute_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
+                                 &mqd_gpu_addr);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
+                       cik_cp_compute_fini(rdev);
+                       return r;
+               }
+               r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
+               if (r) {
+                       dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
+                       cik_cp_compute_fini(rdev);
+                       return r;
+               }
+
+               /* doorbell offset */
+               rdev->ring[idx].doorbell_offset =
+                       (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
+
+               /* init the mqd struct */
+               memset(buf, 0, sizeof(struct bonaire_mqd));
+
+               mqd = (struct bonaire_mqd *)buf;
+               mqd->header = 0xC0310800;
+               mqd->static_thread_mgmt01[0] = 0xffffffff;
+               mqd->static_thread_mgmt01[1] = 0xffffffff;
+               mqd->static_thread_mgmt23[0] = 0xffffffff;
+               mqd->static_thread_mgmt23[1] = 0xffffffff;
+
+               cik_srbm_select(rdev, rdev->ring[idx].me,
+                               rdev->ring[idx].pipe,
+                               rdev->ring[idx].queue, 0);
+
+               /* disable wptr polling */
+               tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
+               tmp &= ~WPTR_POLL_EN;
+               WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
+
+               /* enable doorbell? */
+               mqd->queue_state.cp_hqd_pq_doorbell_control =
+                       RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
+               if (use_doorbell)
+                       mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
+               else
+                       mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
+               WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
+                      mqd->queue_state.cp_hqd_pq_doorbell_control);
+
+               /* disable the queue if it's active */
+               mqd->queue_state.cp_hqd_dequeue_request = 0;
+               mqd->queue_state.cp_hqd_pq_rptr = 0;
+               mqd->queue_state.cp_hqd_pq_wptr= 0;
+               if (RREG32(CP_HQD_ACTIVE) & 1) {
+                       WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
+                       for (i = 0; i < rdev->usec_timeout; i++) {
+                               if (!(RREG32(CP_HQD_ACTIVE) & 1))
+                                       break;
+                               udelay(1);
+                       }
+                       WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
+                       WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
+                       WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
+               }
+
+               /* set the pointer to the MQD */
+               mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
+               mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+               WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
+               WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
+               /* set MQD vmid to 0 */
+               mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
+               mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
+               WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
+
+               /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+               hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
+               mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
+               mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+               WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
+               WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
+
+               /* set up the HQD, this is similar to CP_RB0_CNTL */
+               mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
+               mqd->queue_state.cp_hqd_pq_control &=
+                       ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
+
+               mqd->queue_state.cp_hqd_pq_control |=
+                       drm_order(rdev->ring[idx].ring_size / 8);
+               mqd->queue_state.cp_hqd_pq_control |=
+                       (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
+#ifdef __BIG_ENDIAN
+               mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
+#endif
+               mqd->queue_state.cp_hqd_pq_control &=
+                       ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
+               mqd->queue_state.cp_hqd_pq_control |=
+                       PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
+               WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
+
+               /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
+               if (i == 0)
+                       wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
+               else
+                       wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
+               mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
+               mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+               WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
+               WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
+                      mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+
+               /* set the wb address wether it's enabled or not */
+               if (i == 0)
+                       wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
+               else
+                       wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
+               mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
+               mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
+                       upper_32_bits(wb_gpu_addr) & 0xffff;
+               WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
+                      mqd->queue_state.cp_hqd_pq_rptr_report_addr);
+               WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+                      mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+
+               /* enable the doorbell if requested */
+               if (use_doorbell) {
+                       mqd->queue_state.cp_hqd_pq_doorbell_control =
+                               RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
+                       mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
+                       mqd->queue_state.cp_hqd_pq_doorbell_control |=
+                               DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
+                       mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
+                       mqd->queue_state.cp_hqd_pq_doorbell_control &=
+                               ~(DOORBELL_SOURCE | DOORBELL_HIT);
+
+               } else {
+                       mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+               }
+               WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
+                      mqd->queue_state.cp_hqd_pq_doorbell_control);
+
+               /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+               rdev->ring[idx].wptr = 0;
+               mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
+               WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
+               rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
+               mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
+
+               /* set the vmid for the queue */
+               mqd->queue_state.cp_hqd_vmid = 0;
+               WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+
+               /* activate the queue */
+               mqd->queue_state.cp_hqd_active = 1;
+               WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+
+               cik_srbm_select(rdev, 0, 0, 0, 0);
+
+               radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
+               radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
+
+               rdev->ring[idx].ready = true;
+               r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
+               if (r)
+                       rdev->ring[idx].ready = false;
+       }
+
+       return 0;
+}
+
+static void cik_cp_enable(struct radeon_device *rdev, bool enable)
+{
+       cik_cp_gfx_enable(rdev, enable);
+       cik_cp_compute_enable(rdev, enable);
+}
+
+static int cik_cp_load_microcode(struct radeon_device *rdev)
+{
+       int r;
+
+       r = cik_cp_gfx_load_microcode(rdev);
+       if (r)
+               return r;
+       r = cik_cp_compute_load_microcode(rdev);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static void cik_cp_fini(struct radeon_device *rdev)
+{
+       cik_cp_gfx_fini(rdev);
+       cik_cp_compute_fini(rdev);
+}
+
+static int cik_cp_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       /* Reset all cp blocks */
+       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+       RREG32(GRBM_SOFT_RESET);
+       mdelay(15);
+       WREG32(GRBM_SOFT_RESET, 0);
+       RREG32(GRBM_SOFT_RESET);
+
+       r = cik_cp_load_microcode(rdev);
+       if (r)
+               return r;
+
+       r = cik_cp_gfx_resume(rdev);
+       if (r)
+               return r;
+       r = cik_cp_compute_resume(rdev);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines.  These engines are used for compute
+ * and gfx.  There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things.  It also has support for tiling/detiling of
+ * buffers.
+ */
+/**
+ * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (CIK).
  */
 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
                              struct radeon_ib *ib)
@@ -2731,90 +3735,9 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
        return r;
 }
 
-/**
- * cik_gpu_is_lockup - check if the 3D engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the 3D engine is locked up (CIK).
- * Returns true if the engine is locked, false if not.
- */
-bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-       u32 srbm_status, srbm_status2;
-       u32 grbm_status, grbm_status2;
-       u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
-
-       srbm_status = RREG32(SRBM_STATUS);
-       srbm_status2 = RREG32(SRBM_STATUS2);
-       grbm_status = RREG32(GRBM_STATUS);
-       grbm_status2 = RREG32(GRBM_STATUS2);
-       grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
-       grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
-       grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
-       grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
-       if (!(grbm_status & GUI_ACTIVE)) {
-               radeon_ring_lockup_update(ring);
-               return false;
-       }
-       /* force CP activities */
-       radeon_ring_force_activity(rdev, ring);
-       return radeon_ring_test_lockup(rdev, ring);
-}
 
-/**
- * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
- *
- * @rdev: radeon_device pointer
- *
- * Soft reset the GFX engine and CPG blocks (CIK).
- * XXX: deal with reseting RLC and CPF
- * Returns 0 for success.
- */
-static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
+static void cik_print_gpu_status_regs(struct radeon_device *rdev)
 {
-       struct evergreen_mc_save save;
-       u32 grbm_reset = 0;
-
-       if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
-               return 0;
-
-       dev_info(rdev->dev, "GPU GFX softreset \n");
-       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
-               RREG32(GRBM_STATUS));
-       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
-               RREG32(GRBM_STATUS2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-               RREG32(GRBM_STATUS_SE0));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-               RREG32(GRBM_STATUS_SE1));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-               RREG32(GRBM_STATUS_SE2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-               RREG32(GRBM_STATUS_SE3));
-       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
-               RREG32(SRBM_STATUS));
-       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
-               RREG32(SRBM_STATUS2));
-       evergreen_mc_stop(rdev, &save);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
-       }
-       /* Disable CP parsing/prefetching */
-       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
-
-       /* reset all the gfx block and all CPG blocks */
-       grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
-
-       dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
-       WREG32(GRBM_SOFT_RESET, grbm_reset);
-       (void)RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(GRBM_SOFT_RESET, 0);
-       (void)RREG32(GRBM_SOFT_RESET);
-       /* Wait a little for things to settle down */
-       udelay(50);
        dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
                RREG32(GRBM_STATUS));
        dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
@@ -2831,98 +3754,286 @@ static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
                RREG32(SRBM_STATUS));
        dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
                RREG32(SRBM_STATUS2));
-       evergreen_mc_resume(rdev, &save);
-       return 0;
+       dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
+               RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
+                RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
+       dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
+       dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT2));
+       dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
+                RREG32(CP_STALLED_STAT3));
+       dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
+                RREG32(CP_CPF_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPF_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
+       dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
+       dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
+                RREG32(CP_CPC_STALLED_STAT1));
+       dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
 }
 
 /**
- * cik_compute_gpu_soft_reset - soft reset CPC
+ * cik_gpu_check_soft_reset - check which blocks are busy
  *
  * @rdev: radeon_device pointer
  *
- * Soft reset the CPC blocks (CIK).
- * XXX: deal with reseting RLC and CPF
- * Returns 0 for success.
+ * Check which blocks are busy and return the relevant reset
+ * mask to be used by cik_gpu_soft_reset().
+ * Returns a mask of the blocks to be reset.
  */
-static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
+static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
 {
-       struct evergreen_mc_save save;
-       u32 grbm_reset = 0;
+       u32 reset_mask = 0;
+       u32 tmp;
 
-       dev_info(rdev->dev, "GPU compute softreset \n");
-       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
-               RREG32(GRBM_STATUS));
-       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
-               RREG32(GRBM_STATUS2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-               RREG32(GRBM_STATUS_SE0));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-               RREG32(GRBM_STATUS_SE1));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-               RREG32(GRBM_STATUS_SE2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-               RREG32(GRBM_STATUS_SE3));
-       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
-               RREG32(SRBM_STATUS));
-       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
-               RREG32(SRBM_STATUS2));
-       evergreen_mc_stop(rdev, &save);
-       if (radeon_mc_wait_for_idle(rdev)) {
-               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
-       }
-       /* Disable CP parsing/prefetching */
-       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
+       /* GRBM_STATUS */
+       tmp = RREG32(GRBM_STATUS);
+       if (tmp & (PA_BUSY | SC_BUSY |
+                  BCI_BUSY | SX_BUSY |
+                  TA_BUSY | VGT_BUSY |
+                  DB_BUSY | CB_BUSY |
+                  GDS_BUSY | SPI_BUSY |
+                  IA_BUSY | IA_BUSY_NO_DMA))
+               reset_mask |= RADEON_RESET_GFX;
 
-       /* reset all the CPC blocks */
-       grbm_reset = SOFT_RESET_CPG;
+       if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
+               reset_mask |= RADEON_RESET_CP;
 
-       dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
-       WREG32(GRBM_SOFT_RESET, grbm_reset);
-       (void)RREG32(GRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(GRBM_SOFT_RESET, 0);
-       (void)RREG32(GRBM_SOFT_RESET);
-       /* Wait a little for things to settle down */
-       udelay(50);
-       dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
-               RREG32(GRBM_STATUS));
-       dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
-               RREG32(GRBM_STATUS2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-               RREG32(GRBM_STATUS_SE0));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-               RREG32(GRBM_STATUS_SE1));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-               RREG32(GRBM_STATUS_SE2));
-       dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-               RREG32(GRBM_STATUS_SE3));
-       dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
-               RREG32(SRBM_STATUS));
-       dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
-               RREG32(SRBM_STATUS2));
-       evergreen_mc_resume(rdev, &save);
-       return 0;
+       /* GRBM_STATUS2 */
+       tmp = RREG32(GRBM_STATUS2);
+       if (tmp & RLC_BUSY)
+               reset_mask |= RADEON_RESET_RLC;
+
+       /* SDMA0_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA;
+
+       /* SDMA1_STATUS_REG */
+       tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
+       if (!(tmp & SDMA_IDLE))
+               reset_mask |= RADEON_RESET_DMA1;
+
+       /* SRBM_STATUS2 */
+       tmp = RREG32(SRBM_STATUS2);
+       if (tmp & SDMA_BUSY)
+               reset_mask |= RADEON_RESET_DMA;
+
+       if (tmp & SDMA1_BUSY)
+               reset_mask |= RADEON_RESET_DMA1;
+
+       /* SRBM_STATUS */
+       tmp = RREG32(SRBM_STATUS);
+
+       if (tmp & IH_BUSY)
+               reset_mask |= RADEON_RESET_IH;
+
+       if (tmp & SEM_BUSY)
+               reset_mask |= RADEON_RESET_SEM;
+
+       if (tmp & GRBM_RQ_PENDING)
+               reset_mask |= RADEON_RESET_GRBM;
+
+       if (tmp & VMC_BUSY)
+               reset_mask |= RADEON_RESET_VMC;
+
+       if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
+                  MCC_BUSY | MCD_BUSY))
+               reset_mask |= RADEON_RESET_MC;
+
+       if (evergreen_is_display_hung(rdev))
+               reset_mask |= RADEON_RESET_DISPLAY;
+
+       /* Skip MC reset as it's mostly likely not hung, just busy */
+       if (reset_mask & RADEON_RESET_MC) {
+               DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
+               reset_mask &= ~RADEON_RESET_MC;
+       }
+
+       return reset_mask;
 }
 
 /**
- * cik_asic_reset - soft reset compute and gfx
+ * cik_gpu_soft_reset - soft reset GPU
  *
  * @rdev: radeon_device pointer
+ * @reset_mask: mask of which blocks to reset
  *
- * Soft reset the CPC blocks (CIK).
- * XXX: make this more fine grained and only reset
- * what is necessary.
- * Returns 0 for success.
+ * Soft reset the blocks specified in @reset_mask.
  */
-int cik_asic_reset(struct radeon_device *rdev)
+static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
 {
-       int r;
+       struct evergreen_mc_save save;
+       u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+       u32 tmp;
 
-       r = cik_compute_gpu_soft_reset(rdev);
-       if (r)
-               dev_info(rdev->dev, "Compute reset failed!\n");
+       if (reset_mask == 0)
+               return;
+
+       dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+       cik_print_gpu_status_regs(rdev);
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+       dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+                RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+
+       /* stop the rlc */
+       cik_rlc_stop(rdev);
+
+       /* Disable GFX parsing/prefetching */
+       WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
+
+       /* Disable MEC parsing/prefetching */
+       WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
+
+       if (reset_mask & RADEON_RESET_DMA) {
+               /* sdma0 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+       }
+       if (reset_mask & RADEON_RESET_DMA1) {
+               /* sdma1 */
+               tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
+               tmp |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+       }
+
+       evergreen_mc_stop(rdev, &save);
+       if (evergreen_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       }
+
+       if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
+               grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
+
+       if (reset_mask & RADEON_RESET_CP) {
+               grbm_soft_reset |= SOFT_RESET_CP;
+
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+       }
+
+       if (reset_mask & RADEON_RESET_DMA)
+               srbm_soft_reset |= SOFT_RESET_SDMA;
+
+       if (reset_mask & RADEON_RESET_DMA1)
+               srbm_soft_reset |= SOFT_RESET_SDMA1;
+
+       if (reset_mask & RADEON_RESET_DISPLAY)
+               srbm_soft_reset |= SOFT_RESET_DC;
+
+       if (reset_mask & RADEON_RESET_RLC)
+               grbm_soft_reset |= SOFT_RESET_RLC;
+
+       if (reset_mask & RADEON_RESET_SEM)
+               srbm_soft_reset |= SOFT_RESET_SEM;
+
+       if (reset_mask & RADEON_RESET_IH)
+               srbm_soft_reset |= SOFT_RESET_IH;
+
+       if (reset_mask & RADEON_RESET_GRBM)
+               srbm_soft_reset |= SOFT_RESET_GRBM;
+
+       if (reset_mask & RADEON_RESET_VMC)
+               srbm_soft_reset |= SOFT_RESET_VMC;
+
+       if (!(rdev->flags & RADEON_IS_IGP)) {
+               if (reset_mask & RADEON_RESET_MC)
+                       srbm_soft_reset |= SOFT_RESET_MC;
+       }
+
+       if (grbm_soft_reset) {
+               tmp = RREG32(GRBM_SOFT_RESET);
+               tmp |= grbm_soft_reset;
+               dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
+
+               udelay(50);
+
+               tmp &= ~grbm_soft_reset;
+               WREG32(GRBM_SOFT_RESET, tmp);
+               tmp = RREG32(GRBM_SOFT_RESET);
+       }
+
+       if (srbm_soft_reset) {
+               tmp = RREG32(SRBM_SOFT_RESET);
+               tmp |= srbm_soft_reset;
+               dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
+
+               udelay(50);
+
+               tmp &= ~srbm_soft_reset;
+               WREG32(SRBM_SOFT_RESET, tmp);
+               tmp = RREG32(SRBM_SOFT_RESET);
+       }
+
+       /* Wait a little for things to settle down */
+       udelay(50);
+
+       evergreen_mc_resume(rdev, &save);
+       udelay(50);
+
+       cik_print_gpu_status_regs(rdev);
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+int cik_asic_reset(struct radeon_device *rdev)
+{
+       u32 reset_mask;
+
+       reset_mask = cik_gpu_check_soft_reset(rdev);
+
+       if (reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, true);
+
+       cik_gpu_soft_reset(rdev, reset_mask);
+
+       reset_mask = cik_gpu_check_soft_reset(rdev);
+
+       if (!reset_mask)
+               r600_set_bios_scratch_engine_hung(rdev, false);
+
+       return 0;
+}
+
+/**
+ * cik_gfx_is_lockup - check if the 3D engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the 3D engine is locked up (CIK).
+ * Returns true if the engine is locked, false if not.
+ */
+bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
 
-       return cik_gfx_gpu_soft_reset(rdev);
+       if (!(reset_mask & (RADEON_RESET_GFX |
+                           RADEON_RESET_COMPUTE |
+                           RADEON_RESET_CP))) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force CP activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
 }
 
 /**
@@ -2936,13 +4047,15 @@ int cik_asic_reset(struct radeon_device *rdev)
  */
 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-       u32 dma_status_reg;
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+       u32 mask;
 
        if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-               dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
+               mask = RADEON_RESET_DMA;
        else
-               dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
-       if (dma_status_reg & SDMA_IDLE) {
+               mask = RADEON_RESET_DMA1;
+
+       if (!(reset_mask & mask)) {
                radeon_ring_lockup_update(ring);
                return false;
        }
@@ -3208,7 +4321,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
        /* XXX SH_MEM regs */
        /* where to put LDS, scratch, GPUVM in FSA64 space */
        for (i = 0; i < 16; i++) {
-               WREG32(SRBM_GFX_CNTL, VMID(i));
+               cik_srbm_select(rdev, 0, 0, 0, i);
                /* CP and shaders */
                WREG32(SH_MEM_CONFIG, 0);
                WREG32(SH_MEM_APE1_BASE, 1);
@@ -3221,7 +4334,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
                WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
                /* XXX SDMA RLC - todo */
        }
-       WREG32(SRBM_GFX_CNTL, 0);
+       cik_srbm_select(rdev, 0, 0, 0, 0);
 
        cik_pcie_gart_tlb_flush(rdev);
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -3328,6 +4441,29 @@ void cik_vm_fini(struct radeon_device *rdev)
 {
 }
 
+/**
+ * cik_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (CIK).
+ */
+static void cik_vm_decode_fault(struct radeon_device *rdev,
+                               u32 status, u32 addr, u32 mc_client)
+{
+       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+       char *block = (char *)&mc_client;
+
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+              protections, vmid, addr,
+              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+              block, mc_id);
+}
+
 /**
  * cik_vm_flush - cik vm flush using the CP
  *
@@ -3402,9 +4538,12 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
        radeon_ring_write(ring, 0);
        radeon_ring_write(ring, 1 << vm->id);
 
-       /* sync PFP to ME, otherwise we might get invalid PFP reads */
-       radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-       radeon_ring_write(ring, 0x0);
+       /* compute doesn't have PFP */
+       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
+               /* sync PFP to ME, otherwise we might get invalid PFP reads */
+               radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               radeon_ring_write(ring, 0x0);
+       }
 }
 
 /**
@@ -3941,6 +5080,8 @@ int cik_irq_set(struct radeon_device *rdev)
 {
        u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
                PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
+       u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
+       u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
        u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
        u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
        u32 grbm_int_cntl = 0;
@@ -3968,13 +5109,106 @@ int cik_irq_set(struct radeon_device *rdev)
        dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
        dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
 
+       cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+       cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
+
        /* enable CP interrupts on all rings */
        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
                DRM_DEBUG("cik_irq_set: sw int gfx\n");
                cp_int_cntl |= TIME_STAMP_INT_ENABLE;
        }
-       /* TODO: compute queues! */
-       /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
+       if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
+               struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+               DRM_DEBUG("si_irq_set: sw int cp1\n");
+               if (ring->me == 1) {
+                       switch (ring->pipe) {
+                       case 0:
+                               cp_m1p0 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 1:
+                               cp_m1p1 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 2:
+                               cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 3:
+                               cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       default:
+                               DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
+                               break;
+                       }
+               } else if (ring->me == 2) {
+                       switch (ring->pipe) {
+                       case 0:
+                               cp_m2p0 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 1:
+                               cp_m2p1 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 2:
+                               cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 3:
+                               cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       default:
+                               DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
+                               break;
+                       }
+               } else {
+                       DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
+               }
+       }
+       if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
+               struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+               DRM_DEBUG("si_irq_set: sw int cp2\n");
+               if (ring->me == 1) {
+                       switch (ring->pipe) {
+                       case 0:
+                               cp_m1p0 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 1:
+                               cp_m1p1 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 2:
+                               cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 3:
+                               cp_m1p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       default:
+                               DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
+                               break;
+                       }
+               } else if (ring->me == 2) {
+                       switch (ring->pipe) {
+                       case 0:
+                               cp_m2p0 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 1:
+                               cp_m2p1 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 2:
+                               cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       case 3:
+                               cp_m2p2 |= TIME_STAMP_INT_ENABLE;
+                               break;
+                       default:
+                               DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
+                               break;
+                       }
+               } else {
+                       DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
+               }
+       }
 
        if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
                DRM_DEBUG("cik_irq_set: sw int dma\n");
@@ -4046,6 +5280,15 @@ int cik_irq_set(struct radeon_device *rdev)
        WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
        WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
 
+       WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
+       WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
+       WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
+       WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
+       WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
+       WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
+       WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
+       WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
+
        WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
        WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -4267,6 +5510,8 @@ static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
  */
 int cik_irq_process(struct radeon_device *rdev)
 {
+       struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+       struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
        u32 wptr;
        u32 rptr;
        u32 src_id, src_data, ring_id;
@@ -4274,6 +5519,7 @@ int cik_irq_process(struct radeon_device *rdev)
        u32 ring_index;
        bool queue_hotplug = false;
        bool queue_reset = false;
+       u32 addr, status, mc_client;
 
        if (!rdev->ih.enabled || rdev->shutdown)
                return IRQ_NONE;
@@ -4509,11 +5755,15 @@ restart_ih:
                        break;
                case 146:
                case 147:
+                       addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
+                       status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
+                       mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+                               addr);
                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+                               status);
+                       cik_vm_decode_fault(rdev, status, addr, mc_client);
                        /* reset addr and status */
                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
                        break;
@@ -4532,10 +5782,11 @@ restart_ih:
                                radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
                                break;
                        case 1:
-                               /* XXX compute */
-                               break;
                        case 2:
-                               /* XXX compute */
+                               if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
+                                       radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+                               if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
+                                       radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
                                break;
                        }
                        break;
@@ -4554,9 +5805,11 @@ restart_ih:
                                break;
                        case 1:
                                /* XXX compute */
+                               queue_reset = true;
                                break;
                        case 2:
                                /* XXX compute */
+                               queue_reset = true;
                                break;
                        }
                        break;
@@ -4575,9 +5828,11 @@ restart_ih:
                                break;
                        case 1:
                                /* XXX compute */
+                               queue_reset = true;
                                break;
                        case 2:
                                /* XXX compute */
+                               queue_reset = true;
                                break;
                        }
                        break;
@@ -4681,3 +5936,1070 @@ restart_ih:
 
        return IRQ_HANDLED;
 }
+
+/*
+ * startup/shutdown callbacks
+ */
+/**
+ * cik_startup - program the asic to a functional state
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Programs the asic to a functional state (CIK).
+ * Called by cik_init() and cik_resume().
+ * Returns 0 for success, error for failure.
+ */
+static int cik_startup(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring;
+       int r;
+
+       if (rdev->flags & RADEON_IS_IGP) {
+               if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
+                   !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
+                       r = cik_init_microcode(rdev);
+                       if (r) {
+                               DRM_ERROR("Failed to load firmware!\n");
+                               return r;
+                       }
+               }
+       } else {
+               if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
+                   !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
+                   !rdev->mc_fw) {
+                       r = cik_init_microcode(rdev);
+                       if (r) {
+                               DRM_ERROR("Failed to load firmware!\n");
+                               return r;
+                       }
+               }
+
+               r = ci_mc_load_microcode(rdev);
+               if (r) {
+                       DRM_ERROR("Failed to load MC firmware!\n");
+                       return r;
+               }
+       }
+
+       r = r600_vram_scratch_init(rdev);
+       if (r)
+               return r;
+
+       cik_mc_program(rdev);
+       r = cik_pcie_gart_enable(rdev);
+       if (r)
+               return r;
+       cik_gpu_init(rdev);
+
+       /* allocate rlc buffers */
+       r = si_rlc_init(rdev);
+       if (r) {
+               DRM_ERROR("Failed to init rlc BOs!\n");
+               return r;
+       }
+
+       /* allocate wb buffer */
+       r = radeon_wb_init(rdev);
+       if (r)
+               return r;
+
+       /* allocate mec buffers */
+       r = cik_mec_init(rdev);
+       if (r) {
+               DRM_ERROR("Failed to init MEC BOs!\n");
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+               return r;
+       }
+
+       r = cik_uvd_resume(rdev);
+       if (!r) {
+               r = radeon_fence_driver_start_ring(rdev,
+                                                  R600_RING_TYPE_UVD_INDEX);
+               if (r)
+                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+       }
+       if (r)
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
+       /* Enable IRQ */
+       if (!rdev->irq.installed) {
+               r = radeon_irq_kms_init(rdev);
+               if (r)
+                       return r;
+       }
+
+       r = cik_irq_init(rdev);
+       if (r) {
+               DRM_ERROR("radeon: IH init failed (%d).\n", r);
+               radeon_irq_kms_fini(rdev);
+               return r;
+       }
+       cik_irq_set(rdev);
+
+       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+                            CP_RB0_RPTR, CP_RB0_WPTR,
+                            0, 0xfffff, RADEON_CP_PACKET2);
+       if (r)
+               return r;
+
+       /* set up the compute queues */
+       /* type-2 packets are deprecated on MEC, use type-3 instead */
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
+                            CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
+                            0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
+       if (r)
+               return r;
+       ring->me = 1; /* first MEC */
+       ring->pipe = 0; /* first pipe */
+       ring->queue = 0; /* first queue */
+       ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
+
+       /* type-2 packets are deprecated on MEC, use type-3 instead */
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
+                            CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
+                            0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
+       if (r)
+               return r;
+       /* dGPU only have 1 MEC */
+       ring->me = 1; /* first MEC */
+       ring->pipe = 0; /* first pipe */
+       ring->queue = 1; /* second queue */
+       ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
+
+       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+                            SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
+                            SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
+                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+       if (r)
+               return r;
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+                            SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
+                            SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
+                            2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+       if (r)
+               return r;
+
+       r = cik_cp_resume(rdev);
+       if (r)
+               return r;
+
+       r = cik_sdma_resume(rdev);
+       if (r)
+               return r;
+
+       ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       if (ring->ring_size) {
+               r = radeon_ring_init(rdev, ring, ring->ring_size,
+                                    R600_WB_UVD_RPTR_OFFSET,
+                                    UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+                                    0, 0xfffff, RADEON_CP_PACKET2);
+               if (!r)
+                       r = r600_uvd_init(rdev);
+               if (r)
+                       DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+       }
+
+       r = radeon_ib_pool_init(rdev);
+       if (r) {
+               dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_vm_manager_init(rdev);
+       if (r) {
+               dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
+               return r;
+       }
+
+       return 0;
+}
+
+/**
+ * cik_resume - resume the asic to a functional state
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Programs the asic to a functional state (CIK).
+ * Called at resume.
+ * Returns 0 for success, error for failure.
+ */
+int cik_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       /* post card */
+       atom_asic_init(rdev->mode_info.atom_context);
+
+       /* init golden registers */
+       cik_init_golden_registers(rdev);
+
+       rdev->accel_working = true;
+       r = cik_startup(rdev);
+       if (r) {
+               DRM_ERROR("cik startup failed on resume\n");
+               rdev->accel_working = false;
+               return r;
+       }
+
+       return r;
+
+}
+
+/**
+ * cik_suspend - suspend the asic
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Bring the chip into a state suitable for suspend (CIK).
+ * Called at suspend.
+ * Returns 0 for success.
+ */
+int cik_suspend(struct radeon_device *rdev)
+{
+       radeon_vm_manager_fini(rdev);
+       cik_cp_enable(rdev, false);
+       cik_sdma_enable(rdev, false);
+       r600_uvd_stop(rdev);
+       radeon_uvd_suspend(rdev);
+       cik_irq_suspend(rdev);
+       radeon_wb_disable(rdev);
+       cik_pcie_gart_disable(rdev);
+       return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+/**
+ * cik_init - asic specific driver and hw init
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup asic specific driver variables and program the hw
+ * to a functional state (CIK).
+ * Called at driver startup.
+ * Returns 0 for success, errors for failure.
+ */
+int cik_init(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring;
+       int r;
+
+       /* Read BIOS */
+       if (!radeon_get_bios(rdev)) {
+               if (ASIC_IS_AVIVO(rdev))
+                       return -EINVAL;
+       }
+       /* Must be an ATOMBIOS */
+       if (!rdev->is_atom_bios) {
+               dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
+               return -EINVAL;
+       }
+       r = radeon_atombios_init(rdev);
+       if (r)
+               return r;
+
+       /* Post card if necessary */
+       if (!radeon_card_posted(rdev)) {
+               if (!rdev->bios) {
+                       dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
+                       return -EINVAL;
+               }
+               DRM_INFO("GPU not posted. posting now...\n");
+               atom_asic_init(rdev->mode_info.atom_context);
+       }
+       /* init golden registers */
+       cik_init_golden_registers(rdev);
+       /* Initialize scratch registers */
+       cik_scratch_init(rdev);
+       /* Initialize surface registers */
+       radeon_surface_init(rdev);
+       /* Initialize clocks */
+       radeon_get_clock_info(rdev->ddev);
+
+       /* Fence driver */
+       r = radeon_fence_driver_init(rdev);
+       if (r)
+               return r;
+
+       /* initialize memory controller */
+       r = cik_mc_init(rdev);
+       if (r)
+               return r;
+       /* Memory manager */
+       r = radeon_bo_init(rdev);
+       if (r)
+               return r;
+
+       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 1024 * 1024);
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 1024 * 1024);
+       r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
+       if (r)
+               return r;
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 1024 * 1024);
+       r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
+       if (r)
+               return r;
+
+       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 256 * 1024);
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 256 * 1024);
+
+       r = radeon_uvd_init(rdev);
+       if (!r) {
+               ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+               ring->ring_obj = NULL;
+               r600_ring_init(rdev, ring, 4096);
+       }
+
+       rdev->ih.ring_obj = NULL;
+       r600_ih_ring_init(rdev, 64 * 1024);
+
+       r = r600_pcie_gart_init(rdev);
+       if (r)
+               return r;
+
+       rdev->accel_working = true;
+       r = cik_startup(rdev);
+       if (r) {
+               dev_err(rdev->dev, "disabling GPU acceleration\n");
+               cik_cp_fini(rdev);
+               cik_sdma_fini(rdev);
+               cik_irq_fini(rdev);
+               si_rlc_fini(rdev);
+               cik_mec_fini(rdev);
+               radeon_wb_fini(rdev);
+               radeon_ib_pool_fini(rdev);
+               radeon_vm_manager_fini(rdev);
+               radeon_irq_kms_fini(rdev);
+               cik_pcie_gart_fini(rdev);
+               rdev->accel_working = false;
+       }
+
+       /* Don't start up if the MC ucode is missing.
+        * The default clocks and voltages before the MC ucode
+        * is loaded are not suffient for advanced operations.
+        */
+       if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
+               DRM_ERROR("radeon: MC ucode required for NI+.\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * cik_fini - asic specific driver and hw fini
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the asic specific driver variables and program the hw
+ * to an idle state (CIK).
+ * Called at driver unload.
+ */
+void cik_fini(struct radeon_device *rdev)
+{
+       cik_cp_fini(rdev);
+       cik_sdma_fini(rdev);
+       cik_irq_fini(rdev);
+       si_rlc_fini(rdev);
+       cik_mec_fini(rdev);
+       radeon_wb_fini(rdev);
+       radeon_vm_manager_fini(rdev);
+       radeon_ib_pool_fini(rdev);
+       radeon_irq_kms_fini(rdev);
+       r600_uvd_stop(rdev);
+       radeon_uvd_fini(rdev);
+       cik_pcie_gart_fini(rdev);
+       r600_vram_scratch_fini(rdev);
+       radeon_gem_fini(rdev);
+       radeon_fence_driver_fini(rdev);
+       radeon_bo_fini(rdev);
+       radeon_atombios_fini(rdev);
+       kfree(rdev->bios);
+       rdev->bios = NULL;
+}
+
+/* display watermark setup */
+/**
+ * dce8_line_buffer_adjust - Set up the line buffer
+ *
+ * @rdev: radeon_device pointer
+ * @radeon_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
+                                  struct radeon_crtc *radeon_crtc,
+                                  struct drm_display_mode *mode)
+{
+       u32 tmp;
+
+       /*
+        * Line Buffer Setup
+        * There are 6 line buffers, one for each display controllers.
+        * There are 3 partitions per LB. Select the number of partitions
+        * to enable based on the display width.  For display widths larger
+        * than 4096, you need use to use 2 display controllers and combine
+        * them using the stereo blender.
+        */
+       if (radeon_crtc->base.enabled && mode) {
+               if (mode->crtc_hdisplay < 1920)
+                       tmp = 1;
+               else if (mode->crtc_hdisplay < 2560)
+                       tmp = 2;
+               else if (mode->crtc_hdisplay < 4096)
+                       tmp = 0;
+               else {
+                       DRM_DEBUG_KMS("Mode too big for LB!\n");
+                       tmp = 0;
+               }
+       } else
+               tmp = 1;
+
+       WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
+              LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
+
+       if (radeon_crtc->base.enabled && mode) {
+               switch (tmp) {
+               case 0:
+               default:
+                       return 4096 * 2;
+               case 1:
+                       return 1920 * 2;
+               case 2:
+                       return 2560 * 2;
+               }
+       }
+
+       /* controller not enabled, so no lb used */
+       return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
+{
+       u32 tmp = RREG32(MC_SHARED_CHMAP);
+
+       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+       case 0:
+       default:
+               return 1;
+       case 1:
+               return 2;
+       case 2:
+               return 4;
+       case 3:
+               return 8;
+       case 4:
+               return 3;
+       case 5:
+               return 6;
+       case 6:
+               return 10;
+       case 7:
+               return 12;
+       case 8:
+               return 16;
+       }
+}
+
+struct dce8_wm_params {
+       u32 dram_channels; /* number of dram channels */
+       u32 yclk;          /* bandwidth per dram data pin in kHz */
+       u32 sclk;          /* engine clock in kHz */
+       u32 disp_clk;      /* display clock in kHz */
+       u32 src_width;     /* viewport width */
+       u32 active_time;   /* active display time in ns */
+       u32 blank_time;    /* blank time in ns */
+       bool interlaced;    /* mode is interlaced */
+       fixed20_12 vsc;    /* vertical scale ratio */
+       u32 num_heads;     /* number of active crtcs */
+       u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+       u32 lb_size;       /* line buffer allocated to pipe */
+       u32 vtaps;         /* vertical scaler taps */
+};
+
+/**
+ * dce8_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
+{
+       /* Calculate raw DRAM Bandwidth */
+       fixed20_12 dram_efficiency; /* 0.7 */
+       fixed20_12 yclk, dram_channels, bandwidth;
+       fixed20_12 a;
+
+       a.full = dfixed_const(1000);
+       yclk.full = dfixed_const(wm->yclk);
+       yclk.full = dfixed_div(yclk, a);
+       dram_channels.full = dfixed_const(wm->dram_channels * 4);
+       a.full = dfixed_const(10);
+       dram_efficiency.full = dfixed_const(7);
+       dram_efficiency.full = dfixed_div(dram_efficiency, a);
+       bandwidth.full = dfixed_mul(dram_channels, yclk);
+       bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+       return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+       /* Calculate DRAM Bandwidth and the part allocated to display. */
+       fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+       fixed20_12 yclk, dram_channels, bandwidth;
+       fixed20_12 a;
+
+       a.full = dfixed_const(1000);
+       yclk.full = dfixed_const(wm->yclk);
+       yclk.full = dfixed_div(yclk, a);
+       dram_channels.full = dfixed_const(wm->dram_channels * 4);
+       a.full = dfixed_const(10);
+       disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+       disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+       bandwidth.full = dfixed_mul(dram_channels, yclk);
+       bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+       return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce8_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
+{
+       /* Calculate the display Data return Bandwidth */
+       fixed20_12 return_efficiency; /* 0.8 */
+       fixed20_12 sclk, bandwidth;
+       fixed20_12 a;
+
+       a.full = dfixed_const(1000);
+       sclk.full = dfixed_const(wm->sclk);
+       sclk.full = dfixed_div(sclk, a);
+       a.full = dfixed_const(10);
+       return_efficiency.full = dfixed_const(8);
+       return_efficiency.full = dfixed_div(return_efficiency, a);
+       a.full = dfixed_const(32);
+       bandwidth.full = dfixed_mul(a, sclk);
+       bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+       return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce8_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
+{
+       /* Calculate the DMIF Request Bandwidth */
+       fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+       fixed20_12 disp_clk, bandwidth;
+       fixed20_12 a, b;
+
+       a.full = dfixed_const(1000);
+       disp_clk.full = dfixed_const(wm->disp_clk);
+       disp_clk.full = dfixed_div(disp_clk, a);
+       a.full = dfixed_const(32);
+       b.full = dfixed_mul(a, disp_clk);
+
+       a.full = dfixed_const(10);
+       disp_clk_request_efficiency.full = dfixed_const(8);
+       disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+       bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+       return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce8_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
+{
+       /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+       u32 dram_bandwidth = dce8_dram_bandwidth(wm);
+       u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
+       u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
+
+       return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce8_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
+{
+       /* Calculate the display mode Average Bandwidth
+        * DisplayMode should contain the source and destination dimensions,
+        * timing, etc.
+        */
+       fixed20_12 bpp;
+       fixed20_12 line_time;
+       fixed20_12 src_width;
+       fixed20_12 bandwidth;
+       fixed20_12 a;
+
+       a.full = dfixed_const(1000);
+       line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+       line_time.full = dfixed_div(line_time, a);
+       bpp.full = dfixed_const(wm->bytes_per_pixel);
+       src_width.full = dfixed_const(wm->src_width);
+       bandwidth.full = dfixed_mul(src_width, bpp);
+       bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+       bandwidth.full = dfixed_div(bandwidth, line_time);
+
+       return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce8_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
+{
+       /* First calculate the latency in ns */
+       u32 mc_latency = 2000; /* 2000 ns. */
+       u32 available_bandwidth = dce8_available_bandwidth(wm);
+       u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+       u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+       u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+       u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+               (wm->num_heads * cursor_line_pair_return_time);
+       u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+       u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+       u32 tmp, dmif_size = 12288;
+       fixed20_12 a, b, c;
+
+       if (wm->num_heads == 0)
+               return 0;
+
+       a.full = dfixed_const(2);
+       b.full = dfixed_const(1);
+       if ((wm->vsc.full > a.full) ||
+           ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+           (wm->vtaps >= 5) ||
+           ((wm->vsc.full >= a.full) && wm->interlaced))
+               max_src_lines_per_dst_line = 4;
+       else
+               max_src_lines_per_dst_line = 2;
+
+       a.full = dfixed_const(available_bandwidth);
+       b.full = dfixed_const(wm->num_heads);
+       a.full = dfixed_div(a, b);
+
+       b.full = dfixed_const(mc_latency + 512);
+       c.full = dfixed_const(wm->disp_clk);
+       b.full = dfixed_div(b, c);
+
+       c.full = dfixed_const(dmif_size);
+       b.full = dfixed_div(c, b);
+
+       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
+
+       b.full = dfixed_const(1000);
+       c.full = dfixed_const(wm->disp_clk);
+       b.full = dfixed_div(c, b);
+       c.full = dfixed_const(wm->bytes_per_pixel);
+       b.full = dfixed_mul(b, c);
+
+       lb_fill_bw = min(tmp, dfixed_trunc(b));
+
+       a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+       b.full = dfixed_const(1000);
+       c.full = dfixed_const(lb_fill_bw);
+       b.full = dfixed_div(c, b);
+       a.full = dfixed_div(a, b);
+       line_fill_time = dfixed_trunc(a);
+
+       if (line_fill_time < wm->active_time)
+               return latency;
+       else
+               return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+       if (dce8_average_bandwidth(wm) <=
+           (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
+               return true;
+       else
+               return false;
+}
+
+/**
+ * dce8_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
+{
+       if (dce8_average_bandwidth(wm) <=
+           (dce8_available_bandwidth(wm) / wm->num_heads))
+               return true;
+       else
+               return false;
+}
+
+/**
+ * dce8_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
+{
+       u32 lb_partitions = wm->lb_size / wm->src_width;
+       u32 line_time = wm->active_time + wm->blank_time;
+       u32 latency_tolerant_lines;
+       u32 latency_hiding;
+       fixed20_12 a;
+
+       a.full = dfixed_const(1);
+       if (wm->vsc.full > a.full)
+               latency_tolerant_lines = 1;
+       else {
+               if (lb_partitions <= (wm->vtaps + 1))
+                       latency_tolerant_lines = 1;
+               else
+                       latency_tolerant_lines = 2;
+       }
+
+       latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+       if (dce8_latency_watermark(wm) <= latency_hiding)
+               return true;
+       else
+               return false;
+}
+
+/**
+ * dce8_program_watermarks - program display watermarks
+ *
+ * @rdev: radeon_device pointer
+ * @radeon_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce8_program_watermarks(struct radeon_device *rdev,
+                                   struct radeon_crtc *radeon_crtc,
+                                   u32 lb_size, u32 num_heads)
+{
+       struct drm_display_mode *mode = &radeon_crtc->base.mode;
+       struct dce8_wm_params wm;
+       u32 pixel_period;
+       u32 line_time = 0;
+       u32 latency_watermark_a = 0, latency_watermark_b = 0;
+       u32 tmp, wm_mask;
+
+       if (radeon_crtc->base.enabled && num_heads && mode) {
+               pixel_period = 1000000 / (u32)mode->clock;
+               line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+
+               wm.yclk = rdev->pm.current_mclk * 10;
+               wm.sclk = rdev->pm.current_sclk * 10;
+               wm.disp_clk = mode->clock;
+               wm.src_width = mode->crtc_hdisplay;
+               wm.active_time = mode->crtc_hdisplay * pixel_period;
+               wm.blank_time = line_time - wm.active_time;
+               wm.interlaced = false;
+               if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+                       wm.interlaced = true;
+               wm.vsc = radeon_crtc->vsc;
+               wm.vtaps = 1;
+               if (radeon_crtc->rmx_type != RMX_OFF)
+                       wm.vtaps = 2;
+               wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
+               wm.lb_size = lb_size;
+               wm.dram_channels = cik_get_number_of_dram_channels(rdev);
+               wm.num_heads = num_heads;
+
+               /* set for high clocks */
+               latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
+               /* set for low clocks */
+               /* wm.yclk = low clk; wm.sclk = low clk */
+               latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
+
+               /* possibly force display priority to high */
+               /* should really do this at mode validation time... */
+               if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
+                   !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
+                   !dce8_check_latency_hiding(&wm) ||
+                   (rdev->disp_priority == 2)) {
+                       DRM_DEBUG_KMS("force priority to high\n");
+               }
+       }
+
+       /* select wm A */
+       wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
+       tmp = wm_mask;
+       tmp &= ~LATENCY_WATERMARK_MASK(3);
+       tmp |= LATENCY_WATERMARK_MASK(1);
+       WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
+       WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
+              (LATENCY_LOW_WATERMARK(latency_watermark_a) |
+               LATENCY_HIGH_WATERMARK(line_time)));
+       /* select wm B */
+       tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
+       tmp &= ~LATENCY_WATERMARK_MASK(3);
+       tmp |= LATENCY_WATERMARK_MASK(2);
+       WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
+       WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
+              (LATENCY_LOW_WATERMARK(latency_watermark_b) |
+               LATENCY_HIGH_WATERMARK(line_time)));
+       /* restore original selection */
+       WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
+}
+
+/**
+ * dce8_bandwidth_update - program display watermarks
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+void dce8_bandwidth_update(struct radeon_device *rdev)
+{
+       struct drm_display_mode *mode = NULL;
+       u32 num_heads = 0, lb_size;
+       int i;
+
+       radeon_update_display_priority(rdev);
+
+       for (i = 0; i < rdev->num_crtc; i++) {
+               if (rdev->mode_info.crtcs[i]->base.enabled)
+                       num_heads++;
+       }
+       for (i = 0; i < rdev->num_crtc; i++) {
+               mode = &rdev->mode_info.crtcs[i]->base.mode;
+               lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
+               dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
+       }
+}
+
+/**
+ * cik_get_gpu_clock_counter - return GPU clock counter snapshot
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Fetches a GPU clock counter snapshot (SI).
+ * Returns the 64 bit clock counter snapshot.
+ */
+uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
+{
+       uint64_t clock;
+
+       mutex_lock(&rdev->gpu_clock_mutex);
+       WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+       clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
+               ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+       mutex_unlock(&rdev->gpu_clock_mutex);
+       return clock;
+}
+
+static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
+                              u32 cntl_reg, u32 status_reg)
+{
+       int r, i;
+       struct atom_clock_dividers dividers;
+       uint32_t tmp;
+
+       r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+                                          clock, false, &dividers);
+       if (r)
+               return r;
+
+       tmp = RREG32_SMC(cntl_reg);
+       tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
+       tmp |= dividers.post_divider;
+       WREG32_SMC(cntl_reg, tmp);
+
+       for (i = 0; i < 100; i++) {
+               if (RREG32_SMC(status_reg) & DCLK_STATUS)
+                       break;
+               mdelay(10);
+       }
+       if (i == 100)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
+{
+       int r = 0;
+
+       r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
+       if (r)
+               return r;
+
+       r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
+       return r;
+}
+
+int cik_uvd_resume(struct radeon_device *rdev)
+{
+       uint64_t addr;
+       uint32_t size;
+       int r;
+
+       r = radeon_uvd_resume(rdev);
+       if (r)
+               return r;
+
+       /* programm the VCPU memory controller bits 0-27 */
+       addr = rdev->uvd.gpu_addr >> 3;
+       size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+       addr += size;
+       size = RADEON_UVD_STACK_SIZE >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+       addr += size;
+       size = RADEON_UVD_HEAP_SIZE >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+       /* bits 28-31 */
+       addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+       WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+       /* bits 32-39 */
+       addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+       WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+       return 0;
+}