]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/cik.c
Merge tag 'pm+acpi-3.11-rc1-more' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34
35 /* GFX */
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
39 /* compute */
40 #define CIK_MEC_UCODE_SIZE 4192
41 /* interrupts */
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
45 /* gddr controller */
46 #define CIK_MC_UCODE_SIZE 7866
47 /* sdma */
48 #define CIK_SDMA_UCODE_SIZE 1050
49 #define CIK_SDMA_UCODE_VERSION 64
50
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
64 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65 MODULE_FIRMWARE("radeon/KABINI_me.bin");
66 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
69 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
70
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
76 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
77 extern void si_rlc_fini(struct radeon_device *rdev);
78 extern int si_rlc_init(struct radeon_device *rdev);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80
81 /*
82  * Indirect registers accessor
83  */
84 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85 {
86         u32 r;
87
88         WREG32(PCIE_INDEX, reg);
89         (void)RREG32(PCIE_INDEX);
90         r = RREG32(PCIE_DATA);
91         return r;
92 }
93
94 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95 {
96         WREG32(PCIE_INDEX, reg);
97         (void)RREG32(PCIE_INDEX);
98         WREG32(PCIE_DATA, v);
99         (void)RREG32(PCIE_DATA);
100 }
101
102 static const u32 bonaire_golden_spm_registers[] =
103 {
104         0x30800, 0xe0ffffff, 0xe0000000
105 };
106
107 static const u32 bonaire_golden_common_registers[] =
108 {
109         0xc770, 0xffffffff, 0x00000800,
110         0xc774, 0xffffffff, 0x00000800,
111         0xc798, 0xffffffff, 0x00007fbf,
112         0xc79c, 0xffffffff, 0x00007faf
113 };
114
115 static const u32 bonaire_golden_registers[] =
116 {
117         0x3354, 0x00000333, 0x00000333,
118         0x3350, 0x000c0fc0, 0x00040200,
119         0x9a10, 0x00010000, 0x00058208,
120         0x3c000, 0xffff1fff, 0x00140000,
121         0x3c200, 0xfdfc0fff, 0x00000100,
122         0x3c234, 0x40000000, 0x40000200,
123         0x9830, 0xffffffff, 0x00000000,
124         0x9834, 0xf00fffff, 0x00000400,
125         0x9838, 0x0002021c, 0x00020200,
126         0xc78, 0x00000080, 0x00000000,
127         0x5bb0, 0x000000f0, 0x00000070,
128         0x5bc0, 0xf0311fff, 0x80300000,
129         0x98f8, 0x73773777, 0x12010001,
130         0x350c, 0x00810000, 0x408af000,
131         0x7030, 0x31000111, 0x00000011,
132         0x2f48, 0x73773777, 0x12010001,
133         0x220c, 0x00007fb6, 0x0021a1b1,
134         0x2210, 0x00007fb6, 0x002021b1,
135         0x2180, 0x00007fb6, 0x00002191,
136         0x2218, 0x00007fb6, 0x002121b1,
137         0x221c, 0x00007fb6, 0x002021b1,
138         0x21dc, 0x00007fb6, 0x00002191,
139         0x21e0, 0x00007fb6, 0x00002191,
140         0x3628, 0x0000003f, 0x0000000a,
141         0x362c, 0x0000003f, 0x0000000a,
142         0x2ae4, 0x00073ffe, 0x000022a2,
143         0x240c, 0x000007ff, 0x00000000,
144         0x8a14, 0xf000003f, 0x00000007,
145         0x8bf0, 0x00002001, 0x00000001,
146         0x8b24, 0xffffffff, 0x00ffffff,
147         0x30a04, 0x0000ff0f, 0x00000000,
148         0x28a4c, 0x07ffffff, 0x06000000,
149         0x4d8, 0x00000fff, 0x00000100,
150         0x3e78, 0x00000001, 0x00000002,
151         0x9100, 0x03000000, 0x0362c688,
152         0x8c00, 0x000000ff, 0x00000001,
153         0xe40, 0x00001fff, 0x00001fff,
154         0x9060, 0x0000007f, 0x00000020,
155         0x9508, 0x00010000, 0x00010000,
156         0xac14, 0x000003ff, 0x000000f3,
157         0xac0c, 0xffffffff, 0x00001032
158 };
159
160 static const u32 bonaire_mgcg_cgcg_init[] =
161 {
162         0xc420, 0xffffffff, 0xfffffffc,
163         0x30800, 0xffffffff, 0xe0000000,
164         0x3c2a0, 0xffffffff, 0x00000100,
165         0x3c208, 0xffffffff, 0x00000100,
166         0x3c2c0, 0xffffffff, 0xc0000100,
167         0x3c2c8, 0xffffffff, 0xc0000100,
168         0x3c2c4, 0xffffffff, 0xc0000100,
169         0x55e4, 0xffffffff, 0x00600100,
170         0x3c280, 0xffffffff, 0x00000100,
171         0x3c214, 0xffffffff, 0x06000100,
172         0x3c220, 0xffffffff, 0x00000100,
173         0x3c218, 0xffffffff, 0x06000100,
174         0x3c204, 0xffffffff, 0x00000100,
175         0x3c2e0, 0xffffffff, 0x00000100,
176         0x3c224, 0xffffffff, 0x00000100,
177         0x3c200, 0xffffffff, 0x00000100,
178         0x3c230, 0xffffffff, 0x00000100,
179         0x3c234, 0xffffffff, 0x00000100,
180         0x3c250, 0xffffffff, 0x00000100,
181         0x3c254, 0xffffffff, 0x00000100,
182         0x3c258, 0xffffffff, 0x00000100,
183         0x3c25c, 0xffffffff, 0x00000100,
184         0x3c260, 0xffffffff, 0x00000100,
185         0x3c27c, 0xffffffff, 0x00000100,
186         0x3c278, 0xffffffff, 0x00000100,
187         0x3c210, 0xffffffff, 0x06000100,
188         0x3c290, 0xffffffff, 0x00000100,
189         0x3c274, 0xffffffff, 0x00000100,
190         0x3c2b4, 0xffffffff, 0x00000100,
191         0x3c2b0, 0xffffffff, 0x00000100,
192         0x3c270, 0xffffffff, 0x00000100,
193         0x30800, 0xffffffff, 0xe0000000,
194         0x3c020, 0xffffffff, 0x00010000,
195         0x3c024, 0xffffffff, 0x00030002,
196         0x3c028, 0xffffffff, 0x00040007,
197         0x3c02c, 0xffffffff, 0x00060005,
198         0x3c030, 0xffffffff, 0x00090008,
199         0x3c034, 0xffffffff, 0x00010000,
200         0x3c038, 0xffffffff, 0x00030002,
201         0x3c03c, 0xffffffff, 0x00040007,
202         0x3c040, 0xffffffff, 0x00060005,
203         0x3c044, 0xffffffff, 0x00090008,
204         0x3c048, 0xffffffff, 0x00010000,
205         0x3c04c, 0xffffffff, 0x00030002,
206         0x3c050, 0xffffffff, 0x00040007,
207         0x3c054, 0xffffffff, 0x00060005,
208         0x3c058, 0xffffffff, 0x00090008,
209         0x3c05c, 0xffffffff, 0x00010000,
210         0x3c060, 0xffffffff, 0x00030002,
211         0x3c064, 0xffffffff, 0x00040007,
212         0x3c068, 0xffffffff, 0x00060005,
213         0x3c06c, 0xffffffff, 0x00090008,
214         0x3c070, 0xffffffff, 0x00010000,
215         0x3c074, 0xffffffff, 0x00030002,
216         0x3c078, 0xffffffff, 0x00040007,
217         0x3c07c, 0xffffffff, 0x00060005,
218         0x3c080, 0xffffffff, 0x00090008,
219         0x3c084, 0xffffffff, 0x00010000,
220         0x3c088, 0xffffffff, 0x00030002,
221         0x3c08c, 0xffffffff, 0x00040007,
222         0x3c090, 0xffffffff, 0x00060005,
223         0x3c094, 0xffffffff, 0x00090008,
224         0x3c098, 0xffffffff, 0x00010000,
225         0x3c09c, 0xffffffff, 0x00030002,
226         0x3c0a0, 0xffffffff, 0x00040007,
227         0x3c0a4, 0xffffffff, 0x00060005,
228         0x3c0a8, 0xffffffff, 0x00090008,
229         0x3c000, 0xffffffff, 0x96e00200,
230         0x8708, 0xffffffff, 0x00900100,
231         0xc424, 0xffffffff, 0x0020003f,
232         0x38, 0xffffffff, 0x0140001c,
233         0x3c, 0x000f0000, 0x000f0000,
234         0x220, 0xffffffff, 0xC060000C,
235         0x224, 0xc0000fff, 0x00000100,
236         0xf90, 0xffffffff, 0x00000100,
237         0xf98, 0x00000101, 0x00000000,
238         0x20a8, 0xffffffff, 0x00000104,
239         0x55e4, 0xff000fff, 0x00000100,
240         0x30cc, 0xc0000fff, 0x00000104,
241         0xc1e4, 0x00000001, 0x00000001,
242         0xd00c, 0xff000ff0, 0x00000100,
243         0xd80c, 0xff000ff0, 0x00000100
244 };
245
246 static const u32 spectre_golden_spm_registers[] =
247 {
248         0x30800, 0xe0ffffff, 0xe0000000
249 };
250
251 static const u32 spectre_golden_common_registers[] =
252 {
253         0xc770, 0xffffffff, 0x00000800,
254         0xc774, 0xffffffff, 0x00000800,
255         0xc798, 0xffffffff, 0x00007fbf,
256         0xc79c, 0xffffffff, 0x00007faf
257 };
258
259 static const u32 spectre_golden_registers[] =
260 {
261         0x3c000, 0xffff1fff, 0x96940200,
262         0x3c00c, 0xffff0001, 0xff000000,
263         0x3c200, 0xfffc0fff, 0x00000100,
264         0x6ed8, 0x00010101, 0x00010000,
265         0x9834, 0xf00fffff, 0x00000400,
266         0x9838, 0xfffffffc, 0x00020200,
267         0x5bb0, 0x000000f0, 0x00000070,
268         0x5bc0, 0xf0311fff, 0x80300000,
269         0x98f8, 0x73773777, 0x12010001,
270         0x9b7c, 0x00ff0000, 0x00fc0000,
271         0x2f48, 0x73773777, 0x12010001,
272         0x8a14, 0xf000003f, 0x00000007,
273         0x8b24, 0xffffffff, 0x00ffffff,
274         0x28350, 0x3f3f3fff, 0x00000082,
275         0x28355, 0x0000003f, 0x00000000,
276         0x3e78, 0x00000001, 0x00000002,
277         0x913c, 0xffff03df, 0x00000004,
278         0xc768, 0x00000008, 0x00000008,
279         0x8c00, 0x000008ff, 0x00000800,
280         0x9508, 0x00010000, 0x00010000,
281         0xac0c, 0xffffffff, 0x54763210,
282         0x214f8, 0x01ff01ff, 0x00000002,
283         0x21498, 0x007ff800, 0x00200000,
284         0x2015c, 0xffffffff, 0x00000f40,
285         0x30934, 0xffffffff, 0x00000001
286 };
287
288 static const u32 spectre_mgcg_cgcg_init[] =
289 {
290         0xc420, 0xffffffff, 0xfffffffc,
291         0x30800, 0xffffffff, 0xe0000000,
292         0x3c2a0, 0xffffffff, 0x00000100,
293         0x3c208, 0xffffffff, 0x00000100,
294         0x3c2c0, 0xffffffff, 0x00000100,
295         0x3c2c8, 0xffffffff, 0x00000100,
296         0x3c2c4, 0xffffffff, 0x00000100,
297         0x55e4, 0xffffffff, 0x00600100,
298         0x3c280, 0xffffffff, 0x00000100,
299         0x3c214, 0xffffffff, 0x06000100,
300         0x3c220, 0xffffffff, 0x00000100,
301         0x3c218, 0xffffffff, 0x06000100,
302         0x3c204, 0xffffffff, 0x00000100,
303         0x3c2e0, 0xffffffff, 0x00000100,
304         0x3c224, 0xffffffff, 0x00000100,
305         0x3c200, 0xffffffff, 0x00000100,
306         0x3c230, 0xffffffff, 0x00000100,
307         0x3c234, 0xffffffff, 0x00000100,
308         0x3c250, 0xffffffff, 0x00000100,
309         0x3c254, 0xffffffff, 0x00000100,
310         0x3c258, 0xffffffff, 0x00000100,
311         0x3c25c, 0xffffffff, 0x00000100,
312         0x3c260, 0xffffffff, 0x00000100,
313         0x3c27c, 0xffffffff, 0x00000100,
314         0x3c278, 0xffffffff, 0x00000100,
315         0x3c210, 0xffffffff, 0x06000100,
316         0x3c290, 0xffffffff, 0x00000100,
317         0x3c274, 0xffffffff, 0x00000100,
318         0x3c2b4, 0xffffffff, 0x00000100,
319         0x3c2b0, 0xffffffff, 0x00000100,
320         0x3c270, 0xffffffff, 0x00000100,
321         0x30800, 0xffffffff, 0xe0000000,
322         0x3c020, 0xffffffff, 0x00010000,
323         0x3c024, 0xffffffff, 0x00030002,
324         0x3c028, 0xffffffff, 0x00040007,
325         0x3c02c, 0xffffffff, 0x00060005,
326         0x3c030, 0xffffffff, 0x00090008,
327         0x3c034, 0xffffffff, 0x00010000,
328         0x3c038, 0xffffffff, 0x00030002,
329         0x3c03c, 0xffffffff, 0x00040007,
330         0x3c040, 0xffffffff, 0x00060005,
331         0x3c044, 0xffffffff, 0x00090008,
332         0x3c048, 0xffffffff, 0x00010000,
333         0x3c04c, 0xffffffff, 0x00030002,
334         0x3c050, 0xffffffff, 0x00040007,
335         0x3c054, 0xffffffff, 0x00060005,
336         0x3c058, 0xffffffff, 0x00090008,
337         0x3c05c, 0xffffffff, 0x00010000,
338         0x3c060, 0xffffffff, 0x00030002,
339         0x3c064, 0xffffffff, 0x00040007,
340         0x3c068, 0xffffffff, 0x00060005,
341         0x3c06c, 0xffffffff, 0x00090008,
342         0x3c070, 0xffffffff, 0x00010000,
343         0x3c074, 0xffffffff, 0x00030002,
344         0x3c078, 0xffffffff, 0x00040007,
345         0x3c07c, 0xffffffff, 0x00060005,
346         0x3c080, 0xffffffff, 0x00090008,
347         0x3c084, 0xffffffff, 0x00010000,
348         0x3c088, 0xffffffff, 0x00030002,
349         0x3c08c, 0xffffffff, 0x00040007,
350         0x3c090, 0xffffffff, 0x00060005,
351         0x3c094, 0xffffffff, 0x00090008,
352         0x3c098, 0xffffffff, 0x00010000,
353         0x3c09c, 0xffffffff, 0x00030002,
354         0x3c0a0, 0xffffffff, 0x00040007,
355         0x3c0a4, 0xffffffff, 0x00060005,
356         0x3c0a8, 0xffffffff, 0x00090008,
357         0x3c0ac, 0xffffffff, 0x00010000,
358         0x3c0b0, 0xffffffff, 0x00030002,
359         0x3c0b4, 0xffffffff, 0x00040007,
360         0x3c0b8, 0xffffffff, 0x00060005,
361         0x3c0bc, 0xffffffff, 0x00090008,
362         0x3c000, 0xffffffff, 0x96e00200,
363         0x8708, 0xffffffff, 0x00900100,
364         0xc424, 0xffffffff, 0x0020003f,
365         0x38, 0xffffffff, 0x0140001c,
366         0x3c, 0x000f0000, 0x000f0000,
367         0x220, 0xffffffff, 0xC060000C,
368         0x224, 0xc0000fff, 0x00000100,
369         0xf90, 0xffffffff, 0x00000100,
370         0xf98, 0x00000101, 0x00000000,
371         0x20a8, 0xffffffff, 0x00000104,
372         0x55e4, 0xff000fff, 0x00000100,
373         0x30cc, 0xc0000fff, 0x00000104,
374         0xc1e4, 0x00000001, 0x00000001,
375         0xd00c, 0xff000ff0, 0x00000100,
376         0xd80c, 0xff000ff0, 0x00000100
377 };
378
379 static const u32 kalindi_golden_spm_registers[] =
380 {
381         0x30800, 0xe0ffffff, 0xe0000000
382 };
383
384 static const u32 kalindi_golden_common_registers[] =
385 {
386         0xc770, 0xffffffff, 0x00000800,
387         0xc774, 0xffffffff, 0x00000800,
388         0xc798, 0xffffffff, 0x00007fbf,
389         0xc79c, 0xffffffff, 0x00007faf
390 };
391
392 static const u32 kalindi_golden_registers[] =
393 {
394         0x3c000, 0xffffdfff, 0x6e944040,
395         0x55e4, 0xff607fff, 0xfc000100,
396         0x3c220, 0xff000fff, 0x00000100,
397         0x3c224, 0xff000fff, 0x00000100,
398         0x3c200, 0xfffc0fff, 0x00000100,
399         0x6ed8, 0x00010101, 0x00010000,
400         0x9830, 0xffffffff, 0x00000000,
401         0x9834, 0xf00fffff, 0x00000400,
402         0x5bb0, 0x000000f0, 0x00000070,
403         0x5bc0, 0xf0311fff, 0x80300000,
404         0x98f8, 0x73773777, 0x12010001,
405         0x98fc, 0xffffffff, 0x00000010,
406         0x9b7c, 0x00ff0000, 0x00fc0000,
407         0x8030, 0x00001f0f, 0x0000100a,
408         0x2f48, 0x73773777, 0x12010001,
409         0x2408, 0x000fffff, 0x000c007f,
410         0x8a14, 0xf000003f, 0x00000007,
411         0x8b24, 0x3fff3fff, 0x00ffcfff,
412         0x30a04, 0x0000ff0f, 0x00000000,
413         0x28a4c, 0x07ffffff, 0x06000000,
414         0x4d8, 0x00000fff, 0x00000100,
415         0x3e78, 0x00000001, 0x00000002,
416         0xc768, 0x00000008, 0x00000008,
417         0x8c00, 0x000000ff, 0x00000003,
418         0x214f8, 0x01ff01ff, 0x00000002,
419         0x21498, 0x007ff800, 0x00200000,
420         0x2015c, 0xffffffff, 0x00000f40,
421         0x88c4, 0x001f3ae3, 0x00000082,
422         0x88d4, 0x0000001f, 0x00000010,
423         0x30934, 0xffffffff, 0x00000000
424 };
425
426 static const u32 kalindi_mgcg_cgcg_init[] =
427 {
428         0xc420, 0xffffffff, 0xfffffffc,
429         0x30800, 0xffffffff, 0xe0000000,
430         0x3c2a0, 0xffffffff, 0x00000100,
431         0x3c208, 0xffffffff, 0x00000100,
432         0x3c2c0, 0xffffffff, 0x00000100,
433         0x3c2c8, 0xffffffff, 0x00000100,
434         0x3c2c4, 0xffffffff, 0x00000100,
435         0x55e4, 0xffffffff, 0x00600100,
436         0x3c280, 0xffffffff, 0x00000100,
437         0x3c214, 0xffffffff, 0x06000100,
438         0x3c220, 0xffffffff, 0x00000100,
439         0x3c218, 0xffffffff, 0x06000100,
440         0x3c204, 0xffffffff, 0x00000100,
441         0x3c2e0, 0xffffffff, 0x00000100,
442         0x3c224, 0xffffffff, 0x00000100,
443         0x3c200, 0xffffffff, 0x00000100,
444         0x3c230, 0xffffffff, 0x00000100,
445         0x3c234, 0xffffffff, 0x00000100,
446         0x3c250, 0xffffffff, 0x00000100,
447         0x3c254, 0xffffffff, 0x00000100,
448         0x3c258, 0xffffffff, 0x00000100,
449         0x3c25c, 0xffffffff, 0x00000100,
450         0x3c260, 0xffffffff, 0x00000100,
451         0x3c27c, 0xffffffff, 0x00000100,
452         0x3c278, 0xffffffff, 0x00000100,
453         0x3c210, 0xffffffff, 0x06000100,
454         0x3c290, 0xffffffff, 0x00000100,
455         0x3c274, 0xffffffff, 0x00000100,
456         0x3c2b4, 0xffffffff, 0x00000100,
457         0x3c2b0, 0xffffffff, 0x00000100,
458         0x3c270, 0xffffffff, 0x00000100,
459         0x30800, 0xffffffff, 0xe0000000,
460         0x3c020, 0xffffffff, 0x00010000,
461         0x3c024, 0xffffffff, 0x00030002,
462         0x3c028, 0xffffffff, 0x00040007,
463         0x3c02c, 0xffffffff, 0x00060005,
464         0x3c030, 0xffffffff, 0x00090008,
465         0x3c034, 0xffffffff, 0x00010000,
466         0x3c038, 0xffffffff, 0x00030002,
467         0x3c03c, 0xffffffff, 0x00040007,
468         0x3c040, 0xffffffff, 0x00060005,
469         0x3c044, 0xffffffff, 0x00090008,
470         0x3c000, 0xffffffff, 0x96e00200,
471         0x8708, 0xffffffff, 0x00900100,
472         0xc424, 0xffffffff, 0x0020003f,
473         0x38, 0xffffffff, 0x0140001c,
474         0x3c, 0x000f0000, 0x000f0000,
475         0x220, 0xffffffff, 0xC060000C,
476         0x224, 0xc0000fff, 0x00000100,
477         0x20a8, 0xffffffff, 0x00000104,
478         0x55e4, 0xff000fff, 0x00000100,
479         0x30cc, 0xc0000fff, 0x00000104,
480         0xc1e4, 0x00000001, 0x00000001,
481         0xd00c, 0xff000ff0, 0x00000100,
482         0xd80c, 0xff000ff0, 0x00000100
483 };
484
485 static void cik_init_golden_registers(struct radeon_device *rdev)
486 {
487         switch (rdev->family) {
488         case CHIP_BONAIRE:
489                 radeon_program_register_sequence(rdev,
490                                                  bonaire_mgcg_cgcg_init,
491                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
492                 radeon_program_register_sequence(rdev,
493                                                  bonaire_golden_registers,
494                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
495                 radeon_program_register_sequence(rdev,
496                                                  bonaire_golden_common_registers,
497                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
498                 radeon_program_register_sequence(rdev,
499                                                  bonaire_golden_spm_registers,
500                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
501                 break;
502         case CHIP_KABINI:
503                 radeon_program_register_sequence(rdev,
504                                                  kalindi_mgcg_cgcg_init,
505                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
506                 radeon_program_register_sequence(rdev,
507                                                  kalindi_golden_registers,
508                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
509                 radeon_program_register_sequence(rdev,
510                                                  kalindi_golden_common_registers,
511                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
512                 radeon_program_register_sequence(rdev,
513                                                  kalindi_golden_spm_registers,
514                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
515                 break;
516         case CHIP_KAVERI:
517                 radeon_program_register_sequence(rdev,
518                                                  spectre_mgcg_cgcg_init,
519                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
520                 radeon_program_register_sequence(rdev,
521                                                  spectre_golden_registers,
522                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
523                 radeon_program_register_sequence(rdev,
524                                                  spectre_golden_common_registers,
525                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
526                 radeon_program_register_sequence(rdev,
527                                                  spectre_golden_spm_registers,
528                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
529                 break;
530         default:
531                 break;
532         }
533 }
534
535 /**
536  * cik_get_xclk - get the xclk
537  *
538  * @rdev: radeon_device pointer
539  *
540  * Returns the reference clock used by the gfx engine
541  * (CIK).
542  */
543 u32 cik_get_xclk(struct radeon_device *rdev)
544 {
545         u32 reference_clock = rdev->clock.spll.reference_freq;
546
547         if (rdev->flags & RADEON_IS_IGP) {
548                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
549                         return reference_clock / 2;
550         } else {
551                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
552                         return reference_clock / 4;
553         }
554         return reference_clock;
555 }
556
557 /**
558  * cik_mm_rdoorbell - read a doorbell dword
559  *
560  * @rdev: radeon_device pointer
561  * @offset: byte offset into the aperture
562  *
563  * Returns the value in the doorbell aperture at the
564  * requested offset (CIK).
565  */
566 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
567 {
568         if (offset < rdev->doorbell.size) {
569                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
570         } else {
571                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
572                 return 0;
573         }
574 }
575
576 /**
577  * cik_mm_wdoorbell - write a doorbell dword
578  *
579  * @rdev: radeon_device pointer
580  * @offset: byte offset into the aperture
581  * @v: value to write
582  *
583  * Writes @v to the doorbell aperture at the
584  * requested offset (CIK).
585  */
586 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
587 {
588         if (offset < rdev->doorbell.size) {
589                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
590         } else {
591                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
592         }
593 }
594
595 #define BONAIRE_IO_MC_REGS_SIZE 36
596
597 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
598 {
599         {0x00000070, 0x04400000},
600         {0x00000071, 0x80c01803},
601         {0x00000072, 0x00004004},
602         {0x00000073, 0x00000100},
603         {0x00000074, 0x00ff0000},
604         {0x00000075, 0x34000000},
605         {0x00000076, 0x08000014},
606         {0x00000077, 0x00cc08ec},
607         {0x00000078, 0x00000400},
608         {0x00000079, 0x00000000},
609         {0x0000007a, 0x04090000},
610         {0x0000007c, 0x00000000},
611         {0x0000007e, 0x4408a8e8},
612         {0x0000007f, 0x00000304},
613         {0x00000080, 0x00000000},
614         {0x00000082, 0x00000001},
615         {0x00000083, 0x00000002},
616         {0x00000084, 0xf3e4f400},
617         {0x00000085, 0x052024e3},
618         {0x00000087, 0x00000000},
619         {0x00000088, 0x01000000},
620         {0x0000008a, 0x1c0a0000},
621         {0x0000008b, 0xff010000},
622         {0x0000008d, 0xffffefff},
623         {0x0000008e, 0xfff3efff},
624         {0x0000008f, 0xfff3efbf},
625         {0x00000092, 0xf7ffffff},
626         {0x00000093, 0xffffff7f},
627         {0x00000095, 0x00101101},
628         {0x00000096, 0x00000fff},
629         {0x00000097, 0x00116fff},
630         {0x00000098, 0x60010000},
631         {0x00000099, 0x10010000},
632         {0x0000009a, 0x00006000},
633         {0x0000009b, 0x00001000},
634         {0x0000009f, 0x00b48000}
635 };
636
637 /**
638  * cik_srbm_select - select specific register instances
639  *
640  * @rdev: radeon_device pointer
641  * @me: selected ME (micro engine)
642  * @pipe: pipe
643  * @queue: queue
644  * @vmid: VMID
645  *
646  * Switches the currently active registers instances.  Some
647  * registers are instanced per VMID, others are instanced per
648  * me/pipe/queue combination.
649  */
650 static void cik_srbm_select(struct radeon_device *rdev,
651                             u32 me, u32 pipe, u32 queue, u32 vmid)
652 {
653         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
654                              MEID(me & 0x3) |
655                              VMID(vmid & 0xf) |
656                              QUEUEID(queue & 0x7));
657         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
658 }
659
660 /* ucode loading */
661 /**
662  * ci_mc_load_microcode - load MC ucode into the hw
663  *
664  * @rdev: radeon_device pointer
665  *
666  * Load the GDDR MC ucode into the hw (CIK).
667  * Returns 0 on success, error on failure.
668  */
669 static int ci_mc_load_microcode(struct radeon_device *rdev)
670 {
671         const __be32 *fw_data;
672         u32 running, blackout = 0;
673         u32 *io_mc_regs;
674         int i, ucode_size, regs_size;
675
676         if (!rdev->mc_fw)
677                 return -EINVAL;
678
679         switch (rdev->family) {
680         case CHIP_BONAIRE:
681         default:
682                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
683                 ucode_size = CIK_MC_UCODE_SIZE;
684                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
685                 break;
686         }
687
688         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
689
690         if (running == 0) {
691                 if (running) {
692                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
693                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
694                 }
695
696                 /* reset the engine and set to writable */
697                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
699
700                 /* load mc io regs */
701                 for (i = 0; i < regs_size; i++) {
702                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
703                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
704                 }
705                 /* load the MC ucode */
706                 fw_data = (const __be32 *)rdev->mc_fw->data;
707                 for (i = 0; i < ucode_size; i++)
708                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
709
710                 /* put the engine back into the active state */
711                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
712                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
713                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
714
715                 /* wait for training to complete */
716                 for (i = 0; i < rdev->usec_timeout; i++) {
717                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
718                                 break;
719                         udelay(1);
720                 }
721                 for (i = 0; i < rdev->usec_timeout; i++) {
722                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
723                                 break;
724                         udelay(1);
725                 }
726
727                 if (running)
728                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
729         }
730
731         return 0;
732 }
733
734 /**
735  * cik_init_microcode - load ucode images from disk
736  *
737  * @rdev: radeon_device pointer
738  *
739  * Use the firmware interface to load the ucode images into
740  * the driver (not loaded into hw).
741  * Returns 0 on success, error on failure.
742  */
743 static int cik_init_microcode(struct radeon_device *rdev)
744 {
745         struct platform_device *pdev;
746         const char *chip_name;
747         size_t pfp_req_size, me_req_size, ce_req_size,
748                 mec_req_size, rlc_req_size, mc_req_size,
749                 sdma_req_size;
750         char fw_name[30];
751         int err;
752
753         DRM_DEBUG("\n");
754
755         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
756         err = IS_ERR(pdev);
757         if (err) {
758                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
759                 return -EINVAL;
760         }
761
762         switch (rdev->family) {
763         case CHIP_BONAIRE:
764                 chip_name = "BONAIRE";
765                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
766                 me_req_size = CIK_ME_UCODE_SIZE * 4;
767                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
768                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
769                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
770                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         case CHIP_KAVERI:
774                 chip_name = "KAVERI";
775                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776                 me_req_size = CIK_ME_UCODE_SIZE * 4;
777                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
780                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781                 break;
782         case CHIP_KABINI:
783                 chip_name = "KABINI";
784                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
785                 me_req_size = CIK_ME_UCODE_SIZE * 4;
786                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
787                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
788                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
789                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
790                 break;
791         default: BUG();
792         }
793
794         DRM_INFO("Loading %s Microcode\n", chip_name);
795
796         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
797         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
798         if (err)
799                 goto out;
800         if (rdev->pfp_fw->size != pfp_req_size) {
801                 printk(KERN_ERR
802                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803                        rdev->pfp_fw->size, fw_name);
804                 err = -EINVAL;
805                 goto out;
806         }
807
808         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
809         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
810         if (err)
811                 goto out;
812         if (rdev->me_fw->size != me_req_size) {
813                 printk(KERN_ERR
814                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815                        rdev->me_fw->size, fw_name);
816                 err = -EINVAL;
817         }
818
819         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
820         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
821         if (err)
822                 goto out;
823         if (rdev->ce_fw->size != ce_req_size) {
824                 printk(KERN_ERR
825                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
826                        rdev->ce_fw->size, fw_name);
827                 err = -EINVAL;
828         }
829
830         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
831         err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
832         if (err)
833                 goto out;
834         if (rdev->mec_fw->size != mec_req_size) {
835                 printk(KERN_ERR
836                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
837                        rdev->mec_fw->size, fw_name);
838                 err = -EINVAL;
839         }
840
841         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
842         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
843         if (err)
844                 goto out;
845         if (rdev->rlc_fw->size != rlc_req_size) {
846                 printk(KERN_ERR
847                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
848                        rdev->rlc_fw->size, fw_name);
849                 err = -EINVAL;
850         }
851
852         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
853         err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
854         if (err)
855                 goto out;
856         if (rdev->sdma_fw->size != sdma_req_size) {
857                 printk(KERN_ERR
858                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
859                        rdev->sdma_fw->size, fw_name);
860                 err = -EINVAL;
861         }
862
863         /* No MC ucode on APUs */
864         if (!(rdev->flags & RADEON_IS_IGP)) {
865                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
866                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
867                 if (err)
868                         goto out;
869                 if (rdev->mc_fw->size != mc_req_size) {
870                         printk(KERN_ERR
871                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
872                                rdev->mc_fw->size, fw_name);
873                         err = -EINVAL;
874                 }
875         }
876
877 out:
878         platform_device_unregister(pdev);
879
880         if (err) {
881                 if (err != -EINVAL)
882                         printk(KERN_ERR
883                                "cik_cp: Failed to load firmware \"%s\"\n",
884                                fw_name);
885                 release_firmware(rdev->pfp_fw);
886                 rdev->pfp_fw = NULL;
887                 release_firmware(rdev->me_fw);
888                 rdev->me_fw = NULL;
889                 release_firmware(rdev->ce_fw);
890                 rdev->ce_fw = NULL;
891                 release_firmware(rdev->rlc_fw);
892                 rdev->rlc_fw = NULL;
893                 release_firmware(rdev->mc_fw);
894                 rdev->mc_fw = NULL;
895         }
896         return err;
897 }
898
899 /*
900  * Core functions
901  */
902 /**
903  * cik_tiling_mode_table_init - init the hw tiling table
904  *
905  * @rdev: radeon_device pointer
906  *
907  * Starting with SI, the tiling setup is done globally in a
908  * set of 32 tiling modes.  Rather than selecting each set of
909  * parameters per surface as on older asics, we just select
910  * which index in the tiling table we want to use, and the
911  * surface uses those parameters (CIK).
912  */
913 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
914 {
915         const u32 num_tile_mode_states = 32;
916         const u32 num_secondary_tile_mode_states = 16;
917         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
918         u32 num_pipe_configs;
919         u32 num_rbs = rdev->config.cik.max_backends_per_se *
920                 rdev->config.cik.max_shader_engines;
921
922         switch (rdev->config.cik.mem_row_size_in_kb) {
923         case 1:
924                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
925                 break;
926         case 2:
927         default:
928                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
929                 break;
930         case 4:
931                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
932                 break;
933         }
934
935         num_pipe_configs = rdev->config.cik.max_tile_pipes;
936         if (num_pipe_configs > 8)
937                 num_pipe_configs = 8; /* ??? */
938
939         if (num_pipe_configs == 8) {
940                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
941                         switch (reg_offset) {
942                         case 0:
943                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
947                                 break;
948                         case 1:
949                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
950                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
951                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
952                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
953                                 break;
954                         case 2:
955                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
957                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
958                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
959                                 break;
960                         case 3:
961                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
962                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
965                                 break;
966                         case 4:
967                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970                                                  TILE_SPLIT(split_equal_to_row_size));
971                                 break;
972                         case 5:
973                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
974                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
975                                 break;
976                         case 6:
977                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
978                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
980                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
981                                 break;
982                         case 7:
983                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986                                                  TILE_SPLIT(split_equal_to_row_size));
987                                 break;
988                         case 8:
989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
990                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
991                                 break;
992                         case 9:
993                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
994                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
995                                 break;
996                         case 10:
997                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1000                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1001                                 break;
1002                         case 11:
1003                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1004                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007                                 break;
1008                         case 12:
1009                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1010                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1011                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1012                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013                                 break;
1014                         case 13:
1015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1017                                 break;
1018                         case 14:
1019                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1021                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1022                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1023                                 break;
1024                         case 16:
1025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029                                 break;
1030                         case 17:
1031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1032                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1033                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1034                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035                                 break;
1036                         case 27:
1037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1038                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1039                                 break;
1040                         case 28:
1041                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1042                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1043                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1044                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1045                                 break;
1046                         case 29:
1047                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1048                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1049                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1050                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051                                 break;
1052                         case 30:
1053                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1054                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1055                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1056                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1057                                 break;
1058                         default:
1059                                 gb_tile_moden = 0;
1060                                 break;
1061                         }
1062                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1063                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1064                 }
1065                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1066                         switch (reg_offset) {
1067                         case 0:
1068                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1070                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1071                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1072                                 break;
1073                         case 1:
1074                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1076                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1077                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1078                                 break;
1079                         case 2:
1080                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1083                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1084                                 break;
1085                         case 3:
1086                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1087                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1088                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1089                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1090                                 break;
1091                         case 4:
1092                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1094                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1095                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1096                                 break;
1097                         case 5:
1098                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1100                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1101                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1102                                 break;
1103                         case 6:
1104                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1106                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1107                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1108                                 break;
1109                         case 8:
1110                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1113                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1114                                 break;
1115                         case 9:
1116                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1119                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1120                                 break;
1121                         case 10:
1122                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1124                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1125                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1126                                 break;
1127                         case 11:
1128                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1129                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1130                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1131                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1132                                 break;
1133                         case 12:
1134                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1137                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1138                                 break;
1139                         case 13:
1140                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1141                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1142                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1143                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1144                                 break;
1145                         case 14:
1146                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1147                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1148                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1149                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1150                                 break;
1151                         default:
1152                                 gb_tile_moden = 0;
1153                                 break;
1154                         }
1155                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1156                 }
1157         } else if (num_pipe_configs == 4) {
1158                 if (num_rbs == 4) {
1159                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1160                                 switch (reg_offset) {
1161                                 case 0:
1162                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1164                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1165                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1166                                         break;
1167                                 case 1:
1168                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1169                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1170                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1171                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1172                                         break;
1173                                 case 2:
1174                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1175                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1176                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1177                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1178                                         break;
1179                                 case 3:
1180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1184                                         break;
1185                                 case 4:
1186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189                                                          TILE_SPLIT(split_equal_to_row_size));
1190                                         break;
1191                                 case 5:
1192                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1193                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1194                                         break;
1195                                 case 6:
1196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1198                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1199                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1200                                         break;
1201                                 case 7:
1202                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1203                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1204                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1205                                                          TILE_SPLIT(split_equal_to_row_size));
1206                                         break;
1207                                 case 8:
1208                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1209                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1210                                         break;
1211                                 case 9:
1212                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1213                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1214                                         break;
1215                                 case 10:
1216                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1217                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1218                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1219                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1220                                         break;
1221                                 case 11:
1222                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1223                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226                                         break;
1227                                 case 12:
1228                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1229                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1230                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1231                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232                                         break;
1233                                 case 13:
1234                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1235                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1236                                         break;
1237                                 case 14:
1238                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1240                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1241                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1242                                         break;
1243                                 case 16:
1244                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1245                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1246                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248                                         break;
1249                                 case 17:
1250                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1251                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1252                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1253                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254                                         break;
1255                                 case 27:
1256                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1258                                         break;
1259                                 case 28:
1260                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1261                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1262                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1263                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1264                                         break;
1265                                 case 29:
1266                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1267                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1268                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1269                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1270                                         break;
1271                                 case 30:
1272                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1273                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1274                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1275                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1276                                         break;
1277                                 default:
1278                                         gb_tile_moden = 0;
1279                                         break;
1280                                 }
1281                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1282                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1283                         }
1284                 } else if (num_rbs < 4) {
1285                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1286                                 switch (reg_offset) {
1287                                 case 0:
1288                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1289                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1290                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1291                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1292                                         break;
1293                                 case 1:
1294                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1295                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1296                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1297                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1298                                         break;
1299                                 case 2:
1300                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1302                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1303                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1304                                         break;
1305                                 case 3:
1306                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1310                                         break;
1311                                 case 4:
1312                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315                                                          TILE_SPLIT(split_equal_to_row_size));
1316                                         break;
1317                                 case 5:
1318                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1319                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1320                                         break;
1321                                 case 6:
1322                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1323                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1324                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1326                                         break;
1327                                 case 7:
1328                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1329                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1330                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1331                                                          TILE_SPLIT(split_equal_to_row_size));
1332                                         break;
1333                                 case 8:
1334                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1335                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1336                                         break;
1337                                 case 9:
1338                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1339                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1340                                         break;
1341                                 case 10:
1342                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1344                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1345                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1346                                         break;
1347                                 case 11:
1348                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1349                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1350                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352                                         break;
1353                                 case 12:
1354                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1355                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1356                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358                                         break;
1359                                 case 13:
1360                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1361                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1362                                         break;
1363                                 case 14:
1364                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1365                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1366                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1367                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1368                                         break;
1369                                 case 16:
1370                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1371                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1372                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374                                         break;
1375                                 case 17:
1376                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1377                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1378                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380                                         break;
1381                                 case 27:
1382                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1383                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1384                                         break;
1385                                 case 28:
1386                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1387                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1388                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1389                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1390                                         break;
1391                                 case 29:
1392                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1393                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1394                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1395                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1396                                         break;
1397                                 case 30:
1398                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1399                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1400                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1401                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1402                                         break;
1403                                 default:
1404                                         gb_tile_moden = 0;
1405                                         break;
1406                                 }
1407                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1408                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1409                         }
1410                 }
1411                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1412                         switch (reg_offset) {
1413                         case 0:
1414                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1417                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1418                                 break;
1419                         case 1:
1420                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1422                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1423                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1424                                 break;
1425                         case 2:
1426                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1427                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1428                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1429                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1430                                 break;
1431                         case 3:
1432                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1433                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1434                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1435                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1436                                 break;
1437                         case 4:
1438                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1441                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1442                                 break;
1443                         case 5:
1444                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1447                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1448                                 break;
1449                         case 6:
1450                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1451                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1452                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1453                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1454                                 break;
1455                         case 8:
1456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1460                                 break;
1461                         case 9:
1462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1466                                 break;
1467                         case 10:
1468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1471                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1472                                 break;
1473                         case 11:
1474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1477                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1478                                 break;
1479                         case 12:
1480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1483                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1484                                 break;
1485                         case 13:
1486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1489                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1490                                 break;
1491                         case 14:
1492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1495                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1496                                 break;
1497                         default:
1498                                 gb_tile_moden = 0;
1499                                 break;
1500                         }
1501                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1502                 }
1503         } else if (num_pipe_configs == 2) {
1504                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1505                         switch (reg_offset) {
1506                         case 0:
1507                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1508                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1509                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1510                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1511                                 break;
1512                         case 1:
1513                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1514                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1515                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1516                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1517                                 break;
1518                         case 2:
1519                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1520                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1521                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1522                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1523                                 break;
1524                         case 3:
1525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1526                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1529                                 break;
1530                         case 4:
1531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1532                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1534                                                  TILE_SPLIT(split_equal_to_row_size));
1535                                 break;
1536                         case 5:
1537                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1538                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1539                                 break;
1540                         case 6:
1541                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1542                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1543                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1544                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1545                                 break;
1546                         case 7:
1547                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1548                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1549                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1550                                                  TILE_SPLIT(split_equal_to_row_size));
1551                                 break;
1552                         case 8:
1553                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1554                                 break;
1555                         case 9:
1556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1558                                 break;
1559                         case 10:
1560                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1561                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1562                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1563                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1564                                 break;
1565                         case 11:
1566                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1567                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1568                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1569                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570                                 break;
1571                         case 12:
1572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1573                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1574                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1575                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576                                 break;
1577                         case 13:
1578                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1579                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1580                                 break;
1581                         case 14:
1582                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1583                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1584                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1585                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1586                                 break;
1587                         case 16:
1588                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1589                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1590                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1591                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592                                 break;
1593                         case 17:
1594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1596                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598                                 break;
1599                         case 27:
1600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1602                                 break;
1603                         case 28:
1604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1605                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1606                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1607                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1608                                 break;
1609                         case 29:
1610                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1611                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1612                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1613                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1614                                 break;
1615                         case 30:
1616                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1617                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1618                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1619                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1620                                 break;
1621                         default:
1622                                 gb_tile_moden = 0;
1623                                 break;
1624                         }
1625                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1626                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1627                 }
1628                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1629                         switch (reg_offset) {
1630                         case 0:
1631                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1632                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1633                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1634                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1635                                 break;
1636                         case 1:
1637                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1638                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1639                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1640                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1641                                 break;
1642                         case 2:
1643                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1644                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1645                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1646                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1647                                 break;
1648                         case 3:
1649                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1650                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1651                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1652                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1653                                 break;
1654                         case 4:
1655                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1656                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1657                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1658                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1659                                 break;
1660                         case 5:
1661                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1662                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1663                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1664                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1665                                 break;
1666                         case 6:
1667                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1668                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1669                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1670                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1671                                 break;
1672                         case 8:
1673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1677                                 break;
1678                         case 9:
1679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1683                                 break;
1684                         case 10:
1685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1689                                 break;
1690                         case 11:
1691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1694                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1695                                 break;
1696                         case 12:
1697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1700                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1701                                 break;
1702                         case 13:
1703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1706                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1707                                 break;
1708                         case 14:
1709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1712                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1713                                 break;
1714                         default:
1715                                 gb_tile_moden = 0;
1716                                 break;
1717                         }
1718                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1719                 }
1720         } else
1721                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1722 }
1723
1724 /**
1725  * cik_select_se_sh - select which SE, SH to address
1726  *
1727  * @rdev: radeon_device pointer
1728  * @se_num: shader engine to address
1729  * @sh_num: sh block to address
1730  *
1731  * Select which SE, SH combinations to address. Certain
1732  * registers are instanced per SE or SH.  0xffffffff means
1733  * broadcast to all SEs or SHs (CIK).
1734  */
1735 static void cik_select_se_sh(struct radeon_device *rdev,
1736                              u32 se_num, u32 sh_num)
1737 {
1738         u32 data = INSTANCE_BROADCAST_WRITES;
1739
1740         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1741                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1742         else if (se_num == 0xffffffff)
1743                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1744         else if (sh_num == 0xffffffff)
1745                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1746         else
1747                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1748         WREG32(GRBM_GFX_INDEX, data);
1749 }
1750
1751 /**
1752  * cik_create_bitmask - create a bitmask
1753  *
1754  * @bit_width: length of the mask
1755  *
1756  * create a variable length bit mask (CIK).
1757  * Returns the bitmask.
1758  */
1759 static u32 cik_create_bitmask(u32 bit_width)
1760 {
1761         u32 i, mask = 0;
1762
1763         for (i = 0; i < bit_width; i++) {
1764                 mask <<= 1;
1765                 mask |= 1;
1766         }
1767         return mask;
1768 }
1769
1770 /**
1771  * cik_select_se_sh - select which SE, SH to address
1772  *
1773  * @rdev: radeon_device pointer
1774  * @max_rb_num: max RBs (render backends) for the asic
1775  * @se_num: number of SEs (shader engines) for the asic
1776  * @sh_per_se: number of SH blocks per SE for the asic
1777  *
1778  * Calculates the bitmask of disabled RBs (CIK).
1779  * Returns the disabled RB bitmask.
1780  */
1781 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1782                               u32 max_rb_num, u32 se_num,
1783                               u32 sh_per_se)
1784 {
1785         u32 data, mask;
1786
1787         data = RREG32(CC_RB_BACKEND_DISABLE);
1788         if (data & 1)
1789                 data &= BACKEND_DISABLE_MASK;
1790         else
1791                 data = 0;
1792         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1793
1794         data >>= BACKEND_DISABLE_SHIFT;
1795
1796         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1797
1798         return data & mask;
1799 }
1800
1801 /**
1802  * cik_setup_rb - setup the RBs on the asic
1803  *
1804  * @rdev: radeon_device pointer
1805  * @se_num: number of SEs (shader engines) for the asic
1806  * @sh_per_se: number of SH blocks per SE for the asic
1807  * @max_rb_num: max RBs (render backends) for the asic
1808  *
1809  * Configures per-SE/SH RB registers (CIK).
1810  */
1811 static void cik_setup_rb(struct radeon_device *rdev,
1812                          u32 se_num, u32 sh_per_se,
1813                          u32 max_rb_num)
1814 {
1815         int i, j;
1816         u32 data, mask;
1817         u32 disabled_rbs = 0;
1818         u32 enabled_rbs = 0;
1819
1820         for (i = 0; i < se_num; i++) {
1821                 for (j = 0; j < sh_per_se; j++) {
1822                         cik_select_se_sh(rdev, i, j);
1823                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1824                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1825                 }
1826         }
1827         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1828
1829         mask = 1;
1830         for (i = 0; i < max_rb_num; i++) {
1831                 if (!(disabled_rbs & mask))
1832                         enabled_rbs |= mask;
1833                 mask <<= 1;
1834         }
1835
1836         for (i = 0; i < se_num; i++) {
1837                 cik_select_se_sh(rdev, i, 0xffffffff);
1838                 data = 0;
1839                 for (j = 0; j < sh_per_se; j++) {
1840                         switch (enabled_rbs & 3) {
1841                         case 1:
1842                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1843                                 break;
1844                         case 2:
1845                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1846                                 break;
1847                         case 3:
1848                         default:
1849                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1850                                 break;
1851                         }
1852                         enabled_rbs >>= 2;
1853                 }
1854                 WREG32(PA_SC_RASTER_CONFIG, data);
1855         }
1856         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1857 }
1858
1859 /**
1860  * cik_gpu_init - setup the 3D engine
1861  *
1862  * @rdev: radeon_device pointer
1863  *
1864  * Configures the 3D engine and tiling configuration
1865  * registers so that the 3D engine is usable.
1866  */
1867 static void cik_gpu_init(struct radeon_device *rdev)
1868 {
1869         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1870         u32 mc_shared_chmap, mc_arb_ramcfg;
1871         u32 hdp_host_path_cntl;
1872         u32 tmp;
1873         int i, j;
1874
1875         switch (rdev->family) {
1876         case CHIP_BONAIRE:
1877                 rdev->config.cik.max_shader_engines = 2;
1878                 rdev->config.cik.max_tile_pipes = 4;
1879                 rdev->config.cik.max_cu_per_sh = 7;
1880                 rdev->config.cik.max_sh_per_se = 1;
1881                 rdev->config.cik.max_backends_per_se = 2;
1882                 rdev->config.cik.max_texture_channel_caches = 4;
1883                 rdev->config.cik.max_gprs = 256;
1884                 rdev->config.cik.max_gs_threads = 32;
1885                 rdev->config.cik.max_hw_contexts = 8;
1886
1887                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1888                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1889                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1890                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1891                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1892                 break;
1893         case CHIP_KAVERI:
1894                 /* TODO */
1895                 break;
1896         case CHIP_KABINI:
1897         default:
1898                 rdev->config.cik.max_shader_engines = 1;
1899                 rdev->config.cik.max_tile_pipes = 2;
1900                 rdev->config.cik.max_cu_per_sh = 2;
1901                 rdev->config.cik.max_sh_per_se = 1;
1902                 rdev->config.cik.max_backends_per_se = 1;
1903                 rdev->config.cik.max_texture_channel_caches = 2;
1904                 rdev->config.cik.max_gprs = 256;
1905                 rdev->config.cik.max_gs_threads = 16;
1906                 rdev->config.cik.max_hw_contexts = 8;
1907
1908                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1909                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1910                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1911                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1912                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1913                 break;
1914         }
1915
1916         /* Initialize HDP */
1917         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1918                 WREG32((0x2c14 + j), 0x00000000);
1919                 WREG32((0x2c18 + j), 0x00000000);
1920                 WREG32((0x2c1c + j), 0x00000000);
1921                 WREG32((0x2c20 + j), 0x00000000);
1922                 WREG32((0x2c24 + j), 0x00000000);
1923         }
1924
1925         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1926
1927         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1928
1929         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1930         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1931
1932         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1933         rdev->config.cik.mem_max_burst_length_bytes = 256;
1934         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1935         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1936         if (rdev->config.cik.mem_row_size_in_kb > 4)
1937                 rdev->config.cik.mem_row_size_in_kb = 4;
1938         /* XXX use MC settings? */
1939         rdev->config.cik.shader_engine_tile_size = 32;
1940         rdev->config.cik.num_gpus = 1;
1941         rdev->config.cik.multi_gpu_tile_size = 64;
1942
1943         /* fix up row size */
1944         gb_addr_config &= ~ROW_SIZE_MASK;
1945         switch (rdev->config.cik.mem_row_size_in_kb) {
1946         case 1:
1947         default:
1948                 gb_addr_config |= ROW_SIZE(0);
1949                 break;
1950         case 2:
1951                 gb_addr_config |= ROW_SIZE(1);
1952                 break;
1953         case 4:
1954                 gb_addr_config |= ROW_SIZE(2);
1955                 break;
1956         }
1957
1958         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1959          * not have bank info, so create a custom tiling dword.
1960          * bits 3:0   num_pipes
1961          * bits 7:4   num_banks
1962          * bits 11:8  group_size
1963          * bits 15:12 row_size
1964          */
1965         rdev->config.cik.tile_config = 0;
1966         switch (rdev->config.cik.num_tile_pipes) {
1967         case 1:
1968                 rdev->config.cik.tile_config |= (0 << 0);
1969                 break;
1970         case 2:
1971                 rdev->config.cik.tile_config |= (1 << 0);
1972                 break;
1973         case 4:
1974                 rdev->config.cik.tile_config |= (2 << 0);
1975                 break;
1976         case 8:
1977         default:
1978                 /* XXX what about 12? */
1979                 rdev->config.cik.tile_config |= (3 << 0);
1980                 break;
1981         }
1982         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1983                 rdev->config.cik.tile_config |= 1 << 4;
1984         else
1985                 rdev->config.cik.tile_config |= 0 << 4;
1986         rdev->config.cik.tile_config |=
1987                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1988         rdev->config.cik.tile_config |=
1989                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1990
1991         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1992         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1993         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1994         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1995         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1996         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1997         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1998         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1999
2000         cik_tiling_mode_table_init(rdev);
2001
2002         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2003                      rdev->config.cik.max_sh_per_se,
2004                      rdev->config.cik.max_backends_per_se);
2005
2006         /* set HW defaults for 3D engine */
2007         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2008
2009         WREG32(SX_DEBUG_1, 0x20);
2010
2011         WREG32(TA_CNTL_AUX, 0x00010000);
2012
2013         tmp = RREG32(SPI_CONFIG_CNTL);
2014         tmp |= 0x03000000;
2015         WREG32(SPI_CONFIG_CNTL, tmp);
2016
2017         WREG32(SQ_CONFIG, 1);
2018
2019         WREG32(DB_DEBUG, 0);
2020
2021         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2022         tmp |= 0x00000400;
2023         WREG32(DB_DEBUG2, tmp);
2024
2025         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2026         tmp |= 0x00020200;
2027         WREG32(DB_DEBUG3, tmp);
2028
2029         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2030         tmp |= 0x00018208;
2031         WREG32(CB_HW_CONTROL, tmp);
2032
2033         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2034
2035         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2036                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2037                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2038                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2039
2040         WREG32(VGT_NUM_INSTANCES, 1);
2041
2042         WREG32(CP_PERFMON_CNTL, 0);
2043
2044         WREG32(SQ_CONFIG, 0);
2045
2046         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2047                                           FORCE_EOV_MAX_REZ_CNT(255)));
2048
2049         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2050                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2051
2052         WREG32(VGT_GS_VERTEX_REUSE, 16);
2053         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2054
2055         tmp = RREG32(HDP_MISC_CNTL);
2056         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2057         WREG32(HDP_MISC_CNTL, tmp);
2058
2059         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2060         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2061
2062         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2063         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2064
2065         udelay(50);
2066 }
2067
2068 /*
2069  * GPU scratch registers helpers function.
2070  */
2071 /**
2072  * cik_scratch_init - setup driver info for CP scratch regs
2073  *
2074  * @rdev: radeon_device pointer
2075  *
2076  * Set up the number and offset of the CP scratch registers.
2077  * NOTE: use of CP scratch registers is a legacy inferface and
2078  * is not used by default on newer asics (r6xx+).  On newer asics,
2079  * memory buffers are used for fences rather than scratch regs.
2080  */
2081 static void cik_scratch_init(struct radeon_device *rdev)
2082 {
2083         int i;
2084
2085         rdev->scratch.num_reg = 7;
2086         rdev->scratch.reg_base = SCRATCH_REG0;
2087         for (i = 0; i < rdev->scratch.num_reg; i++) {
2088                 rdev->scratch.free[i] = true;
2089                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2090         }
2091 }
2092
2093 /**
2094  * cik_ring_test - basic gfx ring test
2095  *
2096  * @rdev: radeon_device pointer
2097  * @ring: radeon_ring structure holding ring information
2098  *
2099  * Allocate a scratch register and write to it using the gfx ring (CIK).
2100  * Provides a basic gfx ring test to verify that the ring is working.
2101  * Used by cik_cp_gfx_resume();
2102  * Returns 0 on success, error on failure.
2103  */
2104 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2105 {
2106         uint32_t scratch;
2107         uint32_t tmp = 0;
2108         unsigned i;
2109         int r;
2110
2111         r = radeon_scratch_get(rdev, &scratch);
2112         if (r) {
2113                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2114                 return r;
2115         }
2116         WREG32(scratch, 0xCAFEDEAD);
2117         r = radeon_ring_lock(rdev, ring, 3);
2118         if (r) {
2119                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2120                 radeon_scratch_free(rdev, scratch);
2121                 return r;
2122         }
2123         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2124         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2125         radeon_ring_write(ring, 0xDEADBEEF);
2126         radeon_ring_unlock_commit(rdev, ring);
2127
2128         for (i = 0; i < rdev->usec_timeout; i++) {
2129                 tmp = RREG32(scratch);
2130                 if (tmp == 0xDEADBEEF)
2131                         break;
2132                 DRM_UDELAY(1);
2133         }
2134         if (i < rdev->usec_timeout) {
2135                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2136         } else {
2137                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2138                           ring->idx, scratch, tmp);
2139                 r = -EINVAL;
2140         }
2141         radeon_scratch_free(rdev, scratch);
2142         return r;
2143 }
2144
2145 /**
2146  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2147  *
2148  * @rdev: radeon_device pointer
2149  * @fence: radeon fence object
2150  *
2151  * Emits a fence sequnce number on the gfx ring and flushes
2152  * GPU caches.
2153  */
2154 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2155                              struct radeon_fence *fence)
2156 {
2157         struct radeon_ring *ring = &rdev->ring[fence->ring];
2158         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2159
2160         /* EVENT_WRITE_EOP - flush caches, send int */
2161         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2162         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2163                                  EOP_TC_ACTION_EN |
2164                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2165                                  EVENT_INDEX(5)));
2166         radeon_ring_write(ring, addr & 0xfffffffc);
2167         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2168         radeon_ring_write(ring, fence->seq);
2169         radeon_ring_write(ring, 0);
2170         /* HDP flush */
2171         /* We should be using the new WAIT_REG_MEM special op packet here
2172          * but it causes the CP to hang
2173          */
2174         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2175         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2176                                  WRITE_DATA_DST_SEL(0)));
2177         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2178         radeon_ring_write(ring, 0);
2179         radeon_ring_write(ring, 0);
2180 }
2181
2182 /**
2183  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2184  *
2185  * @rdev: radeon_device pointer
2186  * @fence: radeon fence object
2187  *
2188  * Emits a fence sequnce number on the compute ring and flushes
2189  * GPU caches.
2190  */
2191 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2192                                  struct radeon_fence *fence)
2193 {
2194         struct radeon_ring *ring = &rdev->ring[fence->ring];
2195         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2196
2197         /* RELEASE_MEM - flush caches, send int */
2198         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2199         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2200                                  EOP_TC_ACTION_EN |
2201                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2202                                  EVENT_INDEX(5)));
2203         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2204         radeon_ring_write(ring, addr & 0xfffffffc);
2205         radeon_ring_write(ring, upper_32_bits(addr));
2206         radeon_ring_write(ring, fence->seq);
2207         radeon_ring_write(ring, 0);
2208         /* HDP flush */
2209         /* We should be using the new WAIT_REG_MEM special op packet here
2210          * but it causes the CP to hang
2211          */
2212         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2213         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2214                                  WRITE_DATA_DST_SEL(0)));
2215         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2216         radeon_ring_write(ring, 0);
2217         radeon_ring_write(ring, 0);
2218 }
2219
2220 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2221                              struct radeon_ring *ring,
2222                              struct radeon_semaphore *semaphore,
2223                              bool emit_wait)
2224 {
2225         uint64_t addr = semaphore->gpu_addr;
2226         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2227
2228         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2229         radeon_ring_write(ring, addr & 0xffffffff);
2230         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2231 }
2232
2233 /*
2234  * IB stuff
2235  */
2236 /**
2237  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2238  *
2239  * @rdev: radeon_device pointer
2240  * @ib: radeon indirect buffer object
2241  *
2242  * Emits an DE (drawing engine) or CE (constant engine) IB
2243  * on the gfx ring.  IBs are usually generated by userspace
2244  * acceleration drivers and submitted to the kernel for
2245  * sheduling on the ring.  This function schedules the IB
2246  * on the gfx ring for execution by the GPU.
2247  */
2248 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2249 {
2250         struct radeon_ring *ring = &rdev->ring[ib->ring];
2251         u32 header, control = INDIRECT_BUFFER_VALID;
2252
2253         if (ib->is_const_ib) {
2254                 /* set switch buffer packet before const IB */
2255                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2256                 radeon_ring_write(ring, 0);
2257
2258                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2259         } else {
2260                 u32 next_rptr;
2261                 if (ring->rptr_save_reg) {
2262                         next_rptr = ring->wptr + 3 + 4;
2263                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2264                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2265                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2266                         radeon_ring_write(ring, next_rptr);
2267                 } else if (rdev->wb.enabled) {
2268                         next_rptr = ring->wptr + 5 + 4;
2269                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2270                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2271                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2272                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2273                         radeon_ring_write(ring, next_rptr);
2274                 }
2275
2276                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2277         }
2278
2279         control |= ib->length_dw |
2280                 (ib->vm ? (ib->vm->id << 24) : 0);
2281
2282         radeon_ring_write(ring, header);
2283         radeon_ring_write(ring,
2284 #ifdef __BIG_ENDIAN
2285                           (2 << 0) |
2286 #endif
2287                           (ib->gpu_addr & 0xFFFFFFFC));
2288         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2289         radeon_ring_write(ring, control);
2290 }
2291
2292 /**
2293  * cik_ib_test - basic gfx ring IB test
2294  *
2295  * @rdev: radeon_device pointer
2296  * @ring: radeon_ring structure holding ring information
2297  *
2298  * Allocate an IB and execute it on the gfx ring (CIK).
2299  * Provides a basic gfx ring test to verify that IBs are working.
2300  * Returns 0 on success, error on failure.
2301  */
2302 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2303 {
2304         struct radeon_ib ib;
2305         uint32_t scratch;
2306         uint32_t tmp = 0;
2307         unsigned i;
2308         int r;
2309
2310         r = radeon_scratch_get(rdev, &scratch);
2311         if (r) {
2312                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2313                 return r;
2314         }
2315         WREG32(scratch, 0xCAFEDEAD);
2316         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2317         if (r) {
2318                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2319                 return r;
2320         }
2321         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2322         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2323         ib.ptr[2] = 0xDEADBEEF;
2324         ib.length_dw = 3;
2325         r = radeon_ib_schedule(rdev, &ib, NULL);
2326         if (r) {
2327                 radeon_scratch_free(rdev, scratch);
2328                 radeon_ib_free(rdev, &ib);
2329                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2330                 return r;
2331         }
2332         r = radeon_fence_wait(ib.fence, false);
2333         if (r) {
2334                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2335                 return r;
2336         }
2337         for (i = 0; i < rdev->usec_timeout; i++) {
2338                 tmp = RREG32(scratch);
2339                 if (tmp == 0xDEADBEEF)
2340                         break;
2341                 DRM_UDELAY(1);
2342         }
2343         if (i < rdev->usec_timeout) {
2344                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2345         } else {
2346                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2347                           scratch, tmp);
2348                 r = -EINVAL;
2349         }
2350         radeon_scratch_free(rdev, scratch);
2351         radeon_ib_free(rdev, &ib);
2352         return r;
2353 }
2354
2355 /*
2356  * CP.
2357  * On CIK, gfx and compute now have independant command processors.
2358  *
2359  * GFX
2360  * Gfx consists of a single ring and can process both gfx jobs and
2361  * compute jobs.  The gfx CP consists of three microengines (ME):
2362  * PFP - Pre-Fetch Parser
2363  * ME - Micro Engine
2364  * CE - Constant Engine
2365  * The PFP and ME make up what is considered the Drawing Engine (DE).
2366  * The CE is an asynchronous engine used for updating buffer desciptors
2367  * used by the DE so that they can be loaded into cache in parallel
2368  * while the DE is processing state update packets.
2369  *
2370  * Compute
2371  * The compute CP consists of two microengines (ME):
2372  * MEC1 - Compute MicroEngine 1
2373  * MEC2 - Compute MicroEngine 2
2374  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2375  * The queues are exposed to userspace and are programmed directly
2376  * by the compute runtime.
2377  */
2378 /**
2379  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2380  *
2381  * @rdev: radeon_device pointer
2382  * @enable: enable or disable the MEs
2383  *
2384  * Halts or unhalts the gfx MEs.
2385  */
2386 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2387 {
2388         if (enable)
2389                 WREG32(CP_ME_CNTL, 0);
2390         else {
2391                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2392                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2393         }
2394         udelay(50);
2395 }
2396
2397 /**
2398  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2399  *
2400  * @rdev: radeon_device pointer
2401  *
2402  * Loads the gfx PFP, ME, and CE ucode.
2403  * Returns 0 for success, -EINVAL if the ucode is not available.
2404  */
2405 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2406 {
2407         const __be32 *fw_data;
2408         int i;
2409
2410         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2411                 return -EINVAL;
2412
2413         cik_cp_gfx_enable(rdev, false);
2414
2415         /* PFP */
2416         fw_data = (const __be32 *)rdev->pfp_fw->data;
2417         WREG32(CP_PFP_UCODE_ADDR, 0);
2418         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2419                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2420         WREG32(CP_PFP_UCODE_ADDR, 0);
2421
2422         /* CE */
2423         fw_data = (const __be32 *)rdev->ce_fw->data;
2424         WREG32(CP_CE_UCODE_ADDR, 0);
2425         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2426                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2427         WREG32(CP_CE_UCODE_ADDR, 0);
2428
2429         /* ME */
2430         fw_data = (const __be32 *)rdev->me_fw->data;
2431         WREG32(CP_ME_RAM_WADDR, 0);
2432         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2433                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2434         WREG32(CP_ME_RAM_WADDR, 0);
2435
2436         WREG32(CP_PFP_UCODE_ADDR, 0);
2437         WREG32(CP_CE_UCODE_ADDR, 0);
2438         WREG32(CP_ME_RAM_WADDR, 0);
2439         WREG32(CP_ME_RAM_RADDR, 0);
2440         return 0;
2441 }
2442
2443 /**
2444  * cik_cp_gfx_start - start the gfx ring
2445  *
2446  * @rdev: radeon_device pointer
2447  *
2448  * Enables the ring and loads the clear state context and other
2449  * packets required to init the ring.
2450  * Returns 0 for success, error for failure.
2451  */
2452 static int cik_cp_gfx_start(struct radeon_device *rdev)
2453 {
2454         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2455         int r, i;
2456
2457         /* init the CP */
2458         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2459         WREG32(CP_ENDIAN_SWAP, 0);
2460         WREG32(CP_DEVICE_ID, 1);
2461
2462         cik_cp_gfx_enable(rdev, true);
2463
2464         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2465         if (r) {
2466                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2467                 return r;
2468         }
2469
2470         /* init the CE partitions.  CE only used for gfx on CIK */
2471         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2472         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2473         radeon_ring_write(ring, 0xc000);
2474         radeon_ring_write(ring, 0xc000);
2475
2476         /* setup clear context state */
2477         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2478         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2479
2480         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2481         radeon_ring_write(ring, 0x80000000);
2482         radeon_ring_write(ring, 0x80000000);
2483
2484         for (i = 0; i < cik_default_size; i++)
2485                 radeon_ring_write(ring, cik_default_state[i]);
2486
2487         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2488         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2489
2490         /* set clear context state */
2491         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2492         radeon_ring_write(ring, 0);
2493
2494         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2495         radeon_ring_write(ring, 0x00000316);
2496         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2497         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2498
2499         radeon_ring_unlock_commit(rdev, ring);
2500
2501         return 0;
2502 }
2503
2504 /**
2505  * cik_cp_gfx_fini - stop the gfx ring
2506  *
2507  * @rdev: radeon_device pointer
2508  *
2509  * Stop the gfx ring and tear down the driver ring
2510  * info.
2511  */
2512 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2513 {
2514         cik_cp_gfx_enable(rdev, false);
2515         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2516 }
2517
2518 /**
2519  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2520  *
2521  * @rdev: radeon_device pointer
2522  *
2523  * Program the location and size of the gfx ring buffer
2524  * and test it to make sure it's working.
2525  * Returns 0 for success, error for failure.
2526  */
2527 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2528 {
2529         struct radeon_ring *ring;
2530         u32 tmp;
2531         u32 rb_bufsz;
2532         u64 rb_addr;
2533         int r;
2534
2535         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2536         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2537
2538         /* Set the write pointer delay */
2539         WREG32(CP_RB_WPTR_DELAY, 0);
2540
2541         /* set the RB to use vmid 0 */
2542         WREG32(CP_RB_VMID, 0);
2543
2544         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2545
2546         /* ring 0 - compute and gfx */
2547         /* Set ring buffer size */
2548         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2549         rb_bufsz = drm_order(ring->ring_size / 8);
2550         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2551 #ifdef __BIG_ENDIAN
2552         tmp |= BUF_SWAP_32BIT;
2553 #endif
2554         WREG32(CP_RB0_CNTL, tmp);
2555
2556         /* Initialize the ring buffer's read and write pointers */
2557         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2558         ring->wptr = 0;
2559         WREG32(CP_RB0_WPTR, ring->wptr);
2560
2561         /* set the wb address wether it's enabled or not */
2562         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2563         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2564
2565         /* scratch register shadowing is no longer supported */
2566         WREG32(SCRATCH_UMSK, 0);
2567
2568         if (!rdev->wb.enabled)
2569                 tmp |= RB_NO_UPDATE;
2570
2571         mdelay(1);
2572         WREG32(CP_RB0_CNTL, tmp);
2573
2574         rb_addr = ring->gpu_addr >> 8;
2575         WREG32(CP_RB0_BASE, rb_addr);
2576         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2577
2578         ring->rptr = RREG32(CP_RB0_RPTR);
2579
2580         /* start the ring */
2581         cik_cp_gfx_start(rdev);
2582         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2583         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2584         if (r) {
2585                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2586                 return r;
2587         }
2588         return 0;
2589 }
2590
2591 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2592                               struct radeon_ring *ring)
2593 {
2594         u32 rptr;
2595
2596
2597
2598         if (rdev->wb.enabled) {
2599                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2600         } else {
2601                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2602                 rptr = RREG32(CP_HQD_PQ_RPTR);
2603                 cik_srbm_select(rdev, 0, 0, 0, 0);
2604         }
2605         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2606
2607         return rptr;
2608 }
2609
2610 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2611                               struct radeon_ring *ring)
2612 {
2613         u32 wptr;
2614
2615         if (rdev->wb.enabled) {
2616                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2617         } else {
2618                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2619                 wptr = RREG32(CP_HQD_PQ_WPTR);
2620                 cik_srbm_select(rdev, 0, 0, 0, 0);
2621         }
2622         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2623
2624         return wptr;
2625 }
2626
2627 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2628                                struct radeon_ring *ring)
2629 {
2630         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2631
2632         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2633         WDOORBELL32(ring->doorbell_offset, wptr);
2634 }
2635
2636 /**
2637  * cik_cp_compute_enable - enable/disable the compute CP MEs
2638  *
2639  * @rdev: radeon_device pointer
2640  * @enable: enable or disable the MEs
2641  *
2642  * Halts or unhalts the compute MEs.
2643  */
2644 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2645 {
2646         if (enable)
2647                 WREG32(CP_MEC_CNTL, 0);
2648         else
2649                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2650         udelay(50);
2651 }
2652
2653 /**
2654  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2655  *
2656  * @rdev: radeon_device pointer
2657  *
2658  * Loads the compute MEC1&2 ucode.
2659  * Returns 0 for success, -EINVAL if the ucode is not available.
2660  */
2661 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2662 {
2663         const __be32 *fw_data;
2664         int i;
2665
2666         if (!rdev->mec_fw)
2667                 return -EINVAL;
2668
2669         cik_cp_compute_enable(rdev, false);
2670
2671         /* MEC1 */
2672         fw_data = (const __be32 *)rdev->mec_fw->data;
2673         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2674         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2675                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2676         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2677
2678         if (rdev->family == CHIP_KAVERI) {
2679                 /* MEC2 */
2680                 fw_data = (const __be32 *)rdev->mec_fw->data;
2681                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2682                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2683                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2684                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2685         }
2686
2687         return 0;
2688 }
2689
2690 /**
2691  * cik_cp_compute_start - start the compute queues
2692  *
2693  * @rdev: radeon_device pointer
2694  *
2695  * Enable the compute queues.
2696  * Returns 0 for success, error for failure.
2697  */
2698 static int cik_cp_compute_start(struct radeon_device *rdev)
2699 {
2700         cik_cp_compute_enable(rdev, true);
2701
2702         return 0;
2703 }
2704
2705 /**
2706  * cik_cp_compute_fini - stop the compute queues
2707  *
2708  * @rdev: radeon_device pointer
2709  *
2710  * Stop the compute queues and tear down the driver queue
2711  * info.
2712  */
2713 static void cik_cp_compute_fini(struct radeon_device *rdev)
2714 {
2715         int i, idx, r;
2716
2717         cik_cp_compute_enable(rdev, false);
2718
2719         for (i = 0; i < 2; i++) {
2720                 if (i == 0)
2721                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2722                 else
2723                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2724
2725                 if (rdev->ring[idx].mqd_obj) {
2726                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2727                         if (unlikely(r != 0))
2728                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2729
2730                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2731                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2732
2733                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2734                         rdev->ring[idx].mqd_obj = NULL;
2735                 }
2736         }
2737 }
2738
2739 static void cik_mec_fini(struct radeon_device *rdev)
2740 {
2741         int r;
2742
2743         if (rdev->mec.hpd_eop_obj) {
2744                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2745                 if (unlikely(r != 0))
2746                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2747                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2748                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2749
2750                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2751                 rdev->mec.hpd_eop_obj = NULL;
2752         }
2753 }
2754
2755 #define MEC_HPD_SIZE 2048
2756
2757 static int cik_mec_init(struct radeon_device *rdev)
2758 {
2759         int r;
2760         u32 *hpd;
2761
2762         /*
2763          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2764          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2765          */
2766         if (rdev->family == CHIP_KAVERI)
2767                 rdev->mec.num_mec = 2;
2768         else
2769                 rdev->mec.num_mec = 1;
2770         rdev->mec.num_pipe = 4;
2771         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2772
2773         if (rdev->mec.hpd_eop_obj == NULL) {
2774                 r = radeon_bo_create(rdev,
2775                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2776                                      PAGE_SIZE, true,
2777                                      RADEON_GEM_DOMAIN_GTT, NULL,
2778                                      &rdev->mec.hpd_eop_obj);
2779                 if (r) {
2780                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2781                         return r;
2782                 }
2783         }
2784
2785         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2786         if (unlikely(r != 0)) {
2787                 cik_mec_fini(rdev);
2788                 return r;
2789         }
2790         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2791                           &rdev->mec.hpd_eop_gpu_addr);
2792         if (r) {
2793                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2794                 cik_mec_fini(rdev);
2795                 return r;
2796         }
2797         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2798         if (r) {
2799                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2800                 cik_mec_fini(rdev);
2801                 return r;
2802         }
2803
2804         /* clear memory.  Not sure if this is required or not */
2805         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2806
2807         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2808         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2809
2810         return 0;
2811 }
2812
2813 struct hqd_registers
2814 {
2815         u32 cp_mqd_base_addr;
2816         u32 cp_mqd_base_addr_hi;
2817         u32 cp_hqd_active;
2818         u32 cp_hqd_vmid;
2819         u32 cp_hqd_persistent_state;
2820         u32 cp_hqd_pipe_priority;
2821         u32 cp_hqd_queue_priority;
2822         u32 cp_hqd_quantum;
2823         u32 cp_hqd_pq_base;
2824         u32 cp_hqd_pq_base_hi;
2825         u32 cp_hqd_pq_rptr;
2826         u32 cp_hqd_pq_rptr_report_addr;
2827         u32 cp_hqd_pq_rptr_report_addr_hi;
2828         u32 cp_hqd_pq_wptr_poll_addr;
2829         u32 cp_hqd_pq_wptr_poll_addr_hi;
2830         u32 cp_hqd_pq_doorbell_control;
2831         u32 cp_hqd_pq_wptr;
2832         u32 cp_hqd_pq_control;
2833         u32 cp_hqd_ib_base_addr;
2834         u32 cp_hqd_ib_base_addr_hi;
2835         u32 cp_hqd_ib_rptr;
2836         u32 cp_hqd_ib_control;
2837         u32 cp_hqd_iq_timer;
2838         u32 cp_hqd_iq_rptr;
2839         u32 cp_hqd_dequeue_request;
2840         u32 cp_hqd_dma_offload;
2841         u32 cp_hqd_sema_cmd;
2842         u32 cp_hqd_msg_type;
2843         u32 cp_hqd_atomic0_preop_lo;
2844         u32 cp_hqd_atomic0_preop_hi;
2845         u32 cp_hqd_atomic1_preop_lo;
2846         u32 cp_hqd_atomic1_preop_hi;
2847         u32 cp_hqd_hq_scheduler0;
2848         u32 cp_hqd_hq_scheduler1;
2849         u32 cp_mqd_control;
2850 };
2851
2852 struct bonaire_mqd
2853 {
2854         u32 header;
2855         u32 dispatch_initiator;
2856         u32 dimensions[3];
2857         u32 start_idx[3];
2858         u32 num_threads[3];
2859         u32 pipeline_stat_enable;
2860         u32 perf_counter_enable;
2861         u32 pgm[2];
2862         u32 tba[2];
2863         u32 tma[2];
2864         u32 pgm_rsrc[2];
2865         u32 vmid;
2866         u32 resource_limits;
2867         u32 static_thread_mgmt01[2];
2868         u32 tmp_ring_size;
2869         u32 static_thread_mgmt23[2];
2870         u32 restart[3];
2871         u32 thread_trace_enable;
2872         u32 reserved1;
2873         u32 user_data[16];
2874         u32 vgtcs_invoke_count[2];
2875         struct hqd_registers queue_state;
2876         u32 dequeue_cntr;
2877         u32 interrupt_queue[64];
2878 };
2879
2880 /**
2881  * cik_cp_compute_resume - setup the compute queue registers
2882  *
2883  * @rdev: radeon_device pointer
2884  *
2885  * Program the compute queues and test them to make sure they
2886  * are working.
2887  * Returns 0 for success, error for failure.
2888  */
2889 static int cik_cp_compute_resume(struct radeon_device *rdev)
2890 {
2891         int r, i, idx;
2892         u32 tmp;
2893         bool use_doorbell = true;
2894         u64 hqd_gpu_addr;
2895         u64 mqd_gpu_addr;
2896         u64 eop_gpu_addr;
2897         u64 wb_gpu_addr;
2898         u32 *buf;
2899         struct bonaire_mqd *mqd;
2900
2901         r = cik_cp_compute_start(rdev);
2902         if (r)
2903                 return r;
2904
2905         /* fix up chicken bits */
2906         tmp = RREG32(CP_CPF_DEBUG);
2907         tmp |= (1 << 23);
2908         WREG32(CP_CPF_DEBUG, tmp);
2909
2910         /* init the pipes */
2911         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2912                 int me = (i < 4) ? 1 : 2;
2913                 int pipe = (i < 4) ? i : (i - 4);
2914
2915                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2916
2917                 cik_srbm_select(rdev, me, pipe, 0, 0);
2918
2919                 /* write the EOP addr */
2920                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2921                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2922
2923                 /* set the VMID assigned */
2924                 WREG32(CP_HPD_EOP_VMID, 0);
2925
2926                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2927                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2928                 tmp &= ~EOP_SIZE_MASK;
2929                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2930                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2931         }
2932         cik_srbm_select(rdev, 0, 0, 0, 0);
2933
2934         /* init the queues.  Just two for now. */
2935         for (i = 0; i < 2; i++) {
2936                 if (i == 0)
2937                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2938                 else
2939                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2940
2941                 if (rdev->ring[idx].mqd_obj == NULL) {
2942                         r = radeon_bo_create(rdev,
2943                                              sizeof(struct bonaire_mqd),
2944                                              PAGE_SIZE, true,
2945                                              RADEON_GEM_DOMAIN_GTT, NULL,
2946                                              &rdev->ring[idx].mqd_obj);
2947                         if (r) {
2948                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2949                                 return r;
2950                         }
2951                 }
2952
2953                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2954                 if (unlikely(r != 0)) {
2955                         cik_cp_compute_fini(rdev);
2956                         return r;
2957                 }
2958                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2959                                   &mqd_gpu_addr);
2960                 if (r) {
2961                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2962                         cik_cp_compute_fini(rdev);
2963                         return r;
2964                 }
2965                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2966                 if (r) {
2967                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2968                         cik_cp_compute_fini(rdev);
2969                         return r;
2970                 }
2971
2972                 /* doorbell offset */
2973                 rdev->ring[idx].doorbell_offset =
2974                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2975
2976                 /* init the mqd struct */
2977                 memset(buf, 0, sizeof(struct bonaire_mqd));
2978
2979                 mqd = (struct bonaire_mqd *)buf;
2980                 mqd->header = 0xC0310800;
2981                 mqd->static_thread_mgmt01[0] = 0xffffffff;
2982                 mqd->static_thread_mgmt01[1] = 0xffffffff;
2983                 mqd->static_thread_mgmt23[0] = 0xffffffff;
2984                 mqd->static_thread_mgmt23[1] = 0xffffffff;
2985
2986                 cik_srbm_select(rdev, rdev->ring[idx].me,
2987                                 rdev->ring[idx].pipe,
2988                                 rdev->ring[idx].queue, 0);
2989
2990                 /* disable wptr polling */
2991                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2992                 tmp &= ~WPTR_POLL_EN;
2993                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2994
2995                 /* enable doorbell? */
2996                 mqd->queue_state.cp_hqd_pq_doorbell_control =
2997                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2998                 if (use_doorbell)
2999                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3000                 else
3001                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3002                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3003                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3004
3005                 /* disable the queue if it's active */
3006                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3007                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3008                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3009                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3010                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3011                         for (i = 0; i < rdev->usec_timeout; i++) {
3012                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3013                                         break;
3014                                 udelay(1);
3015                         }
3016                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3017                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3018                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3019                 }
3020
3021                 /* set the pointer to the MQD */
3022                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3023                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3024                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3025                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3026                 /* set MQD vmid to 0 */
3027                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3028                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3029                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3030
3031                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3033                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3034                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3035                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3036                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3037
3038                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3039                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3040                 mqd->queue_state.cp_hqd_pq_control &=
3041                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3042
3043                 mqd->queue_state.cp_hqd_pq_control |=
3044                         drm_order(rdev->ring[idx].ring_size / 8);
3045                 mqd->queue_state.cp_hqd_pq_control |=
3046                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3047 #ifdef __BIG_ENDIAN
3048                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3049 #endif
3050                 mqd->queue_state.cp_hqd_pq_control &=
3051                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3052                 mqd->queue_state.cp_hqd_pq_control |=
3053                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3054                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3055
3056                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3057                 if (i == 0)
3058                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3059                 else
3060                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3061                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3062                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3063                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3064                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3065                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3066
3067                 /* set the wb address wether it's enabled or not */
3068                 if (i == 0)
3069                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3070                 else
3071                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3072                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3073                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3074                         upper_32_bits(wb_gpu_addr) & 0xffff;
3075                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3076                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3077                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3078                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3079
3080                 /* enable the doorbell if requested */
3081                 if (use_doorbell) {
3082                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3083                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3084                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3085                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3086                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3087                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3088                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3089                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3090
3091                 } else {
3092                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3093                 }
3094                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3095                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3096
3097                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3098                 rdev->ring[idx].wptr = 0;
3099                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3100                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3101                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3102                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3103
3104                 /* set the vmid for the queue */
3105                 mqd->queue_state.cp_hqd_vmid = 0;
3106                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3107
3108                 /* activate the queue */
3109                 mqd->queue_state.cp_hqd_active = 1;
3110                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3111
3112                 cik_srbm_select(rdev, 0, 0, 0, 0);
3113
3114                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3115                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3116
3117                 rdev->ring[idx].ready = true;
3118                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3119                 if (r)
3120                         rdev->ring[idx].ready = false;
3121         }
3122
3123         return 0;
3124 }
3125
3126 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3127 {
3128         cik_cp_gfx_enable(rdev, enable);
3129         cik_cp_compute_enable(rdev, enable);
3130 }
3131
3132 static int cik_cp_load_microcode(struct radeon_device *rdev)
3133 {
3134         int r;
3135
3136         r = cik_cp_gfx_load_microcode(rdev);
3137         if (r)
3138                 return r;
3139         r = cik_cp_compute_load_microcode(rdev);
3140         if (r)
3141                 return r;
3142
3143         return 0;
3144 }
3145
3146 static void cik_cp_fini(struct radeon_device *rdev)
3147 {
3148         cik_cp_gfx_fini(rdev);
3149         cik_cp_compute_fini(rdev);
3150 }
3151
3152 static int cik_cp_resume(struct radeon_device *rdev)
3153 {
3154         int r;
3155
3156         /* Reset all cp blocks */
3157         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3158         RREG32(GRBM_SOFT_RESET);
3159         mdelay(15);
3160         WREG32(GRBM_SOFT_RESET, 0);
3161         RREG32(GRBM_SOFT_RESET);
3162
3163         r = cik_cp_load_microcode(rdev);
3164         if (r)
3165                 return r;
3166
3167         r = cik_cp_gfx_resume(rdev);
3168         if (r)
3169                 return r;
3170         r = cik_cp_compute_resume(rdev);
3171         if (r)
3172                 return r;
3173
3174         return 0;
3175 }
3176
3177 /*
3178  * sDMA - System DMA
3179  * Starting with CIK, the GPU has new asynchronous
3180  * DMA engines.  These engines are used for compute
3181  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3182  * and each one supports 1 ring buffer used for gfx
3183  * and 2 queues used for compute.
3184  *
3185  * The programming model is very similar to the CP
3186  * (ring buffer, IBs, etc.), but sDMA has it's own
3187  * packet format that is different from the PM4 format
3188  * used by the CP. sDMA supports copying data, writing
3189  * embedded data, solid fills, and a number of other
3190  * things.  It also has support for tiling/detiling of
3191  * buffers.
3192  */
3193 /**
3194  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3195  *
3196  * @rdev: radeon_device pointer
3197  * @ib: IB object to schedule
3198  *
3199  * Schedule an IB in the DMA ring (CIK).
3200  */
3201 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3202                               struct radeon_ib *ib)
3203 {
3204         struct radeon_ring *ring = &rdev->ring[ib->ring];
3205         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3206
3207         if (rdev->wb.enabled) {
3208                 u32 next_rptr = ring->wptr + 5;
3209                 while ((next_rptr & 7) != 4)
3210                         next_rptr++;
3211                 next_rptr += 4;
3212                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3213                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3214                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3215                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3216                 radeon_ring_write(ring, next_rptr);
3217         }
3218
3219         /* IB packet must end on a 8 DW boundary */
3220         while ((ring->wptr & 7) != 4)
3221                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3222         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3223         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3224         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3225         radeon_ring_write(ring, ib->length_dw);
3226
3227 }
3228
3229 /**
3230  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3231  *
3232  * @rdev: radeon_device pointer
3233  * @fence: radeon fence object
3234  *
3235  * Add a DMA fence packet to the ring to write
3236  * the fence seq number and DMA trap packet to generate
3237  * an interrupt if needed (CIK).
3238  */
3239 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3240                               struct radeon_fence *fence)
3241 {
3242         struct radeon_ring *ring = &rdev->ring[fence->ring];
3243         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3244         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3245                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3246         u32 ref_and_mask;
3247
3248         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3249                 ref_and_mask = SDMA0;
3250         else
3251                 ref_and_mask = SDMA1;
3252
3253         /* write the fence */
3254         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3255         radeon_ring_write(ring, addr & 0xffffffff);
3256         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3257         radeon_ring_write(ring, fence->seq);
3258         /* generate an interrupt */
3259         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3260         /* flush HDP */
3261         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3262         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3263         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3264         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3265         radeon_ring_write(ring, ref_and_mask); /* MASK */
3266         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3267 }
3268
3269 /**
3270  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3271  *
3272  * @rdev: radeon_device pointer
3273  * @ring: radeon_ring structure holding ring information
3274  * @semaphore: radeon semaphore object
3275  * @emit_wait: wait or signal semaphore
3276  *
3277  * Add a DMA semaphore packet to the ring wait on or signal
3278  * other rings (CIK).
3279  */
3280 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3281                                   struct radeon_ring *ring,
3282                                   struct radeon_semaphore *semaphore,
3283                                   bool emit_wait)
3284 {
3285         u64 addr = semaphore->gpu_addr;
3286         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3287
3288         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3289         radeon_ring_write(ring, addr & 0xfffffff8);
3290         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3291 }
3292
3293 /**
3294  * cik_sdma_gfx_stop - stop the gfx async dma engines
3295  *
3296  * @rdev: radeon_device pointer
3297  *
3298  * Stop the gfx async dma ring buffers (CIK).
3299  */
3300 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3301 {
3302         u32 rb_cntl, reg_offset;
3303         int i;
3304
3305         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3306
3307         for (i = 0; i < 2; i++) {
3308                 if (i == 0)
3309                         reg_offset = SDMA0_REGISTER_OFFSET;
3310                 else
3311                         reg_offset = SDMA1_REGISTER_OFFSET;
3312                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3313                 rb_cntl &= ~SDMA_RB_ENABLE;
3314                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3315                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3316         }
3317 }
3318
3319 /**
3320  * cik_sdma_rlc_stop - stop the compute async dma engines
3321  *
3322  * @rdev: radeon_device pointer
3323  *
3324  * Stop the compute async dma queues (CIK).
3325  */
3326 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3327 {
3328         /* XXX todo */
3329 }
3330
3331 /**
3332  * cik_sdma_enable - stop the async dma engines
3333  *
3334  * @rdev: radeon_device pointer
3335  * @enable: enable/disable the DMA MEs.
3336  *
3337  * Halt or unhalt the async dma engines (CIK).
3338  */
3339 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3340 {
3341         u32 me_cntl, reg_offset;
3342         int i;
3343
3344         for (i = 0; i < 2; i++) {
3345                 if (i == 0)
3346                         reg_offset = SDMA0_REGISTER_OFFSET;
3347                 else
3348                         reg_offset = SDMA1_REGISTER_OFFSET;
3349                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3350                 if (enable)
3351                         me_cntl &= ~SDMA_HALT;
3352                 else
3353                         me_cntl |= SDMA_HALT;
3354                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3355         }
3356 }
3357
3358 /**
3359  * cik_sdma_gfx_resume - setup and start the async dma engines
3360  *
3361  * @rdev: radeon_device pointer
3362  *
3363  * Set up the gfx DMA ring buffers and enable them (CIK).
3364  * Returns 0 for success, error for failure.
3365  */
3366 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3367 {
3368         struct radeon_ring *ring;
3369         u32 rb_cntl, ib_cntl;
3370         u32 rb_bufsz;
3371         u32 reg_offset, wb_offset;
3372         int i, r;
3373
3374         for (i = 0; i < 2; i++) {
3375                 if (i == 0) {
3376                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3377                         reg_offset = SDMA0_REGISTER_OFFSET;
3378                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3379                 } else {
3380                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3381                         reg_offset = SDMA1_REGISTER_OFFSET;
3382                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3383                 }
3384
3385                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3386                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3387
3388                 /* Set ring buffer size in dwords */
3389                 rb_bufsz = drm_order(ring->ring_size / 4);
3390                 rb_cntl = rb_bufsz << 1;
3391 #ifdef __BIG_ENDIAN
3392                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3393 #endif
3394                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3395
3396                 /* Initialize the ring buffer's read and write pointers */
3397                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3398                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3399
3400                 /* set the wb address whether it's enabled or not */
3401                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3402                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3403                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3404                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3405
3406                 if (rdev->wb.enabled)
3407                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3408
3409                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3410                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3411
3412                 ring->wptr = 0;
3413                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3414
3415                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3416
3417                 /* enable DMA RB */
3418                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3419
3420                 ib_cntl = SDMA_IB_ENABLE;
3421 #ifdef __BIG_ENDIAN
3422                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3423 #endif
3424                 /* enable DMA IBs */
3425                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3426
3427                 ring->ready = true;
3428
3429                 r = radeon_ring_test(rdev, ring->idx, ring);
3430                 if (r) {
3431                         ring->ready = false;
3432                         return r;
3433                 }
3434         }
3435
3436         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3437
3438         return 0;
3439 }
3440
3441 /**
3442  * cik_sdma_rlc_resume - setup and start the async dma engines
3443  *
3444  * @rdev: radeon_device pointer
3445  *
3446  * Set up the compute DMA queues and enable them (CIK).
3447  * Returns 0 for success, error for failure.
3448  */
3449 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3450 {
3451         /* XXX todo */
3452         return 0;
3453 }
3454
3455 /**
3456  * cik_sdma_load_microcode - load the sDMA ME ucode
3457  *
3458  * @rdev: radeon_device pointer
3459  *
3460  * Loads the sDMA0/1 ucode.
3461  * Returns 0 for success, -EINVAL if the ucode is not available.
3462  */
3463 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3464 {
3465         const __be32 *fw_data;
3466         int i;
3467
3468         if (!rdev->sdma_fw)
3469                 return -EINVAL;
3470
3471         /* stop the gfx rings and rlc compute queues */
3472         cik_sdma_gfx_stop(rdev);
3473         cik_sdma_rlc_stop(rdev);
3474
3475         /* halt the MEs */
3476         cik_sdma_enable(rdev, false);
3477
3478         /* sdma0 */
3479         fw_data = (const __be32 *)rdev->sdma_fw->data;
3480         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3481         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3482                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3483         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3484
3485         /* sdma1 */
3486         fw_data = (const __be32 *)rdev->sdma_fw->data;
3487         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3488         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3489                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3490         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3491
3492         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3493         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3494         return 0;
3495 }
3496
3497 /**
3498  * cik_sdma_resume - setup and start the async dma engines
3499  *
3500  * @rdev: radeon_device pointer
3501  *
3502  * Set up the DMA engines and enable them (CIK).
3503  * Returns 0 for success, error for failure.
3504  */
3505 static int cik_sdma_resume(struct radeon_device *rdev)
3506 {
3507         int r;
3508
3509         /* Reset dma */
3510         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3511         RREG32(SRBM_SOFT_RESET);
3512         udelay(50);
3513         WREG32(SRBM_SOFT_RESET, 0);
3514         RREG32(SRBM_SOFT_RESET);
3515
3516         r = cik_sdma_load_microcode(rdev);
3517         if (r)
3518                 return r;
3519
3520         /* unhalt the MEs */
3521         cik_sdma_enable(rdev, true);
3522
3523         /* start the gfx rings and rlc compute queues */
3524         r = cik_sdma_gfx_resume(rdev);
3525         if (r)
3526                 return r;
3527         r = cik_sdma_rlc_resume(rdev);
3528         if (r)
3529                 return r;
3530
3531         return 0;
3532 }
3533
3534 /**
3535  * cik_sdma_fini - tear down the async dma engines
3536  *
3537  * @rdev: radeon_device pointer
3538  *
3539  * Stop the async dma engines and free the rings (CIK).
3540  */
3541 static void cik_sdma_fini(struct radeon_device *rdev)
3542 {
3543         /* stop the gfx rings and rlc compute queues */
3544         cik_sdma_gfx_stop(rdev);
3545         cik_sdma_rlc_stop(rdev);
3546         /* halt the MEs */
3547         cik_sdma_enable(rdev, false);
3548         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3549         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3550         /* XXX - compute dma queue tear down */
3551 }
3552
3553 /**
3554  * cik_copy_dma - copy pages using the DMA engine
3555  *
3556  * @rdev: radeon_device pointer
3557  * @src_offset: src GPU address
3558  * @dst_offset: dst GPU address
3559  * @num_gpu_pages: number of GPU pages to xfer
3560  * @fence: radeon fence object
3561  *
3562  * Copy GPU paging using the DMA engine (CIK).
3563  * Used by the radeon ttm implementation to move pages if
3564  * registered as the asic copy callback.
3565  */
3566 int cik_copy_dma(struct radeon_device *rdev,
3567                  uint64_t src_offset, uint64_t dst_offset,
3568                  unsigned num_gpu_pages,
3569                  struct radeon_fence **fence)
3570 {
3571         struct radeon_semaphore *sem = NULL;
3572         int ring_index = rdev->asic->copy.dma_ring_index;
3573         struct radeon_ring *ring = &rdev->ring[ring_index];
3574         u32 size_in_bytes, cur_size_in_bytes;
3575         int i, num_loops;
3576         int r = 0;
3577
3578         r = radeon_semaphore_create(rdev, &sem);
3579         if (r) {
3580                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581                 return r;
3582         }
3583
3584         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3585         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3586         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3587         if (r) {
3588                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3589                 radeon_semaphore_free(rdev, &sem, NULL);
3590                 return r;
3591         }
3592
3593         if (radeon_fence_need_sync(*fence, ring->idx)) {
3594                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3595                                             ring->idx);
3596                 radeon_fence_note_sync(*fence, ring->idx);
3597         } else {
3598                 radeon_semaphore_free(rdev, &sem, NULL);
3599         }
3600
3601         for (i = 0; i < num_loops; i++) {
3602                 cur_size_in_bytes = size_in_bytes;
3603                 if (cur_size_in_bytes > 0x1fffff)
3604                         cur_size_in_bytes = 0x1fffff;
3605                 size_in_bytes -= cur_size_in_bytes;
3606                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3607                 radeon_ring_write(ring, cur_size_in_bytes);
3608                 radeon_ring_write(ring, 0); /* src/dst endian swap */
3609                 radeon_ring_write(ring, src_offset & 0xffffffff);
3610                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3611                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3612                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3613                 src_offset += cur_size_in_bytes;
3614                 dst_offset += cur_size_in_bytes;
3615         }
3616
3617         r = radeon_fence_emit(rdev, fence, ring->idx);
3618         if (r) {
3619                 radeon_ring_unlock_undo(rdev, ring);
3620                 return r;
3621         }
3622
3623         radeon_ring_unlock_commit(rdev, ring);
3624         radeon_semaphore_free(rdev, &sem, *fence);
3625
3626         return r;
3627 }
3628
3629 /**
3630  * cik_sdma_ring_test - simple async dma engine test
3631  *
3632  * @rdev: radeon_device pointer
3633  * @ring: radeon_ring structure holding ring information
3634  *
3635  * Test the DMA engine by writing using it to write an
3636  * value to memory. (CIK).
3637  * Returns 0 for success, error for failure.
3638  */
3639 int cik_sdma_ring_test(struct radeon_device *rdev,
3640                        struct radeon_ring *ring)
3641 {
3642         unsigned i;
3643         int r;
3644         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3645         u32 tmp;
3646
3647         if (!ptr) {
3648                 DRM_ERROR("invalid vram scratch pointer\n");
3649                 return -EINVAL;
3650         }
3651
3652         tmp = 0xCAFEDEAD;
3653         writel(tmp, ptr);
3654
3655         r = radeon_ring_lock(rdev, ring, 4);
3656         if (r) {
3657                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3658                 return r;
3659         }
3660         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3661         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3662         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3663         radeon_ring_write(ring, 1); /* number of DWs to follow */
3664         radeon_ring_write(ring, 0xDEADBEEF);
3665         radeon_ring_unlock_commit(rdev, ring);
3666
3667         for (i = 0; i < rdev->usec_timeout; i++) {
3668                 tmp = readl(ptr);
3669                 if (tmp == 0xDEADBEEF)
3670                         break;
3671                 DRM_UDELAY(1);
3672         }
3673
3674         if (i < rdev->usec_timeout) {
3675                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3676         } else {
3677                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3678                           ring->idx, tmp);
3679                 r = -EINVAL;
3680         }
3681         return r;
3682 }
3683
3684 /**
3685  * cik_sdma_ib_test - test an IB on the DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @ring: radeon_ring structure holding ring information
3689  *
3690  * Test a simple IB in the DMA ring (CIK).
3691  * Returns 0 on success, error on failure.
3692  */
3693 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3694 {
3695         struct radeon_ib ib;
3696         unsigned i;
3697         int r;
3698         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3699         u32 tmp = 0;
3700
3701         if (!ptr) {
3702                 DRM_ERROR("invalid vram scratch pointer\n");
3703                 return -EINVAL;
3704         }
3705
3706         tmp = 0xCAFEDEAD;
3707         writel(tmp, ptr);
3708
3709         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3710         if (r) {
3711                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3712                 return r;
3713         }
3714
3715         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3716         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3717         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3718         ib.ptr[3] = 1;
3719         ib.ptr[4] = 0xDEADBEEF;
3720         ib.length_dw = 5;
3721
3722         r = radeon_ib_schedule(rdev, &ib, NULL);
3723         if (r) {
3724                 radeon_ib_free(rdev, &ib);
3725                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3726                 return r;
3727         }
3728         r = radeon_fence_wait(ib.fence, false);
3729         if (r) {
3730                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3731                 return r;
3732         }
3733         for (i = 0; i < rdev->usec_timeout; i++) {
3734                 tmp = readl(ptr);
3735                 if (tmp == 0xDEADBEEF)
3736                         break;
3737                 DRM_UDELAY(1);
3738         }
3739         if (i < rdev->usec_timeout) {
3740                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3741         } else {
3742                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3743                 r = -EINVAL;
3744         }
3745         radeon_ib_free(rdev, &ib);
3746         return r;
3747 }
3748
3749
3750 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3751 {
3752         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3753                 RREG32(GRBM_STATUS));
3754         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3755                 RREG32(GRBM_STATUS2));
3756         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3757                 RREG32(GRBM_STATUS_SE0));
3758         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3759                 RREG32(GRBM_STATUS_SE1));
3760         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3761                 RREG32(GRBM_STATUS_SE2));
3762         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3763                 RREG32(GRBM_STATUS_SE3));
3764         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3765                 RREG32(SRBM_STATUS));
3766         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3767                 RREG32(SRBM_STATUS2));
3768         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3769                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3770         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3771                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3772         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3773         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3774                  RREG32(CP_STALLED_STAT1));
3775         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3776                  RREG32(CP_STALLED_STAT2));
3777         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3778                  RREG32(CP_STALLED_STAT3));
3779         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3780                  RREG32(CP_CPF_BUSY_STAT));
3781         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3782                  RREG32(CP_CPF_STALLED_STAT1));
3783         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3784         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3785         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3786                  RREG32(CP_CPC_STALLED_STAT1));
3787         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3788 }
3789
3790 /**
3791  * cik_gpu_check_soft_reset - check which blocks are busy
3792  *
3793  * @rdev: radeon_device pointer
3794  *
3795  * Check which blocks are busy and return the relevant reset
3796  * mask to be used by cik_gpu_soft_reset().
3797  * Returns a mask of the blocks to be reset.
3798  */
3799 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3800 {
3801         u32 reset_mask = 0;
3802         u32 tmp;
3803
3804         /* GRBM_STATUS */
3805         tmp = RREG32(GRBM_STATUS);
3806         if (tmp & (PA_BUSY | SC_BUSY |
3807                    BCI_BUSY | SX_BUSY |
3808                    TA_BUSY | VGT_BUSY |
3809                    DB_BUSY | CB_BUSY |
3810                    GDS_BUSY | SPI_BUSY |
3811                    IA_BUSY | IA_BUSY_NO_DMA))
3812                 reset_mask |= RADEON_RESET_GFX;
3813
3814         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3815                 reset_mask |= RADEON_RESET_CP;
3816
3817         /* GRBM_STATUS2 */
3818         tmp = RREG32(GRBM_STATUS2);
3819         if (tmp & RLC_BUSY)
3820                 reset_mask |= RADEON_RESET_RLC;
3821
3822         /* SDMA0_STATUS_REG */
3823         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3824         if (!(tmp & SDMA_IDLE))
3825                 reset_mask |= RADEON_RESET_DMA;
3826
3827         /* SDMA1_STATUS_REG */
3828         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3829         if (!(tmp & SDMA_IDLE))
3830                 reset_mask |= RADEON_RESET_DMA1;
3831
3832         /* SRBM_STATUS2 */
3833         tmp = RREG32(SRBM_STATUS2);
3834         if (tmp & SDMA_BUSY)
3835                 reset_mask |= RADEON_RESET_DMA;
3836
3837         if (tmp & SDMA1_BUSY)
3838                 reset_mask |= RADEON_RESET_DMA1;
3839
3840         /* SRBM_STATUS */
3841         tmp = RREG32(SRBM_STATUS);
3842
3843         if (tmp & IH_BUSY)
3844                 reset_mask |= RADEON_RESET_IH;
3845
3846         if (tmp & SEM_BUSY)
3847                 reset_mask |= RADEON_RESET_SEM;
3848
3849         if (tmp & GRBM_RQ_PENDING)
3850                 reset_mask |= RADEON_RESET_GRBM;
3851
3852         if (tmp & VMC_BUSY)
3853                 reset_mask |= RADEON_RESET_VMC;
3854
3855         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3856                    MCC_BUSY | MCD_BUSY))
3857                 reset_mask |= RADEON_RESET_MC;
3858
3859         if (evergreen_is_display_hung(rdev))
3860                 reset_mask |= RADEON_RESET_DISPLAY;
3861
3862         /* Skip MC reset as it's mostly likely not hung, just busy */
3863         if (reset_mask & RADEON_RESET_MC) {
3864                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3865                 reset_mask &= ~RADEON_RESET_MC;
3866         }
3867
3868         return reset_mask;
3869 }
3870
3871 /**
3872  * cik_gpu_soft_reset - soft reset GPU
3873  *
3874  * @rdev: radeon_device pointer
3875  * @reset_mask: mask of which blocks to reset
3876  *
3877  * Soft reset the blocks specified in @reset_mask.
3878  */
3879 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3880 {
3881         struct evergreen_mc_save save;
3882         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3883         u32 tmp;
3884
3885         if (reset_mask == 0)
3886                 return;
3887
3888         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3889
3890         cik_print_gpu_status_regs(rdev);
3891         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3892                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3893         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3894                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3895
3896         /* stop the rlc */
3897         cik_rlc_stop(rdev);
3898
3899         /* Disable GFX parsing/prefetching */
3900         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3901
3902         /* Disable MEC parsing/prefetching */
3903         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3904
3905         if (reset_mask & RADEON_RESET_DMA) {
3906                 /* sdma0 */
3907                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3908                 tmp |= SDMA_HALT;
3909                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3910         }
3911         if (reset_mask & RADEON_RESET_DMA1) {
3912                 /* sdma1 */
3913                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3914                 tmp |= SDMA_HALT;
3915                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3916         }
3917
3918         evergreen_mc_stop(rdev, &save);
3919         if (evergreen_mc_wait_for_idle(rdev)) {
3920                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3921         }
3922
3923         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3924                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3925
3926         if (reset_mask & RADEON_RESET_CP) {
3927                 grbm_soft_reset |= SOFT_RESET_CP;
3928
3929                 srbm_soft_reset |= SOFT_RESET_GRBM;
3930         }
3931
3932         if (reset_mask & RADEON_RESET_DMA)
3933                 srbm_soft_reset |= SOFT_RESET_SDMA;
3934
3935         if (reset_mask & RADEON_RESET_DMA1)
3936                 srbm_soft_reset |= SOFT_RESET_SDMA1;
3937
3938         if (reset_mask & RADEON_RESET_DISPLAY)
3939                 srbm_soft_reset |= SOFT_RESET_DC;
3940
3941         if (reset_mask & RADEON_RESET_RLC)
3942                 grbm_soft_reset |= SOFT_RESET_RLC;
3943
3944         if (reset_mask & RADEON_RESET_SEM)
3945                 srbm_soft_reset |= SOFT_RESET_SEM;
3946
3947         if (reset_mask & RADEON_RESET_IH)
3948                 srbm_soft_reset |= SOFT_RESET_IH;
3949
3950         if (reset_mask & RADEON_RESET_GRBM)
3951                 srbm_soft_reset |= SOFT_RESET_GRBM;
3952
3953         if (reset_mask & RADEON_RESET_VMC)
3954                 srbm_soft_reset |= SOFT_RESET_VMC;
3955
3956         if (!(rdev->flags & RADEON_IS_IGP)) {
3957                 if (reset_mask & RADEON_RESET_MC)
3958                         srbm_soft_reset |= SOFT_RESET_MC;
3959         }
3960
3961         if (grbm_soft_reset) {
3962                 tmp = RREG32(GRBM_SOFT_RESET);
3963                 tmp |= grbm_soft_reset;
3964                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3965                 WREG32(GRBM_SOFT_RESET, tmp);
3966                 tmp = RREG32(GRBM_SOFT_RESET);
3967
3968                 udelay(50);
3969
3970                 tmp &= ~grbm_soft_reset;
3971                 WREG32(GRBM_SOFT_RESET, tmp);
3972                 tmp = RREG32(GRBM_SOFT_RESET);
3973         }
3974
3975         if (srbm_soft_reset) {
3976                 tmp = RREG32(SRBM_SOFT_RESET);
3977                 tmp |= srbm_soft_reset;
3978                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3979                 WREG32(SRBM_SOFT_RESET, tmp);
3980                 tmp = RREG32(SRBM_SOFT_RESET);
3981
3982                 udelay(50);
3983
3984                 tmp &= ~srbm_soft_reset;
3985                 WREG32(SRBM_SOFT_RESET, tmp);
3986                 tmp = RREG32(SRBM_SOFT_RESET);
3987         }
3988
3989         /* Wait a little for things to settle down */
3990         udelay(50);
3991
3992         evergreen_mc_resume(rdev, &save);
3993         udelay(50);
3994
3995         cik_print_gpu_status_regs(rdev);
3996 }
3997
3998 /**
3999  * cik_asic_reset - soft reset GPU
4000  *
4001  * @rdev: radeon_device pointer
4002  *
4003  * Look up which blocks are hung and attempt
4004  * to reset them.
4005  * Returns 0 for success.
4006  */
4007 int cik_asic_reset(struct radeon_device *rdev)
4008 {
4009         u32 reset_mask;
4010
4011         reset_mask = cik_gpu_check_soft_reset(rdev);
4012
4013         if (reset_mask)
4014                 r600_set_bios_scratch_engine_hung(rdev, true);
4015
4016         cik_gpu_soft_reset(rdev, reset_mask);
4017
4018         reset_mask = cik_gpu_check_soft_reset(rdev);
4019
4020         if (!reset_mask)
4021                 r600_set_bios_scratch_engine_hung(rdev, false);
4022
4023         return 0;
4024 }
4025
4026 /**
4027  * cik_gfx_is_lockup - check if the 3D engine is locked up
4028  *
4029  * @rdev: radeon_device pointer
4030  * @ring: radeon_ring structure holding ring information
4031  *
4032  * Check if the 3D engine is locked up (CIK).
4033  * Returns true if the engine is locked, false if not.
4034  */
4035 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4036 {
4037         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4038
4039         if (!(reset_mask & (RADEON_RESET_GFX |
4040                             RADEON_RESET_COMPUTE |
4041                             RADEON_RESET_CP))) {
4042                 radeon_ring_lockup_update(ring);
4043                 return false;
4044         }
4045         /* force CP activities */
4046         radeon_ring_force_activity(rdev, ring);
4047         return radeon_ring_test_lockup(rdev, ring);
4048 }
4049
4050 /**
4051  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4052  *
4053  * @rdev: radeon_device pointer
4054  * @ring: radeon_ring structure holding ring information
4055  *
4056  * Check if the async DMA engine is locked up (CIK).
4057  * Returns true if the engine appears to be locked up, false if not.
4058  */
4059 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4060 {
4061         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4062         u32 mask;
4063
4064         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4065                 mask = RADEON_RESET_DMA;
4066         else
4067                 mask = RADEON_RESET_DMA1;
4068
4069         if (!(reset_mask & mask)) {
4070                 radeon_ring_lockup_update(ring);
4071                 return false;
4072         }
4073         /* force ring activities */
4074         radeon_ring_force_activity(rdev, ring);
4075         return radeon_ring_test_lockup(rdev, ring);
4076 }
4077
4078 /* MC */
4079 /**
4080  * cik_mc_program - program the GPU memory controller
4081  *
4082  * @rdev: radeon_device pointer
4083  *
4084  * Set the location of vram, gart, and AGP in the GPU's
4085  * physical address space (CIK).
4086  */
4087 static void cik_mc_program(struct radeon_device *rdev)
4088 {
4089         struct evergreen_mc_save save;
4090         u32 tmp;
4091         int i, j;
4092
4093         /* Initialize HDP */
4094         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4095                 WREG32((0x2c14 + j), 0x00000000);
4096                 WREG32((0x2c18 + j), 0x00000000);
4097                 WREG32((0x2c1c + j), 0x00000000);
4098                 WREG32((0x2c20 + j), 0x00000000);
4099                 WREG32((0x2c24 + j), 0x00000000);
4100         }
4101         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4102
4103         evergreen_mc_stop(rdev, &save);
4104         if (radeon_mc_wait_for_idle(rdev)) {
4105                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4106         }
4107         /* Lockout access through VGA aperture*/
4108         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4109         /* Update configuration */
4110         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4111                rdev->mc.vram_start >> 12);
4112         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4113                rdev->mc.vram_end >> 12);
4114         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4115                rdev->vram_scratch.gpu_addr >> 12);
4116         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4117         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4118         WREG32(MC_VM_FB_LOCATION, tmp);
4119         /* XXX double check these! */
4120         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4121         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4122         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4123         WREG32(MC_VM_AGP_BASE, 0);
4124         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4125         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4126         if (radeon_mc_wait_for_idle(rdev)) {
4127                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4128         }
4129         evergreen_mc_resume(rdev, &save);
4130         /* we need to own VRAM, so turn off the VGA renderer here
4131          * to stop it overwriting our objects */
4132         rv515_vga_render_disable(rdev);
4133 }
4134
4135 /**
4136  * cik_mc_init - initialize the memory controller driver params
4137  *
4138  * @rdev: radeon_device pointer
4139  *
4140  * Look up the amount of vram, vram width, and decide how to place
4141  * vram and gart within the GPU's physical address space (CIK).
4142  * Returns 0 for success.
4143  */
4144 static int cik_mc_init(struct radeon_device *rdev)
4145 {
4146         u32 tmp;
4147         int chansize, numchan;
4148
4149         /* Get VRAM informations */
4150         rdev->mc.vram_is_ddr = true;
4151         tmp = RREG32(MC_ARB_RAMCFG);
4152         if (tmp & CHANSIZE_MASK) {
4153                 chansize = 64;
4154         } else {
4155                 chansize = 32;
4156         }
4157         tmp = RREG32(MC_SHARED_CHMAP);
4158         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4159         case 0:
4160         default:
4161                 numchan = 1;
4162                 break;
4163         case 1:
4164                 numchan = 2;
4165                 break;
4166         case 2:
4167                 numchan = 4;
4168                 break;
4169         case 3:
4170                 numchan = 8;
4171                 break;
4172         case 4:
4173                 numchan = 3;
4174                 break;
4175         case 5:
4176                 numchan = 6;
4177                 break;
4178         case 6:
4179                 numchan = 10;
4180                 break;
4181         case 7:
4182                 numchan = 12;
4183                 break;
4184         case 8:
4185                 numchan = 16;
4186                 break;
4187         }
4188         rdev->mc.vram_width = numchan * chansize;
4189         /* Could aper size report 0 ? */
4190         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4191         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4192         /* size in MB on si */
4193         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4194         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4195         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4196         si_vram_gtt_location(rdev, &rdev->mc);
4197         radeon_update_bandwidth_info(rdev);
4198
4199         return 0;
4200 }
4201
4202 /*
4203  * GART
4204  * VMID 0 is the physical GPU addresses as used by the kernel.
4205  * VMIDs 1-15 are used for userspace clients and are handled
4206  * by the radeon vm/hsa code.
4207  */
4208 /**
4209  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4210  *
4211  * @rdev: radeon_device pointer
4212  *
4213  * Flush the TLB for the VMID 0 page table (CIK).
4214  */
4215 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4216 {
4217         /* flush hdp cache */
4218         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4219
4220         /* bits 0-15 are the VM contexts0-15 */
4221         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4222 }
4223
4224 /**
4225  * cik_pcie_gart_enable - gart enable
4226  *
4227  * @rdev: radeon_device pointer
4228  *
4229  * This sets up the TLBs, programs the page tables for VMID0,
4230  * sets up the hw for VMIDs 1-15 which are allocated on
4231  * demand, and sets up the global locations for the LDS, GDS,
4232  * and GPUVM for FSA64 clients (CIK).
4233  * Returns 0 for success, errors for failure.
4234  */
4235 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4236 {
4237         int r, i;
4238
4239         if (rdev->gart.robj == NULL) {
4240                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4241                 return -EINVAL;
4242         }
4243         r = radeon_gart_table_vram_pin(rdev);
4244         if (r)
4245                 return r;
4246         radeon_gart_restore(rdev);
4247         /* Setup TLB control */
4248         WREG32(MC_VM_MX_L1_TLB_CNTL,
4249                (0xA << 7) |
4250                ENABLE_L1_TLB |
4251                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4252                ENABLE_ADVANCED_DRIVER_MODEL |
4253                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4254         /* Setup L2 cache */
4255         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4256                ENABLE_L2_FRAGMENT_PROCESSING |
4257                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4258                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4259                EFFECTIVE_L2_QUEUE_SIZE(7) |
4260                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4261         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4262         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4263                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4264         /* setup context0 */
4265         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4266         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4267         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4268         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4269                         (u32)(rdev->dummy_page.addr >> 12));
4270         WREG32(VM_CONTEXT0_CNTL2, 0);
4271         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4272                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4273
4274         WREG32(0x15D4, 0);
4275         WREG32(0x15D8, 0);
4276         WREG32(0x15DC, 0);
4277
4278         /* empty context1-15 */
4279         /* FIXME start with 4G, once using 2 level pt switch to full
4280          * vm size space
4281          */
4282         /* set vm size, must be a multiple of 4 */
4283         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4284         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4285         for (i = 1; i < 16; i++) {
4286                 if (i < 8)
4287                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4288                                rdev->gart.table_addr >> 12);
4289                 else
4290                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4291                                rdev->gart.table_addr >> 12);
4292         }
4293
4294         /* enable context1-15 */
4295         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4296                (u32)(rdev->dummy_page.addr >> 12));
4297         WREG32(VM_CONTEXT1_CNTL2, 4);
4298         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4299                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4301                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4303                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4304                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4305                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4307                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4308                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4309                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4311
4312         /* TC cache setup ??? */
4313         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4314         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4315         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4316
4317         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4318         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4319         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4320         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4321         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4322
4323         WREG32(TC_CFG_L1_VOLATILE, 0);
4324         WREG32(TC_CFG_L2_VOLATILE, 0);
4325
4326         if (rdev->family == CHIP_KAVERI) {
4327                 u32 tmp = RREG32(CHUB_CONTROL);
4328                 tmp &= ~BYPASS_VM;
4329                 WREG32(CHUB_CONTROL, tmp);
4330         }
4331
4332         /* XXX SH_MEM regs */
4333         /* where to put LDS, scratch, GPUVM in FSA64 space */
4334         for (i = 0; i < 16; i++) {
4335                 cik_srbm_select(rdev, 0, 0, 0, i);
4336                 /* CP and shaders */
4337                 WREG32(SH_MEM_CONFIG, 0);
4338                 WREG32(SH_MEM_APE1_BASE, 1);
4339                 WREG32(SH_MEM_APE1_LIMIT, 0);
4340                 WREG32(SH_MEM_BASES, 0);
4341                 /* SDMA GFX */
4342                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4343                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4344                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4345                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4346                 /* XXX SDMA RLC - todo */
4347         }
4348         cik_srbm_select(rdev, 0, 0, 0, 0);
4349
4350         cik_pcie_gart_tlb_flush(rdev);
4351         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352                  (unsigned)(rdev->mc.gtt_size >> 20),
4353                  (unsigned long long)rdev->gart.table_addr);
4354         rdev->gart.ready = true;
4355         return 0;
4356 }
4357
4358 /**
4359  * cik_pcie_gart_disable - gart disable
4360  *
4361  * @rdev: radeon_device pointer
4362  *
4363  * This disables all VM page table (CIK).
4364  */
4365 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4366 {
4367         /* Disable all tables */
4368         WREG32(VM_CONTEXT0_CNTL, 0);
4369         WREG32(VM_CONTEXT1_CNTL, 0);
4370         /* Setup TLB control */
4371         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4372                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4373         /* Setup L2 cache */
4374         WREG32(VM_L2_CNTL,
4375                ENABLE_L2_FRAGMENT_PROCESSING |
4376                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4377                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4378                EFFECTIVE_L2_QUEUE_SIZE(7) |
4379                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4380         WREG32(VM_L2_CNTL2, 0);
4381         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4382                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4383         radeon_gart_table_vram_unpin(rdev);
4384 }
4385
4386 /**
4387  * cik_pcie_gart_fini - vm fini callback
4388  *
4389  * @rdev: radeon_device pointer
4390  *
4391  * Tears down the driver GART/VM setup (CIK).
4392  */
4393 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4394 {
4395         cik_pcie_gart_disable(rdev);
4396         radeon_gart_table_vram_free(rdev);
4397         radeon_gart_fini(rdev);
4398 }
4399
4400 /* vm parser */
4401 /**
4402  * cik_ib_parse - vm ib_parse callback
4403  *
4404  * @rdev: radeon_device pointer
4405  * @ib: indirect buffer pointer
4406  *
4407  * CIK uses hw IB checking so this is a nop (CIK).
4408  */
4409 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4410 {
4411         return 0;
4412 }
4413
4414 /*
4415  * vm
4416  * VMID 0 is the physical GPU addresses as used by the kernel.
4417  * VMIDs 1-15 are used for userspace clients and are handled
4418  * by the radeon vm/hsa code.
4419  */
4420 /**
4421  * cik_vm_init - cik vm init callback
4422  *
4423  * @rdev: radeon_device pointer
4424  *
4425  * Inits cik specific vm parameters (number of VMs, base of vram for
4426  * VMIDs 1-15) (CIK).
4427  * Returns 0 for success.
4428  */
4429 int cik_vm_init(struct radeon_device *rdev)
4430 {
4431         /* number of VMs */
4432         rdev->vm_manager.nvm = 16;
4433         /* base offset of vram pages */
4434         if (rdev->flags & RADEON_IS_IGP) {
4435                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4436                 tmp <<= 22;
4437                 rdev->vm_manager.vram_base_offset = tmp;
4438         } else
4439                 rdev->vm_manager.vram_base_offset = 0;
4440
4441         return 0;
4442 }
4443
4444 /**
4445  * cik_vm_fini - cik vm fini callback
4446  *
4447  * @rdev: radeon_device pointer
4448  *
4449  * Tear down any asic specific VM setup (CIK).
4450  */
4451 void cik_vm_fini(struct radeon_device *rdev)
4452 {
4453 }
4454
4455 /**
4456  * cik_vm_flush - cik vm flush using the CP
4457  *
4458  * @rdev: radeon_device pointer
4459  *
4460  * Update the page table base and flush the VM TLB
4461  * using the CP (CIK).
4462  */
4463 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4464 {
4465         struct radeon_ring *ring = &rdev->ring[ridx];
4466
4467         if (vm == NULL)
4468                 return;
4469
4470         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4471         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4472                                  WRITE_DATA_DST_SEL(0)));
4473         if (vm->id < 8) {
4474                 radeon_ring_write(ring,
4475                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4476         } else {
4477                 radeon_ring_write(ring,
4478                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4479         }
4480         radeon_ring_write(ring, 0);
4481         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4482
4483         /* update SH_MEM_* regs */
4484         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4485         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4486                                  WRITE_DATA_DST_SEL(0)));
4487         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4488         radeon_ring_write(ring, 0);
4489         radeon_ring_write(ring, VMID(vm->id));
4490
4491         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4492         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493                                  WRITE_DATA_DST_SEL(0)));
4494         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4495         radeon_ring_write(ring, 0);
4496
4497         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4498         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4499         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4500         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4501
4502         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4503         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4504                                  WRITE_DATA_DST_SEL(0)));
4505         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4506         radeon_ring_write(ring, 0);
4507         radeon_ring_write(ring, VMID(0));
4508
4509         /* HDP flush */
4510         /* We should be using the WAIT_REG_MEM packet here like in
4511          * cik_fence_ring_emit(), but it causes the CP to hang in this
4512          * context...
4513          */
4514         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516                                  WRITE_DATA_DST_SEL(0)));
4517         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4518         radeon_ring_write(ring, 0);
4519         radeon_ring_write(ring, 0);
4520
4521         /* bits 0-15 are the VM contexts0-15 */
4522         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4523         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4524                                  WRITE_DATA_DST_SEL(0)));
4525         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4526         radeon_ring_write(ring, 0);
4527         radeon_ring_write(ring, 1 << vm->id);
4528
4529         /* compute doesn't have PFP */
4530         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4531                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4532                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533                 radeon_ring_write(ring, 0x0);
4534         }
4535 }
4536
4537 /**
4538  * cik_vm_set_page - update the page tables using sDMA
4539  *
4540  * @rdev: radeon_device pointer
4541  * @ib: indirect buffer to fill with commands
4542  * @pe: addr of the page entry
4543  * @addr: dst addr to write into pe
4544  * @count: number of page entries to update
4545  * @incr: increase next addr by incr bytes
4546  * @flags: access flags
4547  *
4548  * Update the page tables using CP or sDMA (CIK).
4549  */
4550 void cik_vm_set_page(struct radeon_device *rdev,
4551                      struct radeon_ib *ib,
4552                      uint64_t pe,
4553                      uint64_t addr, unsigned count,
4554                      uint32_t incr, uint32_t flags)
4555 {
4556         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4557         uint64_t value;
4558         unsigned ndw;
4559
4560         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4561                 /* CP */
4562                 while (count) {
4563                         ndw = 2 + count * 2;
4564                         if (ndw > 0x3FFE)
4565                                 ndw = 0x3FFE;
4566
4567                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4568                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4569                                                     WRITE_DATA_DST_SEL(1));
4570                         ib->ptr[ib->length_dw++] = pe;
4571                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4572                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4573                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4574                                         value = radeon_vm_map_gart(rdev, addr);
4575                                         value &= 0xFFFFFFFFFFFFF000ULL;
4576                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4577                                         value = addr;
4578                                 } else {
4579                                         value = 0;
4580                                 }
4581                                 addr += incr;
4582                                 value |= r600_flags;
4583                                 ib->ptr[ib->length_dw++] = value;
4584                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4585                         }
4586                 }
4587         } else {
4588                 /* DMA */
4589                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4590                         while (count) {
4591                                 ndw = count * 2;
4592                                 if (ndw > 0xFFFFE)
4593                                         ndw = 0xFFFFE;
4594
4595                                 /* for non-physically contiguous pages (system) */
4596                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4597                                 ib->ptr[ib->length_dw++] = pe;
4598                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4599                                 ib->ptr[ib->length_dw++] = ndw;
4600                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4601                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4602                                                 value = radeon_vm_map_gart(rdev, addr);
4603                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4604                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4605                                                 value = addr;
4606                                         } else {
4607                                                 value = 0;
4608                                         }
4609                                         addr += incr;
4610                                         value |= r600_flags;
4611                                         ib->ptr[ib->length_dw++] = value;
4612                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4613                                 }
4614                         }
4615                 } else {
4616                         while (count) {
4617                                 ndw = count;
4618                                 if (ndw > 0x7FFFF)
4619                                         ndw = 0x7FFFF;
4620
4621                                 if (flags & RADEON_VM_PAGE_VALID)
4622                                         value = addr;
4623                                 else
4624                                         value = 0;
4625                                 /* for physically contiguous pages (vram) */
4626                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4627                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4628                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4629                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4630                                 ib->ptr[ib->length_dw++] = 0;
4631                                 ib->ptr[ib->length_dw++] = value; /* value */
4632                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4633                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4634                                 ib->ptr[ib->length_dw++] = 0;
4635                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4636                                 pe += ndw * 8;
4637                                 addr += ndw * incr;
4638                                 count -= ndw;
4639                         }
4640                 }
4641                 while (ib->length_dw & 0x7)
4642                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4643         }
4644 }
4645
4646 /**
4647  * cik_dma_vm_flush - cik vm flush using sDMA
4648  *
4649  * @rdev: radeon_device pointer
4650  *
4651  * Update the page table base and flush the VM TLB
4652  * using sDMA (CIK).
4653  */
4654 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4655 {
4656         struct radeon_ring *ring = &rdev->ring[ridx];
4657         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4658                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4659         u32 ref_and_mask;
4660
4661         if (vm == NULL)
4662                 return;
4663
4664         if (ridx == R600_RING_TYPE_DMA_INDEX)
4665                 ref_and_mask = SDMA0;
4666         else
4667                 ref_and_mask = SDMA1;
4668
4669         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4670         if (vm->id < 8) {
4671                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4672         } else {
4673                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4674         }
4675         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4676
4677         /* update SH_MEM_* regs */
4678         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4680         radeon_ring_write(ring, VMID(vm->id));
4681
4682         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4683         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4684         radeon_ring_write(ring, 0);
4685
4686         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4688         radeon_ring_write(ring, 0);
4689
4690         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4692         radeon_ring_write(ring, 1);
4693
4694         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4696         radeon_ring_write(ring, 0);
4697
4698         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4700         radeon_ring_write(ring, VMID(0));
4701
4702         /* flush HDP */
4703         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4704         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4705         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4706         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4707         radeon_ring_write(ring, ref_and_mask); /* MASK */
4708         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4709
4710         /* flush TLB */
4711         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713         radeon_ring_write(ring, 1 << vm->id);
4714 }
4715
4716 /*
4717  * RLC
4718  * The RLC is a multi-purpose microengine that handles a
4719  * variety of functions, the most important of which is
4720  * the interrupt controller.
4721  */
4722 /**
4723  * cik_rlc_stop - stop the RLC ME
4724  *
4725  * @rdev: radeon_device pointer
4726  *
4727  * Halt the RLC ME (MicroEngine) (CIK).
4728  */
4729 static void cik_rlc_stop(struct radeon_device *rdev)
4730 {
4731         int i, j, k;
4732         u32 mask, tmp;
4733
4734         tmp = RREG32(CP_INT_CNTL_RING0);
4735         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4736         WREG32(CP_INT_CNTL_RING0, tmp);
4737
4738         RREG32(CB_CGTT_SCLK_CTRL);
4739         RREG32(CB_CGTT_SCLK_CTRL);
4740         RREG32(CB_CGTT_SCLK_CTRL);
4741         RREG32(CB_CGTT_SCLK_CTRL);
4742
4743         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4744         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4745
4746         WREG32(RLC_CNTL, 0);
4747
4748         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4749                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4750                         cik_select_se_sh(rdev, i, j);
4751                         for (k = 0; k < rdev->usec_timeout; k++) {
4752                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4753                                         break;
4754                                 udelay(1);
4755                         }
4756                 }
4757         }
4758         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4759
4760         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4761         for (k = 0; k < rdev->usec_timeout; k++) {
4762                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4763                         break;
4764                 udelay(1);
4765         }
4766 }
4767
4768 /**
4769  * cik_rlc_start - start the RLC ME
4770  *
4771  * @rdev: radeon_device pointer
4772  *
4773  * Unhalt the RLC ME (MicroEngine) (CIK).
4774  */
4775 static void cik_rlc_start(struct radeon_device *rdev)
4776 {
4777         u32 tmp;
4778
4779         WREG32(RLC_CNTL, RLC_ENABLE);
4780
4781         tmp = RREG32(CP_INT_CNTL_RING0);
4782         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4783         WREG32(CP_INT_CNTL_RING0, tmp);
4784
4785         udelay(50);
4786 }
4787
4788 /**
4789  * cik_rlc_resume - setup the RLC hw
4790  *
4791  * @rdev: radeon_device pointer
4792  *
4793  * Initialize the RLC registers, load the ucode,
4794  * and start the RLC (CIK).
4795  * Returns 0 for success, -EINVAL if the ucode is not available.
4796  */
4797 static int cik_rlc_resume(struct radeon_device *rdev)
4798 {
4799         u32 i, size;
4800         u32 clear_state_info[3];
4801         const __be32 *fw_data;
4802
4803         if (!rdev->rlc_fw)
4804                 return -EINVAL;
4805
4806         switch (rdev->family) {
4807         case CHIP_BONAIRE:
4808         default:
4809                 size = BONAIRE_RLC_UCODE_SIZE;
4810                 break;
4811         case CHIP_KAVERI:
4812                 size = KV_RLC_UCODE_SIZE;
4813                 break;
4814         case CHIP_KABINI:
4815                 size = KB_RLC_UCODE_SIZE;
4816                 break;
4817         }
4818
4819         cik_rlc_stop(rdev);
4820
4821         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4822         RREG32(GRBM_SOFT_RESET);
4823         udelay(50);
4824         WREG32(GRBM_SOFT_RESET, 0);
4825         RREG32(GRBM_SOFT_RESET);
4826         udelay(50);
4827
4828         WREG32(RLC_LB_CNTR_INIT, 0);
4829         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4830
4831         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4832         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4833         WREG32(RLC_LB_PARAMS, 0x00600408);
4834         WREG32(RLC_LB_CNTL, 0x80000004);
4835
4836         WREG32(RLC_MC_CNTL, 0);
4837         WREG32(RLC_UCODE_CNTL, 0);
4838
4839         fw_data = (const __be32 *)rdev->rlc_fw->data;
4840                 WREG32(RLC_GPM_UCODE_ADDR, 0);
4841         for (i = 0; i < size; i++)
4842                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4843         WREG32(RLC_GPM_UCODE_ADDR, 0);
4844
4845         /* XXX */
4846         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4847         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4848         clear_state_info[2] = 0;//cik_default_size;
4849         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4850         for (i = 0; i < 3; i++)
4851                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4852         WREG32(RLC_DRIVER_DMA_STATUS, 0);
4853
4854         cik_rlc_start(rdev);
4855
4856         return 0;
4857 }
4858
4859 /*
4860  * Interrupts
4861  * Starting with r6xx, interrupts are handled via a ring buffer.
4862  * Ring buffers are areas of GPU accessible memory that the GPU
4863  * writes interrupt vectors into and the host reads vectors out of.
4864  * There is a rptr (read pointer) that determines where the
4865  * host is currently reading, and a wptr (write pointer)
4866  * which determines where the GPU has written.  When the
4867  * pointers are equal, the ring is idle.  When the GPU
4868  * writes vectors to the ring buffer, it increments the
4869  * wptr.  When there is an interrupt, the host then starts
4870  * fetching commands and processing them until the pointers are
4871  * equal again at which point it updates the rptr.
4872  */
4873
4874 /**
4875  * cik_enable_interrupts - Enable the interrupt ring buffer
4876  *
4877  * @rdev: radeon_device pointer
4878  *
4879  * Enable the interrupt ring buffer (CIK).
4880  */
4881 static void cik_enable_interrupts(struct radeon_device *rdev)
4882 {
4883         u32 ih_cntl = RREG32(IH_CNTL);
4884         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4885
4886         ih_cntl |= ENABLE_INTR;
4887         ih_rb_cntl |= IH_RB_ENABLE;
4888         WREG32(IH_CNTL, ih_cntl);
4889         WREG32(IH_RB_CNTL, ih_rb_cntl);
4890         rdev->ih.enabled = true;
4891 }
4892
4893 /**
4894  * cik_disable_interrupts - Disable the interrupt ring buffer
4895  *
4896  * @rdev: radeon_device pointer
4897  *
4898  * Disable the interrupt ring buffer (CIK).
4899  */
4900 static void cik_disable_interrupts(struct radeon_device *rdev)
4901 {
4902         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4903         u32 ih_cntl = RREG32(IH_CNTL);
4904
4905         ih_rb_cntl &= ~IH_RB_ENABLE;
4906         ih_cntl &= ~ENABLE_INTR;
4907         WREG32(IH_RB_CNTL, ih_rb_cntl);
4908         WREG32(IH_CNTL, ih_cntl);
4909         /* set rptr, wptr to 0 */
4910         WREG32(IH_RB_RPTR, 0);
4911         WREG32(IH_RB_WPTR, 0);
4912         rdev->ih.enabled = false;
4913         rdev->ih.rptr = 0;
4914 }
4915
4916 /**
4917  * cik_disable_interrupt_state - Disable all interrupt sources
4918  *
4919  * @rdev: radeon_device pointer
4920  *
4921  * Clear all interrupt enable bits used by the driver (CIK).
4922  */
4923 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4924 {
4925         u32 tmp;
4926
4927         /* gfx ring */
4928         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4929         /* sdma */
4930         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4931         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4932         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4933         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4934         /* compute queues */
4935         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4936         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4937         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4938         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4939         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4940         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4941         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4942         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4943         /* grbm */
4944         WREG32(GRBM_INT_CNTL, 0);
4945         /* vline/vblank, etc. */
4946         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4947         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4948         if (rdev->num_crtc >= 4) {
4949                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4950                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4951         }
4952         if (rdev->num_crtc >= 6) {
4953                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4954                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4955         }
4956
4957         /* dac hotplug */
4958         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4959
4960         /* digital hotplug */
4961         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4962         WREG32(DC_HPD1_INT_CONTROL, tmp);
4963         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4964         WREG32(DC_HPD2_INT_CONTROL, tmp);
4965         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4966         WREG32(DC_HPD3_INT_CONTROL, tmp);
4967         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4968         WREG32(DC_HPD4_INT_CONTROL, tmp);
4969         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4970         WREG32(DC_HPD5_INT_CONTROL, tmp);
4971         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4972         WREG32(DC_HPD6_INT_CONTROL, tmp);
4973
4974 }
4975
4976 /**
4977  * cik_irq_init - init and enable the interrupt ring
4978  *
4979  * @rdev: radeon_device pointer
4980  *
4981  * Allocate a ring buffer for the interrupt controller,
4982  * enable the RLC, disable interrupts, enable the IH
4983  * ring buffer and enable it (CIK).
4984  * Called at device load and reume.
4985  * Returns 0 for success, errors for failure.
4986  */
4987 static int cik_irq_init(struct radeon_device *rdev)
4988 {
4989         int ret = 0;
4990         int rb_bufsz;
4991         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4992
4993         /* allocate ring */
4994         ret = r600_ih_ring_alloc(rdev);
4995         if (ret)
4996                 return ret;
4997
4998         /* disable irqs */
4999         cik_disable_interrupts(rdev);
5000
5001         /* init rlc */
5002         ret = cik_rlc_resume(rdev);
5003         if (ret) {
5004                 r600_ih_ring_fini(rdev);
5005                 return ret;
5006         }
5007
5008         /* setup interrupt control */
5009         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5010         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5011         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5012         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5013          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5014          */
5015         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5016         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5017         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5018         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5019
5020         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5021         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5022
5023         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5024                       IH_WPTR_OVERFLOW_CLEAR |
5025                       (rb_bufsz << 1));
5026
5027         if (rdev->wb.enabled)
5028                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5029
5030         /* set the writeback address whether it's enabled or not */
5031         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5032         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5033
5034         WREG32(IH_RB_CNTL, ih_rb_cntl);
5035
5036         /* set rptr, wptr to 0 */
5037         WREG32(IH_RB_RPTR, 0);
5038         WREG32(IH_RB_WPTR, 0);
5039
5040         /* Default settings for IH_CNTL (disabled at first) */
5041         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5042         /* RPTR_REARM only works if msi's are enabled */
5043         if (rdev->msi_enabled)
5044                 ih_cntl |= RPTR_REARM;
5045         WREG32(IH_CNTL, ih_cntl);
5046
5047         /* force the active interrupt state to all disabled */
5048         cik_disable_interrupt_state(rdev);
5049
5050         pci_set_master(rdev->pdev);
5051
5052         /* enable irqs */
5053         cik_enable_interrupts(rdev);
5054
5055         return ret;
5056 }
5057
5058 /**
5059  * cik_irq_set - enable/disable interrupt sources
5060  *
5061  * @rdev: radeon_device pointer
5062  *
5063  * Enable interrupt sources on the GPU (vblanks, hpd,
5064  * etc.) (CIK).
5065  * Returns 0 for success, errors for failure.
5066  */
5067 int cik_irq_set(struct radeon_device *rdev)
5068 {
5069         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5070                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5071         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5072         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5073         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5074         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5075         u32 grbm_int_cntl = 0;
5076         u32 dma_cntl, dma_cntl1;
5077
5078         if (!rdev->irq.installed) {
5079                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5080                 return -EINVAL;
5081         }
5082         /* don't enable anything if the ih is disabled */
5083         if (!rdev->ih.enabled) {
5084                 cik_disable_interrupts(rdev);
5085                 /* force the active interrupt state to all disabled */
5086                 cik_disable_interrupt_state(rdev);
5087                 return 0;
5088         }
5089
5090         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5091         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5092         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5093         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5094         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5095         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5096
5097         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5098         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5099
5100         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5101         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5102         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5103         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5104         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5105         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5106         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5107         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5108
5109         /* enable CP interrupts on all rings */
5110         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5111                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5112                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5113         }
5114         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5115                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5116                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5117                 if (ring->me == 1) {
5118                         switch (ring->pipe) {
5119                         case 0:
5120                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5121                                 break;
5122                         case 1:
5123                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5124                                 break;
5125                         case 2:
5126                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5127                                 break;
5128                         case 3:
5129                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5130                                 break;
5131                         default:
5132                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5133                                 break;
5134                         }
5135                 } else if (ring->me == 2) {
5136                         switch (ring->pipe) {
5137                         case 0:
5138                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5139                                 break;
5140                         case 1:
5141                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5142                                 break;
5143                         case 2:
5144                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5145                                 break;
5146                         case 3:
5147                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5148                                 break;
5149                         default:
5150                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5151                                 break;
5152                         }
5153                 } else {
5154                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5155                 }
5156         }
5157         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5158                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5159                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5160                 if (ring->me == 1) {
5161                         switch (ring->pipe) {
5162                         case 0:
5163                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5164                                 break;
5165                         case 1:
5166                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5167                                 break;
5168                         case 2:
5169                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5170                                 break;
5171                         case 3:
5172                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5173                                 break;
5174                         default:
5175                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5176                                 break;
5177                         }
5178                 } else if (ring->me == 2) {
5179                         switch (ring->pipe) {
5180                         case 0:
5181                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5182                                 break;
5183                         case 1:
5184                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5185                                 break;
5186                         case 2:
5187                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5188                                 break;
5189                         case 3:
5190                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5191                                 break;
5192                         default:
5193                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5194                                 break;
5195                         }
5196                 } else {
5197                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5198                 }
5199         }
5200
5201         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5202                 DRM_DEBUG("cik_irq_set: sw int dma\n");
5203                 dma_cntl |= TRAP_ENABLE;
5204         }
5205
5206         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5207                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5208                 dma_cntl1 |= TRAP_ENABLE;
5209         }
5210
5211         if (rdev->irq.crtc_vblank_int[0] ||
5212             atomic_read(&rdev->irq.pflip[0])) {
5213                 DRM_DEBUG("cik_irq_set: vblank 0\n");
5214                 crtc1 |= VBLANK_INTERRUPT_MASK;
5215         }
5216         if (rdev->irq.crtc_vblank_int[1] ||
5217             atomic_read(&rdev->irq.pflip[1])) {
5218                 DRM_DEBUG("cik_irq_set: vblank 1\n");
5219                 crtc2 |= VBLANK_INTERRUPT_MASK;
5220         }
5221         if (rdev->irq.crtc_vblank_int[2] ||
5222             atomic_read(&rdev->irq.pflip[2])) {
5223                 DRM_DEBUG("cik_irq_set: vblank 2\n");
5224                 crtc3 |= VBLANK_INTERRUPT_MASK;
5225         }
5226         if (rdev->irq.crtc_vblank_int[3] ||
5227             atomic_read(&rdev->irq.pflip[3])) {
5228                 DRM_DEBUG("cik_irq_set: vblank 3\n");
5229                 crtc4 |= VBLANK_INTERRUPT_MASK;
5230         }
5231         if (rdev->irq.crtc_vblank_int[4] ||
5232             atomic_read(&rdev->irq.pflip[4])) {
5233                 DRM_DEBUG("cik_irq_set: vblank 4\n");
5234                 crtc5 |= VBLANK_INTERRUPT_MASK;
5235         }
5236         if (rdev->irq.crtc_vblank_int[5] ||
5237             atomic_read(&rdev->irq.pflip[5])) {
5238                 DRM_DEBUG("cik_irq_set: vblank 5\n");
5239                 crtc6 |= VBLANK_INTERRUPT_MASK;
5240         }
5241         if (rdev->irq.hpd[0]) {
5242                 DRM_DEBUG("cik_irq_set: hpd 1\n");
5243                 hpd1 |= DC_HPDx_INT_EN;
5244         }
5245         if (rdev->irq.hpd[1]) {
5246                 DRM_DEBUG("cik_irq_set: hpd 2\n");
5247                 hpd2 |= DC_HPDx_INT_EN;
5248         }
5249         if (rdev->irq.hpd[2]) {
5250                 DRM_DEBUG("cik_irq_set: hpd 3\n");
5251                 hpd3 |= DC_HPDx_INT_EN;
5252         }
5253         if (rdev->irq.hpd[3]) {
5254                 DRM_DEBUG("cik_irq_set: hpd 4\n");
5255                 hpd4 |= DC_HPDx_INT_EN;
5256         }
5257         if (rdev->irq.hpd[4]) {
5258                 DRM_DEBUG("cik_irq_set: hpd 5\n");
5259                 hpd5 |= DC_HPDx_INT_EN;
5260         }
5261         if (rdev->irq.hpd[5]) {
5262                 DRM_DEBUG("cik_irq_set: hpd 6\n");
5263                 hpd6 |= DC_HPDx_INT_EN;
5264         }
5265
5266         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5267
5268         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5269         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5270
5271         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5272         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5273         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5274         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5275         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5276         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5277         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5278         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5279
5280         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5281
5282         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5283         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5284         if (rdev->num_crtc >= 4) {
5285                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5286                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5287         }
5288         if (rdev->num_crtc >= 6) {
5289                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5290                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5291         }
5292
5293         WREG32(DC_HPD1_INT_CONTROL, hpd1);
5294         WREG32(DC_HPD2_INT_CONTROL, hpd2);
5295         WREG32(DC_HPD3_INT_CONTROL, hpd3);
5296         WREG32(DC_HPD4_INT_CONTROL, hpd4);
5297         WREG32(DC_HPD5_INT_CONTROL, hpd5);
5298         WREG32(DC_HPD6_INT_CONTROL, hpd6);
5299
5300         return 0;
5301 }
5302
5303 /**
5304  * cik_irq_ack - ack interrupt sources
5305  *
5306  * @rdev: radeon_device pointer
5307  *
5308  * Ack interrupt sources on the GPU (vblanks, hpd,
5309  * etc.) (CIK).  Certain interrupts sources are sw
5310  * generated and do not require an explicit ack.
5311  */
5312 static inline void cik_irq_ack(struct radeon_device *rdev)
5313 {
5314         u32 tmp;
5315
5316         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5317         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5318         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5319         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5320         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5321         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5322         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5323
5324         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5325                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5326         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5327                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5328         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5329                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5330         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5331                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5332
5333         if (rdev->num_crtc >= 4) {
5334                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5335                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5336                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5337                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5338                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5339                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5340                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5341                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5342         }
5343
5344         if (rdev->num_crtc >= 6) {
5345                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5346                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5347                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5348                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5349                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5350                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5351                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5352                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5353         }
5354
5355         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5356                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5357                 tmp |= DC_HPDx_INT_ACK;
5358                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5359         }
5360         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5361                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5362                 tmp |= DC_HPDx_INT_ACK;
5363                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5364         }
5365         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5366                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5367                 tmp |= DC_HPDx_INT_ACK;
5368                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5369         }
5370         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5371                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5372                 tmp |= DC_HPDx_INT_ACK;
5373                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5374         }
5375         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5376                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5377                 tmp |= DC_HPDx_INT_ACK;
5378                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5379         }
5380         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5381                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5382                 tmp |= DC_HPDx_INT_ACK;
5383                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5384         }
5385 }
5386
5387 /**
5388  * cik_irq_disable - disable interrupts
5389  *
5390  * @rdev: radeon_device pointer
5391  *
5392  * Disable interrupts on the hw (CIK).
5393  */
5394 static void cik_irq_disable(struct radeon_device *rdev)
5395 {
5396         cik_disable_interrupts(rdev);
5397         /* Wait and acknowledge irq */
5398         mdelay(1);
5399         cik_irq_ack(rdev);
5400         cik_disable_interrupt_state(rdev);
5401 }
5402
5403 /**
5404  * cik_irq_disable - disable interrupts for suspend
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Disable interrupts and stop the RLC (CIK).
5409  * Used for suspend.
5410  */
5411 static void cik_irq_suspend(struct radeon_device *rdev)
5412 {
5413         cik_irq_disable(rdev);
5414         cik_rlc_stop(rdev);
5415 }
5416
5417 /**
5418  * cik_irq_fini - tear down interrupt support
5419  *
5420  * @rdev: radeon_device pointer
5421  *
5422  * Disable interrupts on the hw and free the IH ring
5423  * buffer (CIK).
5424  * Used for driver unload.
5425  */
5426 static void cik_irq_fini(struct radeon_device *rdev)
5427 {
5428         cik_irq_suspend(rdev);
5429         r600_ih_ring_fini(rdev);
5430 }
5431
5432 /**
5433  * cik_get_ih_wptr - get the IH ring buffer wptr
5434  *
5435  * @rdev: radeon_device pointer
5436  *
5437  * Get the IH ring buffer wptr from either the register
5438  * or the writeback memory buffer (CIK).  Also check for
5439  * ring buffer overflow and deal with it.
5440  * Used by cik_irq_process().
5441  * Returns the value of the wptr.
5442  */
5443 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5444 {
5445         u32 wptr, tmp;
5446
5447         if (rdev->wb.enabled)
5448                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5449         else
5450                 wptr = RREG32(IH_RB_WPTR);
5451
5452         if (wptr & RB_OVERFLOW) {
5453                 /* When a ring buffer overflow happen start parsing interrupt
5454                  * from the last not overwritten vector (wptr + 16). Hopefully
5455                  * this should allow us to catchup.
5456                  */
5457                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5458                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5459                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5460                 tmp = RREG32(IH_RB_CNTL);
5461                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5462                 WREG32(IH_RB_CNTL, tmp);
5463         }
5464         return (wptr & rdev->ih.ptr_mask);
5465 }
5466
5467 /*        CIK IV Ring
5468  * Each IV ring entry is 128 bits:
5469  * [7:0]    - interrupt source id
5470  * [31:8]   - reserved
5471  * [59:32]  - interrupt source data
5472  * [63:60]  - reserved
5473  * [71:64]  - RINGID
5474  *            CP:
5475  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5476  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5477  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5478  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5479  *            PIPE_ID - ME0 0=3D
5480  *                    - ME1&2 compute dispatcher (4 pipes each)
5481  *            SDMA:
5482  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5483  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5484  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5485  * [79:72]  - VMID
5486  * [95:80]  - PASID
5487  * [127:96] - reserved
5488  */
5489 /**
5490  * cik_irq_process - interrupt handler
5491  *
5492  * @rdev: radeon_device pointer
5493  *
5494  * Interrupt hander (CIK).  Walk the IH ring,
5495  * ack interrupts and schedule work to handle
5496  * interrupt events.
5497  * Returns irq process return code.
5498  */
5499 int cik_irq_process(struct radeon_device *rdev)
5500 {
5501         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5502         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5503         u32 wptr;
5504         u32 rptr;
5505         u32 src_id, src_data, ring_id;
5506         u8 me_id, pipe_id, queue_id;
5507         u32 ring_index;
5508         bool queue_hotplug = false;
5509         bool queue_reset = false;
5510
5511         if (!rdev->ih.enabled || rdev->shutdown)
5512                 return IRQ_NONE;
5513
5514         wptr = cik_get_ih_wptr(rdev);
5515
5516 restart_ih:
5517         /* is somebody else already processing irqs? */
5518         if (atomic_xchg(&rdev->ih.lock, 1))
5519                 return IRQ_NONE;
5520
5521         rptr = rdev->ih.rptr;
5522         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5523
5524         /* Order reading of wptr vs. reading of IH ring data */
5525         rmb();
5526
5527         /* display interrupts */
5528         cik_irq_ack(rdev);
5529
5530         while (rptr != wptr) {
5531                 /* wptr/rptr are in bytes! */
5532                 ring_index = rptr / 4;
5533                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5534                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5535                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5536
5537                 switch (src_id) {
5538                 case 1: /* D1 vblank/vline */
5539                         switch (src_data) {
5540                         case 0: /* D1 vblank */
5541                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5542                                         if (rdev->irq.crtc_vblank_int[0]) {
5543                                                 drm_handle_vblank(rdev->ddev, 0);
5544                                                 rdev->pm.vblank_sync = true;
5545                                                 wake_up(&rdev->irq.vblank_queue);
5546                                         }
5547                                         if (atomic_read(&rdev->irq.pflip[0]))
5548                                                 radeon_crtc_handle_flip(rdev, 0);
5549                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5550                                         DRM_DEBUG("IH: D1 vblank\n");
5551                                 }
5552                                 break;
5553                         case 1: /* D1 vline */
5554                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5555                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5556                                         DRM_DEBUG("IH: D1 vline\n");
5557                                 }
5558                                 break;
5559                         default:
5560                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5561                                 break;
5562                         }
5563                         break;
5564                 case 2: /* D2 vblank/vline */
5565                         switch (src_data) {
5566                         case 0: /* D2 vblank */
5567                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5568                                         if (rdev->irq.crtc_vblank_int[1]) {
5569                                                 drm_handle_vblank(rdev->ddev, 1);
5570                                                 rdev->pm.vblank_sync = true;
5571                                                 wake_up(&rdev->irq.vblank_queue);
5572                                         }
5573                                         if (atomic_read(&rdev->irq.pflip[1]))
5574                                                 radeon_crtc_handle_flip(rdev, 1);
5575                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5576                                         DRM_DEBUG("IH: D2 vblank\n");
5577                                 }
5578                                 break;
5579                         case 1: /* D2 vline */
5580                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5581                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5582                                         DRM_DEBUG("IH: D2 vline\n");
5583                                 }
5584                                 break;
5585                         default:
5586                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5587                                 break;
5588                         }
5589                         break;
5590                 case 3: /* D3 vblank/vline */
5591                         switch (src_data) {
5592                         case 0: /* D3 vblank */
5593                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5594                                         if (rdev->irq.crtc_vblank_int[2]) {
5595                                                 drm_handle_vblank(rdev->ddev, 2);
5596                                                 rdev->pm.vblank_sync = true;
5597                                                 wake_up(&rdev->irq.vblank_queue);
5598                                         }
5599                                         if (atomic_read(&rdev->irq.pflip[2]))
5600                                                 radeon_crtc_handle_flip(rdev, 2);
5601                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5602                                         DRM_DEBUG("IH: D3 vblank\n");
5603                                 }
5604                                 break;
5605                         case 1: /* D3 vline */
5606                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5607                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5608                                         DRM_DEBUG("IH: D3 vline\n");
5609                                 }
5610                                 break;
5611                         default:
5612                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5613                                 break;
5614                         }
5615                         break;
5616                 case 4: /* D4 vblank/vline */
5617                         switch (src_data) {
5618                         case 0: /* D4 vblank */
5619                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5620                                         if (rdev->irq.crtc_vblank_int[3]) {
5621                                                 drm_handle_vblank(rdev->ddev, 3);
5622                                                 rdev->pm.vblank_sync = true;
5623                                                 wake_up(&rdev->irq.vblank_queue);
5624                                         }
5625                                         if (atomic_read(&rdev->irq.pflip[3]))
5626                                                 radeon_crtc_handle_flip(rdev, 3);
5627                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5628                                         DRM_DEBUG("IH: D4 vblank\n");
5629                                 }
5630                                 break;
5631                         case 1: /* D4 vline */
5632                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5633                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5634                                         DRM_DEBUG("IH: D4 vline\n");
5635                                 }
5636                                 break;
5637                         default:
5638                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5639                                 break;
5640                         }
5641                         break;
5642                 case 5: /* D5 vblank/vline */
5643                         switch (src_data) {
5644                         case 0: /* D5 vblank */
5645                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5646                                         if (rdev->irq.crtc_vblank_int[4]) {
5647                                                 drm_handle_vblank(rdev->ddev, 4);
5648                                                 rdev->pm.vblank_sync = true;
5649                                                 wake_up(&rdev->irq.vblank_queue);
5650                                         }
5651                                         if (atomic_read(&rdev->irq.pflip[4]))
5652                                                 radeon_crtc_handle_flip(rdev, 4);
5653                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5654                                         DRM_DEBUG("IH: D5 vblank\n");
5655                                 }
5656                                 break;
5657                         case 1: /* D5 vline */
5658                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5659                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5660                                         DRM_DEBUG("IH: D5 vline\n");
5661                                 }
5662                                 break;
5663                         default:
5664                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5665                                 break;
5666                         }
5667                         break;
5668                 case 6: /* D6 vblank/vline */
5669                         switch (src_data) {
5670                         case 0: /* D6 vblank */
5671                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5672                                         if (rdev->irq.crtc_vblank_int[5]) {
5673                                                 drm_handle_vblank(rdev->ddev, 5);
5674                                                 rdev->pm.vblank_sync = true;
5675                                                 wake_up(&rdev->irq.vblank_queue);
5676                                         }
5677                                         if (atomic_read(&rdev->irq.pflip[5]))
5678                                                 radeon_crtc_handle_flip(rdev, 5);
5679                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5680                                         DRM_DEBUG("IH: D6 vblank\n");
5681                                 }
5682                                 break;
5683                         case 1: /* D6 vline */
5684                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5685                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5686                                         DRM_DEBUG("IH: D6 vline\n");
5687                                 }
5688                                 break;
5689                         default:
5690                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5691                                 break;
5692                         }
5693                         break;
5694                 case 42: /* HPD hotplug */
5695                         switch (src_data) {
5696                         case 0:
5697                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5698                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5699                                         queue_hotplug = true;
5700                                         DRM_DEBUG("IH: HPD1\n");
5701                                 }
5702                                 break;
5703                         case 1:
5704                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5705                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5706                                         queue_hotplug = true;
5707                                         DRM_DEBUG("IH: HPD2\n");
5708                                 }
5709                                 break;
5710                         case 2:
5711                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5712                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5713                                         queue_hotplug = true;
5714                                         DRM_DEBUG("IH: HPD3\n");
5715                                 }
5716                                 break;
5717                         case 3:
5718                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5719                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5720                                         queue_hotplug = true;
5721                                         DRM_DEBUG("IH: HPD4\n");
5722                                 }
5723                                 break;
5724                         case 4:
5725                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5726                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5727                                         queue_hotplug = true;
5728                                         DRM_DEBUG("IH: HPD5\n");
5729                                 }
5730                                 break;
5731                         case 5:
5732                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5733                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5734                                         queue_hotplug = true;
5735                                         DRM_DEBUG("IH: HPD6\n");
5736                                 }
5737                                 break;
5738                         default:
5739                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5740                                 break;
5741                         }
5742                         break;
5743                 case 146:
5744                 case 147:
5745                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5746                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5747                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5748                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5749                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5750                         /* reset addr and status */
5751                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5752                         break;
5753                 case 176: /* GFX RB CP_INT */
5754                 case 177: /* GFX IB CP_INT */
5755                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5756                         break;
5757                 case 181: /* CP EOP event */
5758                         DRM_DEBUG("IH: CP EOP\n");
5759                         /* XXX check the bitfield order! */
5760                         me_id = (ring_id & 0x60) >> 5;
5761                         pipe_id = (ring_id & 0x18) >> 3;
5762                         queue_id = (ring_id & 0x7) >> 0;
5763                         switch (me_id) {
5764                         case 0:
5765                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5766                                 break;
5767                         case 1:
5768                         case 2:
5769                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5770                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5771                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5772                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5773                                 break;
5774                         }
5775                         break;
5776                 case 184: /* CP Privileged reg access */
5777                         DRM_ERROR("Illegal register access in command stream\n");
5778                         /* XXX check the bitfield order! */
5779                         me_id = (ring_id & 0x60) >> 5;
5780                         pipe_id = (ring_id & 0x18) >> 3;
5781                         queue_id = (ring_id & 0x7) >> 0;
5782                         switch (me_id) {
5783                         case 0:
5784                                 /* This results in a full GPU reset, but all we need to do is soft
5785                                  * reset the CP for gfx
5786                                  */
5787                                 queue_reset = true;
5788                                 break;
5789                         case 1:
5790                                 /* XXX compute */
5791                                 queue_reset = true;
5792                                 break;
5793                         case 2:
5794                                 /* XXX compute */
5795                                 queue_reset = true;
5796                                 break;
5797                         }
5798                         break;
5799                 case 185: /* CP Privileged inst */
5800                         DRM_ERROR("Illegal instruction in command stream\n");
5801                         /* XXX check the bitfield order! */
5802                         me_id = (ring_id & 0x60) >> 5;
5803                         pipe_id = (ring_id & 0x18) >> 3;
5804                         queue_id = (ring_id & 0x7) >> 0;
5805                         switch (me_id) {
5806                         case 0:
5807                                 /* This results in a full GPU reset, but all we need to do is soft
5808                                  * reset the CP for gfx
5809                                  */
5810                                 queue_reset = true;
5811                                 break;
5812                         case 1:
5813                                 /* XXX compute */
5814                                 queue_reset = true;
5815                                 break;
5816                         case 2:
5817                                 /* XXX compute */
5818                                 queue_reset = true;
5819                                 break;
5820                         }
5821                         break;
5822                 case 224: /* SDMA trap event */
5823                         /* XXX check the bitfield order! */
5824                         me_id = (ring_id & 0x3) >> 0;
5825                         queue_id = (ring_id & 0xc) >> 2;
5826                         DRM_DEBUG("IH: SDMA trap\n");
5827                         switch (me_id) {
5828                         case 0:
5829                                 switch (queue_id) {
5830                                 case 0:
5831                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5832                                         break;
5833                                 case 1:
5834                                         /* XXX compute */
5835                                         break;
5836                                 case 2:
5837                                         /* XXX compute */
5838                                         break;
5839                                 }
5840                                 break;
5841                         case 1:
5842                                 switch (queue_id) {
5843                                 case 0:
5844                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5845                                         break;
5846                                 case 1:
5847                                         /* XXX compute */
5848                                         break;
5849                                 case 2:
5850                                         /* XXX compute */
5851                                         break;
5852                                 }
5853                                 break;
5854                         }
5855                         break;
5856                 case 241: /* SDMA Privileged inst */
5857                 case 247: /* SDMA Privileged inst */
5858                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
5859                         /* XXX check the bitfield order! */
5860                         me_id = (ring_id & 0x3) >> 0;
5861                         queue_id = (ring_id & 0xc) >> 2;
5862                         switch (me_id) {
5863                         case 0:
5864                                 switch (queue_id) {
5865                                 case 0:
5866                                         queue_reset = true;
5867                                         break;
5868                                 case 1:
5869                                         /* XXX compute */
5870                                         queue_reset = true;
5871                                         break;
5872                                 case 2:
5873                                         /* XXX compute */
5874                                         queue_reset = true;
5875                                         break;
5876                                 }
5877                                 break;
5878                         case 1:
5879                                 switch (queue_id) {
5880                                 case 0:
5881                                         queue_reset = true;
5882                                         break;
5883                                 case 1:
5884                                         /* XXX compute */
5885                                         queue_reset = true;
5886                                         break;
5887                                 case 2:
5888                                         /* XXX compute */
5889                                         queue_reset = true;
5890                                         break;
5891                                 }
5892                                 break;
5893                         }
5894                         break;
5895                 case 233: /* GUI IDLE */
5896                         DRM_DEBUG("IH: GUI idle\n");
5897                         break;
5898                 default:
5899                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5900                         break;
5901                 }
5902
5903                 /* wptr/rptr are in bytes! */
5904                 rptr += 16;
5905                 rptr &= rdev->ih.ptr_mask;
5906         }
5907         if (queue_hotplug)
5908                 schedule_work(&rdev->hotplug_work);
5909         if (queue_reset)
5910                 schedule_work(&rdev->reset_work);
5911         rdev->ih.rptr = rptr;
5912         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5913         atomic_set(&rdev->ih.lock, 0);
5914
5915         /* make sure wptr hasn't changed while processing */
5916         wptr = cik_get_ih_wptr(rdev);
5917         if (wptr != rptr)
5918                 goto restart_ih;
5919
5920         return IRQ_HANDLED;
5921 }
5922
5923 /*
5924  * startup/shutdown callbacks
5925  */
5926 /**
5927  * cik_startup - program the asic to a functional state
5928  *
5929  * @rdev: radeon_device pointer
5930  *
5931  * Programs the asic to a functional state (CIK).
5932  * Called by cik_init() and cik_resume().
5933  * Returns 0 for success, error for failure.
5934  */
5935 static int cik_startup(struct radeon_device *rdev)
5936 {
5937         struct radeon_ring *ring;
5938         int r;
5939
5940         if (rdev->flags & RADEON_IS_IGP) {
5941                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5942                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5943                         r = cik_init_microcode(rdev);
5944                         if (r) {
5945                                 DRM_ERROR("Failed to load firmware!\n");
5946                                 return r;
5947                         }
5948                 }
5949         } else {
5950                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5951                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5952                     !rdev->mc_fw) {
5953                         r = cik_init_microcode(rdev);
5954                         if (r) {
5955                                 DRM_ERROR("Failed to load firmware!\n");
5956                                 return r;
5957                         }
5958                 }
5959
5960                 r = ci_mc_load_microcode(rdev);
5961                 if (r) {
5962                         DRM_ERROR("Failed to load MC firmware!\n");
5963                         return r;
5964                 }
5965         }
5966
5967         r = r600_vram_scratch_init(rdev);
5968         if (r)
5969                 return r;
5970
5971         cik_mc_program(rdev);
5972         r = cik_pcie_gart_enable(rdev);
5973         if (r)
5974                 return r;
5975         cik_gpu_init(rdev);
5976
5977         /* allocate rlc buffers */
5978         r = si_rlc_init(rdev);
5979         if (r) {
5980                 DRM_ERROR("Failed to init rlc BOs!\n");
5981                 return r;
5982         }
5983
5984         /* allocate wb buffer */
5985         r = radeon_wb_init(rdev);
5986         if (r)
5987                 return r;
5988
5989         /* allocate mec buffers */
5990         r = cik_mec_init(rdev);
5991         if (r) {
5992                 DRM_ERROR("Failed to init MEC BOs!\n");
5993                 return r;
5994         }
5995
5996         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5997         if (r) {
5998                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5999                 return r;
6000         }
6001
6002         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6003         if (r) {
6004                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6005                 return r;
6006         }
6007
6008         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6009         if (r) {
6010                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6011                 return r;
6012         }
6013
6014         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6015         if (r) {
6016                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6017                 return r;
6018         }
6019
6020         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6021         if (r) {
6022                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6023                 return r;
6024         }
6025
6026         r = cik_uvd_resume(rdev);
6027         if (!r) {
6028                 r = radeon_fence_driver_start_ring(rdev,
6029                                                    R600_RING_TYPE_UVD_INDEX);
6030                 if (r)
6031                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6032         }
6033         if (r)
6034                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6035
6036         /* Enable IRQ */
6037         if (!rdev->irq.installed) {
6038                 r = radeon_irq_kms_init(rdev);
6039                 if (r)
6040                         return r;
6041         }
6042
6043         r = cik_irq_init(rdev);
6044         if (r) {
6045                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6046                 radeon_irq_kms_fini(rdev);
6047                 return r;
6048         }
6049         cik_irq_set(rdev);
6050
6051         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6052         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6053                              CP_RB0_RPTR, CP_RB0_WPTR,
6054                              0, 0xfffff, RADEON_CP_PACKET2);
6055         if (r)
6056                 return r;
6057
6058         /* set up the compute queues */
6059         /* type-2 packets are deprecated on MEC, use type-3 instead */
6060         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6061         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6062                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6063                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6064         if (r)
6065                 return r;
6066         ring->me = 1; /* first MEC */
6067         ring->pipe = 0; /* first pipe */
6068         ring->queue = 0; /* first queue */
6069         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6070
6071         /* type-2 packets are deprecated on MEC, use type-3 instead */
6072         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6073         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6074                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6075                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6076         if (r)
6077                 return r;
6078         /* dGPU only have 1 MEC */
6079         ring->me = 1; /* first MEC */
6080         ring->pipe = 0; /* first pipe */
6081         ring->queue = 1; /* second queue */
6082         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6083
6084         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6085         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6086                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6087                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6088                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6089         if (r)
6090                 return r;
6091
6092         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6093         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6094                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6095                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6096                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6097         if (r)
6098                 return r;
6099
6100         r = cik_cp_resume(rdev);
6101         if (r)
6102                 return r;
6103
6104         r = cik_sdma_resume(rdev);
6105         if (r)
6106                 return r;
6107
6108         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6109         if (ring->ring_size) {
6110                 r = radeon_ring_init(rdev, ring, ring->ring_size,
6111                                      R600_WB_UVD_RPTR_OFFSET,
6112                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6113                                      0, 0xfffff, RADEON_CP_PACKET2);
6114                 if (!r)
6115                         r = r600_uvd_init(rdev);
6116                 if (r)
6117                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6118         }
6119
6120         r = radeon_ib_pool_init(rdev);
6121         if (r) {
6122                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6123                 return r;
6124         }
6125
6126         r = radeon_vm_manager_init(rdev);
6127         if (r) {
6128                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6129                 return r;
6130         }
6131
6132         return 0;
6133 }
6134
6135 /**
6136  * cik_resume - resume the asic to a functional state
6137  *
6138  * @rdev: radeon_device pointer
6139  *
6140  * Programs the asic to a functional state (CIK).
6141  * Called at resume.
6142  * Returns 0 for success, error for failure.
6143  */
6144 int cik_resume(struct radeon_device *rdev)
6145 {
6146         int r;
6147
6148         /* post card */
6149         atom_asic_init(rdev->mode_info.atom_context);
6150
6151         /* init golden registers */
6152         cik_init_golden_registers(rdev);
6153
6154         rdev->accel_working = true;
6155         r = cik_startup(rdev);
6156         if (r) {
6157                 DRM_ERROR("cik startup failed on resume\n");
6158                 rdev->accel_working = false;
6159                 return r;
6160         }
6161
6162         return r;
6163
6164 }
6165
6166 /**
6167  * cik_suspend - suspend the asic
6168  *
6169  * @rdev: radeon_device pointer
6170  *
6171  * Bring the chip into a state suitable for suspend (CIK).
6172  * Called at suspend.
6173  * Returns 0 for success.
6174  */
6175 int cik_suspend(struct radeon_device *rdev)
6176 {
6177         radeon_vm_manager_fini(rdev);
6178         cik_cp_enable(rdev, false);
6179         cik_sdma_enable(rdev, false);
6180         r600_uvd_rbc_stop(rdev);
6181         radeon_uvd_suspend(rdev);
6182         cik_irq_suspend(rdev);
6183         radeon_wb_disable(rdev);
6184         cik_pcie_gart_disable(rdev);
6185         return 0;
6186 }
6187
6188 /* Plan is to move initialization in that function and use
6189  * helper function so that radeon_device_init pretty much
6190  * do nothing more than calling asic specific function. This
6191  * should also allow to remove a bunch of callback function
6192  * like vram_info.
6193  */
6194 /**
6195  * cik_init - asic specific driver and hw init
6196  *
6197  * @rdev: radeon_device pointer
6198  *
6199  * Setup asic specific driver variables and program the hw
6200  * to a functional state (CIK).
6201  * Called at driver startup.
6202  * Returns 0 for success, errors for failure.
6203  */
6204 int cik_init(struct radeon_device *rdev)
6205 {
6206         struct radeon_ring *ring;
6207         int r;
6208
6209         /* Read BIOS */
6210         if (!radeon_get_bios(rdev)) {
6211                 if (ASIC_IS_AVIVO(rdev))
6212                         return -EINVAL;
6213         }
6214         /* Must be an ATOMBIOS */
6215         if (!rdev->is_atom_bios) {
6216                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6217                 return -EINVAL;
6218         }
6219         r = radeon_atombios_init(rdev);
6220         if (r)
6221                 return r;
6222
6223         /* Post card if necessary */
6224         if (!radeon_card_posted(rdev)) {
6225                 if (!rdev->bios) {
6226                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6227                         return -EINVAL;
6228                 }
6229                 DRM_INFO("GPU not posted. posting now...\n");
6230                 atom_asic_init(rdev->mode_info.atom_context);
6231         }
6232         /* init golden registers */
6233         cik_init_golden_registers(rdev);
6234         /* Initialize scratch registers */
6235         cik_scratch_init(rdev);
6236         /* Initialize surface registers */
6237         radeon_surface_init(rdev);
6238         /* Initialize clocks */
6239         radeon_get_clock_info(rdev->ddev);
6240
6241         /* Fence driver */
6242         r = radeon_fence_driver_init(rdev);
6243         if (r)
6244                 return r;
6245
6246         /* initialize memory controller */
6247         r = cik_mc_init(rdev);
6248         if (r)
6249                 return r;
6250         /* Memory manager */
6251         r = radeon_bo_init(rdev);
6252         if (r)
6253                 return r;
6254
6255         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6256         ring->ring_obj = NULL;
6257         r600_ring_init(rdev, ring, 1024 * 1024);
6258
6259         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6260         ring->ring_obj = NULL;
6261         r600_ring_init(rdev, ring, 1024 * 1024);
6262         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6263         if (r)
6264                 return r;
6265
6266         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6267         ring->ring_obj = NULL;
6268         r600_ring_init(rdev, ring, 1024 * 1024);
6269         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6270         if (r)
6271                 return r;
6272
6273         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6274         ring->ring_obj = NULL;
6275         r600_ring_init(rdev, ring, 256 * 1024);
6276
6277         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6278         ring->ring_obj = NULL;
6279         r600_ring_init(rdev, ring, 256 * 1024);
6280
6281         r = radeon_uvd_init(rdev);
6282         if (!r) {
6283                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6284                 ring->ring_obj = NULL;
6285                 r600_ring_init(rdev, ring, 4096);
6286         }
6287
6288         rdev->ih.ring_obj = NULL;
6289         r600_ih_ring_init(rdev, 64 * 1024);
6290
6291         r = r600_pcie_gart_init(rdev);
6292         if (r)
6293                 return r;
6294
6295         rdev->accel_working = true;
6296         r = cik_startup(rdev);
6297         if (r) {
6298                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6299                 cik_cp_fini(rdev);
6300                 cik_sdma_fini(rdev);
6301                 cik_irq_fini(rdev);
6302                 si_rlc_fini(rdev);
6303                 cik_mec_fini(rdev);
6304                 radeon_wb_fini(rdev);
6305                 radeon_ib_pool_fini(rdev);
6306                 radeon_vm_manager_fini(rdev);
6307                 radeon_irq_kms_fini(rdev);
6308                 cik_pcie_gart_fini(rdev);
6309                 rdev->accel_working = false;
6310         }
6311
6312         /* Don't start up if the MC ucode is missing.
6313          * The default clocks and voltages before the MC ucode
6314          * is loaded are not suffient for advanced operations.
6315          */
6316         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6317                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6318                 return -EINVAL;
6319         }
6320
6321         return 0;
6322 }
6323
6324 /**
6325  * cik_fini - asic specific driver and hw fini
6326  *
6327  * @rdev: radeon_device pointer
6328  *
6329  * Tear down the asic specific driver variables and program the hw
6330  * to an idle state (CIK).
6331  * Called at driver unload.
6332  */
6333 void cik_fini(struct radeon_device *rdev)
6334 {
6335         cik_cp_fini(rdev);
6336         cik_sdma_fini(rdev);
6337         cik_irq_fini(rdev);
6338         si_rlc_fini(rdev);
6339         cik_mec_fini(rdev);
6340         radeon_wb_fini(rdev);
6341         radeon_vm_manager_fini(rdev);
6342         radeon_ib_pool_fini(rdev);
6343         radeon_irq_kms_fini(rdev);
6344         radeon_uvd_fini(rdev);
6345         cik_pcie_gart_fini(rdev);
6346         r600_vram_scratch_fini(rdev);
6347         radeon_gem_fini(rdev);
6348         radeon_fence_driver_fini(rdev);
6349         radeon_bo_fini(rdev);
6350         radeon_atombios_fini(rdev);
6351         kfree(rdev->bios);
6352         rdev->bios = NULL;
6353 }
6354
6355 /* display watermark setup */
6356 /**
6357  * dce8_line_buffer_adjust - Set up the line buffer
6358  *
6359  * @rdev: radeon_device pointer
6360  * @radeon_crtc: the selected display controller
6361  * @mode: the current display mode on the selected display
6362  * controller
6363  *
6364  * Setup up the line buffer allocation for
6365  * the selected display controller (CIK).
6366  * Returns the line buffer size in pixels.
6367  */
6368 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6369                                    struct radeon_crtc *radeon_crtc,
6370                                    struct drm_display_mode *mode)
6371 {
6372         u32 tmp;
6373
6374         /*
6375          * Line Buffer Setup
6376          * There are 6 line buffers, one for each display controllers.
6377          * There are 3 partitions per LB. Select the number of partitions
6378          * to enable based on the display width.  For display widths larger
6379          * than 4096, you need use to use 2 display controllers and combine
6380          * them using the stereo blender.
6381          */
6382         if (radeon_crtc->base.enabled && mode) {
6383                 if (mode->crtc_hdisplay < 1920)
6384                         tmp = 1;
6385                 else if (mode->crtc_hdisplay < 2560)
6386                         tmp = 2;
6387                 else if (mode->crtc_hdisplay < 4096)
6388                         tmp = 0;
6389                 else {
6390                         DRM_DEBUG_KMS("Mode too big for LB!\n");
6391                         tmp = 0;
6392                 }
6393         } else
6394                 tmp = 1;
6395
6396         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6397                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6398
6399         if (radeon_crtc->base.enabled && mode) {
6400                 switch (tmp) {
6401                 case 0:
6402                 default:
6403                         return 4096 * 2;
6404                 case 1:
6405                         return 1920 * 2;
6406                 case 2:
6407                         return 2560 * 2;
6408                 }
6409         }
6410
6411         /* controller not enabled, so no lb used */
6412         return 0;
6413 }
6414
6415 /**
6416  * cik_get_number_of_dram_channels - get the number of dram channels
6417  *
6418  * @rdev: radeon_device pointer
6419  *
6420  * Look up the number of video ram channels (CIK).
6421  * Used for display watermark bandwidth calculations
6422  * Returns the number of dram channels
6423  */
6424 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6425 {
6426         u32 tmp = RREG32(MC_SHARED_CHMAP);
6427
6428         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6429         case 0:
6430         default:
6431                 return 1;
6432         case 1:
6433                 return 2;
6434         case 2:
6435                 return 4;
6436         case 3:
6437                 return 8;
6438         case 4:
6439                 return 3;
6440         case 5:
6441                 return 6;
6442         case 6:
6443                 return 10;
6444         case 7:
6445                 return 12;
6446         case 8:
6447                 return 16;
6448         }
6449 }
6450
6451 struct dce8_wm_params {
6452         u32 dram_channels; /* number of dram channels */
6453         u32 yclk;          /* bandwidth per dram data pin in kHz */
6454         u32 sclk;          /* engine clock in kHz */
6455         u32 disp_clk;      /* display clock in kHz */
6456         u32 src_width;     /* viewport width */
6457         u32 active_time;   /* active display time in ns */
6458         u32 blank_time;    /* blank time in ns */
6459         bool interlaced;    /* mode is interlaced */
6460         fixed20_12 vsc;    /* vertical scale ratio */
6461         u32 num_heads;     /* number of active crtcs */
6462         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6463         u32 lb_size;       /* line buffer allocated to pipe */
6464         u32 vtaps;         /* vertical scaler taps */
6465 };
6466
6467 /**
6468  * dce8_dram_bandwidth - get the dram bandwidth
6469  *
6470  * @wm: watermark calculation data
6471  *
6472  * Calculate the raw dram bandwidth (CIK).
6473  * Used for display watermark bandwidth calculations
6474  * Returns the dram bandwidth in MBytes/s
6475  */
6476 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6477 {
6478         /* Calculate raw DRAM Bandwidth */
6479         fixed20_12 dram_efficiency; /* 0.7 */
6480         fixed20_12 yclk, dram_channels, bandwidth;
6481         fixed20_12 a;
6482
6483         a.full = dfixed_const(1000);
6484         yclk.full = dfixed_const(wm->yclk);
6485         yclk.full = dfixed_div(yclk, a);
6486         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6487         a.full = dfixed_const(10);
6488         dram_efficiency.full = dfixed_const(7);
6489         dram_efficiency.full = dfixed_div(dram_efficiency, a);
6490         bandwidth.full = dfixed_mul(dram_channels, yclk);
6491         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6492
6493         return dfixed_trunc(bandwidth);
6494 }
6495
6496 /**
6497  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6498  *
6499  * @wm: watermark calculation data
6500  *
6501  * Calculate the dram bandwidth used for display (CIK).
6502  * Used for display watermark bandwidth calculations
6503  * Returns the dram bandwidth for display in MBytes/s
6504  */
6505 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6506 {
6507         /* Calculate DRAM Bandwidth and the part allocated to display. */
6508         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6509         fixed20_12 yclk, dram_channels, bandwidth;
6510         fixed20_12 a;
6511
6512         a.full = dfixed_const(1000);
6513         yclk.full = dfixed_const(wm->yclk);
6514         yclk.full = dfixed_div(yclk, a);
6515         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516         a.full = dfixed_const(10);
6517         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6518         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6519         bandwidth.full = dfixed_mul(dram_channels, yclk);
6520         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6521
6522         return dfixed_trunc(bandwidth);
6523 }
6524
6525 /**
6526  * dce8_data_return_bandwidth - get the data return bandwidth
6527  *
6528  * @wm: watermark calculation data
6529  *
6530  * Calculate the data return bandwidth used for display (CIK).
6531  * Used for display watermark bandwidth calculations
6532  * Returns the data return bandwidth in MBytes/s
6533  */
6534 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6535 {
6536         /* Calculate the display Data return Bandwidth */
6537         fixed20_12 return_efficiency; /* 0.8 */
6538         fixed20_12 sclk, bandwidth;
6539         fixed20_12 a;
6540
6541         a.full = dfixed_const(1000);
6542         sclk.full = dfixed_const(wm->sclk);
6543         sclk.full = dfixed_div(sclk, a);
6544         a.full = dfixed_const(10);
6545         return_efficiency.full = dfixed_const(8);
6546         return_efficiency.full = dfixed_div(return_efficiency, a);
6547         a.full = dfixed_const(32);
6548         bandwidth.full = dfixed_mul(a, sclk);
6549         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6550
6551         return dfixed_trunc(bandwidth);
6552 }
6553
6554 /**
6555  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6556  *
6557  * @wm: watermark calculation data
6558  *
6559  * Calculate the dmif bandwidth used for display (CIK).
6560  * Used for display watermark bandwidth calculations
6561  * Returns the dmif bandwidth in MBytes/s
6562  */
6563 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6564 {
6565         /* Calculate the DMIF Request Bandwidth */
6566         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6567         fixed20_12 disp_clk, bandwidth;
6568         fixed20_12 a, b;
6569
6570         a.full = dfixed_const(1000);
6571         disp_clk.full = dfixed_const(wm->disp_clk);
6572         disp_clk.full = dfixed_div(disp_clk, a);
6573         a.full = dfixed_const(32);
6574         b.full = dfixed_mul(a, disp_clk);
6575
6576         a.full = dfixed_const(10);
6577         disp_clk_request_efficiency.full = dfixed_const(8);
6578         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6579
6580         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6581
6582         return dfixed_trunc(bandwidth);
6583 }
6584
6585 /**
6586  * dce8_available_bandwidth - get the min available bandwidth
6587  *
6588  * @wm: watermark calculation data
6589  *
6590  * Calculate the min available bandwidth used for display (CIK).
6591  * Used for display watermark bandwidth calculations
6592  * Returns the min available bandwidth in MBytes/s
6593  */
6594 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6595 {
6596         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6597         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6598         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6599         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6600
6601         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6602 }
6603
6604 /**
6605  * dce8_average_bandwidth - get the average available bandwidth
6606  *
6607  * @wm: watermark calculation data
6608  *
6609  * Calculate the average available bandwidth used for display (CIK).
6610  * Used for display watermark bandwidth calculations
6611  * Returns the average available bandwidth in MBytes/s
6612  */
6613 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6614 {
6615         /* Calculate the display mode Average Bandwidth
6616          * DisplayMode should contain the source and destination dimensions,
6617          * timing, etc.
6618          */
6619         fixed20_12 bpp;
6620         fixed20_12 line_time;
6621         fixed20_12 src_width;
6622         fixed20_12 bandwidth;
6623         fixed20_12 a;
6624
6625         a.full = dfixed_const(1000);
6626         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6627         line_time.full = dfixed_div(line_time, a);
6628         bpp.full = dfixed_const(wm->bytes_per_pixel);
6629         src_width.full = dfixed_const(wm->src_width);
6630         bandwidth.full = dfixed_mul(src_width, bpp);
6631         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6632         bandwidth.full = dfixed_div(bandwidth, line_time);
6633
6634         return dfixed_trunc(bandwidth);
6635 }
6636
6637 /**
6638  * dce8_latency_watermark - get the latency watermark
6639  *
6640  * @wm: watermark calculation data
6641  *
6642  * Calculate the latency watermark (CIK).
6643  * Used for display watermark bandwidth calculations
6644  * Returns the latency watermark in ns
6645  */
6646 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6647 {
6648         /* First calculate the latency in ns */
6649         u32 mc_latency = 2000; /* 2000 ns. */
6650         u32 available_bandwidth = dce8_available_bandwidth(wm);
6651         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6652         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6653         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6654         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6655                 (wm->num_heads * cursor_line_pair_return_time);
6656         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6657         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6658         u32 tmp, dmif_size = 12288;
6659         fixed20_12 a, b, c;
6660
6661         if (wm->num_heads == 0)
6662                 return 0;
6663
6664         a.full = dfixed_const(2);
6665         b.full = dfixed_const(1);
6666         if ((wm->vsc.full > a.full) ||
6667             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6668             (wm->vtaps >= 5) ||
6669             ((wm->vsc.full >= a.full) && wm->interlaced))
6670                 max_src_lines_per_dst_line = 4;
6671         else
6672                 max_src_lines_per_dst_line = 2;
6673
6674         a.full = dfixed_const(available_bandwidth);
6675         b.full = dfixed_const(wm->num_heads);
6676         a.full = dfixed_div(a, b);
6677
6678         b.full = dfixed_const(mc_latency + 512);
6679         c.full = dfixed_const(wm->disp_clk);
6680         b.full = dfixed_div(b, c);
6681
6682         c.full = dfixed_const(dmif_size);
6683         b.full = dfixed_div(c, b);
6684
6685         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6686
6687         b.full = dfixed_const(1000);
6688         c.full = dfixed_const(wm->disp_clk);
6689         b.full = dfixed_div(c, b);
6690         c.full = dfixed_const(wm->bytes_per_pixel);
6691         b.full = dfixed_mul(b, c);
6692
6693         lb_fill_bw = min(tmp, dfixed_trunc(b));
6694
6695         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6696         b.full = dfixed_const(1000);
6697         c.full = dfixed_const(lb_fill_bw);
6698         b.full = dfixed_div(c, b);
6699         a.full = dfixed_div(a, b);
6700         line_fill_time = dfixed_trunc(a);
6701
6702         if (line_fill_time < wm->active_time)
6703                 return latency;
6704         else
6705                 return latency + (line_fill_time - wm->active_time);
6706
6707 }
6708
6709 /**
6710  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6711  * average and available dram bandwidth
6712  *
6713  * @wm: watermark calculation data
6714  *
6715  * Check if the display average bandwidth fits in the display
6716  * dram bandwidth (CIK).
6717  * Used for display watermark bandwidth calculations
6718  * Returns true if the display fits, false if not.
6719  */
6720 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6721 {
6722         if (dce8_average_bandwidth(wm) <=
6723             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6724                 return true;
6725         else
6726                 return false;
6727 }
6728
6729 /**
6730  * dce8_average_bandwidth_vs_available_bandwidth - check
6731  * average and available bandwidth
6732  *
6733  * @wm: watermark calculation data
6734  *
6735  * Check if the display average bandwidth fits in the display
6736  * available bandwidth (CIK).
6737  * Used for display watermark bandwidth calculations
6738  * Returns true if the display fits, false if not.
6739  */
6740 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6741 {
6742         if (dce8_average_bandwidth(wm) <=
6743             (dce8_available_bandwidth(wm) / wm->num_heads))
6744                 return true;
6745         else
6746                 return false;
6747 }
6748
6749 /**
6750  * dce8_check_latency_hiding - check latency hiding
6751  *
6752  * @wm: watermark calculation data
6753  *
6754  * Check latency hiding (CIK).
6755  * Used for display watermark bandwidth calculations
6756  * Returns true if the display fits, false if not.
6757  */
6758 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6759 {
6760         u32 lb_partitions = wm->lb_size / wm->src_width;
6761         u32 line_time = wm->active_time + wm->blank_time;
6762         u32 latency_tolerant_lines;
6763         u32 latency_hiding;
6764         fixed20_12 a;
6765
6766         a.full = dfixed_const(1);
6767         if (wm->vsc.full > a.full)
6768                 latency_tolerant_lines = 1;
6769         else {
6770                 if (lb_partitions <= (wm->vtaps + 1))
6771                         latency_tolerant_lines = 1;
6772                 else
6773                         latency_tolerant_lines = 2;
6774         }
6775
6776         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6777
6778         if (dce8_latency_watermark(wm) <= latency_hiding)
6779                 return true;
6780         else
6781                 return false;
6782 }
6783
6784 /**
6785  * dce8_program_watermarks - program display watermarks
6786  *
6787  * @rdev: radeon_device pointer
6788  * @radeon_crtc: the selected display controller
6789  * @lb_size: line buffer size
6790  * @num_heads: number of display controllers in use
6791  *
6792  * Calculate and program the display watermarks for the
6793  * selected display controller (CIK).
6794  */
6795 static void dce8_program_watermarks(struct radeon_device *rdev,
6796                                     struct radeon_crtc *radeon_crtc,
6797                                     u32 lb_size, u32 num_heads)
6798 {
6799         struct drm_display_mode *mode = &radeon_crtc->base.mode;
6800         struct dce8_wm_params wm;
6801         u32 pixel_period;
6802         u32 line_time = 0;
6803         u32 latency_watermark_a = 0, latency_watermark_b = 0;
6804         u32 tmp, wm_mask;
6805
6806         if (radeon_crtc->base.enabled && num_heads && mode) {
6807                 pixel_period = 1000000 / (u32)mode->clock;
6808                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6809
6810                 wm.yclk = rdev->pm.current_mclk * 10;
6811                 wm.sclk = rdev->pm.current_sclk * 10;
6812                 wm.disp_clk = mode->clock;
6813                 wm.src_width = mode->crtc_hdisplay;
6814                 wm.active_time = mode->crtc_hdisplay * pixel_period;
6815                 wm.blank_time = line_time - wm.active_time;
6816                 wm.interlaced = false;
6817                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6818                         wm.interlaced = true;
6819                 wm.vsc = radeon_crtc->vsc;
6820                 wm.vtaps = 1;
6821                 if (radeon_crtc->rmx_type != RMX_OFF)
6822                         wm.vtaps = 2;
6823                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6824                 wm.lb_size = lb_size;
6825                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6826                 wm.num_heads = num_heads;
6827
6828                 /* set for high clocks */
6829                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6830                 /* set for low clocks */
6831                 /* wm.yclk = low clk; wm.sclk = low clk */
6832                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6833
6834                 /* possibly force display priority to high */
6835                 /* should really do this at mode validation time... */
6836                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6837                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6838                     !dce8_check_latency_hiding(&wm) ||
6839                     (rdev->disp_priority == 2)) {
6840                         DRM_DEBUG_KMS("force priority to high\n");
6841                 }
6842         }
6843
6844         /* select wm A */
6845         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6846         tmp = wm_mask;
6847         tmp &= ~LATENCY_WATERMARK_MASK(3);
6848         tmp |= LATENCY_WATERMARK_MASK(1);
6849         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6850         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6851                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6852                 LATENCY_HIGH_WATERMARK(line_time)));
6853         /* select wm B */
6854         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6855         tmp &= ~LATENCY_WATERMARK_MASK(3);
6856         tmp |= LATENCY_WATERMARK_MASK(2);
6857         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6858         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6859                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6860                 LATENCY_HIGH_WATERMARK(line_time)));
6861         /* restore original selection */
6862         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6863 }
6864
6865 /**
6866  * dce8_bandwidth_update - program display watermarks
6867  *
6868  * @rdev: radeon_device pointer
6869  *
6870  * Calculate and program the display watermarks and line
6871  * buffer allocation (CIK).
6872  */
6873 void dce8_bandwidth_update(struct radeon_device *rdev)
6874 {
6875         struct drm_display_mode *mode = NULL;
6876         u32 num_heads = 0, lb_size;
6877         int i;
6878
6879         radeon_update_display_priority(rdev);
6880
6881         for (i = 0; i < rdev->num_crtc; i++) {
6882                 if (rdev->mode_info.crtcs[i]->base.enabled)
6883                         num_heads++;
6884         }
6885         for (i = 0; i < rdev->num_crtc; i++) {
6886                 mode = &rdev->mode_info.crtcs[i]->base.mode;
6887                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6888                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6889         }
6890 }
6891
6892 /**
6893  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6894  *
6895  * @rdev: radeon_device pointer
6896  *
6897  * Fetches a GPU clock counter snapshot (SI).
6898  * Returns the 64 bit clock counter snapshot.
6899  */
6900 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6901 {
6902         uint64_t clock;
6903
6904         mutex_lock(&rdev->gpu_clock_mutex);
6905         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6906         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6907                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6908         mutex_unlock(&rdev->gpu_clock_mutex);
6909         return clock;
6910 }
6911
6912 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6913                               u32 cntl_reg, u32 status_reg)
6914 {
6915         int r, i;
6916         struct atom_clock_dividers dividers;
6917         uint32_t tmp;
6918
6919         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6920                                            clock, false, &dividers);
6921         if (r)
6922                 return r;
6923
6924         tmp = RREG32_SMC(cntl_reg);
6925         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6926         tmp |= dividers.post_divider;
6927         WREG32_SMC(cntl_reg, tmp);
6928
6929         for (i = 0; i < 100; i++) {
6930                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6931                         break;
6932                 mdelay(10);
6933         }
6934         if (i == 100)
6935                 return -ETIMEDOUT;
6936
6937         return 0;
6938 }
6939
6940 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6941 {
6942         int r = 0;
6943
6944         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6945         if (r)
6946                 return r;
6947
6948         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6949         return r;
6950 }
6951
6952 int cik_uvd_resume(struct radeon_device *rdev)
6953 {
6954         uint64_t addr;
6955         uint32_t size;
6956         int r;
6957
6958         r = radeon_uvd_resume(rdev);
6959         if (r)
6960                 return r;
6961
6962         /* programm the VCPU memory controller bits 0-27 */
6963         addr = rdev->uvd.gpu_addr >> 3;
6964         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6965         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6966         WREG32(UVD_VCPU_CACHE_SIZE0, size);
6967
6968         addr += size;
6969         size = RADEON_UVD_STACK_SIZE >> 3;
6970         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6971         WREG32(UVD_VCPU_CACHE_SIZE1, size);
6972
6973         addr += size;
6974         size = RADEON_UVD_HEAP_SIZE >> 3;
6975         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6976         WREG32(UVD_VCPU_CACHE_SIZE2, size);
6977
6978         /* bits 28-31 */
6979         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6980         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6981
6982         /* bits 32-39 */
6983         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6984         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6985
6986         return 0;
6987 }