]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon: fix halting UVD
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33
34 /* GFX */
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
38 /* compute */
39 #define CIK_MEC_UCODE_SIZE 4192
40 /* interrupts */
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
44 /* gddr controller */
45 #define CIK_MC_UCODE_SIZE 7866
46 /* sdma */
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
49
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79
80 /*
81  * Indirect registers accessor
82  */
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84 {
85         u32 r;
86
87         WREG32(PCIE_INDEX, reg);
88         (void)RREG32(PCIE_INDEX);
89         r = RREG32(PCIE_DATA);
90         return r;
91 }
92
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94 {
95         WREG32(PCIE_INDEX, reg);
96         (void)RREG32(PCIE_INDEX);
97         WREG32(PCIE_DATA, v);
98         (void)RREG32(PCIE_DATA);
99 }
100
101 static const u32 bonaire_golden_spm_registers[] =
102 {
103         0x30800, 0xe0ffffff, 0xe0000000
104 };
105
106 static const u32 bonaire_golden_common_registers[] =
107 {
108         0xc770, 0xffffffff, 0x00000800,
109         0xc774, 0xffffffff, 0x00000800,
110         0xc798, 0xffffffff, 0x00007fbf,
111         0xc79c, 0xffffffff, 0x00007faf
112 };
113
114 static const u32 bonaire_golden_registers[] =
115 {
116         0x3354, 0x00000333, 0x00000333,
117         0x3350, 0x000c0fc0, 0x00040200,
118         0x9a10, 0x00010000, 0x00058208,
119         0x3c000, 0xffff1fff, 0x00140000,
120         0x3c200, 0xfdfc0fff, 0x00000100,
121         0x3c234, 0x40000000, 0x40000200,
122         0x9830, 0xffffffff, 0x00000000,
123         0x9834, 0xf00fffff, 0x00000400,
124         0x9838, 0x0002021c, 0x00020200,
125         0xc78, 0x00000080, 0x00000000,
126         0x5bb0, 0x000000f0, 0x00000070,
127         0x5bc0, 0xf0311fff, 0x80300000,
128         0x98f8, 0x73773777, 0x12010001,
129         0x350c, 0x00810000, 0x408af000,
130         0x7030, 0x31000111, 0x00000011,
131         0x2f48, 0x73773777, 0x12010001,
132         0x220c, 0x00007fb6, 0x0021a1b1,
133         0x2210, 0x00007fb6, 0x002021b1,
134         0x2180, 0x00007fb6, 0x00002191,
135         0x2218, 0x00007fb6, 0x002121b1,
136         0x221c, 0x00007fb6, 0x002021b1,
137         0x21dc, 0x00007fb6, 0x00002191,
138         0x21e0, 0x00007fb6, 0x00002191,
139         0x3628, 0x0000003f, 0x0000000a,
140         0x362c, 0x0000003f, 0x0000000a,
141         0x2ae4, 0x00073ffe, 0x000022a2,
142         0x240c, 0x000007ff, 0x00000000,
143         0x8a14, 0xf000003f, 0x00000007,
144         0x8bf0, 0x00002001, 0x00000001,
145         0x8b24, 0xffffffff, 0x00ffffff,
146         0x30a04, 0x0000ff0f, 0x00000000,
147         0x28a4c, 0x07ffffff, 0x06000000,
148         0x4d8, 0x00000fff, 0x00000100,
149         0x3e78, 0x00000001, 0x00000002,
150         0x9100, 0x03000000, 0x0362c688,
151         0x8c00, 0x000000ff, 0x00000001,
152         0xe40, 0x00001fff, 0x00001fff,
153         0x9060, 0x0000007f, 0x00000020,
154         0x9508, 0x00010000, 0x00010000,
155         0xac14, 0x000003ff, 0x000000f3,
156         0xac0c, 0xffffffff, 0x00001032
157 };
158
159 static const u32 bonaire_mgcg_cgcg_init[] =
160 {
161         0xc420, 0xffffffff, 0xfffffffc,
162         0x30800, 0xffffffff, 0xe0000000,
163         0x3c2a0, 0xffffffff, 0x00000100,
164         0x3c208, 0xffffffff, 0x00000100,
165         0x3c2c0, 0xffffffff, 0xc0000100,
166         0x3c2c8, 0xffffffff, 0xc0000100,
167         0x3c2c4, 0xffffffff, 0xc0000100,
168         0x55e4, 0xffffffff, 0x00600100,
169         0x3c280, 0xffffffff, 0x00000100,
170         0x3c214, 0xffffffff, 0x06000100,
171         0x3c220, 0xffffffff, 0x00000100,
172         0x3c218, 0xffffffff, 0x06000100,
173         0x3c204, 0xffffffff, 0x00000100,
174         0x3c2e0, 0xffffffff, 0x00000100,
175         0x3c224, 0xffffffff, 0x00000100,
176         0x3c200, 0xffffffff, 0x00000100,
177         0x3c230, 0xffffffff, 0x00000100,
178         0x3c234, 0xffffffff, 0x00000100,
179         0x3c250, 0xffffffff, 0x00000100,
180         0x3c254, 0xffffffff, 0x00000100,
181         0x3c258, 0xffffffff, 0x00000100,
182         0x3c25c, 0xffffffff, 0x00000100,
183         0x3c260, 0xffffffff, 0x00000100,
184         0x3c27c, 0xffffffff, 0x00000100,
185         0x3c278, 0xffffffff, 0x00000100,
186         0x3c210, 0xffffffff, 0x06000100,
187         0x3c290, 0xffffffff, 0x00000100,
188         0x3c274, 0xffffffff, 0x00000100,
189         0x3c2b4, 0xffffffff, 0x00000100,
190         0x3c2b0, 0xffffffff, 0x00000100,
191         0x3c270, 0xffffffff, 0x00000100,
192         0x30800, 0xffffffff, 0xe0000000,
193         0x3c020, 0xffffffff, 0x00010000,
194         0x3c024, 0xffffffff, 0x00030002,
195         0x3c028, 0xffffffff, 0x00040007,
196         0x3c02c, 0xffffffff, 0x00060005,
197         0x3c030, 0xffffffff, 0x00090008,
198         0x3c034, 0xffffffff, 0x00010000,
199         0x3c038, 0xffffffff, 0x00030002,
200         0x3c03c, 0xffffffff, 0x00040007,
201         0x3c040, 0xffffffff, 0x00060005,
202         0x3c044, 0xffffffff, 0x00090008,
203         0x3c048, 0xffffffff, 0x00010000,
204         0x3c04c, 0xffffffff, 0x00030002,
205         0x3c050, 0xffffffff, 0x00040007,
206         0x3c054, 0xffffffff, 0x00060005,
207         0x3c058, 0xffffffff, 0x00090008,
208         0x3c05c, 0xffffffff, 0x00010000,
209         0x3c060, 0xffffffff, 0x00030002,
210         0x3c064, 0xffffffff, 0x00040007,
211         0x3c068, 0xffffffff, 0x00060005,
212         0x3c06c, 0xffffffff, 0x00090008,
213         0x3c070, 0xffffffff, 0x00010000,
214         0x3c074, 0xffffffff, 0x00030002,
215         0x3c078, 0xffffffff, 0x00040007,
216         0x3c07c, 0xffffffff, 0x00060005,
217         0x3c080, 0xffffffff, 0x00090008,
218         0x3c084, 0xffffffff, 0x00010000,
219         0x3c088, 0xffffffff, 0x00030002,
220         0x3c08c, 0xffffffff, 0x00040007,
221         0x3c090, 0xffffffff, 0x00060005,
222         0x3c094, 0xffffffff, 0x00090008,
223         0x3c098, 0xffffffff, 0x00010000,
224         0x3c09c, 0xffffffff, 0x00030002,
225         0x3c0a0, 0xffffffff, 0x00040007,
226         0x3c0a4, 0xffffffff, 0x00060005,
227         0x3c0a8, 0xffffffff, 0x00090008,
228         0x3c000, 0xffffffff, 0x96e00200,
229         0x8708, 0xffffffff, 0x00900100,
230         0xc424, 0xffffffff, 0x0020003f,
231         0x38, 0xffffffff, 0x0140001c,
232         0x3c, 0x000f0000, 0x000f0000,
233         0x220, 0xffffffff, 0xC060000C,
234         0x224, 0xc0000fff, 0x00000100,
235         0xf90, 0xffffffff, 0x00000100,
236         0xf98, 0x00000101, 0x00000000,
237         0x20a8, 0xffffffff, 0x00000104,
238         0x55e4, 0xff000fff, 0x00000100,
239         0x30cc, 0xc0000fff, 0x00000104,
240         0xc1e4, 0x00000001, 0x00000001,
241         0xd00c, 0xff000ff0, 0x00000100,
242         0xd80c, 0xff000ff0, 0x00000100
243 };
244
245 static const u32 spectre_golden_spm_registers[] =
246 {
247         0x30800, 0xe0ffffff, 0xe0000000
248 };
249
250 static const u32 spectre_golden_common_registers[] =
251 {
252         0xc770, 0xffffffff, 0x00000800,
253         0xc774, 0xffffffff, 0x00000800,
254         0xc798, 0xffffffff, 0x00007fbf,
255         0xc79c, 0xffffffff, 0x00007faf
256 };
257
258 static const u32 spectre_golden_registers[] =
259 {
260         0x3c000, 0xffff1fff, 0x96940200,
261         0x3c00c, 0xffff0001, 0xff000000,
262         0x3c200, 0xfffc0fff, 0x00000100,
263         0x6ed8, 0x00010101, 0x00010000,
264         0x9834, 0xf00fffff, 0x00000400,
265         0x9838, 0xfffffffc, 0x00020200,
266         0x5bb0, 0x000000f0, 0x00000070,
267         0x5bc0, 0xf0311fff, 0x80300000,
268         0x98f8, 0x73773777, 0x12010001,
269         0x9b7c, 0x00ff0000, 0x00fc0000,
270         0x2f48, 0x73773777, 0x12010001,
271         0x8a14, 0xf000003f, 0x00000007,
272         0x8b24, 0xffffffff, 0x00ffffff,
273         0x28350, 0x3f3f3fff, 0x00000082,
274         0x28355, 0x0000003f, 0x00000000,
275         0x3e78, 0x00000001, 0x00000002,
276         0x913c, 0xffff03df, 0x00000004,
277         0xc768, 0x00000008, 0x00000008,
278         0x8c00, 0x000008ff, 0x00000800,
279         0x9508, 0x00010000, 0x00010000,
280         0xac0c, 0xffffffff, 0x54763210,
281         0x214f8, 0x01ff01ff, 0x00000002,
282         0x21498, 0x007ff800, 0x00200000,
283         0x2015c, 0xffffffff, 0x00000f40,
284         0x30934, 0xffffffff, 0x00000001
285 };
286
287 static const u32 spectre_mgcg_cgcg_init[] =
288 {
289         0xc420, 0xffffffff, 0xfffffffc,
290         0x30800, 0xffffffff, 0xe0000000,
291         0x3c2a0, 0xffffffff, 0x00000100,
292         0x3c208, 0xffffffff, 0x00000100,
293         0x3c2c0, 0xffffffff, 0x00000100,
294         0x3c2c8, 0xffffffff, 0x00000100,
295         0x3c2c4, 0xffffffff, 0x00000100,
296         0x55e4, 0xffffffff, 0x00600100,
297         0x3c280, 0xffffffff, 0x00000100,
298         0x3c214, 0xffffffff, 0x06000100,
299         0x3c220, 0xffffffff, 0x00000100,
300         0x3c218, 0xffffffff, 0x06000100,
301         0x3c204, 0xffffffff, 0x00000100,
302         0x3c2e0, 0xffffffff, 0x00000100,
303         0x3c224, 0xffffffff, 0x00000100,
304         0x3c200, 0xffffffff, 0x00000100,
305         0x3c230, 0xffffffff, 0x00000100,
306         0x3c234, 0xffffffff, 0x00000100,
307         0x3c250, 0xffffffff, 0x00000100,
308         0x3c254, 0xffffffff, 0x00000100,
309         0x3c258, 0xffffffff, 0x00000100,
310         0x3c25c, 0xffffffff, 0x00000100,
311         0x3c260, 0xffffffff, 0x00000100,
312         0x3c27c, 0xffffffff, 0x00000100,
313         0x3c278, 0xffffffff, 0x00000100,
314         0x3c210, 0xffffffff, 0x06000100,
315         0x3c290, 0xffffffff, 0x00000100,
316         0x3c274, 0xffffffff, 0x00000100,
317         0x3c2b4, 0xffffffff, 0x00000100,
318         0x3c2b0, 0xffffffff, 0x00000100,
319         0x3c270, 0xffffffff, 0x00000100,
320         0x30800, 0xffffffff, 0xe0000000,
321         0x3c020, 0xffffffff, 0x00010000,
322         0x3c024, 0xffffffff, 0x00030002,
323         0x3c028, 0xffffffff, 0x00040007,
324         0x3c02c, 0xffffffff, 0x00060005,
325         0x3c030, 0xffffffff, 0x00090008,
326         0x3c034, 0xffffffff, 0x00010000,
327         0x3c038, 0xffffffff, 0x00030002,
328         0x3c03c, 0xffffffff, 0x00040007,
329         0x3c040, 0xffffffff, 0x00060005,
330         0x3c044, 0xffffffff, 0x00090008,
331         0x3c048, 0xffffffff, 0x00010000,
332         0x3c04c, 0xffffffff, 0x00030002,
333         0x3c050, 0xffffffff, 0x00040007,
334         0x3c054, 0xffffffff, 0x00060005,
335         0x3c058, 0xffffffff, 0x00090008,
336         0x3c05c, 0xffffffff, 0x00010000,
337         0x3c060, 0xffffffff, 0x00030002,
338         0x3c064, 0xffffffff, 0x00040007,
339         0x3c068, 0xffffffff, 0x00060005,
340         0x3c06c, 0xffffffff, 0x00090008,
341         0x3c070, 0xffffffff, 0x00010000,
342         0x3c074, 0xffffffff, 0x00030002,
343         0x3c078, 0xffffffff, 0x00040007,
344         0x3c07c, 0xffffffff, 0x00060005,
345         0x3c080, 0xffffffff, 0x00090008,
346         0x3c084, 0xffffffff, 0x00010000,
347         0x3c088, 0xffffffff, 0x00030002,
348         0x3c08c, 0xffffffff, 0x00040007,
349         0x3c090, 0xffffffff, 0x00060005,
350         0x3c094, 0xffffffff, 0x00090008,
351         0x3c098, 0xffffffff, 0x00010000,
352         0x3c09c, 0xffffffff, 0x00030002,
353         0x3c0a0, 0xffffffff, 0x00040007,
354         0x3c0a4, 0xffffffff, 0x00060005,
355         0x3c0a8, 0xffffffff, 0x00090008,
356         0x3c0ac, 0xffffffff, 0x00010000,
357         0x3c0b0, 0xffffffff, 0x00030002,
358         0x3c0b4, 0xffffffff, 0x00040007,
359         0x3c0b8, 0xffffffff, 0x00060005,
360         0x3c0bc, 0xffffffff, 0x00090008,
361         0x3c000, 0xffffffff, 0x96e00200,
362         0x8708, 0xffffffff, 0x00900100,
363         0xc424, 0xffffffff, 0x0020003f,
364         0x38, 0xffffffff, 0x0140001c,
365         0x3c, 0x000f0000, 0x000f0000,
366         0x220, 0xffffffff, 0xC060000C,
367         0x224, 0xc0000fff, 0x00000100,
368         0xf90, 0xffffffff, 0x00000100,
369         0xf98, 0x00000101, 0x00000000,
370         0x20a8, 0xffffffff, 0x00000104,
371         0x55e4, 0xff000fff, 0x00000100,
372         0x30cc, 0xc0000fff, 0x00000104,
373         0xc1e4, 0x00000001, 0x00000001,
374         0xd00c, 0xff000ff0, 0x00000100,
375         0xd80c, 0xff000ff0, 0x00000100
376 };
377
378 static const u32 kalindi_golden_spm_registers[] =
379 {
380         0x30800, 0xe0ffffff, 0xe0000000
381 };
382
383 static const u32 kalindi_golden_common_registers[] =
384 {
385         0xc770, 0xffffffff, 0x00000800,
386         0xc774, 0xffffffff, 0x00000800,
387         0xc798, 0xffffffff, 0x00007fbf,
388         0xc79c, 0xffffffff, 0x00007faf
389 };
390
391 static const u32 kalindi_golden_registers[] =
392 {
393         0x3c000, 0xffffdfff, 0x6e944040,
394         0x55e4, 0xff607fff, 0xfc000100,
395         0x3c220, 0xff000fff, 0x00000100,
396         0x3c224, 0xff000fff, 0x00000100,
397         0x3c200, 0xfffc0fff, 0x00000100,
398         0x6ed8, 0x00010101, 0x00010000,
399         0x9830, 0xffffffff, 0x00000000,
400         0x9834, 0xf00fffff, 0x00000400,
401         0x5bb0, 0x000000f0, 0x00000070,
402         0x5bc0, 0xf0311fff, 0x80300000,
403         0x98f8, 0x73773777, 0x12010001,
404         0x98fc, 0xffffffff, 0x00000010,
405         0x9b7c, 0x00ff0000, 0x00fc0000,
406         0x8030, 0x00001f0f, 0x0000100a,
407         0x2f48, 0x73773777, 0x12010001,
408         0x2408, 0x000fffff, 0x000c007f,
409         0x8a14, 0xf000003f, 0x00000007,
410         0x8b24, 0x3fff3fff, 0x00ffcfff,
411         0x30a04, 0x0000ff0f, 0x00000000,
412         0x28a4c, 0x07ffffff, 0x06000000,
413         0x4d8, 0x00000fff, 0x00000100,
414         0x3e78, 0x00000001, 0x00000002,
415         0xc768, 0x00000008, 0x00000008,
416         0x8c00, 0x000000ff, 0x00000003,
417         0x214f8, 0x01ff01ff, 0x00000002,
418         0x21498, 0x007ff800, 0x00200000,
419         0x2015c, 0xffffffff, 0x00000f40,
420         0x88c4, 0x001f3ae3, 0x00000082,
421         0x88d4, 0x0000001f, 0x00000010,
422         0x30934, 0xffffffff, 0x00000000
423 };
424
425 static const u32 kalindi_mgcg_cgcg_init[] =
426 {
427         0xc420, 0xffffffff, 0xfffffffc,
428         0x30800, 0xffffffff, 0xe0000000,
429         0x3c2a0, 0xffffffff, 0x00000100,
430         0x3c208, 0xffffffff, 0x00000100,
431         0x3c2c0, 0xffffffff, 0x00000100,
432         0x3c2c8, 0xffffffff, 0x00000100,
433         0x3c2c4, 0xffffffff, 0x00000100,
434         0x55e4, 0xffffffff, 0x00600100,
435         0x3c280, 0xffffffff, 0x00000100,
436         0x3c214, 0xffffffff, 0x06000100,
437         0x3c220, 0xffffffff, 0x00000100,
438         0x3c218, 0xffffffff, 0x06000100,
439         0x3c204, 0xffffffff, 0x00000100,
440         0x3c2e0, 0xffffffff, 0x00000100,
441         0x3c224, 0xffffffff, 0x00000100,
442         0x3c200, 0xffffffff, 0x00000100,
443         0x3c230, 0xffffffff, 0x00000100,
444         0x3c234, 0xffffffff, 0x00000100,
445         0x3c250, 0xffffffff, 0x00000100,
446         0x3c254, 0xffffffff, 0x00000100,
447         0x3c258, 0xffffffff, 0x00000100,
448         0x3c25c, 0xffffffff, 0x00000100,
449         0x3c260, 0xffffffff, 0x00000100,
450         0x3c27c, 0xffffffff, 0x00000100,
451         0x3c278, 0xffffffff, 0x00000100,
452         0x3c210, 0xffffffff, 0x06000100,
453         0x3c290, 0xffffffff, 0x00000100,
454         0x3c274, 0xffffffff, 0x00000100,
455         0x3c2b4, 0xffffffff, 0x00000100,
456         0x3c2b0, 0xffffffff, 0x00000100,
457         0x3c270, 0xffffffff, 0x00000100,
458         0x30800, 0xffffffff, 0xe0000000,
459         0x3c020, 0xffffffff, 0x00010000,
460         0x3c024, 0xffffffff, 0x00030002,
461         0x3c028, 0xffffffff, 0x00040007,
462         0x3c02c, 0xffffffff, 0x00060005,
463         0x3c030, 0xffffffff, 0x00090008,
464         0x3c034, 0xffffffff, 0x00010000,
465         0x3c038, 0xffffffff, 0x00030002,
466         0x3c03c, 0xffffffff, 0x00040007,
467         0x3c040, 0xffffffff, 0x00060005,
468         0x3c044, 0xffffffff, 0x00090008,
469         0x3c000, 0xffffffff, 0x96e00200,
470         0x8708, 0xffffffff, 0x00900100,
471         0xc424, 0xffffffff, 0x0020003f,
472         0x38, 0xffffffff, 0x0140001c,
473         0x3c, 0x000f0000, 0x000f0000,
474         0x220, 0xffffffff, 0xC060000C,
475         0x224, 0xc0000fff, 0x00000100,
476         0x20a8, 0xffffffff, 0x00000104,
477         0x55e4, 0xff000fff, 0x00000100,
478         0x30cc, 0xc0000fff, 0x00000104,
479         0xc1e4, 0x00000001, 0x00000001,
480         0xd00c, 0xff000ff0, 0x00000100,
481         0xd80c, 0xff000ff0, 0x00000100
482 };
483
484 static void cik_init_golden_registers(struct radeon_device *rdev)
485 {
486         switch (rdev->family) {
487         case CHIP_BONAIRE:
488                 radeon_program_register_sequence(rdev,
489                                                  bonaire_mgcg_cgcg_init,
490                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491                 radeon_program_register_sequence(rdev,
492                                                  bonaire_golden_registers,
493                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
494                 radeon_program_register_sequence(rdev,
495                                                  bonaire_golden_common_registers,
496                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497                 radeon_program_register_sequence(rdev,
498                                                  bonaire_golden_spm_registers,
499                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500                 break;
501         case CHIP_KABINI:
502                 radeon_program_register_sequence(rdev,
503                                                  kalindi_mgcg_cgcg_init,
504                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505                 radeon_program_register_sequence(rdev,
506                                                  kalindi_golden_registers,
507                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
508                 radeon_program_register_sequence(rdev,
509                                                  kalindi_golden_common_registers,
510                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511                 radeon_program_register_sequence(rdev,
512                                                  kalindi_golden_spm_registers,
513                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514                 break;
515         case CHIP_KAVERI:
516                 radeon_program_register_sequence(rdev,
517                                                  spectre_mgcg_cgcg_init,
518                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519                 radeon_program_register_sequence(rdev,
520                                                  spectre_golden_registers,
521                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
522                 radeon_program_register_sequence(rdev,
523                                                  spectre_golden_common_registers,
524                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525                 radeon_program_register_sequence(rdev,
526                                                  spectre_golden_spm_registers,
527                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528                 break;
529         default:
530                 break;
531         }
532 }
533
534 /**
535  * cik_get_xclk - get the xclk
536  *
537  * @rdev: radeon_device pointer
538  *
539  * Returns the reference clock used by the gfx engine
540  * (CIK).
541  */
542 u32 cik_get_xclk(struct radeon_device *rdev)
543 {
544         u32 reference_clock = rdev->clock.spll.reference_freq;
545
546         if (rdev->flags & RADEON_IS_IGP) {
547                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548                         return reference_clock / 2;
549         } else {
550                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551                         return reference_clock / 4;
552         }
553         return reference_clock;
554 }
555
556 /**
557  * cik_mm_rdoorbell - read a doorbell dword
558  *
559  * @rdev: radeon_device pointer
560  * @offset: byte offset into the aperture
561  *
562  * Returns the value in the doorbell aperture at the
563  * requested offset (CIK).
564  */
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566 {
567         if (offset < rdev->doorbell.size) {
568                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569         } else {
570                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571                 return 0;
572         }
573 }
574
575 /**
576  * cik_mm_wdoorbell - write a doorbell dword
577  *
578  * @rdev: radeon_device pointer
579  * @offset: byte offset into the aperture
580  * @v: value to write
581  *
582  * Writes @v to the doorbell aperture at the
583  * requested offset (CIK).
584  */
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586 {
587         if (offset < rdev->doorbell.size) {
588                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589         } else {
590                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591         }
592 }
593
594 #define BONAIRE_IO_MC_REGS_SIZE 36
595
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597 {
598         {0x00000070, 0x04400000},
599         {0x00000071, 0x80c01803},
600         {0x00000072, 0x00004004},
601         {0x00000073, 0x00000100},
602         {0x00000074, 0x00ff0000},
603         {0x00000075, 0x34000000},
604         {0x00000076, 0x08000014},
605         {0x00000077, 0x00cc08ec},
606         {0x00000078, 0x00000400},
607         {0x00000079, 0x00000000},
608         {0x0000007a, 0x04090000},
609         {0x0000007c, 0x00000000},
610         {0x0000007e, 0x4408a8e8},
611         {0x0000007f, 0x00000304},
612         {0x00000080, 0x00000000},
613         {0x00000082, 0x00000001},
614         {0x00000083, 0x00000002},
615         {0x00000084, 0xf3e4f400},
616         {0x00000085, 0x052024e3},
617         {0x00000087, 0x00000000},
618         {0x00000088, 0x01000000},
619         {0x0000008a, 0x1c0a0000},
620         {0x0000008b, 0xff010000},
621         {0x0000008d, 0xffffefff},
622         {0x0000008e, 0xfff3efff},
623         {0x0000008f, 0xfff3efbf},
624         {0x00000092, 0xf7ffffff},
625         {0x00000093, 0xffffff7f},
626         {0x00000095, 0x00101101},
627         {0x00000096, 0x00000fff},
628         {0x00000097, 0x00116fff},
629         {0x00000098, 0x60010000},
630         {0x00000099, 0x10010000},
631         {0x0000009a, 0x00006000},
632         {0x0000009b, 0x00001000},
633         {0x0000009f, 0x00b48000}
634 };
635
636 /**
637  * cik_srbm_select - select specific register instances
638  *
639  * @rdev: radeon_device pointer
640  * @me: selected ME (micro engine)
641  * @pipe: pipe
642  * @queue: queue
643  * @vmid: VMID
644  *
645  * Switches the currently active registers instances.  Some
646  * registers are instanced per VMID, others are instanced per
647  * me/pipe/queue combination.
648  */
649 static void cik_srbm_select(struct radeon_device *rdev,
650                             u32 me, u32 pipe, u32 queue, u32 vmid)
651 {
652         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653                              MEID(me & 0x3) |
654                              VMID(vmid & 0xf) |
655                              QUEUEID(queue & 0x7));
656         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657 }
658
659 /* ucode loading */
660 /**
661  * ci_mc_load_microcode - load MC ucode into the hw
662  *
663  * @rdev: radeon_device pointer
664  *
665  * Load the GDDR MC ucode into the hw (CIK).
666  * Returns 0 on success, error on failure.
667  */
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
669 {
670         const __be32 *fw_data;
671         u32 running, blackout = 0;
672         u32 *io_mc_regs;
673         int i, ucode_size, regs_size;
674
675         if (!rdev->mc_fw)
676                 return -EINVAL;
677
678         switch (rdev->family) {
679         case CHIP_BONAIRE:
680         default:
681                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682                 ucode_size = CIK_MC_UCODE_SIZE;
683                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
684                 break;
685         }
686
687         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688
689         if (running == 0) {
690                 if (running) {
691                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693                 }
694
695                 /* reset the engine and set to writable */
696                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698
699                 /* load mc io regs */
700                 for (i = 0; i < regs_size; i++) {
701                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703                 }
704                 /* load the MC ucode */
705                 fw_data = (const __be32 *)rdev->mc_fw->data;
706                 for (i = 0; i < ucode_size; i++)
707                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708
709                 /* put the engine back into the active state */
710                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713
714                 /* wait for training to complete */
715                 for (i = 0; i < rdev->usec_timeout; i++) {
716                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717                                 break;
718                         udelay(1);
719                 }
720                 for (i = 0; i < rdev->usec_timeout; i++) {
721                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722                                 break;
723                         udelay(1);
724                 }
725
726                 if (running)
727                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728         }
729
730         return 0;
731 }
732
733 /**
734  * cik_init_microcode - load ucode images from disk
735  *
736  * @rdev: radeon_device pointer
737  *
738  * Use the firmware interface to load the ucode images into
739  * the driver (not loaded into hw).
740  * Returns 0 on success, error on failure.
741  */
742 static int cik_init_microcode(struct radeon_device *rdev)
743 {
744         const char *chip_name;
745         size_t pfp_req_size, me_req_size, ce_req_size,
746                 mec_req_size, rlc_req_size, mc_req_size,
747                 sdma_req_size;
748         char fw_name[30];
749         int err;
750
751         DRM_DEBUG("\n");
752
753         switch (rdev->family) {
754         case CHIP_BONAIRE:
755                 chip_name = "BONAIRE";
756                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757                 me_req_size = CIK_ME_UCODE_SIZE * 4;
758                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
759                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
762                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763                 break;
764         case CHIP_KAVERI:
765                 chip_name = "KAVERI";
766                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767                 me_req_size = CIK_ME_UCODE_SIZE * 4;
768                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         case CHIP_KABINI:
774                 chip_name = "KABINI";
775                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776                 me_req_size = CIK_ME_UCODE_SIZE * 4;
777                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781                 break;
782         default: BUG();
783         }
784
785         DRM_INFO("Loading %s Microcode\n", chip_name);
786
787         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
789         if (err)
790                 goto out;
791         if (rdev->pfp_fw->size != pfp_req_size) {
792                 printk(KERN_ERR
793                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794                        rdev->pfp_fw->size, fw_name);
795                 err = -EINVAL;
796                 goto out;
797         }
798
799         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
801         if (err)
802                 goto out;
803         if (rdev->me_fw->size != me_req_size) {
804                 printk(KERN_ERR
805                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806                        rdev->me_fw->size, fw_name);
807                 err = -EINVAL;
808         }
809
810         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
812         if (err)
813                 goto out;
814         if (rdev->ce_fw->size != ce_req_size) {
815                 printk(KERN_ERR
816                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817                        rdev->ce_fw->size, fw_name);
818                 err = -EINVAL;
819         }
820
821         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
823         if (err)
824                 goto out;
825         if (rdev->mec_fw->size != mec_req_size) {
826                 printk(KERN_ERR
827                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828                        rdev->mec_fw->size, fw_name);
829                 err = -EINVAL;
830         }
831
832         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
834         if (err)
835                 goto out;
836         if (rdev->rlc_fw->size != rlc_req_size) {
837                 printk(KERN_ERR
838                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839                        rdev->rlc_fw->size, fw_name);
840                 err = -EINVAL;
841         }
842
843         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
845         if (err)
846                 goto out;
847         if (rdev->sdma_fw->size != sdma_req_size) {
848                 printk(KERN_ERR
849                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850                        rdev->sdma_fw->size, fw_name);
851                 err = -EINVAL;
852         }
853
854         /* No MC ucode on APUs */
855         if (!(rdev->flags & RADEON_IS_IGP)) {
856                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
858                 if (err)
859                         goto out;
860                 if (rdev->mc_fw->size != mc_req_size) {
861                         printk(KERN_ERR
862                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863                                rdev->mc_fw->size, fw_name);
864                         err = -EINVAL;
865                 }
866         }
867
868 out:
869         if (err) {
870                 if (err != -EINVAL)
871                         printk(KERN_ERR
872                                "cik_cp: Failed to load firmware \"%s\"\n",
873                                fw_name);
874                 release_firmware(rdev->pfp_fw);
875                 rdev->pfp_fw = NULL;
876                 release_firmware(rdev->me_fw);
877                 rdev->me_fw = NULL;
878                 release_firmware(rdev->ce_fw);
879                 rdev->ce_fw = NULL;
880                 release_firmware(rdev->rlc_fw);
881                 rdev->rlc_fw = NULL;
882                 release_firmware(rdev->mc_fw);
883                 rdev->mc_fw = NULL;
884         }
885         return err;
886 }
887
888 /*
889  * Core functions
890  */
891 /**
892  * cik_tiling_mode_table_init - init the hw tiling table
893  *
894  * @rdev: radeon_device pointer
895  *
896  * Starting with SI, the tiling setup is done globally in a
897  * set of 32 tiling modes.  Rather than selecting each set of
898  * parameters per surface as on older asics, we just select
899  * which index in the tiling table we want to use, and the
900  * surface uses those parameters (CIK).
901  */
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903 {
904         const u32 num_tile_mode_states = 32;
905         const u32 num_secondary_tile_mode_states = 16;
906         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907         u32 num_pipe_configs;
908         u32 num_rbs = rdev->config.cik.max_backends_per_se *
909                 rdev->config.cik.max_shader_engines;
910
911         switch (rdev->config.cik.mem_row_size_in_kb) {
912         case 1:
913                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914                 break;
915         case 2:
916         default:
917                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918                 break;
919         case 4:
920                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921                 break;
922         }
923
924         num_pipe_configs = rdev->config.cik.max_tile_pipes;
925         if (num_pipe_configs > 8)
926                 num_pipe_configs = 8; /* ??? */
927
928         if (num_pipe_configs == 8) {
929                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930                         switch (reg_offset) {
931                         case 0:
932                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936                                 break;
937                         case 1:
938                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942                                 break;
943                         case 2:
944                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948                                 break;
949                         case 3:
950                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954                                 break;
955                         case 4:
956                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959                                                  TILE_SPLIT(split_equal_to_row_size));
960                                 break;
961                         case 5:
962                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964                                 break;
965                         case 6:
966                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970                                 break;
971                         case 7:
972                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975                                                  TILE_SPLIT(split_equal_to_row_size));
976                                 break;
977                         case 8:
978                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980                                 break;
981                         case 9:
982                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984                                 break;
985                         case 10:
986                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990                                 break;
991                         case 11:
992                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996                                 break;
997                         case 12:
998                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002                                 break;
1003                         case 13:
1004                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006                                 break;
1007                         case 14:
1008                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012                                 break;
1013                         case 16:
1014                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018                                 break;
1019                         case 17:
1020                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024                                 break;
1025                         case 27:
1026                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028                                 break;
1029                         case 28:
1030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034                                 break;
1035                         case 29:
1036                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040                                 break;
1041                         case 30:
1042                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046                                 break;
1047                         default:
1048                                 gb_tile_moden = 0;
1049                                 break;
1050                         }
1051                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053                 }
1054                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055                         switch (reg_offset) {
1056                         case 0:
1057                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1061                                 break;
1062                         case 1:
1063                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1067                                 break;
1068                         case 2:
1069                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1073                                 break;
1074                         case 3:
1075                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1079                                 break;
1080                         case 4:
1081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1085                                 break;
1086                         case 5:
1087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1091                                 break;
1092                         case 6:
1093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1097                                 break;
1098                         case 8:
1099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1103                                 break;
1104                         case 9:
1105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1109                                 break;
1110                         case 10:
1111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1115                                 break;
1116                         case 11:
1117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1121                                 break;
1122                         case 12:
1123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1127                                 break;
1128                         case 13:
1129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1133                                 break;
1134                         case 14:
1135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1139                                 break;
1140                         default:
1141                                 gb_tile_moden = 0;
1142                                 break;
1143                         }
1144                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145                 }
1146         } else if (num_pipe_configs == 4) {
1147                 if (num_rbs == 4) {
1148                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149                                 switch (reg_offset) {
1150                                 case 0:
1151                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155                                         break;
1156                                 case 1:
1157                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161                                         break;
1162                                 case 2:
1163                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167                                         break;
1168                                 case 3:
1169                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173                                         break;
1174                                 case 4:
1175                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178                                                          TILE_SPLIT(split_equal_to_row_size));
1179                                         break;
1180                                 case 5:
1181                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183                                         break;
1184                                 case 6:
1185                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189                                         break;
1190                                 case 7:
1191                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194                                                          TILE_SPLIT(split_equal_to_row_size));
1195                                         break;
1196                                 case 8:
1197                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199                                         break;
1200                                 case 9:
1201                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203                                         break;
1204                                 case 10:
1205                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209                                         break;
1210                                 case 11:
1211                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215                                         break;
1216                                 case 12:
1217                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221                                         break;
1222                                 case 13:
1223                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225                                         break;
1226                                 case 14:
1227                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231                                         break;
1232                                 case 16:
1233                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237                                         break;
1238                                 case 17:
1239                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243                                         break;
1244                                 case 27:
1245                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247                                         break;
1248                                 case 28:
1249                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253                                         break;
1254                                 case 29:
1255                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259                                         break;
1260                                 case 30:
1261                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265                                         break;
1266                                 default:
1267                                         gb_tile_moden = 0;
1268                                         break;
1269                                 }
1270                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272                         }
1273                 } else if (num_rbs < 4) {
1274                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275                                 switch (reg_offset) {
1276                                 case 0:
1277                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281                                         break;
1282                                 case 1:
1283                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287                                         break;
1288                                 case 2:
1289                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293                                         break;
1294                                 case 3:
1295                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299                                         break;
1300                                 case 4:
1301                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304                                                          TILE_SPLIT(split_equal_to_row_size));
1305                                         break;
1306                                 case 5:
1307                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309                                         break;
1310                                 case 6:
1311                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315                                         break;
1316                                 case 7:
1317                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320                                                          TILE_SPLIT(split_equal_to_row_size));
1321                                         break;
1322                                 case 8:
1323                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325                                         break;
1326                                 case 9:
1327                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329                                         break;
1330                                 case 10:
1331                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335                                         break;
1336                                 case 11:
1337                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341                                         break;
1342                                 case 12:
1343                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347                                         break;
1348                                 case 13:
1349                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351                                         break;
1352                                 case 14:
1353                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357                                         break;
1358                                 case 16:
1359                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363                                         break;
1364                                 case 17:
1365                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369                                         break;
1370                                 case 27:
1371                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373                                         break;
1374                                 case 28:
1375                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379                                         break;
1380                                 case 29:
1381                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385                                         break;
1386                                 case 30:
1387                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391                                         break;
1392                                 default:
1393                                         gb_tile_moden = 0;
1394                                         break;
1395                                 }
1396                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398                         }
1399                 }
1400                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401                         switch (reg_offset) {
1402                         case 0:
1403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1407                                 break;
1408                         case 1:
1409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1413                                 break;
1414                         case 2:
1415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1419                                 break;
1420                         case 3:
1421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1425                                 break;
1426                         case 4:
1427                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1431                                 break;
1432                         case 5:
1433                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1437                                 break;
1438                         case 6:
1439                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1443                                 break;
1444                         case 8:
1445                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1449                                 break;
1450                         case 9:
1451                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1455                                 break;
1456                         case 10:
1457                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1461                                 break;
1462                         case 11:
1463                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1467                                 break;
1468                         case 12:
1469                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1473                                 break;
1474                         case 13:
1475                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1479                                 break;
1480                         case 14:
1481                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1485                                 break;
1486                         default:
1487                                 gb_tile_moden = 0;
1488                                 break;
1489                         }
1490                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491                 }
1492         } else if (num_pipe_configs == 2) {
1493                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494                         switch (reg_offset) {
1495                         case 0:
1496                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1499                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500                                 break;
1501                         case 1:
1502                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1505                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506                                 break;
1507                         case 2:
1508                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1511                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512                                 break;
1513                         case 3:
1514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518                                 break;
1519                         case 4:
1520                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1523                                                  TILE_SPLIT(split_equal_to_row_size));
1524                                 break;
1525                         case 5:
1526                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528                                 break;
1529                         case 6:
1530                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1533                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534                                 break;
1535                         case 7:
1536                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1539                                                  TILE_SPLIT(split_equal_to_row_size));
1540                                 break;
1541                         case 8:
1542                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543                                 break;
1544                         case 9:
1545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547                                 break;
1548                         case 10:
1549                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1552                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553                                 break;
1554                         case 11:
1555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1558                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559                                 break;
1560                         case 12:
1561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1564                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565                                 break;
1566                         case 13:
1567                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569                                 break;
1570                         case 14:
1571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1574                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575                                 break;
1576                         case 16:
1577                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1580                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581                                 break;
1582                         case 17:
1583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1586                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587                                 break;
1588                         case 27:
1589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591                                 break;
1592                         case 28:
1593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1596                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597                                 break;
1598                         case 29:
1599                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1602                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603                                 break;
1604                         case 30:
1605                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1608                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609                                 break;
1610                         default:
1611                                 gb_tile_moden = 0;
1612                                 break;
1613                         }
1614                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616                 }
1617                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618                         switch (reg_offset) {
1619                         case 0:
1620                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1624                                 break;
1625                         case 1:
1626                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1630                                 break;
1631                         case 2:
1632                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1636                                 break;
1637                         case 3:
1638                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1642                                 break;
1643                         case 4:
1644                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1648                                 break;
1649                         case 5:
1650                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1654                                 break;
1655                         case 6:
1656                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1660                                 break;
1661                         case 8:
1662                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1666                                 break;
1667                         case 9:
1668                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1672                                 break;
1673                         case 10:
1674                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1678                                 break;
1679                         case 11:
1680                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1684                                 break;
1685                         case 12:
1686                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1690                                 break;
1691                         case 13:
1692                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1696                                 break;
1697                         case 14:
1698                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1702                                 break;
1703                         default:
1704                                 gb_tile_moden = 0;
1705                                 break;
1706                         }
1707                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708                 }
1709         } else
1710                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711 }
1712
1713 /**
1714  * cik_select_se_sh - select which SE, SH to address
1715  *
1716  * @rdev: radeon_device pointer
1717  * @se_num: shader engine to address
1718  * @sh_num: sh block to address
1719  *
1720  * Select which SE, SH combinations to address. Certain
1721  * registers are instanced per SE or SH.  0xffffffff means
1722  * broadcast to all SEs or SHs (CIK).
1723  */
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725                              u32 se_num, u32 sh_num)
1726 {
1727         u32 data = INSTANCE_BROADCAST_WRITES;
1728
1729         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731         else if (se_num == 0xffffffff)
1732                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733         else if (sh_num == 0xffffffff)
1734                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735         else
1736                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737         WREG32(GRBM_GFX_INDEX, data);
1738 }
1739
1740 /**
1741  * cik_create_bitmask - create a bitmask
1742  *
1743  * @bit_width: length of the mask
1744  *
1745  * create a variable length bit mask (CIK).
1746  * Returns the bitmask.
1747  */
1748 static u32 cik_create_bitmask(u32 bit_width)
1749 {
1750         u32 i, mask = 0;
1751
1752         for (i = 0; i < bit_width; i++) {
1753                 mask <<= 1;
1754                 mask |= 1;
1755         }
1756         return mask;
1757 }
1758
1759 /**
1760  * cik_select_se_sh - select which SE, SH to address
1761  *
1762  * @rdev: radeon_device pointer
1763  * @max_rb_num: max RBs (render backends) for the asic
1764  * @se_num: number of SEs (shader engines) for the asic
1765  * @sh_per_se: number of SH blocks per SE for the asic
1766  *
1767  * Calculates the bitmask of disabled RBs (CIK).
1768  * Returns the disabled RB bitmask.
1769  */
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771                               u32 max_rb_num, u32 se_num,
1772                               u32 sh_per_se)
1773 {
1774         u32 data, mask;
1775
1776         data = RREG32(CC_RB_BACKEND_DISABLE);
1777         if (data & 1)
1778                 data &= BACKEND_DISABLE_MASK;
1779         else
1780                 data = 0;
1781         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782
1783         data >>= BACKEND_DISABLE_SHIFT;
1784
1785         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786
1787         return data & mask;
1788 }
1789
1790 /**
1791  * cik_setup_rb - setup the RBs on the asic
1792  *
1793  * @rdev: radeon_device pointer
1794  * @se_num: number of SEs (shader engines) for the asic
1795  * @sh_per_se: number of SH blocks per SE for the asic
1796  * @max_rb_num: max RBs (render backends) for the asic
1797  *
1798  * Configures per-SE/SH RB registers (CIK).
1799  */
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801                          u32 se_num, u32 sh_per_se,
1802                          u32 max_rb_num)
1803 {
1804         int i, j;
1805         u32 data, mask;
1806         u32 disabled_rbs = 0;
1807         u32 enabled_rbs = 0;
1808
1809         for (i = 0; i < se_num; i++) {
1810                 for (j = 0; j < sh_per_se; j++) {
1811                         cik_select_se_sh(rdev, i, j);
1812                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814                 }
1815         }
1816         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817
1818         mask = 1;
1819         for (i = 0; i < max_rb_num; i++) {
1820                 if (!(disabled_rbs & mask))
1821                         enabled_rbs |= mask;
1822                 mask <<= 1;
1823         }
1824
1825         for (i = 0; i < se_num; i++) {
1826                 cik_select_se_sh(rdev, i, 0xffffffff);
1827                 data = 0;
1828                 for (j = 0; j < sh_per_se; j++) {
1829                         switch (enabled_rbs & 3) {
1830                         case 1:
1831                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832                                 break;
1833                         case 2:
1834                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835                                 break;
1836                         case 3:
1837                         default:
1838                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839                                 break;
1840                         }
1841                         enabled_rbs >>= 2;
1842                 }
1843                 WREG32(PA_SC_RASTER_CONFIG, data);
1844         }
1845         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846 }
1847
1848 /**
1849  * cik_gpu_init - setup the 3D engine
1850  *
1851  * @rdev: radeon_device pointer
1852  *
1853  * Configures the 3D engine and tiling configuration
1854  * registers so that the 3D engine is usable.
1855  */
1856 static void cik_gpu_init(struct radeon_device *rdev)
1857 {
1858         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859         u32 mc_shared_chmap, mc_arb_ramcfg;
1860         u32 hdp_host_path_cntl;
1861         u32 tmp;
1862         int i, j;
1863
1864         switch (rdev->family) {
1865         case CHIP_BONAIRE:
1866                 rdev->config.cik.max_shader_engines = 2;
1867                 rdev->config.cik.max_tile_pipes = 4;
1868                 rdev->config.cik.max_cu_per_sh = 7;
1869                 rdev->config.cik.max_sh_per_se = 1;
1870                 rdev->config.cik.max_backends_per_se = 2;
1871                 rdev->config.cik.max_texture_channel_caches = 4;
1872                 rdev->config.cik.max_gprs = 256;
1873                 rdev->config.cik.max_gs_threads = 32;
1874                 rdev->config.cik.max_hw_contexts = 8;
1875
1876                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881                 break;
1882         case CHIP_KAVERI:
1883                 /* TODO */
1884                 break;
1885         case CHIP_KABINI:
1886         default:
1887                 rdev->config.cik.max_shader_engines = 1;
1888                 rdev->config.cik.max_tile_pipes = 2;
1889                 rdev->config.cik.max_cu_per_sh = 2;
1890                 rdev->config.cik.max_sh_per_se = 1;
1891                 rdev->config.cik.max_backends_per_se = 1;
1892                 rdev->config.cik.max_texture_channel_caches = 2;
1893                 rdev->config.cik.max_gprs = 256;
1894                 rdev->config.cik.max_gs_threads = 16;
1895                 rdev->config.cik.max_hw_contexts = 8;
1896
1897                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902                 break;
1903         }
1904
1905         /* Initialize HDP */
1906         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907                 WREG32((0x2c14 + j), 0x00000000);
1908                 WREG32((0x2c18 + j), 0x00000000);
1909                 WREG32((0x2c1c + j), 0x00000000);
1910                 WREG32((0x2c20 + j), 0x00000000);
1911                 WREG32((0x2c24 + j), 0x00000000);
1912         }
1913
1914         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915
1916         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917
1918         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920
1921         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922         rdev->config.cik.mem_max_burst_length_bytes = 256;
1923         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925         if (rdev->config.cik.mem_row_size_in_kb > 4)
1926                 rdev->config.cik.mem_row_size_in_kb = 4;
1927         /* XXX use MC settings? */
1928         rdev->config.cik.shader_engine_tile_size = 32;
1929         rdev->config.cik.num_gpus = 1;
1930         rdev->config.cik.multi_gpu_tile_size = 64;
1931
1932         /* fix up row size */
1933         gb_addr_config &= ~ROW_SIZE_MASK;
1934         switch (rdev->config.cik.mem_row_size_in_kb) {
1935         case 1:
1936         default:
1937                 gb_addr_config |= ROW_SIZE(0);
1938                 break;
1939         case 2:
1940                 gb_addr_config |= ROW_SIZE(1);
1941                 break;
1942         case 4:
1943                 gb_addr_config |= ROW_SIZE(2);
1944                 break;
1945         }
1946
1947         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1948          * not have bank info, so create a custom tiling dword.
1949          * bits 3:0   num_pipes
1950          * bits 7:4   num_banks
1951          * bits 11:8  group_size
1952          * bits 15:12 row_size
1953          */
1954         rdev->config.cik.tile_config = 0;
1955         switch (rdev->config.cik.num_tile_pipes) {
1956         case 1:
1957                 rdev->config.cik.tile_config |= (0 << 0);
1958                 break;
1959         case 2:
1960                 rdev->config.cik.tile_config |= (1 << 0);
1961                 break;
1962         case 4:
1963                 rdev->config.cik.tile_config |= (2 << 0);
1964                 break;
1965         case 8:
1966         default:
1967                 /* XXX what about 12? */
1968                 rdev->config.cik.tile_config |= (3 << 0);
1969                 break;
1970         }
1971         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972                 rdev->config.cik.tile_config |= 1 << 4;
1973         else
1974                 rdev->config.cik.tile_config |= 0 << 4;
1975         rdev->config.cik.tile_config |=
1976                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977         rdev->config.cik.tile_config |=
1978                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979
1980         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1988
1989         cik_tiling_mode_table_init(rdev);
1990
1991         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992                      rdev->config.cik.max_sh_per_se,
1993                      rdev->config.cik.max_backends_per_se);
1994
1995         /* set HW defaults for 3D engine */
1996         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997
1998         WREG32(SX_DEBUG_1, 0x20);
1999
2000         WREG32(TA_CNTL_AUX, 0x00010000);
2001
2002         tmp = RREG32(SPI_CONFIG_CNTL);
2003         tmp |= 0x03000000;
2004         WREG32(SPI_CONFIG_CNTL, tmp);
2005
2006         WREG32(SQ_CONFIG, 1);
2007
2008         WREG32(DB_DEBUG, 0);
2009
2010         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011         tmp |= 0x00000400;
2012         WREG32(DB_DEBUG2, tmp);
2013
2014         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015         tmp |= 0x00020200;
2016         WREG32(DB_DEBUG3, tmp);
2017
2018         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019         tmp |= 0x00018208;
2020         WREG32(CB_HW_CONTROL, tmp);
2021
2022         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023
2024         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028
2029         WREG32(VGT_NUM_INSTANCES, 1);
2030
2031         WREG32(CP_PERFMON_CNTL, 0);
2032
2033         WREG32(SQ_CONFIG, 0);
2034
2035         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036                                           FORCE_EOV_MAX_REZ_CNT(255)));
2037
2038         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040
2041         WREG32(VGT_GS_VERTEX_REUSE, 16);
2042         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043
2044         tmp = RREG32(HDP_MISC_CNTL);
2045         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046         WREG32(HDP_MISC_CNTL, tmp);
2047
2048         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050
2051         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053
2054         udelay(50);
2055 }
2056
2057 /*
2058  * GPU scratch registers helpers function.
2059  */
2060 /**
2061  * cik_scratch_init - setup driver info for CP scratch regs
2062  *
2063  * @rdev: radeon_device pointer
2064  *
2065  * Set up the number and offset of the CP scratch registers.
2066  * NOTE: use of CP scratch registers is a legacy inferface and
2067  * is not used by default on newer asics (r6xx+).  On newer asics,
2068  * memory buffers are used for fences rather than scratch regs.
2069  */
2070 static void cik_scratch_init(struct radeon_device *rdev)
2071 {
2072         int i;
2073
2074         rdev->scratch.num_reg = 7;
2075         rdev->scratch.reg_base = SCRATCH_REG0;
2076         for (i = 0; i < rdev->scratch.num_reg; i++) {
2077                 rdev->scratch.free[i] = true;
2078                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079         }
2080 }
2081
2082 /**
2083  * cik_ring_test - basic gfx ring test
2084  *
2085  * @rdev: radeon_device pointer
2086  * @ring: radeon_ring structure holding ring information
2087  *
2088  * Allocate a scratch register and write to it using the gfx ring (CIK).
2089  * Provides a basic gfx ring test to verify that the ring is working.
2090  * Used by cik_cp_gfx_resume();
2091  * Returns 0 on success, error on failure.
2092  */
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094 {
2095         uint32_t scratch;
2096         uint32_t tmp = 0;
2097         unsigned i;
2098         int r;
2099
2100         r = radeon_scratch_get(rdev, &scratch);
2101         if (r) {
2102                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103                 return r;
2104         }
2105         WREG32(scratch, 0xCAFEDEAD);
2106         r = radeon_ring_lock(rdev, ring, 3);
2107         if (r) {
2108                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109                 radeon_scratch_free(rdev, scratch);
2110                 return r;
2111         }
2112         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114         radeon_ring_write(ring, 0xDEADBEEF);
2115         radeon_ring_unlock_commit(rdev, ring);
2116
2117         for (i = 0; i < rdev->usec_timeout; i++) {
2118                 tmp = RREG32(scratch);
2119                 if (tmp == 0xDEADBEEF)
2120                         break;
2121                 DRM_UDELAY(1);
2122         }
2123         if (i < rdev->usec_timeout) {
2124                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125         } else {
2126                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127                           ring->idx, scratch, tmp);
2128                 r = -EINVAL;
2129         }
2130         radeon_scratch_free(rdev, scratch);
2131         return r;
2132 }
2133
2134 /**
2135  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2136  *
2137  * @rdev: radeon_device pointer
2138  * @fence: radeon fence object
2139  *
2140  * Emits a fence sequnce number on the gfx ring and flushes
2141  * GPU caches.
2142  */
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144                              struct radeon_fence *fence)
2145 {
2146         struct radeon_ring *ring = &rdev->ring[fence->ring];
2147         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148
2149         /* EVENT_WRITE_EOP - flush caches, send int */
2150         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152                                  EOP_TC_ACTION_EN |
2153                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154                                  EVENT_INDEX(5)));
2155         radeon_ring_write(ring, addr & 0xfffffffc);
2156         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157         radeon_ring_write(ring, fence->seq);
2158         radeon_ring_write(ring, 0);
2159         /* HDP flush */
2160         /* We should be using the new WAIT_REG_MEM special op packet here
2161          * but it causes the CP to hang
2162          */
2163         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165                                  WRITE_DATA_DST_SEL(0)));
2166         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167         radeon_ring_write(ring, 0);
2168         radeon_ring_write(ring, 0);
2169 }
2170
2171 /**
2172  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173  *
2174  * @rdev: radeon_device pointer
2175  * @fence: radeon fence object
2176  *
2177  * Emits a fence sequnce number on the compute ring and flushes
2178  * GPU caches.
2179  */
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181                                  struct radeon_fence *fence)
2182 {
2183         struct radeon_ring *ring = &rdev->ring[fence->ring];
2184         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185
2186         /* RELEASE_MEM - flush caches, send int */
2187         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189                                  EOP_TC_ACTION_EN |
2190                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191                                  EVENT_INDEX(5)));
2192         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193         radeon_ring_write(ring, addr & 0xfffffffc);
2194         radeon_ring_write(ring, upper_32_bits(addr));
2195         radeon_ring_write(ring, fence->seq);
2196         radeon_ring_write(ring, 0);
2197         /* HDP flush */
2198         /* We should be using the new WAIT_REG_MEM special op packet here
2199          * but it causes the CP to hang
2200          */
2201         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203                                  WRITE_DATA_DST_SEL(0)));
2204         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205         radeon_ring_write(ring, 0);
2206         radeon_ring_write(ring, 0);
2207 }
2208
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210                              struct radeon_ring *ring,
2211                              struct radeon_semaphore *semaphore,
2212                              bool emit_wait)
2213 {
2214         uint64_t addr = semaphore->gpu_addr;
2215         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216
2217         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218         radeon_ring_write(ring, addr & 0xffffffff);
2219         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220 }
2221
2222 /*
2223  * IB stuff
2224  */
2225 /**
2226  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227  *
2228  * @rdev: radeon_device pointer
2229  * @ib: radeon indirect buffer object
2230  *
2231  * Emits an DE (drawing engine) or CE (constant engine) IB
2232  * on the gfx ring.  IBs are usually generated by userspace
2233  * acceleration drivers and submitted to the kernel for
2234  * sheduling on the ring.  This function schedules the IB
2235  * on the gfx ring for execution by the GPU.
2236  */
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238 {
2239         struct radeon_ring *ring = &rdev->ring[ib->ring];
2240         u32 header, control = INDIRECT_BUFFER_VALID;
2241
2242         if (ib->is_const_ib) {
2243                 /* set switch buffer packet before const IB */
2244                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245                 radeon_ring_write(ring, 0);
2246
2247                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248         } else {
2249                 u32 next_rptr;
2250                 if (ring->rptr_save_reg) {
2251                         next_rptr = ring->wptr + 3 + 4;
2252                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2254                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2255                         radeon_ring_write(ring, next_rptr);
2256                 } else if (rdev->wb.enabled) {
2257                         next_rptr = ring->wptr + 5 + 4;
2258                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262                         radeon_ring_write(ring, next_rptr);
2263                 }
2264
2265                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266         }
2267
2268         control |= ib->length_dw |
2269                 (ib->vm ? (ib->vm->id << 24) : 0);
2270
2271         radeon_ring_write(ring, header);
2272         radeon_ring_write(ring,
2273 #ifdef __BIG_ENDIAN
2274                           (2 << 0) |
2275 #endif
2276                           (ib->gpu_addr & 0xFFFFFFFC));
2277         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278         radeon_ring_write(ring, control);
2279 }
2280
2281 /**
2282  * cik_ib_test - basic gfx ring IB test
2283  *
2284  * @rdev: radeon_device pointer
2285  * @ring: radeon_ring structure holding ring information
2286  *
2287  * Allocate an IB and execute it on the gfx ring (CIK).
2288  * Provides a basic gfx ring test to verify that IBs are working.
2289  * Returns 0 on success, error on failure.
2290  */
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292 {
2293         struct radeon_ib ib;
2294         uint32_t scratch;
2295         uint32_t tmp = 0;
2296         unsigned i;
2297         int r;
2298
2299         r = radeon_scratch_get(rdev, &scratch);
2300         if (r) {
2301                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302                 return r;
2303         }
2304         WREG32(scratch, 0xCAFEDEAD);
2305         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306         if (r) {
2307                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308                 return r;
2309         }
2310         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312         ib.ptr[2] = 0xDEADBEEF;
2313         ib.length_dw = 3;
2314         r = radeon_ib_schedule(rdev, &ib, NULL);
2315         if (r) {
2316                 radeon_scratch_free(rdev, scratch);
2317                 radeon_ib_free(rdev, &ib);
2318                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319                 return r;
2320         }
2321         r = radeon_fence_wait(ib.fence, false);
2322         if (r) {
2323                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324                 return r;
2325         }
2326         for (i = 0; i < rdev->usec_timeout; i++) {
2327                 tmp = RREG32(scratch);
2328                 if (tmp == 0xDEADBEEF)
2329                         break;
2330                 DRM_UDELAY(1);
2331         }
2332         if (i < rdev->usec_timeout) {
2333                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334         } else {
2335                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336                           scratch, tmp);
2337                 r = -EINVAL;
2338         }
2339         radeon_scratch_free(rdev, scratch);
2340         radeon_ib_free(rdev, &ib);
2341         return r;
2342 }
2343
2344 /*
2345  * CP.
2346  * On CIK, gfx and compute now have independant command processors.
2347  *
2348  * GFX
2349  * Gfx consists of a single ring and can process both gfx jobs and
2350  * compute jobs.  The gfx CP consists of three microengines (ME):
2351  * PFP - Pre-Fetch Parser
2352  * ME - Micro Engine
2353  * CE - Constant Engine
2354  * The PFP and ME make up what is considered the Drawing Engine (DE).
2355  * The CE is an asynchronous engine used for updating buffer desciptors
2356  * used by the DE so that they can be loaded into cache in parallel
2357  * while the DE is processing state update packets.
2358  *
2359  * Compute
2360  * The compute CP consists of two microengines (ME):
2361  * MEC1 - Compute MicroEngine 1
2362  * MEC2 - Compute MicroEngine 2
2363  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364  * The queues are exposed to userspace and are programmed directly
2365  * by the compute runtime.
2366  */
2367 /**
2368  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369  *
2370  * @rdev: radeon_device pointer
2371  * @enable: enable or disable the MEs
2372  *
2373  * Halts or unhalts the gfx MEs.
2374  */
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376 {
2377         if (enable)
2378                 WREG32(CP_ME_CNTL, 0);
2379         else {
2380                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382         }
2383         udelay(50);
2384 }
2385
2386 /**
2387  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388  *
2389  * @rdev: radeon_device pointer
2390  *
2391  * Loads the gfx PFP, ME, and CE ucode.
2392  * Returns 0 for success, -EINVAL if the ucode is not available.
2393  */
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395 {
2396         const __be32 *fw_data;
2397         int i;
2398
2399         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400                 return -EINVAL;
2401
2402         cik_cp_gfx_enable(rdev, false);
2403
2404         /* PFP */
2405         fw_data = (const __be32 *)rdev->pfp_fw->data;
2406         WREG32(CP_PFP_UCODE_ADDR, 0);
2407         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409         WREG32(CP_PFP_UCODE_ADDR, 0);
2410
2411         /* CE */
2412         fw_data = (const __be32 *)rdev->ce_fw->data;
2413         WREG32(CP_CE_UCODE_ADDR, 0);
2414         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416         WREG32(CP_CE_UCODE_ADDR, 0);
2417
2418         /* ME */
2419         fw_data = (const __be32 *)rdev->me_fw->data;
2420         WREG32(CP_ME_RAM_WADDR, 0);
2421         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423         WREG32(CP_ME_RAM_WADDR, 0);
2424
2425         WREG32(CP_PFP_UCODE_ADDR, 0);
2426         WREG32(CP_CE_UCODE_ADDR, 0);
2427         WREG32(CP_ME_RAM_WADDR, 0);
2428         WREG32(CP_ME_RAM_RADDR, 0);
2429         return 0;
2430 }
2431
2432 /**
2433  * cik_cp_gfx_start - start the gfx ring
2434  *
2435  * @rdev: radeon_device pointer
2436  *
2437  * Enables the ring and loads the clear state context and other
2438  * packets required to init the ring.
2439  * Returns 0 for success, error for failure.
2440  */
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2442 {
2443         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444         int r, i;
2445
2446         /* init the CP */
2447         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448         WREG32(CP_ENDIAN_SWAP, 0);
2449         WREG32(CP_DEVICE_ID, 1);
2450
2451         cik_cp_gfx_enable(rdev, true);
2452
2453         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454         if (r) {
2455                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456                 return r;
2457         }
2458
2459         /* init the CE partitions.  CE only used for gfx on CIK */
2460         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462         radeon_ring_write(ring, 0xc000);
2463         radeon_ring_write(ring, 0xc000);
2464
2465         /* setup clear context state */
2466         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468
2469         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470         radeon_ring_write(ring, 0x80000000);
2471         radeon_ring_write(ring, 0x80000000);
2472
2473         for (i = 0; i < cik_default_size; i++)
2474                 radeon_ring_write(ring, cik_default_state[i]);
2475
2476         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478
2479         /* set clear context state */
2480         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481         radeon_ring_write(ring, 0);
2482
2483         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484         radeon_ring_write(ring, 0x00000316);
2485         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487
2488         radeon_ring_unlock_commit(rdev, ring);
2489
2490         return 0;
2491 }
2492
2493 /**
2494  * cik_cp_gfx_fini - stop the gfx ring
2495  *
2496  * @rdev: radeon_device pointer
2497  *
2498  * Stop the gfx ring and tear down the driver ring
2499  * info.
2500  */
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502 {
2503         cik_cp_gfx_enable(rdev, false);
2504         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505 }
2506
2507 /**
2508  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509  *
2510  * @rdev: radeon_device pointer
2511  *
2512  * Program the location and size of the gfx ring buffer
2513  * and test it to make sure it's working.
2514  * Returns 0 for success, error for failure.
2515  */
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517 {
2518         struct radeon_ring *ring;
2519         u32 tmp;
2520         u32 rb_bufsz;
2521         u64 rb_addr;
2522         int r;
2523
2524         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526
2527         /* Set the write pointer delay */
2528         WREG32(CP_RB_WPTR_DELAY, 0);
2529
2530         /* set the RB to use vmid 0 */
2531         WREG32(CP_RB_VMID, 0);
2532
2533         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534
2535         /* ring 0 - compute and gfx */
2536         /* Set ring buffer size */
2537         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538         rb_bufsz = drm_order(ring->ring_size / 8);
2539         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540 #ifdef __BIG_ENDIAN
2541         tmp |= BUF_SWAP_32BIT;
2542 #endif
2543         WREG32(CP_RB0_CNTL, tmp);
2544
2545         /* Initialize the ring buffer's read and write pointers */
2546         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547         ring->wptr = 0;
2548         WREG32(CP_RB0_WPTR, ring->wptr);
2549
2550         /* set the wb address wether it's enabled or not */
2551         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553
2554         /* scratch register shadowing is no longer supported */
2555         WREG32(SCRATCH_UMSK, 0);
2556
2557         if (!rdev->wb.enabled)
2558                 tmp |= RB_NO_UPDATE;
2559
2560         mdelay(1);
2561         WREG32(CP_RB0_CNTL, tmp);
2562
2563         rb_addr = ring->gpu_addr >> 8;
2564         WREG32(CP_RB0_BASE, rb_addr);
2565         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566
2567         ring->rptr = RREG32(CP_RB0_RPTR);
2568
2569         /* start the ring */
2570         cik_cp_gfx_start(rdev);
2571         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573         if (r) {
2574                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575                 return r;
2576         }
2577         return 0;
2578 }
2579
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581                               struct radeon_ring *ring)
2582 {
2583         u32 rptr;
2584
2585
2586
2587         if (rdev->wb.enabled) {
2588                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589         } else {
2590                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2591                 rptr = RREG32(CP_HQD_PQ_RPTR);
2592                 cik_srbm_select(rdev, 0, 0, 0, 0);
2593         }
2594         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2595
2596         return rptr;
2597 }
2598
2599 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2600                               struct radeon_ring *ring)
2601 {
2602         u32 wptr;
2603
2604         if (rdev->wb.enabled) {
2605                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2606         } else {
2607                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2608                 wptr = RREG32(CP_HQD_PQ_WPTR);
2609                 cik_srbm_select(rdev, 0, 0, 0, 0);
2610         }
2611         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2612
2613         return wptr;
2614 }
2615
2616 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2617                                struct radeon_ring *ring)
2618 {
2619         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620
2621         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2622         WDOORBELL32(ring->doorbell_offset, wptr);
2623 }
2624
2625 /**
2626  * cik_cp_compute_enable - enable/disable the compute CP MEs
2627  *
2628  * @rdev: radeon_device pointer
2629  * @enable: enable or disable the MEs
2630  *
2631  * Halts or unhalts the compute MEs.
2632  */
2633 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2634 {
2635         if (enable)
2636                 WREG32(CP_MEC_CNTL, 0);
2637         else
2638                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2639         udelay(50);
2640 }
2641
2642 /**
2643  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644  *
2645  * @rdev: radeon_device pointer
2646  *
2647  * Loads the compute MEC1&2 ucode.
2648  * Returns 0 for success, -EINVAL if the ucode is not available.
2649  */
2650 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651 {
2652         const __be32 *fw_data;
2653         int i;
2654
2655         if (!rdev->mec_fw)
2656                 return -EINVAL;
2657
2658         cik_cp_compute_enable(rdev, false);
2659
2660         /* MEC1 */
2661         fw_data = (const __be32 *)rdev->mec_fw->data;
2662         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2663         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2664                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2665         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666
2667         if (rdev->family == CHIP_KAVERI) {
2668                 /* MEC2 */
2669                 fw_data = (const __be32 *)rdev->mec_fw->data;
2670                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2671                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2672                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2673                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2674         }
2675
2676         return 0;
2677 }
2678
2679 /**
2680  * cik_cp_compute_start - start the compute queues
2681  *
2682  * @rdev: radeon_device pointer
2683  *
2684  * Enable the compute queues.
2685  * Returns 0 for success, error for failure.
2686  */
2687 static int cik_cp_compute_start(struct radeon_device *rdev)
2688 {
2689         cik_cp_compute_enable(rdev, true);
2690
2691         return 0;
2692 }
2693
2694 /**
2695  * cik_cp_compute_fini - stop the compute queues
2696  *
2697  * @rdev: radeon_device pointer
2698  *
2699  * Stop the compute queues and tear down the driver queue
2700  * info.
2701  */
2702 static void cik_cp_compute_fini(struct radeon_device *rdev)
2703 {
2704         int i, idx, r;
2705
2706         cik_cp_compute_enable(rdev, false);
2707
2708         for (i = 0; i < 2; i++) {
2709                 if (i == 0)
2710                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711                 else
2712                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713
2714                 if (rdev->ring[idx].mqd_obj) {
2715                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2716                         if (unlikely(r != 0))
2717                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718
2719                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2720                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721
2722                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2723                         rdev->ring[idx].mqd_obj = NULL;
2724                 }
2725         }
2726 }
2727
2728 static void cik_mec_fini(struct radeon_device *rdev)
2729 {
2730         int r;
2731
2732         if (rdev->mec.hpd_eop_obj) {
2733                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2734                 if (unlikely(r != 0))
2735                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2736                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2737                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738
2739                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2740                 rdev->mec.hpd_eop_obj = NULL;
2741         }
2742 }
2743
2744 #define MEC_HPD_SIZE 2048
2745
2746 static int cik_mec_init(struct radeon_device *rdev)
2747 {
2748         int r;
2749         u32 *hpd;
2750
2751         /*
2752          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2753          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754          */
2755         if (rdev->family == CHIP_KAVERI)
2756                 rdev->mec.num_mec = 2;
2757         else
2758                 rdev->mec.num_mec = 1;
2759         rdev->mec.num_pipe = 4;
2760         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761
2762         if (rdev->mec.hpd_eop_obj == NULL) {
2763                 r = radeon_bo_create(rdev,
2764                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765                                      PAGE_SIZE, true,
2766                                      RADEON_GEM_DOMAIN_GTT, NULL,
2767                                      &rdev->mec.hpd_eop_obj);
2768                 if (r) {
2769                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2770                         return r;
2771                 }
2772         }
2773
2774         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2775         if (unlikely(r != 0)) {
2776                 cik_mec_fini(rdev);
2777                 return r;
2778         }
2779         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2780                           &rdev->mec.hpd_eop_gpu_addr);
2781         if (r) {
2782                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2783                 cik_mec_fini(rdev);
2784                 return r;
2785         }
2786         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787         if (r) {
2788                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2789                 cik_mec_fini(rdev);
2790                 return r;
2791         }
2792
2793         /* clear memory.  Not sure if this is required or not */
2794         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795
2796         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2797         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2798
2799         return 0;
2800 }
2801
2802 struct hqd_registers
2803 {
2804         u32 cp_mqd_base_addr;
2805         u32 cp_mqd_base_addr_hi;
2806         u32 cp_hqd_active;
2807         u32 cp_hqd_vmid;
2808         u32 cp_hqd_persistent_state;
2809         u32 cp_hqd_pipe_priority;
2810         u32 cp_hqd_queue_priority;
2811         u32 cp_hqd_quantum;
2812         u32 cp_hqd_pq_base;
2813         u32 cp_hqd_pq_base_hi;
2814         u32 cp_hqd_pq_rptr;
2815         u32 cp_hqd_pq_rptr_report_addr;
2816         u32 cp_hqd_pq_rptr_report_addr_hi;
2817         u32 cp_hqd_pq_wptr_poll_addr;
2818         u32 cp_hqd_pq_wptr_poll_addr_hi;
2819         u32 cp_hqd_pq_doorbell_control;
2820         u32 cp_hqd_pq_wptr;
2821         u32 cp_hqd_pq_control;
2822         u32 cp_hqd_ib_base_addr;
2823         u32 cp_hqd_ib_base_addr_hi;
2824         u32 cp_hqd_ib_rptr;
2825         u32 cp_hqd_ib_control;
2826         u32 cp_hqd_iq_timer;
2827         u32 cp_hqd_iq_rptr;
2828         u32 cp_hqd_dequeue_request;
2829         u32 cp_hqd_dma_offload;
2830         u32 cp_hqd_sema_cmd;
2831         u32 cp_hqd_msg_type;
2832         u32 cp_hqd_atomic0_preop_lo;
2833         u32 cp_hqd_atomic0_preop_hi;
2834         u32 cp_hqd_atomic1_preop_lo;
2835         u32 cp_hqd_atomic1_preop_hi;
2836         u32 cp_hqd_hq_scheduler0;
2837         u32 cp_hqd_hq_scheduler1;
2838         u32 cp_mqd_control;
2839 };
2840
2841 struct bonaire_mqd
2842 {
2843         u32 header;
2844         u32 dispatch_initiator;
2845         u32 dimensions[3];
2846         u32 start_idx[3];
2847         u32 num_threads[3];
2848         u32 pipeline_stat_enable;
2849         u32 perf_counter_enable;
2850         u32 pgm[2];
2851         u32 tba[2];
2852         u32 tma[2];
2853         u32 pgm_rsrc[2];
2854         u32 vmid;
2855         u32 resource_limits;
2856         u32 static_thread_mgmt01[2];
2857         u32 tmp_ring_size;
2858         u32 static_thread_mgmt23[2];
2859         u32 restart[3];
2860         u32 thread_trace_enable;
2861         u32 reserved1;
2862         u32 user_data[16];
2863         u32 vgtcs_invoke_count[2];
2864         struct hqd_registers queue_state;
2865         u32 dequeue_cntr;
2866         u32 interrupt_queue[64];
2867 };
2868
2869 /**
2870  * cik_cp_compute_resume - setup the compute queue registers
2871  *
2872  * @rdev: radeon_device pointer
2873  *
2874  * Program the compute queues and test them to make sure they
2875  * are working.
2876  * Returns 0 for success, error for failure.
2877  */
2878 static int cik_cp_compute_resume(struct radeon_device *rdev)
2879 {
2880         int r, i, idx;
2881         u32 tmp;
2882         bool use_doorbell = true;
2883         u64 hqd_gpu_addr;
2884         u64 mqd_gpu_addr;
2885         u64 eop_gpu_addr;
2886         u64 wb_gpu_addr;
2887         u32 *buf;
2888         struct bonaire_mqd *mqd;
2889
2890         r = cik_cp_compute_start(rdev);
2891         if (r)
2892                 return r;
2893
2894         /* fix up chicken bits */
2895         tmp = RREG32(CP_CPF_DEBUG);
2896         tmp |= (1 << 23);
2897         WREG32(CP_CPF_DEBUG, tmp);
2898
2899         /* init the pipes */
2900         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901                 int me = (i < 4) ? 1 : 2;
2902                 int pipe = (i < 4) ? i : (i - 4);
2903
2904                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905
2906                 cik_srbm_select(rdev, me, pipe, 0, 0);
2907
2908                 /* write the EOP addr */
2909                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911
2912                 /* set the VMID assigned */
2913                 WREG32(CP_HPD_EOP_VMID, 0);
2914
2915                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917                 tmp &= ~EOP_SIZE_MASK;
2918                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2920         }
2921         cik_srbm_select(rdev, 0, 0, 0, 0);
2922
2923         /* init the queues.  Just two for now. */
2924         for (i = 0; i < 2; i++) {
2925                 if (i == 0)
2926                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2927                 else
2928                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2929
2930                 if (rdev->ring[idx].mqd_obj == NULL) {
2931                         r = radeon_bo_create(rdev,
2932                                              sizeof(struct bonaire_mqd),
2933                                              PAGE_SIZE, true,
2934                                              RADEON_GEM_DOMAIN_GTT, NULL,
2935                                              &rdev->ring[idx].mqd_obj);
2936                         if (r) {
2937                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2938                                 return r;
2939                         }
2940                 }
2941
2942                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2943                 if (unlikely(r != 0)) {
2944                         cik_cp_compute_fini(rdev);
2945                         return r;
2946                 }
2947                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2948                                   &mqd_gpu_addr);
2949                 if (r) {
2950                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2951                         cik_cp_compute_fini(rdev);
2952                         return r;
2953                 }
2954                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2955                 if (r) {
2956                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2957                         cik_cp_compute_fini(rdev);
2958                         return r;
2959                 }
2960
2961                 /* doorbell offset */
2962                 rdev->ring[idx].doorbell_offset =
2963                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2964
2965                 /* init the mqd struct */
2966                 memset(buf, 0, sizeof(struct bonaire_mqd));
2967
2968                 mqd = (struct bonaire_mqd *)buf;
2969                 mqd->header = 0xC0310800;
2970                 mqd->static_thread_mgmt01[0] = 0xffffffff;
2971                 mqd->static_thread_mgmt01[1] = 0xffffffff;
2972                 mqd->static_thread_mgmt23[0] = 0xffffffff;
2973                 mqd->static_thread_mgmt23[1] = 0xffffffff;
2974
2975                 cik_srbm_select(rdev, rdev->ring[idx].me,
2976                                 rdev->ring[idx].pipe,
2977                                 rdev->ring[idx].queue, 0);
2978
2979                 /* disable wptr polling */
2980                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2981                 tmp &= ~WPTR_POLL_EN;
2982                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2983
2984                 /* enable doorbell? */
2985                 mqd->queue_state.cp_hqd_pq_doorbell_control =
2986                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2987                 if (use_doorbell)
2988                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2989                 else
2990                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2991                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2992                        mqd->queue_state.cp_hqd_pq_doorbell_control);
2993
2994                 /* disable the queue if it's active */
2995                 mqd->queue_state.cp_hqd_dequeue_request = 0;
2996                 mqd->queue_state.cp_hqd_pq_rptr = 0;
2997                 mqd->queue_state.cp_hqd_pq_wptr= 0;
2998                 if (RREG32(CP_HQD_ACTIVE) & 1) {
2999                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3000                         for (i = 0; i < rdev->usec_timeout; i++) {
3001                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3002                                         break;
3003                                 udelay(1);
3004                         }
3005                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3006                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3007                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3008                 }
3009
3010                 /* set the pointer to the MQD */
3011                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3012                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3013                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3014                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3015                 /* set MQD vmid to 0 */
3016                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3017                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3018                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3019
3020                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3021                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3022                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3023                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3024                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3025                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3026
3027                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3028                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3029                 mqd->queue_state.cp_hqd_pq_control &=
3030                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3031
3032                 mqd->queue_state.cp_hqd_pq_control |=
3033                         drm_order(rdev->ring[idx].ring_size / 8);
3034                 mqd->queue_state.cp_hqd_pq_control |=
3035                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3036 #ifdef __BIG_ENDIAN
3037                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3038 #endif
3039                 mqd->queue_state.cp_hqd_pq_control &=
3040                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3041                 mqd->queue_state.cp_hqd_pq_control |=
3042                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3043                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3044
3045                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3046                 if (i == 0)
3047                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3048                 else
3049                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3050                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3051                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3052                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3053                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3054                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3055
3056                 /* set the wb address wether it's enabled or not */
3057                 if (i == 0)
3058                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3059                 else
3060                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3061                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3062                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3063                         upper_32_bits(wb_gpu_addr) & 0xffff;
3064                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3065                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3066                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3067                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3068
3069                 /* enable the doorbell if requested */
3070                 if (use_doorbell) {
3071                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3072                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3073                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3074                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3075                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3076                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3077                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3078                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3079
3080                 } else {
3081                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3082                 }
3083                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3084                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3085
3086                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3087                 rdev->ring[idx].wptr = 0;
3088                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3089                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3090                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3091                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3092
3093                 /* set the vmid for the queue */
3094                 mqd->queue_state.cp_hqd_vmid = 0;
3095                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3096
3097                 /* activate the queue */
3098                 mqd->queue_state.cp_hqd_active = 1;
3099                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3100
3101                 cik_srbm_select(rdev, 0, 0, 0, 0);
3102
3103                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3104                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3105
3106                 rdev->ring[idx].ready = true;
3107                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3108                 if (r)
3109                         rdev->ring[idx].ready = false;
3110         }
3111
3112         return 0;
3113 }
3114
3115 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3116 {
3117         cik_cp_gfx_enable(rdev, enable);
3118         cik_cp_compute_enable(rdev, enable);
3119 }
3120
3121 static int cik_cp_load_microcode(struct radeon_device *rdev)
3122 {
3123         int r;
3124
3125         r = cik_cp_gfx_load_microcode(rdev);
3126         if (r)
3127                 return r;
3128         r = cik_cp_compute_load_microcode(rdev);
3129         if (r)
3130                 return r;
3131
3132         return 0;
3133 }
3134
3135 static void cik_cp_fini(struct radeon_device *rdev)
3136 {
3137         cik_cp_gfx_fini(rdev);
3138         cik_cp_compute_fini(rdev);
3139 }
3140
3141 static int cik_cp_resume(struct radeon_device *rdev)
3142 {
3143         int r;
3144
3145         /* Reset all cp blocks */
3146         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3147         RREG32(GRBM_SOFT_RESET);
3148         mdelay(15);
3149         WREG32(GRBM_SOFT_RESET, 0);
3150         RREG32(GRBM_SOFT_RESET);
3151
3152         r = cik_cp_load_microcode(rdev);
3153         if (r)
3154                 return r;
3155
3156         r = cik_cp_gfx_resume(rdev);
3157         if (r)
3158                 return r;
3159         r = cik_cp_compute_resume(rdev);
3160         if (r)
3161                 return r;
3162
3163         return 0;
3164 }
3165
3166 /*
3167  * sDMA - System DMA
3168  * Starting with CIK, the GPU has new asynchronous
3169  * DMA engines.  These engines are used for compute
3170  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3171  * and each one supports 1 ring buffer used for gfx
3172  * and 2 queues used for compute.
3173  *
3174  * The programming model is very similar to the CP
3175  * (ring buffer, IBs, etc.), but sDMA has it's own
3176  * packet format that is different from the PM4 format
3177  * used by the CP. sDMA supports copying data, writing
3178  * embedded data, solid fills, and a number of other
3179  * things.  It also has support for tiling/detiling of
3180  * buffers.
3181  */
3182 /**
3183  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3184  *
3185  * @rdev: radeon_device pointer
3186  * @ib: IB object to schedule
3187  *
3188  * Schedule an IB in the DMA ring (CIK).
3189  */
3190 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3191                               struct radeon_ib *ib)
3192 {
3193         struct radeon_ring *ring = &rdev->ring[ib->ring];
3194         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3195
3196         if (rdev->wb.enabled) {
3197                 u32 next_rptr = ring->wptr + 5;
3198                 while ((next_rptr & 7) != 4)
3199                         next_rptr++;
3200                 next_rptr += 4;
3201                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3202                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3203                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3204                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3205                 radeon_ring_write(ring, next_rptr);
3206         }
3207
3208         /* IB packet must end on a 8 DW boundary */
3209         while ((ring->wptr & 7) != 4)
3210                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3211         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3212         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3213         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3214         radeon_ring_write(ring, ib->length_dw);
3215
3216 }
3217
3218 /**
3219  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3220  *
3221  * @rdev: radeon_device pointer
3222  * @fence: radeon fence object
3223  *
3224  * Add a DMA fence packet to the ring to write
3225  * the fence seq number and DMA trap packet to generate
3226  * an interrupt if needed (CIK).
3227  */
3228 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3229                               struct radeon_fence *fence)
3230 {
3231         struct radeon_ring *ring = &rdev->ring[fence->ring];
3232         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3233         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3234                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3235         u32 ref_and_mask;
3236
3237         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3238                 ref_and_mask = SDMA0;
3239         else
3240                 ref_and_mask = SDMA1;
3241
3242         /* write the fence */
3243         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3244         radeon_ring_write(ring, addr & 0xffffffff);
3245         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3246         radeon_ring_write(ring, fence->seq);
3247         /* generate an interrupt */
3248         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3249         /* flush HDP */
3250         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3251         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3252         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3253         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3254         radeon_ring_write(ring, ref_and_mask); /* MASK */
3255         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3256 }
3257
3258 /**
3259  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3260  *
3261  * @rdev: radeon_device pointer
3262  * @ring: radeon_ring structure holding ring information
3263  * @semaphore: radeon semaphore object
3264  * @emit_wait: wait or signal semaphore
3265  *
3266  * Add a DMA semaphore packet to the ring wait on or signal
3267  * other rings (CIK).
3268  */
3269 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3270                                   struct radeon_ring *ring,
3271                                   struct radeon_semaphore *semaphore,
3272                                   bool emit_wait)
3273 {
3274         u64 addr = semaphore->gpu_addr;
3275         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3276
3277         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3278         radeon_ring_write(ring, addr & 0xfffffff8);
3279         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3280 }
3281
3282 /**
3283  * cik_sdma_gfx_stop - stop the gfx async dma engines
3284  *
3285  * @rdev: radeon_device pointer
3286  *
3287  * Stop the gfx async dma ring buffers (CIK).
3288  */
3289 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3290 {
3291         u32 rb_cntl, reg_offset;
3292         int i;
3293
3294         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3295
3296         for (i = 0; i < 2; i++) {
3297                 if (i == 0)
3298                         reg_offset = SDMA0_REGISTER_OFFSET;
3299                 else
3300                         reg_offset = SDMA1_REGISTER_OFFSET;
3301                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3302                 rb_cntl &= ~SDMA_RB_ENABLE;
3303                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3304                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3305         }
3306 }
3307
3308 /**
3309  * cik_sdma_rlc_stop - stop the compute async dma engines
3310  *
3311  * @rdev: radeon_device pointer
3312  *
3313  * Stop the compute async dma queues (CIK).
3314  */
3315 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3316 {
3317         /* XXX todo */
3318 }
3319
3320 /**
3321  * cik_sdma_enable - stop the async dma engines
3322  *
3323  * @rdev: radeon_device pointer
3324  * @enable: enable/disable the DMA MEs.
3325  *
3326  * Halt or unhalt the async dma engines (CIK).
3327  */
3328 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3329 {
3330         u32 me_cntl, reg_offset;
3331         int i;
3332
3333         for (i = 0; i < 2; i++) {
3334                 if (i == 0)
3335                         reg_offset = SDMA0_REGISTER_OFFSET;
3336                 else
3337                         reg_offset = SDMA1_REGISTER_OFFSET;
3338                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3339                 if (enable)
3340                         me_cntl &= ~SDMA_HALT;
3341                 else
3342                         me_cntl |= SDMA_HALT;
3343                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3344         }
3345 }
3346
3347 /**
3348  * cik_sdma_gfx_resume - setup and start the async dma engines
3349  *
3350  * @rdev: radeon_device pointer
3351  *
3352  * Set up the gfx DMA ring buffers and enable them (CIK).
3353  * Returns 0 for success, error for failure.
3354  */
3355 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3356 {
3357         struct radeon_ring *ring;
3358         u32 rb_cntl, ib_cntl;
3359         u32 rb_bufsz;
3360         u32 reg_offset, wb_offset;
3361         int i, r;
3362
3363         for (i = 0; i < 2; i++) {
3364                 if (i == 0) {
3365                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3366                         reg_offset = SDMA0_REGISTER_OFFSET;
3367                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3368                 } else {
3369                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3370                         reg_offset = SDMA1_REGISTER_OFFSET;
3371                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3372                 }
3373
3374                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3375                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3376
3377                 /* Set ring buffer size in dwords */
3378                 rb_bufsz = drm_order(ring->ring_size / 4);
3379                 rb_cntl = rb_bufsz << 1;
3380 #ifdef __BIG_ENDIAN
3381                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3382 #endif
3383                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3384
3385                 /* Initialize the ring buffer's read and write pointers */
3386                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3387                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3388
3389                 /* set the wb address whether it's enabled or not */
3390                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3391                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3392                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3393                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3394
3395                 if (rdev->wb.enabled)
3396                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3397
3398                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3399                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3400
3401                 ring->wptr = 0;
3402                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3403
3404                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3405
3406                 /* enable DMA RB */
3407                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3408
3409                 ib_cntl = SDMA_IB_ENABLE;
3410 #ifdef __BIG_ENDIAN
3411                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3412 #endif
3413                 /* enable DMA IBs */
3414                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3415
3416                 ring->ready = true;
3417
3418                 r = radeon_ring_test(rdev, ring->idx, ring);
3419                 if (r) {
3420                         ring->ready = false;
3421                         return r;
3422                 }
3423         }
3424
3425         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3426
3427         return 0;
3428 }
3429
3430 /**
3431  * cik_sdma_rlc_resume - setup and start the async dma engines
3432  *
3433  * @rdev: radeon_device pointer
3434  *
3435  * Set up the compute DMA queues and enable them (CIK).
3436  * Returns 0 for success, error for failure.
3437  */
3438 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3439 {
3440         /* XXX todo */
3441         return 0;
3442 }
3443
3444 /**
3445  * cik_sdma_load_microcode - load the sDMA ME ucode
3446  *
3447  * @rdev: radeon_device pointer
3448  *
3449  * Loads the sDMA0/1 ucode.
3450  * Returns 0 for success, -EINVAL if the ucode is not available.
3451  */
3452 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3453 {
3454         const __be32 *fw_data;
3455         int i;
3456
3457         if (!rdev->sdma_fw)
3458                 return -EINVAL;
3459
3460         /* stop the gfx rings and rlc compute queues */
3461         cik_sdma_gfx_stop(rdev);
3462         cik_sdma_rlc_stop(rdev);
3463
3464         /* halt the MEs */
3465         cik_sdma_enable(rdev, false);
3466
3467         /* sdma0 */
3468         fw_data = (const __be32 *)rdev->sdma_fw->data;
3469         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3470         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3471                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3472         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3473
3474         /* sdma1 */
3475         fw_data = (const __be32 *)rdev->sdma_fw->data;
3476         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3477         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3478                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3479         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3480
3481         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3482         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3483         return 0;
3484 }
3485
3486 /**
3487  * cik_sdma_resume - setup and start the async dma engines
3488  *
3489  * @rdev: radeon_device pointer
3490  *
3491  * Set up the DMA engines and enable them (CIK).
3492  * Returns 0 for success, error for failure.
3493  */
3494 static int cik_sdma_resume(struct radeon_device *rdev)
3495 {
3496         int r;
3497
3498         /* Reset dma */
3499         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3500         RREG32(SRBM_SOFT_RESET);
3501         udelay(50);
3502         WREG32(SRBM_SOFT_RESET, 0);
3503         RREG32(SRBM_SOFT_RESET);
3504
3505         r = cik_sdma_load_microcode(rdev);
3506         if (r)
3507                 return r;
3508
3509         /* unhalt the MEs */
3510         cik_sdma_enable(rdev, true);
3511
3512         /* start the gfx rings and rlc compute queues */
3513         r = cik_sdma_gfx_resume(rdev);
3514         if (r)
3515                 return r;
3516         r = cik_sdma_rlc_resume(rdev);
3517         if (r)
3518                 return r;
3519
3520         return 0;
3521 }
3522
3523 /**
3524  * cik_sdma_fini - tear down the async dma engines
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Stop the async dma engines and free the rings (CIK).
3529  */
3530 static void cik_sdma_fini(struct radeon_device *rdev)
3531 {
3532         /* stop the gfx rings and rlc compute queues */
3533         cik_sdma_gfx_stop(rdev);
3534         cik_sdma_rlc_stop(rdev);
3535         /* halt the MEs */
3536         cik_sdma_enable(rdev, false);
3537         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3538         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3539         /* XXX - compute dma queue tear down */
3540 }
3541
3542 /**
3543  * cik_copy_dma - copy pages using the DMA engine
3544  *
3545  * @rdev: radeon_device pointer
3546  * @src_offset: src GPU address
3547  * @dst_offset: dst GPU address
3548  * @num_gpu_pages: number of GPU pages to xfer
3549  * @fence: radeon fence object
3550  *
3551  * Copy GPU paging using the DMA engine (CIK).
3552  * Used by the radeon ttm implementation to move pages if
3553  * registered as the asic copy callback.
3554  */
3555 int cik_copy_dma(struct radeon_device *rdev,
3556                  uint64_t src_offset, uint64_t dst_offset,
3557                  unsigned num_gpu_pages,
3558                  struct radeon_fence **fence)
3559 {
3560         struct radeon_semaphore *sem = NULL;
3561         int ring_index = rdev->asic->copy.dma_ring_index;
3562         struct radeon_ring *ring = &rdev->ring[ring_index];
3563         u32 size_in_bytes, cur_size_in_bytes;
3564         int i, num_loops;
3565         int r = 0;
3566
3567         r = radeon_semaphore_create(rdev, &sem);
3568         if (r) {
3569                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3570                 return r;
3571         }
3572
3573         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3574         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3575         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3576         if (r) {
3577                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3578                 radeon_semaphore_free(rdev, &sem, NULL);
3579                 return r;
3580         }
3581
3582         if (radeon_fence_need_sync(*fence, ring->idx)) {
3583                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3584                                             ring->idx);
3585                 radeon_fence_note_sync(*fence, ring->idx);
3586         } else {
3587                 radeon_semaphore_free(rdev, &sem, NULL);
3588         }
3589
3590         for (i = 0; i < num_loops; i++) {
3591                 cur_size_in_bytes = size_in_bytes;
3592                 if (cur_size_in_bytes > 0x1fffff)
3593                         cur_size_in_bytes = 0x1fffff;
3594                 size_in_bytes -= cur_size_in_bytes;
3595                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3596                 radeon_ring_write(ring, cur_size_in_bytes);
3597                 radeon_ring_write(ring, 0); /* src/dst endian swap */
3598                 radeon_ring_write(ring, src_offset & 0xffffffff);
3599                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3600                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3601                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3602                 src_offset += cur_size_in_bytes;
3603                 dst_offset += cur_size_in_bytes;
3604         }
3605
3606         r = radeon_fence_emit(rdev, fence, ring->idx);
3607         if (r) {
3608                 radeon_ring_unlock_undo(rdev, ring);
3609                 return r;
3610         }
3611
3612         radeon_ring_unlock_commit(rdev, ring);
3613         radeon_semaphore_free(rdev, &sem, *fence);
3614
3615         return r;
3616 }
3617
3618 /**
3619  * cik_sdma_ring_test - simple async dma engine test
3620  *
3621  * @rdev: radeon_device pointer
3622  * @ring: radeon_ring structure holding ring information
3623  *
3624  * Test the DMA engine by writing using it to write an
3625  * value to memory. (CIK).
3626  * Returns 0 for success, error for failure.
3627  */
3628 int cik_sdma_ring_test(struct radeon_device *rdev,
3629                        struct radeon_ring *ring)
3630 {
3631         unsigned i;
3632         int r;
3633         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3634         u32 tmp;
3635
3636         if (!ptr) {
3637                 DRM_ERROR("invalid vram scratch pointer\n");
3638                 return -EINVAL;
3639         }
3640
3641         tmp = 0xCAFEDEAD;
3642         writel(tmp, ptr);
3643
3644         r = radeon_ring_lock(rdev, ring, 4);
3645         if (r) {
3646                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3647                 return r;
3648         }
3649         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3650         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3651         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3652         radeon_ring_write(ring, 1); /* number of DWs to follow */
3653         radeon_ring_write(ring, 0xDEADBEEF);
3654         radeon_ring_unlock_commit(rdev, ring);
3655
3656         for (i = 0; i < rdev->usec_timeout; i++) {
3657                 tmp = readl(ptr);
3658                 if (tmp == 0xDEADBEEF)
3659                         break;
3660                 DRM_UDELAY(1);
3661         }
3662
3663         if (i < rdev->usec_timeout) {
3664                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3665         } else {
3666                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3667                           ring->idx, tmp);
3668                 r = -EINVAL;
3669         }
3670         return r;
3671 }
3672
3673 /**
3674  * cik_sdma_ib_test - test an IB on the DMA engine
3675  *
3676  * @rdev: radeon_device pointer
3677  * @ring: radeon_ring structure holding ring information
3678  *
3679  * Test a simple IB in the DMA ring (CIK).
3680  * Returns 0 on success, error on failure.
3681  */
3682 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3683 {
3684         struct radeon_ib ib;
3685         unsigned i;
3686         int r;
3687         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3688         u32 tmp = 0;
3689
3690         if (!ptr) {
3691                 DRM_ERROR("invalid vram scratch pointer\n");
3692                 return -EINVAL;
3693         }
3694
3695         tmp = 0xCAFEDEAD;
3696         writel(tmp, ptr);
3697
3698         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3699         if (r) {
3700                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3701                 return r;
3702         }
3703
3704         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3705         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3706         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3707         ib.ptr[3] = 1;
3708         ib.ptr[4] = 0xDEADBEEF;
3709         ib.length_dw = 5;
3710
3711         r = radeon_ib_schedule(rdev, &ib, NULL);
3712         if (r) {
3713                 radeon_ib_free(rdev, &ib);
3714                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3715                 return r;
3716         }
3717         r = radeon_fence_wait(ib.fence, false);
3718         if (r) {
3719                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3720                 return r;
3721         }
3722         for (i = 0; i < rdev->usec_timeout; i++) {
3723                 tmp = readl(ptr);
3724                 if (tmp == 0xDEADBEEF)
3725                         break;
3726                 DRM_UDELAY(1);
3727         }
3728         if (i < rdev->usec_timeout) {
3729                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3730         } else {
3731                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3732                 r = -EINVAL;
3733         }
3734         radeon_ib_free(rdev, &ib);
3735         return r;
3736 }
3737
3738
3739 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3740 {
3741         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3742                 RREG32(GRBM_STATUS));
3743         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3744                 RREG32(GRBM_STATUS2));
3745         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3746                 RREG32(GRBM_STATUS_SE0));
3747         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3748                 RREG32(GRBM_STATUS_SE1));
3749         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3750                 RREG32(GRBM_STATUS_SE2));
3751         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3752                 RREG32(GRBM_STATUS_SE3));
3753         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3754                 RREG32(SRBM_STATUS));
3755         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3756                 RREG32(SRBM_STATUS2));
3757         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3758                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3759         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3760                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3761         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3762         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3763                  RREG32(CP_STALLED_STAT1));
3764         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3765                  RREG32(CP_STALLED_STAT2));
3766         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3767                  RREG32(CP_STALLED_STAT3));
3768         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3769                  RREG32(CP_CPF_BUSY_STAT));
3770         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3771                  RREG32(CP_CPF_STALLED_STAT1));
3772         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3773         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3774         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3775                  RREG32(CP_CPC_STALLED_STAT1));
3776         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3777 }
3778
3779 /**
3780  * cik_gpu_check_soft_reset - check which blocks are busy
3781  *
3782  * @rdev: radeon_device pointer
3783  *
3784  * Check which blocks are busy and return the relevant reset
3785  * mask to be used by cik_gpu_soft_reset().
3786  * Returns a mask of the blocks to be reset.
3787  */
3788 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3789 {
3790         u32 reset_mask = 0;
3791         u32 tmp;
3792
3793         /* GRBM_STATUS */
3794         tmp = RREG32(GRBM_STATUS);
3795         if (tmp & (PA_BUSY | SC_BUSY |
3796                    BCI_BUSY | SX_BUSY |
3797                    TA_BUSY | VGT_BUSY |
3798                    DB_BUSY | CB_BUSY |
3799                    GDS_BUSY | SPI_BUSY |
3800                    IA_BUSY | IA_BUSY_NO_DMA))
3801                 reset_mask |= RADEON_RESET_GFX;
3802
3803         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3804                 reset_mask |= RADEON_RESET_CP;
3805
3806         /* GRBM_STATUS2 */
3807         tmp = RREG32(GRBM_STATUS2);
3808         if (tmp & RLC_BUSY)
3809                 reset_mask |= RADEON_RESET_RLC;
3810
3811         /* SDMA0_STATUS_REG */
3812         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3813         if (!(tmp & SDMA_IDLE))
3814                 reset_mask |= RADEON_RESET_DMA;
3815
3816         /* SDMA1_STATUS_REG */
3817         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3818         if (!(tmp & SDMA_IDLE))
3819                 reset_mask |= RADEON_RESET_DMA1;
3820
3821         /* SRBM_STATUS2 */
3822         tmp = RREG32(SRBM_STATUS2);
3823         if (tmp & SDMA_BUSY)
3824                 reset_mask |= RADEON_RESET_DMA;
3825
3826         if (tmp & SDMA1_BUSY)
3827                 reset_mask |= RADEON_RESET_DMA1;
3828
3829         /* SRBM_STATUS */
3830         tmp = RREG32(SRBM_STATUS);
3831
3832         if (tmp & IH_BUSY)
3833                 reset_mask |= RADEON_RESET_IH;
3834
3835         if (tmp & SEM_BUSY)
3836                 reset_mask |= RADEON_RESET_SEM;
3837
3838         if (tmp & GRBM_RQ_PENDING)
3839                 reset_mask |= RADEON_RESET_GRBM;
3840
3841         if (tmp & VMC_BUSY)
3842                 reset_mask |= RADEON_RESET_VMC;
3843
3844         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845                    MCC_BUSY | MCD_BUSY))
3846                 reset_mask |= RADEON_RESET_MC;
3847
3848         if (evergreen_is_display_hung(rdev))
3849                 reset_mask |= RADEON_RESET_DISPLAY;
3850
3851         /* Skip MC reset as it's mostly likely not hung, just busy */
3852         if (reset_mask & RADEON_RESET_MC) {
3853                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3854                 reset_mask &= ~RADEON_RESET_MC;
3855         }
3856
3857         return reset_mask;
3858 }
3859
3860 /**
3861  * cik_gpu_soft_reset - soft reset GPU
3862  *
3863  * @rdev: radeon_device pointer
3864  * @reset_mask: mask of which blocks to reset
3865  *
3866  * Soft reset the blocks specified in @reset_mask.
3867  */
3868 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3869 {
3870         struct evergreen_mc_save save;
3871         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3872         u32 tmp;
3873
3874         if (reset_mask == 0)
3875                 return;
3876
3877         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3878
3879         cik_print_gpu_status_regs(rdev);
3880         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3881                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3882         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3883                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3884
3885         /* stop the rlc */
3886         cik_rlc_stop(rdev);
3887
3888         /* Disable GFX parsing/prefetching */
3889         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3890
3891         /* Disable MEC parsing/prefetching */
3892         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3893
3894         if (reset_mask & RADEON_RESET_DMA) {
3895                 /* sdma0 */
3896                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3897                 tmp |= SDMA_HALT;
3898                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3899         }
3900         if (reset_mask & RADEON_RESET_DMA1) {
3901                 /* sdma1 */
3902                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3903                 tmp |= SDMA_HALT;
3904                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3905         }
3906
3907         evergreen_mc_stop(rdev, &save);
3908         if (evergreen_mc_wait_for_idle(rdev)) {
3909                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910         }
3911
3912         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3913                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3914
3915         if (reset_mask & RADEON_RESET_CP) {
3916                 grbm_soft_reset |= SOFT_RESET_CP;
3917
3918                 srbm_soft_reset |= SOFT_RESET_GRBM;
3919         }
3920
3921         if (reset_mask & RADEON_RESET_DMA)
3922                 srbm_soft_reset |= SOFT_RESET_SDMA;
3923
3924         if (reset_mask & RADEON_RESET_DMA1)
3925                 srbm_soft_reset |= SOFT_RESET_SDMA1;
3926
3927         if (reset_mask & RADEON_RESET_DISPLAY)
3928                 srbm_soft_reset |= SOFT_RESET_DC;
3929
3930         if (reset_mask & RADEON_RESET_RLC)
3931                 grbm_soft_reset |= SOFT_RESET_RLC;
3932
3933         if (reset_mask & RADEON_RESET_SEM)
3934                 srbm_soft_reset |= SOFT_RESET_SEM;
3935
3936         if (reset_mask & RADEON_RESET_IH)
3937                 srbm_soft_reset |= SOFT_RESET_IH;
3938
3939         if (reset_mask & RADEON_RESET_GRBM)
3940                 srbm_soft_reset |= SOFT_RESET_GRBM;
3941
3942         if (reset_mask & RADEON_RESET_VMC)
3943                 srbm_soft_reset |= SOFT_RESET_VMC;
3944
3945         if (!(rdev->flags & RADEON_IS_IGP)) {
3946                 if (reset_mask & RADEON_RESET_MC)
3947                         srbm_soft_reset |= SOFT_RESET_MC;
3948         }
3949
3950         if (grbm_soft_reset) {
3951                 tmp = RREG32(GRBM_SOFT_RESET);
3952                 tmp |= grbm_soft_reset;
3953                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954                 WREG32(GRBM_SOFT_RESET, tmp);
3955                 tmp = RREG32(GRBM_SOFT_RESET);
3956
3957                 udelay(50);
3958
3959                 tmp &= ~grbm_soft_reset;
3960                 WREG32(GRBM_SOFT_RESET, tmp);
3961                 tmp = RREG32(GRBM_SOFT_RESET);
3962         }
3963
3964         if (srbm_soft_reset) {
3965                 tmp = RREG32(SRBM_SOFT_RESET);
3966                 tmp |= srbm_soft_reset;
3967                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968                 WREG32(SRBM_SOFT_RESET, tmp);
3969                 tmp = RREG32(SRBM_SOFT_RESET);
3970
3971                 udelay(50);
3972
3973                 tmp &= ~srbm_soft_reset;
3974                 WREG32(SRBM_SOFT_RESET, tmp);
3975                 tmp = RREG32(SRBM_SOFT_RESET);
3976         }
3977
3978         /* Wait a little for things to settle down */
3979         udelay(50);
3980
3981         evergreen_mc_resume(rdev, &save);
3982         udelay(50);
3983
3984         cik_print_gpu_status_regs(rdev);
3985 }
3986
3987 /**
3988  * cik_asic_reset - soft reset GPU
3989  *
3990  * @rdev: radeon_device pointer
3991  *
3992  * Look up which blocks are hung and attempt
3993  * to reset them.
3994  * Returns 0 for success.
3995  */
3996 int cik_asic_reset(struct radeon_device *rdev)
3997 {
3998         u32 reset_mask;
3999
4000         reset_mask = cik_gpu_check_soft_reset(rdev);
4001
4002         if (reset_mask)
4003                 r600_set_bios_scratch_engine_hung(rdev, true);
4004
4005         cik_gpu_soft_reset(rdev, reset_mask);
4006
4007         reset_mask = cik_gpu_check_soft_reset(rdev);
4008
4009         if (!reset_mask)
4010                 r600_set_bios_scratch_engine_hung(rdev, false);
4011
4012         return 0;
4013 }
4014
4015 /**
4016  * cik_gfx_is_lockup - check if the 3D engine is locked up
4017  *
4018  * @rdev: radeon_device pointer
4019  * @ring: radeon_ring structure holding ring information
4020  *
4021  * Check if the 3D engine is locked up (CIK).
4022  * Returns true if the engine is locked, false if not.
4023  */
4024 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4025 {
4026         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4027
4028         if (!(reset_mask & (RADEON_RESET_GFX |
4029                             RADEON_RESET_COMPUTE |
4030                             RADEON_RESET_CP))) {
4031                 radeon_ring_lockup_update(ring);
4032                 return false;
4033         }
4034         /* force CP activities */
4035         radeon_ring_force_activity(rdev, ring);
4036         return radeon_ring_test_lockup(rdev, ring);
4037 }
4038
4039 /**
4040  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4041  *
4042  * @rdev: radeon_device pointer
4043  * @ring: radeon_ring structure holding ring information
4044  *
4045  * Check if the async DMA engine is locked up (CIK).
4046  * Returns true if the engine appears to be locked up, false if not.
4047  */
4048 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4049 {
4050         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4051         u32 mask;
4052
4053         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4054                 mask = RADEON_RESET_DMA;
4055         else
4056                 mask = RADEON_RESET_DMA1;
4057
4058         if (!(reset_mask & mask)) {
4059                 radeon_ring_lockup_update(ring);
4060                 return false;
4061         }
4062         /* force ring activities */
4063         radeon_ring_force_activity(rdev, ring);
4064         return radeon_ring_test_lockup(rdev, ring);
4065 }
4066
4067 /* MC */
4068 /**
4069  * cik_mc_program - program the GPU memory controller
4070  *
4071  * @rdev: radeon_device pointer
4072  *
4073  * Set the location of vram, gart, and AGP in the GPU's
4074  * physical address space (CIK).
4075  */
4076 static void cik_mc_program(struct radeon_device *rdev)
4077 {
4078         struct evergreen_mc_save save;
4079         u32 tmp;
4080         int i, j;
4081
4082         /* Initialize HDP */
4083         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4084                 WREG32((0x2c14 + j), 0x00000000);
4085                 WREG32((0x2c18 + j), 0x00000000);
4086                 WREG32((0x2c1c + j), 0x00000000);
4087                 WREG32((0x2c20 + j), 0x00000000);
4088                 WREG32((0x2c24 + j), 0x00000000);
4089         }
4090         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4091
4092         evergreen_mc_stop(rdev, &save);
4093         if (radeon_mc_wait_for_idle(rdev)) {
4094                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4095         }
4096         /* Lockout access through VGA aperture*/
4097         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4098         /* Update configuration */
4099         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4100                rdev->mc.vram_start >> 12);
4101         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4102                rdev->mc.vram_end >> 12);
4103         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4104                rdev->vram_scratch.gpu_addr >> 12);
4105         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4106         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4107         WREG32(MC_VM_FB_LOCATION, tmp);
4108         /* XXX double check these! */
4109         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4110         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4111         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4112         WREG32(MC_VM_AGP_BASE, 0);
4113         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4114         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4115         if (radeon_mc_wait_for_idle(rdev)) {
4116                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117         }
4118         evergreen_mc_resume(rdev, &save);
4119         /* we need to own VRAM, so turn off the VGA renderer here
4120          * to stop it overwriting our objects */
4121         rv515_vga_render_disable(rdev);
4122 }
4123
4124 /**
4125  * cik_mc_init - initialize the memory controller driver params
4126  *
4127  * @rdev: radeon_device pointer
4128  *
4129  * Look up the amount of vram, vram width, and decide how to place
4130  * vram and gart within the GPU's physical address space (CIK).
4131  * Returns 0 for success.
4132  */
4133 static int cik_mc_init(struct radeon_device *rdev)
4134 {
4135         u32 tmp;
4136         int chansize, numchan;
4137
4138         /* Get VRAM informations */
4139         rdev->mc.vram_is_ddr = true;
4140         tmp = RREG32(MC_ARB_RAMCFG);
4141         if (tmp & CHANSIZE_MASK) {
4142                 chansize = 64;
4143         } else {
4144                 chansize = 32;
4145         }
4146         tmp = RREG32(MC_SHARED_CHMAP);
4147         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4148         case 0:
4149         default:
4150                 numchan = 1;
4151                 break;
4152         case 1:
4153                 numchan = 2;
4154                 break;
4155         case 2:
4156                 numchan = 4;
4157                 break;
4158         case 3:
4159                 numchan = 8;
4160                 break;
4161         case 4:
4162                 numchan = 3;
4163                 break;
4164         case 5:
4165                 numchan = 6;
4166                 break;
4167         case 6:
4168                 numchan = 10;
4169                 break;
4170         case 7:
4171                 numchan = 12;
4172                 break;
4173         case 8:
4174                 numchan = 16;
4175                 break;
4176         }
4177         rdev->mc.vram_width = numchan * chansize;
4178         /* Could aper size report 0 ? */
4179         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4180         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4181         /* size in MB on si */
4182         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4183         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4184         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4185         si_vram_gtt_location(rdev, &rdev->mc);
4186         radeon_update_bandwidth_info(rdev);
4187
4188         return 0;
4189 }
4190
4191 /*
4192  * GART
4193  * VMID 0 is the physical GPU addresses as used by the kernel.
4194  * VMIDs 1-15 are used for userspace clients and are handled
4195  * by the radeon vm/hsa code.
4196  */
4197 /**
4198  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4199  *
4200  * @rdev: radeon_device pointer
4201  *
4202  * Flush the TLB for the VMID 0 page table (CIK).
4203  */
4204 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4205 {
4206         /* flush hdp cache */
4207         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4208
4209         /* bits 0-15 are the VM contexts0-15 */
4210         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4211 }
4212
4213 /**
4214  * cik_pcie_gart_enable - gart enable
4215  *
4216  * @rdev: radeon_device pointer
4217  *
4218  * This sets up the TLBs, programs the page tables for VMID0,
4219  * sets up the hw for VMIDs 1-15 which are allocated on
4220  * demand, and sets up the global locations for the LDS, GDS,
4221  * and GPUVM for FSA64 clients (CIK).
4222  * Returns 0 for success, errors for failure.
4223  */
4224 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4225 {
4226         int r, i;
4227
4228         if (rdev->gart.robj == NULL) {
4229                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4230                 return -EINVAL;
4231         }
4232         r = radeon_gart_table_vram_pin(rdev);
4233         if (r)
4234                 return r;
4235         radeon_gart_restore(rdev);
4236         /* Setup TLB control */
4237         WREG32(MC_VM_MX_L1_TLB_CNTL,
4238                (0xA << 7) |
4239                ENABLE_L1_TLB |
4240                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4241                ENABLE_ADVANCED_DRIVER_MODEL |
4242                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4243         /* Setup L2 cache */
4244         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4245                ENABLE_L2_FRAGMENT_PROCESSING |
4246                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4247                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4248                EFFECTIVE_L2_QUEUE_SIZE(7) |
4249                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4250         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4251         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4252                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4253         /* setup context0 */
4254         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4255         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4256         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4257         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4258                         (u32)(rdev->dummy_page.addr >> 12));
4259         WREG32(VM_CONTEXT0_CNTL2, 0);
4260         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4261                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4262
4263         WREG32(0x15D4, 0);
4264         WREG32(0x15D8, 0);
4265         WREG32(0x15DC, 0);
4266
4267         /* empty context1-15 */
4268         /* FIXME start with 4G, once using 2 level pt switch to full
4269          * vm size space
4270          */
4271         /* set vm size, must be a multiple of 4 */
4272         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4273         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4274         for (i = 1; i < 16; i++) {
4275                 if (i < 8)
4276                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4277                                rdev->gart.table_addr >> 12);
4278                 else
4279                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4280                                rdev->gart.table_addr >> 12);
4281         }
4282
4283         /* enable context1-15 */
4284         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4285                (u32)(rdev->dummy_page.addr >> 12));
4286         WREG32(VM_CONTEXT1_CNTL2, 4);
4287         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4288                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4290                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4294                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4296                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4298                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4300
4301         /* TC cache setup ??? */
4302         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4303         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4304         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4305
4306         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4307         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4308         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4309         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4310         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4311
4312         WREG32(TC_CFG_L1_VOLATILE, 0);
4313         WREG32(TC_CFG_L2_VOLATILE, 0);
4314
4315         if (rdev->family == CHIP_KAVERI) {
4316                 u32 tmp = RREG32(CHUB_CONTROL);
4317                 tmp &= ~BYPASS_VM;
4318                 WREG32(CHUB_CONTROL, tmp);
4319         }
4320
4321         /* XXX SH_MEM regs */
4322         /* where to put LDS, scratch, GPUVM in FSA64 space */
4323         for (i = 0; i < 16; i++) {
4324                 cik_srbm_select(rdev, 0, 0, 0, i);
4325                 /* CP and shaders */
4326                 WREG32(SH_MEM_CONFIG, 0);
4327                 WREG32(SH_MEM_APE1_BASE, 1);
4328                 WREG32(SH_MEM_APE1_LIMIT, 0);
4329                 WREG32(SH_MEM_BASES, 0);
4330                 /* SDMA GFX */
4331                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4332                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4333                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4334                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4335                 /* XXX SDMA RLC - todo */
4336         }
4337         cik_srbm_select(rdev, 0, 0, 0, 0);
4338
4339         cik_pcie_gart_tlb_flush(rdev);
4340         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4341                  (unsigned)(rdev->mc.gtt_size >> 20),
4342                  (unsigned long long)rdev->gart.table_addr);
4343         rdev->gart.ready = true;
4344         return 0;
4345 }
4346
4347 /**
4348  * cik_pcie_gart_disable - gart disable
4349  *
4350  * @rdev: radeon_device pointer
4351  *
4352  * This disables all VM page table (CIK).
4353  */
4354 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4355 {
4356         /* Disable all tables */
4357         WREG32(VM_CONTEXT0_CNTL, 0);
4358         WREG32(VM_CONTEXT1_CNTL, 0);
4359         /* Setup TLB control */
4360         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4361                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4362         /* Setup L2 cache */
4363         WREG32(VM_L2_CNTL,
4364                ENABLE_L2_FRAGMENT_PROCESSING |
4365                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4366                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4367                EFFECTIVE_L2_QUEUE_SIZE(7) |
4368                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369         WREG32(VM_L2_CNTL2, 0);
4370         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4371                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4372         radeon_gart_table_vram_unpin(rdev);
4373 }
4374
4375 /**
4376  * cik_pcie_gart_fini - vm fini callback
4377  *
4378  * @rdev: radeon_device pointer
4379  *
4380  * Tears down the driver GART/VM setup (CIK).
4381  */
4382 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4383 {
4384         cik_pcie_gart_disable(rdev);
4385         radeon_gart_table_vram_free(rdev);
4386         radeon_gart_fini(rdev);
4387 }
4388
4389 /* vm parser */
4390 /**
4391  * cik_ib_parse - vm ib_parse callback
4392  *
4393  * @rdev: radeon_device pointer
4394  * @ib: indirect buffer pointer
4395  *
4396  * CIK uses hw IB checking so this is a nop (CIK).
4397  */
4398 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4399 {
4400         return 0;
4401 }
4402
4403 /*
4404  * vm
4405  * VMID 0 is the physical GPU addresses as used by the kernel.
4406  * VMIDs 1-15 are used for userspace clients and are handled
4407  * by the radeon vm/hsa code.
4408  */
4409 /**
4410  * cik_vm_init - cik vm init callback
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Inits cik specific vm parameters (number of VMs, base of vram for
4415  * VMIDs 1-15) (CIK).
4416  * Returns 0 for success.
4417  */
4418 int cik_vm_init(struct radeon_device *rdev)
4419 {
4420         /* number of VMs */
4421         rdev->vm_manager.nvm = 16;
4422         /* base offset of vram pages */
4423         if (rdev->flags & RADEON_IS_IGP) {
4424                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4425                 tmp <<= 22;
4426                 rdev->vm_manager.vram_base_offset = tmp;
4427         } else
4428                 rdev->vm_manager.vram_base_offset = 0;
4429
4430         return 0;
4431 }
4432
4433 /**
4434  * cik_vm_fini - cik vm fini callback
4435  *
4436  * @rdev: radeon_device pointer
4437  *
4438  * Tear down any asic specific VM setup (CIK).
4439  */
4440 void cik_vm_fini(struct radeon_device *rdev)
4441 {
4442 }
4443
4444 /**
4445  * cik_vm_decode_fault - print human readable fault info
4446  *
4447  * @rdev: radeon_device pointer
4448  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4449  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4450  *
4451  * Print human readable fault information (CIK).
4452  */
4453 static void cik_vm_decode_fault(struct radeon_device *rdev,
4454                                 u32 status, u32 addr, u32 mc_client)
4455 {
4456         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4457         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4458         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4459         char *block = (char *)&mc_client;
4460
4461         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4462                protections, vmid, addr,
4463                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4464                block, mc_id);
4465 }
4466
4467 /**
4468  * cik_vm_flush - cik vm flush using the CP
4469  *
4470  * @rdev: radeon_device pointer
4471  *
4472  * Update the page table base and flush the VM TLB
4473  * using the CP (CIK).
4474  */
4475 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4476 {
4477         struct radeon_ring *ring = &rdev->ring[ridx];
4478
4479         if (vm == NULL)
4480                 return;
4481
4482         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4483         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4484                                  WRITE_DATA_DST_SEL(0)));
4485         if (vm->id < 8) {
4486                 radeon_ring_write(ring,
4487                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4488         } else {
4489                 radeon_ring_write(ring,
4490                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4491         }
4492         radeon_ring_write(ring, 0);
4493         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4494
4495         /* update SH_MEM_* regs */
4496         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4497         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4498                                  WRITE_DATA_DST_SEL(0)));
4499         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4500         radeon_ring_write(ring, 0);
4501         radeon_ring_write(ring, VMID(vm->id));
4502
4503         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4504         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4505                                  WRITE_DATA_DST_SEL(0)));
4506         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4507         radeon_ring_write(ring, 0);
4508
4509         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4510         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4511         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4512         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4513
4514         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516                                  WRITE_DATA_DST_SEL(0)));
4517         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4518         radeon_ring_write(ring, 0);
4519         radeon_ring_write(ring, VMID(0));
4520
4521         /* HDP flush */
4522         /* We should be using the WAIT_REG_MEM packet here like in
4523          * cik_fence_ring_emit(), but it causes the CP to hang in this
4524          * context...
4525          */
4526         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4527         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4528                                  WRITE_DATA_DST_SEL(0)));
4529         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4530         radeon_ring_write(ring, 0);
4531         radeon_ring_write(ring, 0);
4532
4533         /* bits 0-15 are the VM contexts0-15 */
4534         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4535         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4536                                  WRITE_DATA_DST_SEL(0)));
4537         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4538         radeon_ring_write(ring, 0);
4539         radeon_ring_write(ring, 1 << vm->id);
4540
4541         /* compute doesn't have PFP */
4542         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4543                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4544                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4545                 radeon_ring_write(ring, 0x0);
4546         }
4547 }
4548
4549 /**
4550  * cik_vm_set_page - update the page tables using sDMA
4551  *
4552  * @rdev: radeon_device pointer
4553  * @ib: indirect buffer to fill with commands
4554  * @pe: addr of the page entry
4555  * @addr: dst addr to write into pe
4556  * @count: number of page entries to update
4557  * @incr: increase next addr by incr bytes
4558  * @flags: access flags
4559  *
4560  * Update the page tables using CP or sDMA (CIK).
4561  */
4562 void cik_vm_set_page(struct radeon_device *rdev,
4563                      struct radeon_ib *ib,
4564                      uint64_t pe,
4565                      uint64_t addr, unsigned count,
4566                      uint32_t incr, uint32_t flags)
4567 {
4568         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4569         uint64_t value;
4570         unsigned ndw;
4571
4572         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4573                 /* CP */
4574                 while (count) {
4575                         ndw = 2 + count * 2;
4576                         if (ndw > 0x3FFE)
4577                                 ndw = 0x3FFE;
4578
4579                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4580                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4581                                                     WRITE_DATA_DST_SEL(1));
4582                         ib->ptr[ib->length_dw++] = pe;
4583                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4584                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4585                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4586                                         value = radeon_vm_map_gart(rdev, addr);
4587                                         value &= 0xFFFFFFFFFFFFF000ULL;
4588                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4589                                         value = addr;
4590                                 } else {
4591                                         value = 0;
4592                                 }
4593                                 addr += incr;
4594                                 value |= r600_flags;
4595                                 ib->ptr[ib->length_dw++] = value;
4596                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4597                         }
4598                 }
4599         } else {
4600                 /* DMA */
4601                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4602                         while (count) {
4603                                 ndw = count * 2;
4604                                 if (ndw > 0xFFFFE)
4605                                         ndw = 0xFFFFE;
4606
4607                                 /* for non-physically contiguous pages (system) */
4608                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4609                                 ib->ptr[ib->length_dw++] = pe;
4610                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4611                                 ib->ptr[ib->length_dw++] = ndw;
4612                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4613                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4614                                                 value = radeon_vm_map_gart(rdev, addr);
4615                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4616                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4617                                                 value = addr;
4618                                         } else {
4619                                                 value = 0;
4620                                         }
4621                                         addr += incr;
4622                                         value |= r600_flags;
4623                                         ib->ptr[ib->length_dw++] = value;
4624                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4625                                 }
4626                         }
4627                 } else {
4628                         while (count) {
4629                                 ndw = count;
4630                                 if (ndw > 0x7FFFF)
4631                                         ndw = 0x7FFFF;
4632
4633                                 if (flags & RADEON_VM_PAGE_VALID)
4634                                         value = addr;
4635                                 else
4636                                         value = 0;
4637                                 /* for physically contiguous pages (vram) */
4638                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4639                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4640                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4641                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4642                                 ib->ptr[ib->length_dw++] = 0;
4643                                 ib->ptr[ib->length_dw++] = value; /* value */
4644                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4645                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4646                                 ib->ptr[ib->length_dw++] = 0;
4647                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4648                                 pe += ndw * 8;
4649                                 addr += ndw * incr;
4650                                 count -= ndw;
4651                         }
4652                 }
4653                 while (ib->length_dw & 0x7)
4654                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4655         }
4656 }
4657
4658 /**
4659  * cik_dma_vm_flush - cik vm flush using sDMA
4660  *
4661  * @rdev: radeon_device pointer
4662  *
4663  * Update the page table base and flush the VM TLB
4664  * using sDMA (CIK).
4665  */
4666 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4667 {
4668         struct radeon_ring *ring = &rdev->ring[ridx];
4669         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4670                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4671         u32 ref_and_mask;
4672
4673         if (vm == NULL)
4674                 return;
4675
4676         if (ridx == R600_RING_TYPE_DMA_INDEX)
4677                 ref_and_mask = SDMA0;
4678         else
4679                 ref_and_mask = SDMA1;
4680
4681         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4682         if (vm->id < 8) {
4683                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4684         } else {
4685                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4686         }
4687         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4688
4689         /* update SH_MEM_* regs */
4690         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4692         radeon_ring_write(ring, VMID(vm->id));
4693
4694         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4696         radeon_ring_write(ring, 0);
4697
4698         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4700         radeon_ring_write(ring, 0);
4701
4702         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4704         radeon_ring_write(ring, 1);
4705
4706         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4708         radeon_ring_write(ring, 0);
4709
4710         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4711         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4712         radeon_ring_write(ring, VMID(0));
4713
4714         /* flush HDP */
4715         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4716         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4717         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4718         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4719         radeon_ring_write(ring, ref_and_mask); /* MASK */
4720         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4721
4722         /* flush TLB */
4723         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4724         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4725         radeon_ring_write(ring, 1 << vm->id);
4726 }
4727
4728 /*
4729  * RLC
4730  * The RLC is a multi-purpose microengine that handles a
4731  * variety of functions, the most important of which is
4732  * the interrupt controller.
4733  */
4734 /**
4735  * cik_rlc_stop - stop the RLC ME
4736  *
4737  * @rdev: radeon_device pointer
4738  *
4739  * Halt the RLC ME (MicroEngine) (CIK).
4740  */
4741 static void cik_rlc_stop(struct radeon_device *rdev)
4742 {
4743         int i, j, k;
4744         u32 mask, tmp;
4745
4746         tmp = RREG32(CP_INT_CNTL_RING0);
4747         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4748         WREG32(CP_INT_CNTL_RING0, tmp);
4749
4750         RREG32(CB_CGTT_SCLK_CTRL);
4751         RREG32(CB_CGTT_SCLK_CTRL);
4752         RREG32(CB_CGTT_SCLK_CTRL);
4753         RREG32(CB_CGTT_SCLK_CTRL);
4754
4755         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4756         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4757
4758         WREG32(RLC_CNTL, 0);
4759
4760         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4761                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4762                         cik_select_se_sh(rdev, i, j);
4763                         for (k = 0; k < rdev->usec_timeout; k++) {
4764                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4765                                         break;
4766                                 udelay(1);
4767                         }
4768                 }
4769         }
4770         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4771
4772         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4773         for (k = 0; k < rdev->usec_timeout; k++) {
4774                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4775                         break;
4776                 udelay(1);
4777         }
4778 }
4779
4780 /**
4781  * cik_rlc_start - start the RLC ME
4782  *
4783  * @rdev: radeon_device pointer
4784  *
4785  * Unhalt the RLC ME (MicroEngine) (CIK).
4786  */
4787 static void cik_rlc_start(struct radeon_device *rdev)
4788 {
4789         u32 tmp;
4790
4791         WREG32(RLC_CNTL, RLC_ENABLE);
4792
4793         tmp = RREG32(CP_INT_CNTL_RING0);
4794         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4795         WREG32(CP_INT_CNTL_RING0, tmp);
4796
4797         udelay(50);
4798 }
4799
4800 /**
4801  * cik_rlc_resume - setup the RLC hw
4802  *
4803  * @rdev: radeon_device pointer
4804  *
4805  * Initialize the RLC registers, load the ucode,
4806  * and start the RLC (CIK).
4807  * Returns 0 for success, -EINVAL if the ucode is not available.
4808  */
4809 static int cik_rlc_resume(struct radeon_device *rdev)
4810 {
4811         u32 i, size;
4812         u32 clear_state_info[3];
4813         const __be32 *fw_data;
4814
4815         if (!rdev->rlc_fw)
4816                 return -EINVAL;
4817
4818         switch (rdev->family) {
4819         case CHIP_BONAIRE:
4820         default:
4821                 size = BONAIRE_RLC_UCODE_SIZE;
4822                 break;
4823         case CHIP_KAVERI:
4824                 size = KV_RLC_UCODE_SIZE;
4825                 break;
4826         case CHIP_KABINI:
4827                 size = KB_RLC_UCODE_SIZE;
4828                 break;
4829         }
4830
4831         cik_rlc_stop(rdev);
4832
4833         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4834         RREG32(GRBM_SOFT_RESET);
4835         udelay(50);
4836         WREG32(GRBM_SOFT_RESET, 0);
4837         RREG32(GRBM_SOFT_RESET);
4838         udelay(50);
4839
4840         WREG32(RLC_LB_CNTR_INIT, 0);
4841         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4842
4843         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4844         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4845         WREG32(RLC_LB_PARAMS, 0x00600408);
4846         WREG32(RLC_LB_CNTL, 0x80000004);
4847
4848         WREG32(RLC_MC_CNTL, 0);
4849         WREG32(RLC_UCODE_CNTL, 0);
4850
4851         fw_data = (const __be32 *)rdev->rlc_fw->data;
4852                 WREG32(RLC_GPM_UCODE_ADDR, 0);
4853         for (i = 0; i < size; i++)
4854                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4855         WREG32(RLC_GPM_UCODE_ADDR, 0);
4856
4857         /* XXX */
4858         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4859         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4860         clear_state_info[2] = 0;//cik_default_size;
4861         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4862         for (i = 0; i < 3; i++)
4863                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4864         WREG32(RLC_DRIVER_DMA_STATUS, 0);
4865
4866         cik_rlc_start(rdev);
4867
4868         return 0;
4869 }
4870
4871 /*
4872  * Interrupts
4873  * Starting with r6xx, interrupts are handled via a ring buffer.
4874  * Ring buffers are areas of GPU accessible memory that the GPU
4875  * writes interrupt vectors into and the host reads vectors out of.
4876  * There is a rptr (read pointer) that determines where the
4877  * host is currently reading, and a wptr (write pointer)
4878  * which determines where the GPU has written.  When the
4879  * pointers are equal, the ring is idle.  When the GPU
4880  * writes vectors to the ring buffer, it increments the
4881  * wptr.  When there is an interrupt, the host then starts
4882  * fetching commands and processing them until the pointers are
4883  * equal again at which point it updates the rptr.
4884  */
4885
4886 /**
4887  * cik_enable_interrupts - Enable the interrupt ring buffer
4888  *
4889  * @rdev: radeon_device pointer
4890  *
4891  * Enable the interrupt ring buffer (CIK).
4892  */
4893 static void cik_enable_interrupts(struct radeon_device *rdev)
4894 {
4895         u32 ih_cntl = RREG32(IH_CNTL);
4896         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4897
4898         ih_cntl |= ENABLE_INTR;
4899         ih_rb_cntl |= IH_RB_ENABLE;
4900         WREG32(IH_CNTL, ih_cntl);
4901         WREG32(IH_RB_CNTL, ih_rb_cntl);
4902         rdev->ih.enabled = true;
4903 }
4904
4905 /**
4906  * cik_disable_interrupts - Disable the interrupt ring buffer
4907  *
4908  * @rdev: radeon_device pointer
4909  *
4910  * Disable the interrupt ring buffer (CIK).
4911  */
4912 static void cik_disable_interrupts(struct radeon_device *rdev)
4913 {
4914         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4915         u32 ih_cntl = RREG32(IH_CNTL);
4916
4917         ih_rb_cntl &= ~IH_RB_ENABLE;
4918         ih_cntl &= ~ENABLE_INTR;
4919         WREG32(IH_RB_CNTL, ih_rb_cntl);
4920         WREG32(IH_CNTL, ih_cntl);
4921         /* set rptr, wptr to 0 */
4922         WREG32(IH_RB_RPTR, 0);
4923         WREG32(IH_RB_WPTR, 0);
4924         rdev->ih.enabled = false;
4925         rdev->ih.rptr = 0;
4926 }
4927
4928 /**
4929  * cik_disable_interrupt_state - Disable all interrupt sources
4930  *
4931  * @rdev: radeon_device pointer
4932  *
4933  * Clear all interrupt enable bits used by the driver (CIK).
4934  */
4935 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4936 {
4937         u32 tmp;
4938
4939         /* gfx ring */
4940         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4941         /* sdma */
4942         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4943         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4944         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4945         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4946         /* compute queues */
4947         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4948         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4949         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4950         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4951         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4952         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4953         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4954         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4955         /* grbm */
4956         WREG32(GRBM_INT_CNTL, 0);
4957         /* vline/vblank, etc. */
4958         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4959         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4960         if (rdev->num_crtc >= 4) {
4961                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4962                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4963         }
4964         if (rdev->num_crtc >= 6) {
4965                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4966                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4967         }
4968
4969         /* dac hotplug */
4970         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4971
4972         /* digital hotplug */
4973         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4974         WREG32(DC_HPD1_INT_CONTROL, tmp);
4975         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4976         WREG32(DC_HPD2_INT_CONTROL, tmp);
4977         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978         WREG32(DC_HPD3_INT_CONTROL, tmp);
4979         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980         WREG32(DC_HPD4_INT_CONTROL, tmp);
4981         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4982         WREG32(DC_HPD5_INT_CONTROL, tmp);
4983         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984         WREG32(DC_HPD6_INT_CONTROL, tmp);
4985
4986 }
4987
4988 /**
4989  * cik_irq_init - init and enable the interrupt ring
4990  *
4991  * @rdev: radeon_device pointer
4992  *
4993  * Allocate a ring buffer for the interrupt controller,
4994  * enable the RLC, disable interrupts, enable the IH
4995  * ring buffer and enable it (CIK).
4996  * Called at device load and reume.
4997  * Returns 0 for success, errors for failure.
4998  */
4999 static int cik_irq_init(struct radeon_device *rdev)
5000 {
5001         int ret = 0;
5002         int rb_bufsz;
5003         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5004
5005         /* allocate ring */
5006         ret = r600_ih_ring_alloc(rdev);
5007         if (ret)
5008                 return ret;
5009
5010         /* disable irqs */
5011         cik_disable_interrupts(rdev);
5012
5013         /* init rlc */
5014         ret = cik_rlc_resume(rdev);
5015         if (ret) {
5016                 r600_ih_ring_fini(rdev);
5017                 return ret;
5018         }
5019
5020         /* setup interrupt control */
5021         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5022         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5023         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5024         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5025          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5026          */
5027         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5028         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5029         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5030         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5031
5032         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5033         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5034
5035         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5036                       IH_WPTR_OVERFLOW_CLEAR |
5037                       (rb_bufsz << 1));
5038
5039         if (rdev->wb.enabled)
5040                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5041
5042         /* set the writeback address whether it's enabled or not */
5043         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5044         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5045
5046         WREG32(IH_RB_CNTL, ih_rb_cntl);
5047
5048         /* set rptr, wptr to 0 */
5049         WREG32(IH_RB_RPTR, 0);
5050         WREG32(IH_RB_WPTR, 0);
5051
5052         /* Default settings for IH_CNTL (disabled at first) */
5053         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5054         /* RPTR_REARM only works if msi's are enabled */
5055         if (rdev->msi_enabled)
5056                 ih_cntl |= RPTR_REARM;
5057         WREG32(IH_CNTL, ih_cntl);
5058
5059         /* force the active interrupt state to all disabled */
5060         cik_disable_interrupt_state(rdev);
5061
5062         pci_set_master(rdev->pdev);
5063
5064         /* enable irqs */
5065         cik_enable_interrupts(rdev);
5066
5067         return ret;
5068 }
5069
5070 /**
5071  * cik_irq_set - enable/disable interrupt sources
5072  *
5073  * @rdev: radeon_device pointer
5074  *
5075  * Enable interrupt sources on the GPU (vblanks, hpd,
5076  * etc.) (CIK).
5077  * Returns 0 for success, errors for failure.
5078  */
5079 int cik_irq_set(struct radeon_device *rdev)
5080 {
5081         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5082                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5083         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5084         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5085         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5086         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5087         u32 grbm_int_cntl = 0;
5088         u32 dma_cntl, dma_cntl1;
5089
5090         if (!rdev->irq.installed) {
5091                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5092                 return -EINVAL;
5093         }
5094         /* don't enable anything if the ih is disabled */
5095         if (!rdev->ih.enabled) {
5096                 cik_disable_interrupts(rdev);
5097                 /* force the active interrupt state to all disabled */
5098                 cik_disable_interrupt_state(rdev);
5099                 return 0;
5100         }
5101
5102         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5104         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5105         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5106         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5107         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108
5109         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5110         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5111
5112         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5116         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120
5121         /* enable CP interrupts on all rings */
5122         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5123                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5124                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5125         }
5126         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5127                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5128                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5129                 if (ring->me == 1) {
5130                         switch (ring->pipe) {
5131                         case 0:
5132                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5133                                 break;
5134                         case 1:
5135                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5136                                 break;
5137                         case 2:
5138                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5139                                 break;
5140                         case 3:
5141                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5142                                 break;
5143                         default:
5144                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5145                                 break;
5146                         }
5147                 } else if (ring->me == 2) {
5148                         switch (ring->pipe) {
5149                         case 0:
5150                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5151                                 break;
5152                         case 1:
5153                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5154                                 break;
5155                         case 2:
5156                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5157                                 break;
5158                         case 3:
5159                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5160                                 break;
5161                         default:
5162                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5163                                 break;
5164                         }
5165                 } else {
5166                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5167                 }
5168         }
5169         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5170                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5171                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5172                 if (ring->me == 1) {
5173                         switch (ring->pipe) {
5174                         case 0:
5175                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5176                                 break;
5177                         case 1:
5178                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5179                                 break;
5180                         case 2:
5181                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5182                                 break;
5183                         case 3:
5184                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5185                                 break;
5186                         default:
5187                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5188                                 break;
5189                         }
5190                 } else if (ring->me == 2) {
5191                         switch (ring->pipe) {
5192                         case 0:
5193                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5194                                 break;
5195                         case 1:
5196                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5197                                 break;
5198                         case 2:
5199                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5200                                 break;
5201                         case 3:
5202                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5203                                 break;
5204                         default:
5205                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5206                                 break;
5207                         }
5208                 } else {
5209                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5210                 }
5211         }
5212
5213         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5214                 DRM_DEBUG("cik_irq_set: sw int dma\n");
5215                 dma_cntl |= TRAP_ENABLE;
5216         }
5217
5218         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5219                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5220                 dma_cntl1 |= TRAP_ENABLE;
5221         }
5222
5223         if (rdev->irq.crtc_vblank_int[0] ||
5224             atomic_read(&rdev->irq.pflip[0])) {
5225                 DRM_DEBUG("cik_irq_set: vblank 0\n");
5226                 crtc1 |= VBLANK_INTERRUPT_MASK;
5227         }
5228         if (rdev->irq.crtc_vblank_int[1] ||
5229             atomic_read(&rdev->irq.pflip[1])) {
5230                 DRM_DEBUG("cik_irq_set: vblank 1\n");
5231                 crtc2 |= VBLANK_INTERRUPT_MASK;
5232         }
5233         if (rdev->irq.crtc_vblank_int[2] ||
5234             atomic_read(&rdev->irq.pflip[2])) {
5235                 DRM_DEBUG("cik_irq_set: vblank 2\n");
5236                 crtc3 |= VBLANK_INTERRUPT_MASK;
5237         }
5238         if (rdev->irq.crtc_vblank_int[3] ||
5239             atomic_read(&rdev->irq.pflip[3])) {
5240                 DRM_DEBUG("cik_irq_set: vblank 3\n");
5241                 crtc4 |= VBLANK_INTERRUPT_MASK;
5242         }
5243         if (rdev->irq.crtc_vblank_int[4] ||
5244             atomic_read(&rdev->irq.pflip[4])) {
5245                 DRM_DEBUG("cik_irq_set: vblank 4\n");
5246                 crtc5 |= VBLANK_INTERRUPT_MASK;
5247         }
5248         if (rdev->irq.crtc_vblank_int[5] ||
5249             atomic_read(&rdev->irq.pflip[5])) {
5250                 DRM_DEBUG("cik_irq_set: vblank 5\n");
5251                 crtc6 |= VBLANK_INTERRUPT_MASK;
5252         }
5253         if (rdev->irq.hpd[0]) {
5254                 DRM_DEBUG("cik_irq_set: hpd 1\n");
5255                 hpd1 |= DC_HPDx_INT_EN;
5256         }
5257         if (rdev->irq.hpd[1]) {
5258                 DRM_DEBUG("cik_irq_set: hpd 2\n");
5259                 hpd2 |= DC_HPDx_INT_EN;
5260         }
5261         if (rdev->irq.hpd[2]) {
5262                 DRM_DEBUG("cik_irq_set: hpd 3\n");
5263                 hpd3 |= DC_HPDx_INT_EN;
5264         }
5265         if (rdev->irq.hpd[3]) {
5266                 DRM_DEBUG("cik_irq_set: hpd 4\n");
5267                 hpd4 |= DC_HPDx_INT_EN;
5268         }
5269         if (rdev->irq.hpd[4]) {
5270                 DRM_DEBUG("cik_irq_set: hpd 5\n");
5271                 hpd5 |= DC_HPDx_INT_EN;
5272         }
5273         if (rdev->irq.hpd[5]) {
5274                 DRM_DEBUG("cik_irq_set: hpd 6\n");
5275                 hpd6 |= DC_HPDx_INT_EN;
5276         }
5277
5278         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5279
5280         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5281         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5282
5283         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5284         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5285         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5286         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5287         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5288         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5289         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5290         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5291
5292         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5293
5294         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5295         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5296         if (rdev->num_crtc >= 4) {
5297                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5298                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5299         }
5300         if (rdev->num_crtc >= 6) {
5301                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5302                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5303         }
5304
5305         WREG32(DC_HPD1_INT_CONTROL, hpd1);
5306         WREG32(DC_HPD2_INT_CONTROL, hpd2);
5307         WREG32(DC_HPD3_INT_CONTROL, hpd3);
5308         WREG32(DC_HPD4_INT_CONTROL, hpd4);
5309         WREG32(DC_HPD5_INT_CONTROL, hpd5);
5310         WREG32(DC_HPD6_INT_CONTROL, hpd6);
5311
5312         return 0;
5313 }
5314
5315 /**
5316  * cik_irq_ack - ack interrupt sources
5317  *
5318  * @rdev: radeon_device pointer
5319  *
5320  * Ack interrupt sources on the GPU (vblanks, hpd,
5321  * etc.) (CIK).  Certain interrupts sources are sw
5322  * generated and do not require an explicit ack.
5323  */
5324 static inline void cik_irq_ack(struct radeon_device *rdev)
5325 {
5326         u32 tmp;
5327
5328         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5329         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5330         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5331         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5332         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5333         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5334         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5335
5336         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5337                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5338         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5339                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5340         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5341                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5342         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5343                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5344
5345         if (rdev->num_crtc >= 4) {
5346                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5347                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5348                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5349                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5350                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5351                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5352                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5353                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5354         }
5355
5356         if (rdev->num_crtc >= 6) {
5357                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5358                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5359                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5360                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5361                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5362                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5363                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5364                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5365         }
5366
5367         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5368                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5369                 tmp |= DC_HPDx_INT_ACK;
5370                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5371         }
5372         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5373                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5374                 tmp |= DC_HPDx_INT_ACK;
5375                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5376         }
5377         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5378                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5379                 tmp |= DC_HPDx_INT_ACK;
5380                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5381         }
5382         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5383                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5384                 tmp |= DC_HPDx_INT_ACK;
5385                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5386         }
5387         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5388                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5389                 tmp |= DC_HPDx_INT_ACK;
5390                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5391         }
5392         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5393                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5394                 tmp |= DC_HPDx_INT_ACK;
5395                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5396         }
5397 }
5398
5399 /**
5400  * cik_irq_disable - disable interrupts
5401  *
5402  * @rdev: radeon_device pointer
5403  *
5404  * Disable interrupts on the hw (CIK).
5405  */
5406 static void cik_irq_disable(struct radeon_device *rdev)
5407 {
5408         cik_disable_interrupts(rdev);
5409         /* Wait and acknowledge irq */
5410         mdelay(1);
5411         cik_irq_ack(rdev);
5412         cik_disable_interrupt_state(rdev);
5413 }
5414
5415 /**
5416  * cik_irq_disable - disable interrupts for suspend
5417  *
5418  * @rdev: radeon_device pointer
5419  *
5420  * Disable interrupts and stop the RLC (CIK).
5421  * Used for suspend.
5422  */
5423 static void cik_irq_suspend(struct radeon_device *rdev)
5424 {
5425         cik_irq_disable(rdev);
5426         cik_rlc_stop(rdev);
5427 }
5428
5429 /**
5430  * cik_irq_fini - tear down interrupt support
5431  *
5432  * @rdev: radeon_device pointer
5433  *
5434  * Disable interrupts on the hw and free the IH ring
5435  * buffer (CIK).
5436  * Used for driver unload.
5437  */
5438 static void cik_irq_fini(struct radeon_device *rdev)
5439 {
5440         cik_irq_suspend(rdev);
5441         r600_ih_ring_fini(rdev);
5442 }
5443
5444 /**
5445  * cik_get_ih_wptr - get the IH ring buffer wptr
5446  *
5447  * @rdev: radeon_device pointer
5448  *
5449  * Get the IH ring buffer wptr from either the register
5450  * or the writeback memory buffer (CIK).  Also check for
5451  * ring buffer overflow and deal with it.
5452  * Used by cik_irq_process().
5453  * Returns the value of the wptr.
5454  */
5455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5456 {
5457         u32 wptr, tmp;
5458
5459         if (rdev->wb.enabled)
5460                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5461         else
5462                 wptr = RREG32(IH_RB_WPTR);
5463
5464         if (wptr & RB_OVERFLOW) {
5465                 /* When a ring buffer overflow happen start parsing interrupt
5466                  * from the last not overwritten vector (wptr + 16). Hopefully
5467                  * this should allow us to catchup.
5468                  */
5469                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5470                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5471                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5472                 tmp = RREG32(IH_RB_CNTL);
5473                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5474                 WREG32(IH_RB_CNTL, tmp);
5475         }
5476         return (wptr & rdev->ih.ptr_mask);
5477 }
5478
5479 /*        CIK IV Ring
5480  * Each IV ring entry is 128 bits:
5481  * [7:0]    - interrupt source id
5482  * [31:8]   - reserved
5483  * [59:32]  - interrupt source data
5484  * [63:60]  - reserved
5485  * [71:64]  - RINGID
5486  *            CP:
5487  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5488  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5489  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5490  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5491  *            PIPE_ID - ME0 0=3D
5492  *                    - ME1&2 compute dispatcher (4 pipes each)
5493  *            SDMA:
5494  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5495  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5496  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5497  * [79:72]  - VMID
5498  * [95:80]  - PASID
5499  * [127:96] - reserved
5500  */
5501 /**
5502  * cik_irq_process - interrupt handler
5503  *
5504  * @rdev: radeon_device pointer
5505  *
5506  * Interrupt hander (CIK).  Walk the IH ring,
5507  * ack interrupts and schedule work to handle
5508  * interrupt events.
5509  * Returns irq process return code.
5510  */
5511 int cik_irq_process(struct radeon_device *rdev)
5512 {
5513         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5514         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5515         u32 wptr;
5516         u32 rptr;
5517         u32 src_id, src_data, ring_id;
5518         u8 me_id, pipe_id, queue_id;
5519         u32 ring_index;
5520         bool queue_hotplug = false;
5521         bool queue_reset = false;
5522         u32 addr, status, mc_client;
5523
5524         if (!rdev->ih.enabled || rdev->shutdown)
5525                 return IRQ_NONE;
5526
5527         wptr = cik_get_ih_wptr(rdev);
5528
5529 restart_ih:
5530         /* is somebody else already processing irqs? */
5531         if (atomic_xchg(&rdev->ih.lock, 1))
5532                 return IRQ_NONE;
5533
5534         rptr = rdev->ih.rptr;
5535         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5536
5537         /* Order reading of wptr vs. reading of IH ring data */
5538         rmb();
5539
5540         /* display interrupts */
5541         cik_irq_ack(rdev);
5542
5543         while (rptr != wptr) {
5544                 /* wptr/rptr are in bytes! */
5545                 ring_index = rptr / 4;
5546                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5547                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5548                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5549
5550                 switch (src_id) {
5551                 case 1: /* D1 vblank/vline */
5552                         switch (src_data) {
5553                         case 0: /* D1 vblank */
5554                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5555                                         if (rdev->irq.crtc_vblank_int[0]) {
5556                                                 drm_handle_vblank(rdev->ddev, 0);
5557                                                 rdev->pm.vblank_sync = true;
5558                                                 wake_up(&rdev->irq.vblank_queue);
5559                                         }
5560                                         if (atomic_read(&rdev->irq.pflip[0]))
5561                                                 radeon_crtc_handle_flip(rdev, 0);
5562                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5563                                         DRM_DEBUG("IH: D1 vblank\n");
5564                                 }
5565                                 break;
5566                         case 1: /* D1 vline */
5567                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5568                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5569                                         DRM_DEBUG("IH: D1 vline\n");
5570                                 }
5571                                 break;
5572                         default:
5573                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5574                                 break;
5575                         }
5576                         break;
5577                 case 2: /* D2 vblank/vline */
5578                         switch (src_data) {
5579                         case 0: /* D2 vblank */
5580                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5581                                         if (rdev->irq.crtc_vblank_int[1]) {
5582                                                 drm_handle_vblank(rdev->ddev, 1);
5583                                                 rdev->pm.vblank_sync = true;
5584                                                 wake_up(&rdev->irq.vblank_queue);
5585                                         }
5586                                         if (atomic_read(&rdev->irq.pflip[1]))
5587                                                 radeon_crtc_handle_flip(rdev, 1);
5588                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5589                                         DRM_DEBUG("IH: D2 vblank\n");
5590                                 }
5591                                 break;
5592                         case 1: /* D2 vline */
5593                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5594                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5595                                         DRM_DEBUG("IH: D2 vline\n");
5596                                 }
5597                                 break;
5598                         default:
5599                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5600                                 break;
5601                         }
5602                         break;
5603                 case 3: /* D3 vblank/vline */
5604                         switch (src_data) {
5605                         case 0: /* D3 vblank */
5606                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5607                                         if (rdev->irq.crtc_vblank_int[2]) {
5608                                                 drm_handle_vblank(rdev->ddev, 2);
5609                                                 rdev->pm.vblank_sync = true;
5610                                                 wake_up(&rdev->irq.vblank_queue);
5611                                         }
5612                                         if (atomic_read(&rdev->irq.pflip[2]))
5613                                                 radeon_crtc_handle_flip(rdev, 2);
5614                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5615                                         DRM_DEBUG("IH: D3 vblank\n");
5616                                 }
5617                                 break;
5618                         case 1: /* D3 vline */
5619                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5620                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5621                                         DRM_DEBUG("IH: D3 vline\n");
5622                                 }
5623                                 break;
5624                         default:
5625                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5626                                 break;
5627                         }
5628                         break;
5629                 case 4: /* D4 vblank/vline */
5630                         switch (src_data) {
5631                         case 0: /* D4 vblank */
5632                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5633                                         if (rdev->irq.crtc_vblank_int[3]) {
5634                                                 drm_handle_vblank(rdev->ddev, 3);
5635                                                 rdev->pm.vblank_sync = true;
5636                                                 wake_up(&rdev->irq.vblank_queue);
5637                                         }
5638                                         if (atomic_read(&rdev->irq.pflip[3]))
5639                                                 radeon_crtc_handle_flip(rdev, 3);
5640                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5641                                         DRM_DEBUG("IH: D4 vblank\n");
5642                                 }
5643                                 break;
5644                         case 1: /* D4 vline */
5645                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5646                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5647                                         DRM_DEBUG("IH: D4 vline\n");
5648                                 }
5649                                 break;
5650                         default:
5651                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5652                                 break;
5653                         }
5654                         break;
5655                 case 5: /* D5 vblank/vline */
5656                         switch (src_data) {
5657                         case 0: /* D5 vblank */
5658                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5659                                         if (rdev->irq.crtc_vblank_int[4]) {
5660                                                 drm_handle_vblank(rdev->ddev, 4);
5661                                                 rdev->pm.vblank_sync = true;
5662                                                 wake_up(&rdev->irq.vblank_queue);
5663                                         }
5664                                         if (atomic_read(&rdev->irq.pflip[4]))
5665                                                 radeon_crtc_handle_flip(rdev, 4);
5666                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5667                                         DRM_DEBUG("IH: D5 vblank\n");
5668                                 }
5669                                 break;
5670                         case 1: /* D5 vline */
5671                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5672                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5673                                         DRM_DEBUG("IH: D5 vline\n");
5674                                 }
5675                                 break;
5676                         default:
5677                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5678                                 break;
5679                         }
5680                         break;
5681                 case 6: /* D6 vblank/vline */
5682                         switch (src_data) {
5683                         case 0: /* D6 vblank */
5684                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5685                                         if (rdev->irq.crtc_vblank_int[5]) {
5686                                                 drm_handle_vblank(rdev->ddev, 5);
5687                                                 rdev->pm.vblank_sync = true;
5688                                                 wake_up(&rdev->irq.vblank_queue);
5689                                         }
5690                                         if (atomic_read(&rdev->irq.pflip[5]))
5691                                                 radeon_crtc_handle_flip(rdev, 5);
5692                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5693                                         DRM_DEBUG("IH: D6 vblank\n");
5694                                 }
5695                                 break;
5696                         case 1: /* D6 vline */
5697                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5698                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5699                                         DRM_DEBUG("IH: D6 vline\n");
5700                                 }
5701                                 break;
5702                         default:
5703                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5704                                 break;
5705                         }
5706                         break;
5707                 case 42: /* HPD hotplug */
5708                         switch (src_data) {
5709                         case 0:
5710                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5711                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5712                                         queue_hotplug = true;
5713                                         DRM_DEBUG("IH: HPD1\n");
5714                                 }
5715                                 break;
5716                         case 1:
5717                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5718                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5719                                         queue_hotplug = true;
5720                                         DRM_DEBUG("IH: HPD2\n");
5721                                 }
5722                                 break;
5723                         case 2:
5724                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5725                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5726                                         queue_hotplug = true;
5727                                         DRM_DEBUG("IH: HPD3\n");
5728                                 }
5729                                 break;
5730                         case 3:
5731                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5732                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5733                                         queue_hotplug = true;
5734                                         DRM_DEBUG("IH: HPD4\n");
5735                                 }
5736                                 break;
5737                         case 4:
5738                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5739                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5740                                         queue_hotplug = true;
5741                                         DRM_DEBUG("IH: HPD5\n");
5742                                 }
5743                                 break;
5744                         case 5:
5745                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5746                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5747                                         queue_hotplug = true;
5748                                         DRM_DEBUG("IH: HPD6\n");
5749                                 }
5750                                 break;
5751                         default:
5752                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5753                                 break;
5754                         }
5755                         break;
5756                 case 146:
5757                 case 147:
5758                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5759                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5760                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5761                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5762                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5763                                 addr);
5764                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5765                                 status);
5766                         cik_vm_decode_fault(rdev, status, addr, mc_client);
5767                         /* reset addr and status */
5768                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5769                         break;
5770                 case 176: /* GFX RB CP_INT */
5771                 case 177: /* GFX IB CP_INT */
5772                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5773                         break;
5774                 case 181: /* CP EOP event */
5775                         DRM_DEBUG("IH: CP EOP\n");
5776                         /* XXX check the bitfield order! */
5777                         me_id = (ring_id & 0x60) >> 5;
5778                         pipe_id = (ring_id & 0x18) >> 3;
5779                         queue_id = (ring_id & 0x7) >> 0;
5780                         switch (me_id) {
5781                         case 0:
5782                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5783                                 break;
5784                         case 1:
5785                         case 2:
5786                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5787                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5788                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5789                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5790                                 break;
5791                         }
5792                         break;
5793                 case 184: /* CP Privileged reg access */
5794                         DRM_ERROR("Illegal register access in command stream\n");
5795                         /* XXX check the bitfield order! */
5796                         me_id = (ring_id & 0x60) >> 5;
5797                         pipe_id = (ring_id & 0x18) >> 3;
5798                         queue_id = (ring_id & 0x7) >> 0;
5799                         switch (me_id) {
5800                         case 0:
5801                                 /* This results in a full GPU reset, but all we need to do is soft
5802                                  * reset the CP for gfx
5803                                  */
5804                                 queue_reset = true;
5805                                 break;
5806                         case 1:
5807                                 /* XXX compute */
5808                                 queue_reset = true;
5809                                 break;
5810                         case 2:
5811                                 /* XXX compute */
5812                                 queue_reset = true;
5813                                 break;
5814                         }
5815                         break;
5816                 case 185: /* CP Privileged inst */
5817                         DRM_ERROR("Illegal instruction in command stream\n");
5818                         /* XXX check the bitfield order! */
5819                         me_id = (ring_id & 0x60) >> 5;
5820                         pipe_id = (ring_id & 0x18) >> 3;
5821                         queue_id = (ring_id & 0x7) >> 0;
5822                         switch (me_id) {
5823                         case 0:
5824                                 /* This results in a full GPU reset, but all we need to do is soft
5825                                  * reset the CP for gfx
5826                                  */
5827                                 queue_reset = true;
5828                                 break;
5829                         case 1:
5830                                 /* XXX compute */
5831                                 queue_reset = true;
5832                                 break;
5833                         case 2:
5834                                 /* XXX compute */
5835                                 queue_reset = true;
5836                                 break;
5837                         }
5838                         break;
5839                 case 224: /* SDMA trap event */
5840                         /* XXX check the bitfield order! */
5841                         me_id = (ring_id & 0x3) >> 0;
5842                         queue_id = (ring_id & 0xc) >> 2;
5843                         DRM_DEBUG("IH: SDMA trap\n");
5844                         switch (me_id) {
5845                         case 0:
5846                                 switch (queue_id) {
5847                                 case 0:
5848                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5849                                         break;
5850                                 case 1:
5851                                         /* XXX compute */
5852                                         break;
5853                                 case 2:
5854                                         /* XXX compute */
5855                                         break;
5856                                 }
5857                                 break;
5858                         case 1:
5859                                 switch (queue_id) {
5860                                 case 0:
5861                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5862                                         break;
5863                                 case 1:
5864                                         /* XXX compute */
5865                                         break;
5866                                 case 2:
5867                                         /* XXX compute */
5868                                         break;
5869                                 }
5870                                 break;
5871                         }
5872                         break;
5873                 case 241: /* SDMA Privileged inst */
5874                 case 247: /* SDMA Privileged inst */
5875                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
5876                         /* XXX check the bitfield order! */
5877                         me_id = (ring_id & 0x3) >> 0;
5878                         queue_id = (ring_id & 0xc) >> 2;
5879                         switch (me_id) {
5880                         case 0:
5881                                 switch (queue_id) {
5882                                 case 0:
5883                                         queue_reset = true;
5884                                         break;
5885                                 case 1:
5886                                         /* XXX compute */
5887                                         queue_reset = true;
5888                                         break;
5889                                 case 2:
5890                                         /* XXX compute */
5891                                         queue_reset = true;
5892                                         break;
5893                                 }
5894                                 break;
5895                         case 1:
5896                                 switch (queue_id) {
5897                                 case 0:
5898                                         queue_reset = true;
5899                                         break;
5900                                 case 1:
5901                                         /* XXX compute */
5902                                         queue_reset = true;
5903                                         break;
5904                                 case 2:
5905                                         /* XXX compute */
5906                                         queue_reset = true;
5907                                         break;
5908                                 }
5909                                 break;
5910                         }
5911                         break;
5912                 case 233: /* GUI IDLE */
5913                         DRM_DEBUG("IH: GUI idle\n");
5914                         break;
5915                 default:
5916                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5917                         break;
5918                 }
5919
5920                 /* wptr/rptr are in bytes! */
5921                 rptr += 16;
5922                 rptr &= rdev->ih.ptr_mask;
5923         }
5924         if (queue_hotplug)
5925                 schedule_work(&rdev->hotplug_work);
5926         if (queue_reset)
5927                 schedule_work(&rdev->reset_work);
5928         rdev->ih.rptr = rptr;
5929         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5930         atomic_set(&rdev->ih.lock, 0);
5931
5932         /* make sure wptr hasn't changed while processing */
5933         wptr = cik_get_ih_wptr(rdev);
5934         if (wptr != rptr)
5935                 goto restart_ih;
5936
5937         return IRQ_HANDLED;
5938 }
5939
5940 /*
5941  * startup/shutdown callbacks
5942  */
5943 /**
5944  * cik_startup - program the asic to a functional state
5945  *
5946  * @rdev: radeon_device pointer
5947  *
5948  * Programs the asic to a functional state (CIK).
5949  * Called by cik_init() and cik_resume().
5950  * Returns 0 for success, error for failure.
5951  */
5952 static int cik_startup(struct radeon_device *rdev)
5953 {
5954         struct radeon_ring *ring;
5955         int r;
5956
5957         if (rdev->flags & RADEON_IS_IGP) {
5958                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5959                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5960                         r = cik_init_microcode(rdev);
5961                         if (r) {
5962                                 DRM_ERROR("Failed to load firmware!\n");
5963                                 return r;
5964                         }
5965                 }
5966         } else {
5967                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5968                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5969                     !rdev->mc_fw) {
5970                         r = cik_init_microcode(rdev);
5971                         if (r) {
5972                                 DRM_ERROR("Failed to load firmware!\n");
5973                                 return r;
5974                         }
5975                 }
5976
5977                 r = ci_mc_load_microcode(rdev);
5978                 if (r) {
5979                         DRM_ERROR("Failed to load MC firmware!\n");
5980                         return r;
5981                 }
5982         }
5983
5984         r = r600_vram_scratch_init(rdev);
5985         if (r)
5986                 return r;
5987
5988         cik_mc_program(rdev);
5989         r = cik_pcie_gart_enable(rdev);
5990         if (r)
5991                 return r;
5992         cik_gpu_init(rdev);
5993
5994         /* allocate rlc buffers */
5995         r = si_rlc_init(rdev);
5996         if (r) {
5997                 DRM_ERROR("Failed to init rlc BOs!\n");
5998                 return r;
5999         }
6000
6001         /* allocate wb buffer */
6002         r = radeon_wb_init(rdev);
6003         if (r)
6004                 return r;
6005
6006         /* allocate mec buffers */
6007         r = cik_mec_init(rdev);
6008         if (r) {
6009                 DRM_ERROR("Failed to init MEC BOs!\n");
6010                 return r;
6011         }
6012
6013         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6014         if (r) {
6015                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6016                 return r;
6017         }
6018
6019         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6020         if (r) {
6021                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022                 return r;
6023         }
6024
6025         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6026         if (r) {
6027                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028                 return r;
6029         }
6030
6031         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6032         if (r) {
6033                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6034                 return r;
6035         }
6036
6037         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6038         if (r) {
6039                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040                 return r;
6041         }
6042
6043         r = cik_uvd_resume(rdev);
6044         if (!r) {
6045                 r = radeon_fence_driver_start_ring(rdev,
6046                                                    R600_RING_TYPE_UVD_INDEX);
6047                 if (r)
6048                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6049         }
6050         if (r)
6051                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6052
6053         /* Enable IRQ */
6054         if (!rdev->irq.installed) {
6055                 r = radeon_irq_kms_init(rdev);
6056                 if (r)
6057                         return r;
6058         }
6059
6060         r = cik_irq_init(rdev);
6061         if (r) {
6062                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6063                 radeon_irq_kms_fini(rdev);
6064                 return r;
6065         }
6066         cik_irq_set(rdev);
6067
6068         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6069         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6070                              CP_RB0_RPTR, CP_RB0_WPTR,
6071                              0, 0xfffff, RADEON_CP_PACKET2);
6072         if (r)
6073                 return r;
6074
6075         /* set up the compute queues */
6076         /* type-2 packets are deprecated on MEC, use type-3 instead */
6077         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6078         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6079                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6080                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6081         if (r)
6082                 return r;
6083         ring->me = 1; /* first MEC */
6084         ring->pipe = 0; /* first pipe */
6085         ring->queue = 0; /* first queue */
6086         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6087
6088         /* type-2 packets are deprecated on MEC, use type-3 instead */
6089         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6090         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6091                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6092                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6093         if (r)
6094                 return r;
6095         /* dGPU only have 1 MEC */
6096         ring->me = 1; /* first MEC */
6097         ring->pipe = 0; /* first pipe */
6098         ring->queue = 1; /* second queue */
6099         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6100
6101         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6102         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6103                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6104                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6105                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6106         if (r)
6107                 return r;
6108
6109         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6110         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6111                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6112                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6113                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6114         if (r)
6115                 return r;
6116
6117         r = cik_cp_resume(rdev);
6118         if (r)
6119                 return r;
6120
6121         r = cik_sdma_resume(rdev);
6122         if (r)
6123                 return r;
6124
6125         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6126         if (ring->ring_size) {
6127                 r = radeon_ring_init(rdev, ring, ring->ring_size,
6128                                      R600_WB_UVD_RPTR_OFFSET,
6129                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6130                                      0, 0xfffff, RADEON_CP_PACKET2);
6131                 if (!r)
6132                         r = r600_uvd_init(rdev);
6133                 if (r)
6134                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6135         }
6136
6137         r = radeon_ib_pool_init(rdev);
6138         if (r) {
6139                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6140                 return r;
6141         }
6142
6143         r = radeon_vm_manager_init(rdev);
6144         if (r) {
6145                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6146                 return r;
6147         }
6148
6149         return 0;
6150 }
6151
6152 /**
6153  * cik_resume - resume the asic to a functional state
6154  *
6155  * @rdev: radeon_device pointer
6156  *
6157  * Programs the asic to a functional state (CIK).
6158  * Called at resume.
6159  * Returns 0 for success, error for failure.
6160  */
6161 int cik_resume(struct radeon_device *rdev)
6162 {
6163         int r;
6164
6165         /* post card */
6166         atom_asic_init(rdev->mode_info.atom_context);
6167
6168         /* init golden registers */
6169         cik_init_golden_registers(rdev);
6170
6171         rdev->accel_working = true;
6172         r = cik_startup(rdev);
6173         if (r) {
6174                 DRM_ERROR("cik startup failed on resume\n");
6175                 rdev->accel_working = false;
6176                 return r;
6177         }
6178
6179         return r;
6180
6181 }
6182
6183 /**
6184  * cik_suspend - suspend the asic
6185  *
6186  * @rdev: radeon_device pointer
6187  *
6188  * Bring the chip into a state suitable for suspend (CIK).
6189  * Called at suspend.
6190  * Returns 0 for success.
6191  */
6192 int cik_suspend(struct radeon_device *rdev)
6193 {
6194         radeon_vm_manager_fini(rdev);
6195         cik_cp_enable(rdev, false);
6196         cik_sdma_enable(rdev, false);
6197         r600_uvd_stop(rdev);
6198         radeon_uvd_suspend(rdev);
6199         cik_irq_suspend(rdev);
6200         radeon_wb_disable(rdev);
6201         cik_pcie_gart_disable(rdev);
6202         return 0;
6203 }
6204
6205 /* Plan is to move initialization in that function and use
6206  * helper function so that radeon_device_init pretty much
6207  * do nothing more than calling asic specific function. This
6208  * should also allow to remove a bunch of callback function
6209  * like vram_info.
6210  */
6211 /**
6212  * cik_init - asic specific driver and hw init
6213  *
6214  * @rdev: radeon_device pointer
6215  *
6216  * Setup asic specific driver variables and program the hw
6217  * to a functional state (CIK).
6218  * Called at driver startup.
6219  * Returns 0 for success, errors for failure.
6220  */
6221 int cik_init(struct radeon_device *rdev)
6222 {
6223         struct radeon_ring *ring;
6224         int r;
6225
6226         /* Read BIOS */
6227         if (!radeon_get_bios(rdev)) {
6228                 if (ASIC_IS_AVIVO(rdev))
6229                         return -EINVAL;
6230         }
6231         /* Must be an ATOMBIOS */
6232         if (!rdev->is_atom_bios) {
6233                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6234                 return -EINVAL;
6235         }
6236         r = radeon_atombios_init(rdev);
6237         if (r)
6238                 return r;
6239
6240         /* Post card if necessary */
6241         if (!radeon_card_posted(rdev)) {
6242                 if (!rdev->bios) {
6243                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6244                         return -EINVAL;
6245                 }
6246                 DRM_INFO("GPU not posted. posting now...\n");
6247                 atom_asic_init(rdev->mode_info.atom_context);
6248         }
6249         /* init golden registers */
6250         cik_init_golden_registers(rdev);
6251         /* Initialize scratch registers */
6252         cik_scratch_init(rdev);
6253         /* Initialize surface registers */
6254         radeon_surface_init(rdev);
6255         /* Initialize clocks */
6256         radeon_get_clock_info(rdev->ddev);
6257
6258         /* Fence driver */
6259         r = radeon_fence_driver_init(rdev);
6260         if (r)
6261                 return r;
6262
6263         /* initialize memory controller */
6264         r = cik_mc_init(rdev);
6265         if (r)
6266                 return r;
6267         /* Memory manager */
6268         r = radeon_bo_init(rdev);
6269         if (r)
6270                 return r;
6271
6272         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6273         ring->ring_obj = NULL;
6274         r600_ring_init(rdev, ring, 1024 * 1024);
6275
6276         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6277         ring->ring_obj = NULL;
6278         r600_ring_init(rdev, ring, 1024 * 1024);
6279         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6280         if (r)
6281                 return r;
6282
6283         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6284         ring->ring_obj = NULL;
6285         r600_ring_init(rdev, ring, 1024 * 1024);
6286         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6287         if (r)
6288                 return r;
6289
6290         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6291         ring->ring_obj = NULL;
6292         r600_ring_init(rdev, ring, 256 * 1024);
6293
6294         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6295         ring->ring_obj = NULL;
6296         r600_ring_init(rdev, ring, 256 * 1024);
6297
6298         r = radeon_uvd_init(rdev);
6299         if (!r) {
6300                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6301                 ring->ring_obj = NULL;
6302                 r600_ring_init(rdev, ring, 4096);
6303         }
6304
6305         rdev->ih.ring_obj = NULL;
6306         r600_ih_ring_init(rdev, 64 * 1024);
6307
6308         r = r600_pcie_gart_init(rdev);
6309         if (r)
6310                 return r;
6311
6312         rdev->accel_working = true;
6313         r = cik_startup(rdev);
6314         if (r) {
6315                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6316                 cik_cp_fini(rdev);
6317                 cik_sdma_fini(rdev);
6318                 cik_irq_fini(rdev);
6319                 si_rlc_fini(rdev);
6320                 cik_mec_fini(rdev);
6321                 radeon_wb_fini(rdev);
6322                 radeon_ib_pool_fini(rdev);
6323                 radeon_vm_manager_fini(rdev);
6324                 radeon_irq_kms_fini(rdev);
6325                 cik_pcie_gart_fini(rdev);
6326                 rdev->accel_working = false;
6327         }
6328
6329         /* Don't start up if the MC ucode is missing.
6330          * The default clocks and voltages before the MC ucode
6331          * is loaded are not suffient for advanced operations.
6332          */
6333         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6334                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6335                 return -EINVAL;
6336         }
6337
6338         return 0;
6339 }
6340
6341 /**
6342  * cik_fini - asic specific driver and hw fini
6343  *
6344  * @rdev: radeon_device pointer
6345  *
6346  * Tear down the asic specific driver variables and program the hw
6347  * to an idle state (CIK).
6348  * Called at driver unload.
6349  */
6350 void cik_fini(struct radeon_device *rdev)
6351 {
6352         cik_cp_fini(rdev);
6353         cik_sdma_fini(rdev);
6354         cik_irq_fini(rdev);
6355         si_rlc_fini(rdev);
6356         cik_mec_fini(rdev);
6357         radeon_wb_fini(rdev);
6358         radeon_vm_manager_fini(rdev);
6359         radeon_ib_pool_fini(rdev);
6360         radeon_irq_kms_fini(rdev);
6361         r600_uvd_stop(rdev);
6362         radeon_uvd_fini(rdev);
6363         cik_pcie_gart_fini(rdev);
6364         r600_vram_scratch_fini(rdev);
6365         radeon_gem_fini(rdev);
6366         radeon_fence_driver_fini(rdev);
6367         radeon_bo_fini(rdev);
6368         radeon_atombios_fini(rdev);
6369         kfree(rdev->bios);
6370         rdev->bios = NULL;
6371 }
6372
6373 /* display watermark setup */
6374 /**
6375  * dce8_line_buffer_adjust - Set up the line buffer
6376  *
6377  * @rdev: radeon_device pointer
6378  * @radeon_crtc: the selected display controller
6379  * @mode: the current display mode on the selected display
6380  * controller
6381  *
6382  * Setup up the line buffer allocation for
6383  * the selected display controller (CIK).
6384  * Returns the line buffer size in pixels.
6385  */
6386 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6387                                    struct radeon_crtc *radeon_crtc,
6388                                    struct drm_display_mode *mode)
6389 {
6390         u32 tmp;
6391
6392         /*
6393          * Line Buffer Setup
6394          * There are 6 line buffers, one for each display controllers.
6395          * There are 3 partitions per LB. Select the number of partitions
6396          * to enable based on the display width.  For display widths larger
6397          * than 4096, you need use to use 2 display controllers and combine
6398          * them using the stereo blender.
6399          */
6400         if (radeon_crtc->base.enabled && mode) {
6401                 if (mode->crtc_hdisplay < 1920)
6402                         tmp = 1;
6403                 else if (mode->crtc_hdisplay < 2560)
6404                         tmp = 2;
6405                 else if (mode->crtc_hdisplay < 4096)
6406                         tmp = 0;
6407                 else {
6408                         DRM_DEBUG_KMS("Mode too big for LB!\n");
6409                         tmp = 0;
6410                 }
6411         } else
6412                 tmp = 1;
6413
6414         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6415                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6416
6417         if (radeon_crtc->base.enabled && mode) {
6418                 switch (tmp) {
6419                 case 0:
6420                 default:
6421                         return 4096 * 2;
6422                 case 1:
6423                         return 1920 * 2;
6424                 case 2:
6425                         return 2560 * 2;
6426                 }
6427         }
6428
6429         /* controller not enabled, so no lb used */
6430         return 0;
6431 }
6432
6433 /**
6434  * cik_get_number_of_dram_channels - get the number of dram channels
6435  *
6436  * @rdev: radeon_device pointer
6437  *
6438  * Look up the number of video ram channels (CIK).
6439  * Used for display watermark bandwidth calculations
6440  * Returns the number of dram channels
6441  */
6442 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6443 {
6444         u32 tmp = RREG32(MC_SHARED_CHMAP);
6445
6446         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6447         case 0:
6448         default:
6449                 return 1;
6450         case 1:
6451                 return 2;
6452         case 2:
6453                 return 4;
6454         case 3:
6455                 return 8;
6456         case 4:
6457                 return 3;
6458         case 5:
6459                 return 6;
6460         case 6:
6461                 return 10;
6462         case 7:
6463                 return 12;
6464         case 8:
6465                 return 16;
6466         }
6467 }
6468
6469 struct dce8_wm_params {
6470         u32 dram_channels; /* number of dram channels */
6471         u32 yclk;          /* bandwidth per dram data pin in kHz */
6472         u32 sclk;          /* engine clock in kHz */
6473         u32 disp_clk;      /* display clock in kHz */
6474         u32 src_width;     /* viewport width */
6475         u32 active_time;   /* active display time in ns */
6476         u32 blank_time;    /* blank time in ns */
6477         bool interlaced;    /* mode is interlaced */
6478         fixed20_12 vsc;    /* vertical scale ratio */
6479         u32 num_heads;     /* number of active crtcs */
6480         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6481         u32 lb_size;       /* line buffer allocated to pipe */
6482         u32 vtaps;         /* vertical scaler taps */
6483 };
6484
6485 /**
6486  * dce8_dram_bandwidth - get the dram bandwidth
6487  *
6488  * @wm: watermark calculation data
6489  *
6490  * Calculate the raw dram bandwidth (CIK).
6491  * Used for display watermark bandwidth calculations
6492  * Returns the dram bandwidth in MBytes/s
6493  */
6494 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6495 {
6496         /* Calculate raw DRAM Bandwidth */
6497         fixed20_12 dram_efficiency; /* 0.7 */
6498         fixed20_12 yclk, dram_channels, bandwidth;
6499         fixed20_12 a;
6500
6501         a.full = dfixed_const(1000);
6502         yclk.full = dfixed_const(wm->yclk);
6503         yclk.full = dfixed_div(yclk, a);
6504         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6505         a.full = dfixed_const(10);
6506         dram_efficiency.full = dfixed_const(7);
6507         dram_efficiency.full = dfixed_div(dram_efficiency, a);
6508         bandwidth.full = dfixed_mul(dram_channels, yclk);
6509         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6510
6511         return dfixed_trunc(bandwidth);
6512 }
6513
6514 /**
6515  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6516  *
6517  * @wm: watermark calculation data
6518  *
6519  * Calculate the dram bandwidth used for display (CIK).
6520  * Used for display watermark bandwidth calculations
6521  * Returns the dram bandwidth for display in MBytes/s
6522  */
6523 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6524 {
6525         /* Calculate DRAM Bandwidth and the part allocated to display. */
6526         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6527         fixed20_12 yclk, dram_channels, bandwidth;
6528         fixed20_12 a;
6529
6530         a.full = dfixed_const(1000);
6531         yclk.full = dfixed_const(wm->yclk);
6532         yclk.full = dfixed_div(yclk, a);
6533         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6534         a.full = dfixed_const(10);
6535         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6536         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6537         bandwidth.full = dfixed_mul(dram_channels, yclk);
6538         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6539
6540         return dfixed_trunc(bandwidth);
6541 }
6542
6543 /**
6544  * dce8_data_return_bandwidth - get the data return bandwidth
6545  *
6546  * @wm: watermark calculation data
6547  *
6548  * Calculate the data return bandwidth used for display (CIK).
6549  * Used for display watermark bandwidth calculations
6550  * Returns the data return bandwidth in MBytes/s
6551  */
6552 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6553 {
6554         /* Calculate the display Data return Bandwidth */
6555         fixed20_12 return_efficiency; /* 0.8 */
6556         fixed20_12 sclk, bandwidth;
6557         fixed20_12 a;
6558
6559         a.full = dfixed_const(1000);
6560         sclk.full = dfixed_const(wm->sclk);
6561         sclk.full = dfixed_div(sclk, a);
6562         a.full = dfixed_const(10);
6563         return_efficiency.full = dfixed_const(8);
6564         return_efficiency.full = dfixed_div(return_efficiency, a);
6565         a.full = dfixed_const(32);
6566         bandwidth.full = dfixed_mul(a, sclk);
6567         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6568
6569         return dfixed_trunc(bandwidth);
6570 }
6571
6572 /**
6573  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6574  *
6575  * @wm: watermark calculation data
6576  *
6577  * Calculate the dmif bandwidth used for display (CIK).
6578  * Used for display watermark bandwidth calculations
6579  * Returns the dmif bandwidth in MBytes/s
6580  */
6581 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6582 {
6583         /* Calculate the DMIF Request Bandwidth */
6584         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6585         fixed20_12 disp_clk, bandwidth;
6586         fixed20_12 a, b;
6587
6588         a.full = dfixed_const(1000);
6589         disp_clk.full = dfixed_const(wm->disp_clk);
6590         disp_clk.full = dfixed_div(disp_clk, a);
6591         a.full = dfixed_const(32);
6592         b.full = dfixed_mul(a, disp_clk);
6593
6594         a.full = dfixed_const(10);
6595         disp_clk_request_efficiency.full = dfixed_const(8);
6596         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6597
6598         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6599
6600         return dfixed_trunc(bandwidth);
6601 }
6602
6603 /**
6604  * dce8_available_bandwidth - get the min available bandwidth
6605  *
6606  * @wm: watermark calculation data
6607  *
6608  * Calculate the min available bandwidth used for display (CIK).
6609  * Used for display watermark bandwidth calculations
6610  * Returns the min available bandwidth in MBytes/s
6611  */
6612 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6613 {
6614         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6615         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6616         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6617         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6618
6619         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6620 }
6621
6622 /**
6623  * dce8_average_bandwidth - get the average available bandwidth
6624  *
6625  * @wm: watermark calculation data
6626  *
6627  * Calculate the average available bandwidth used for display (CIK).
6628  * Used for display watermark bandwidth calculations
6629  * Returns the average available bandwidth in MBytes/s
6630  */
6631 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6632 {
6633         /* Calculate the display mode Average Bandwidth
6634          * DisplayMode should contain the source and destination dimensions,
6635          * timing, etc.
6636          */
6637         fixed20_12 bpp;
6638         fixed20_12 line_time;
6639         fixed20_12 src_width;
6640         fixed20_12 bandwidth;
6641         fixed20_12 a;
6642
6643         a.full = dfixed_const(1000);
6644         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6645         line_time.full = dfixed_div(line_time, a);
6646         bpp.full = dfixed_const(wm->bytes_per_pixel);
6647         src_width.full = dfixed_const(wm->src_width);
6648         bandwidth.full = dfixed_mul(src_width, bpp);
6649         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6650         bandwidth.full = dfixed_div(bandwidth, line_time);
6651
6652         return dfixed_trunc(bandwidth);
6653 }
6654
6655 /**
6656  * dce8_latency_watermark - get the latency watermark
6657  *
6658  * @wm: watermark calculation data
6659  *
6660  * Calculate the latency watermark (CIK).
6661  * Used for display watermark bandwidth calculations
6662  * Returns the latency watermark in ns
6663  */
6664 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6665 {
6666         /* First calculate the latency in ns */
6667         u32 mc_latency = 2000; /* 2000 ns. */
6668         u32 available_bandwidth = dce8_available_bandwidth(wm);
6669         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6670         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6671         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6672         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6673                 (wm->num_heads * cursor_line_pair_return_time);
6674         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6675         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6676         u32 tmp, dmif_size = 12288;
6677         fixed20_12 a, b, c;
6678
6679         if (wm->num_heads == 0)
6680                 return 0;
6681
6682         a.full = dfixed_const(2);
6683         b.full = dfixed_const(1);
6684         if ((wm->vsc.full > a.full) ||
6685             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6686             (wm->vtaps >= 5) ||
6687             ((wm->vsc.full >= a.full) && wm->interlaced))
6688                 max_src_lines_per_dst_line = 4;
6689         else
6690                 max_src_lines_per_dst_line = 2;
6691
6692         a.full = dfixed_const(available_bandwidth);
6693         b.full = dfixed_const(wm->num_heads);
6694         a.full = dfixed_div(a, b);
6695
6696         b.full = dfixed_const(mc_latency + 512);
6697         c.full = dfixed_const(wm->disp_clk);
6698         b.full = dfixed_div(b, c);
6699
6700         c.full = dfixed_const(dmif_size);
6701         b.full = dfixed_div(c, b);
6702
6703         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6704
6705         b.full = dfixed_const(1000);
6706         c.full = dfixed_const(wm->disp_clk);
6707         b.full = dfixed_div(c, b);
6708         c.full = dfixed_const(wm->bytes_per_pixel);
6709         b.full = dfixed_mul(b, c);
6710
6711         lb_fill_bw = min(tmp, dfixed_trunc(b));
6712
6713         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6714         b.full = dfixed_const(1000);
6715         c.full = dfixed_const(lb_fill_bw);
6716         b.full = dfixed_div(c, b);
6717         a.full = dfixed_div(a, b);
6718         line_fill_time = dfixed_trunc(a);
6719
6720         if (line_fill_time < wm->active_time)
6721                 return latency;
6722         else
6723                 return latency + (line_fill_time - wm->active_time);
6724
6725 }
6726
6727 /**
6728  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6729  * average and available dram bandwidth
6730  *
6731  * @wm: watermark calculation data
6732  *
6733  * Check if the display average bandwidth fits in the display
6734  * dram bandwidth (CIK).
6735  * Used for display watermark bandwidth calculations
6736  * Returns true if the display fits, false if not.
6737  */
6738 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6739 {
6740         if (dce8_average_bandwidth(wm) <=
6741             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6742                 return true;
6743         else
6744                 return false;
6745 }
6746
6747 /**
6748  * dce8_average_bandwidth_vs_available_bandwidth - check
6749  * average and available bandwidth
6750  *
6751  * @wm: watermark calculation data
6752  *
6753  * Check if the display average bandwidth fits in the display
6754  * available bandwidth (CIK).
6755  * Used for display watermark bandwidth calculations
6756  * Returns true if the display fits, false if not.
6757  */
6758 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6759 {
6760         if (dce8_average_bandwidth(wm) <=
6761             (dce8_available_bandwidth(wm) / wm->num_heads))
6762                 return true;
6763         else
6764                 return false;
6765 }
6766
6767 /**
6768  * dce8_check_latency_hiding - check latency hiding
6769  *
6770  * @wm: watermark calculation data
6771  *
6772  * Check latency hiding (CIK).
6773  * Used for display watermark bandwidth calculations
6774  * Returns true if the display fits, false if not.
6775  */
6776 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6777 {
6778         u32 lb_partitions = wm->lb_size / wm->src_width;
6779         u32 line_time = wm->active_time + wm->blank_time;
6780         u32 latency_tolerant_lines;
6781         u32 latency_hiding;
6782         fixed20_12 a;
6783
6784         a.full = dfixed_const(1);
6785         if (wm->vsc.full > a.full)
6786                 latency_tolerant_lines = 1;
6787         else {
6788                 if (lb_partitions <= (wm->vtaps + 1))
6789                         latency_tolerant_lines = 1;
6790                 else
6791                         latency_tolerant_lines = 2;
6792         }
6793
6794         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6795
6796         if (dce8_latency_watermark(wm) <= latency_hiding)
6797                 return true;
6798         else
6799                 return false;
6800 }
6801
6802 /**
6803  * dce8_program_watermarks - program display watermarks
6804  *
6805  * @rdev: radeon_device pointer
6806  * @radeon_crtc: the selected display controller
6807  * @lb_size: line buffer size
6808  * @num_heads: number of display controllers in use
6809  *
6810  * Calculate and program the display watermarks for the
6811  * selected display controller (CIK).
6812  */
6813 static void dce8_program_watermarks(struct radeon_device *rdev,
6814                                     struct radeon_crtc *radeon_crtc,
6815                                     u32 lb_size, u32 num_heads)
6816 {
6817         struct drm_display_mode *mode = &radeon_crtc->base.mode;
6818         struct dce8_wm_params wm;
6819         u32 pixel_period;
6820         u32 line_time = 0;
6821         u32 latency_watermark_a = 0, latency_watermark_b = 0;
6822         u32 tmp, wm_mask;
6823
6824         if (radeon_crtc->base.enabled && num_heads && mode) {
6825                 pixel_period = 1000000 / (u32)mode->clock;
6826                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6827
6828                 wm.yclk = rdev->pm.current_mclk * 10;
6829                 wm.sclk = rdev->pm.current_sclk * 10;
6830                 wm.disp_clk = mode->clock;
6831                 wm.src_width = mode->crtc_hdisplay;
6832                 wm.active_time = mode->crtc_hdisplay * pixel_period;
6833                 wm.blank_time = line_time - wm.active_time;
6834                 wm.interlaced = false;
6835                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6836                         wm.interlaced = true;
6837                 wm.vsc = radeon_crtc->vsc;
6838                 wm.vtaps = 1;
6839                 if (radeon_crtc->rmx_type != RMX_OFF)
6840                         wm.vtaps = 2;
6841                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6842                 wm.lb_size = lb_size;
6843                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6844                 wm.num_heads = num_heads;
6845
6846                 /* set for high clocks */
6847                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6848                 /* set for low clocks */
6849                 /* wm.yclk = low clk; wm.sclk = low clk */
6850                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6851
6852                 /* possibly force display priority to high */
6853                 /* should really do this at mode validation time... */
6854                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6855                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6856                     !dce8_check_latency_hiding(&wm) ||
6857                     (rdev->disp_priority == 2)) {
6858                         DRM_DEBUG_KMS("force priority to high\n");
6859                 }
6860         }
6861
6862         /* select wm A */
6863         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6864         tmp = wm_mask;
6865         tmp &= ~LATENCY_WATERMARK_MASK(3);
6866         tmp |= LATENCY_WATERMARK_MASK(1);
6867         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6868         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6869                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6870                 LATENCY_HIGH_WATERMARK(line_time)));
6871         /* select wm B */
6872         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6873         tmp &= ~LATENCY_WATERMARK_MASK(3);
6874         tmp |= LATENCY_WATERMARK_MASK(2);
6875         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6876         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6877                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6878                 LATENCY_HIGH_WATERMARK(line_time)));
6879         /* restore original selection */
6880         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6881 }
6882
6883 /**
6884  * dce8_bandwidth_update - program display watermarks
6885  *
6886  * @rdev: radeon_device pointer
6887  *
6888  * Calculate and program the display watermarks and line
6889  * buffer allocation (CIK).
6890  */
6891 void dce8_bandwidth_update(struct radeon_device *rdev)
6892 {
6893         struct drm_display_mode *mode = NULL;
6894         u32 num_heads = 0, lb_size;
6895         int i;
6896
6897         radeon_update_display_priority(rdev);
6898
6899         for (i = 0; i < rdev->num_crtc; i++) {
6900                 if (rdev->mode_info.crtcs[i]->base.enabled)
6901                         num_heads++;
6902         }
6903         for (i = 0; i < rdev->num_crtc; i++) {
6904                 mode = &rdev->mode_info.crtcs[i]->base.mode;
6905                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6906                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6907         }
6908 }
6909
6910 /**
6911  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6912  *
6913  * @rdev: radeon_device pointer
6914  *
6915  * Fetches a GPU clock counter snapshot (SI).
6916  * Returns the 64 bit clock counter snapshot.
6917  */
6918 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6919 {
6920         uint64_t clock;
6921
6922         mutex_lock(&rdev->gpu_clock_mutex);
6923         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6924         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6925                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6926         mutex_unlock(&rdev->gpu_clock_mutex);
6927         return clock;
6928 }
6929
6930 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6931                               u32 cntl_reg, u32 status_reg)
6932 {
6933         int r, i;
6934         struct atom_clock_dividers dividers;
6935         uint32_t tmp;
6936
6937         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6938                                            clock, false, &dividers);
6939         if (r)
6940                 return r;
6941
6942         tmp = RREG32_SMC(cntl_reg);
6943         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6944         tmp |= dividers.post_divider;
6945         WREG32_SMC(cntl_reg, tmp);
6946
6947         for (i = 0; i < 100; i++) {
6948                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6949                         break;
6950                 mdelay(10);
6951         }
6952         if (i == 100)
6953                 return -ETIMEDOUT;
6954
6955         return 0;
6956 }
6957
6958 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6959 {
6960         int r = 0;
6961
6962         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6963         if (r)
6964                 return r;
6965
6966         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6967         return r;
6968 }
6969
6970 int cik_uvd_resume(struct radeon_device *rdev)
6971 {
6972         uint64_t addr;
6973         uint32_t size;
6974         int r;
6975
6976         r = radeon_uvd_resume(rdev);
6977         if (r)
6978                 return r;
6979
6980         /* programm the VCPU memory controller bits 0-27 */
6981         addr = rdev->uvd.gpu_addr >> 3;
6982         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
6983         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6984         WREG32(UVD_VCPU_CACHE_SIZE0, size);
6985
6986         addr += size;
6987         size = RADEON_UVD_STACK_SIZE >> 3;
6988         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6989         WREG32(UVD_VCPU_CACHE_SIZE1, size);
6990
6991         addr += size;
6992         size = RADEON_UVD_HEAP_SIZE >> 3;
6993         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6994         WREG32(UVD_VCPU_CACHE_SIZE2, size);
6995
6996         /* bits 28-31 */
6997         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6998         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6999
7000         /* bits 32-39 */
7001         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7002         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7003
7004         return 0;
7005 }