]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/si.c
242c1ac83e2327a0ce6b32b720c9b208d54bc741
[linux-imx.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82         (0x8000 << 16) | (0x98f4 >> 2),
83         0x00000000,
84         (0x8040 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8000 << 16) | (0xe80 >> 2),
87         0x00000000,
88         (0x8040 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8000 << 16) | (0x89bc >> 2),
91         0x00000000,
92         (0x8040 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8000 << 16) | (0x8c1c >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x9c00 << 16) | (0x98f0 >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0xe7c >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x9148 >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0x9150 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x897c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x8d8c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xac54 >> 2),
113         0X00000000,
114         0x3,
115         (0x9c00 << 16) | (0x98f8 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x9910 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9914 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9918 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x991c >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9920 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9924 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9928 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x992c >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9930 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9934 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9938 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x993c >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9940 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9944 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9948 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x994c >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9950 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9954 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9958 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x995c >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9960 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9964 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9968 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x996c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9970 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9974 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9978 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x997c >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9980 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9984 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9988 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x998c >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x8c00 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c14 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c04 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c08 >> 2),
188         0x00000000,
189         (0x8000 << 16) | (0x9b7c >> 2),
190         0x00000000,
191         (0x8040 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8000 << 16) | (0xe84 >> 2),
194         0x00000000,
195         (0x8040 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8000 << 16) | (0x89c0 >> 2),
198         0x00000000,
199         (0x8040 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x914c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0x8c20 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x9354 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9060 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9364 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9100 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x913c >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x90e0 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e4 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e8 >> 2),
226         0x00000000,
227         (0x8040 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x8bcc >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8b24 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x88c4 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8e50 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c0c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8e58 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e5c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9508 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x950c >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9494 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0xac0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac10 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac14 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xae00 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac08 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x88d4 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88c8 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88cc >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x89b0 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8b10 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8a14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9830 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9834 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9838 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9a10 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x9870 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9874 >> 2),
286         0x00000000,
287         (0x8001 << 16) | (0x9870 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9874 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x9870 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9874 >> 2),
294         0x00000000,
295         (0x8041 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9874 >> 2),
298         0x00000000,
299         0x00000000
300 };
301
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304         0xc424, 0xffffffff, 0x00601005,
305         0xc47c, 0xffffffff, 0x10104040,
306         0xc488, 0xffffffff, 0x0100000a,
307         0xc314, 0xffffffff, 0x00000800,
308         0xc30c, 0xffffffff, 0x800000f4,
309         0xf4a8, 0xffffffff, 0x00000000
310 };
311
312 static const u32 tahiti_golden_registers[] =
313 {
314         0x9a10, 0x00010000, 0x00018208,
315         0x9830, 0xffffffff, 0x00000000,
316         0x9834, 0xf00fffff, 0x00000400,
317         0x9838, 0x0002021c, 0x00020200,
318         0xc78, 0x00000080, 0x00000000,
319         0xd030, 0x000300c0, 0x00800040,
320         0xd830, 0x000300c0, 0x00800040,
321         0x5bb0, 0x000000f0, 0x00000070,
322         0x5bc0, 0x00200000, 0x50100000,
323         0x7030, 0x31000311, 0x00000011,
324         0x277c, 0x00000003, 0x000007ff,
325         0x240c, 0x000007ff, 0x00000000,
326         0x8a14, 0xf000001f, 0x00000007,
327         0x8b24, 0xffffffff, 0x00ffffff,
328         0x8b10, 0x0000ff0f, 0x00000000,
329         0x28a4c, 0x07ffffff, 0x4e000000,
330         0x28350, 0x3f3f3fff, 0x2a00126a,
331         0x30, 0x000000ff, 0x0040,
332         0x34, 0x00000040, 0x00004040,
333         0x9100, 0x07ffffff, 0x03000000,
334         0x8e88, 0x01ff1f3f, 0x00000000,
335         0x8e84, 0x01ff1f3f, 0x00000000,
336         0x9060, 0x0000007f, 0x00000020,
337         0x9508, 0x00010000, 0x00010000,
338         0xac14, 0x00000200, 0x000002fb,
339         0xac10, 0xffffffff, 0x0000543b,
340         0xac0c, 0xffffffff, 0xa9210876,
341         0x88d0, 0xffffffff, 0x000fff40,
342         0x88d4, 0x0000001f, 0x00000010,
343         0x1410, 0x20000000, 0x20fffed8,
344         0x15c0, 0x000c0fc0, 0x000c0400
345 };
346
347 static const u32 tahiti_golden_registers2[] =
348 {
349         0xc64, 0x00000001, 0x00000001
350 };
351
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354         0xc424, 0xffffffff, 0x00601004,
355         0xc47c, 0xffffffff, 0x10102020,
356         0xc488, 0xffffffff, 0x01000020,
357         0xc314, 0xffffffff, 0x00000800,
358         0xc30c, 0xffffffff, 0x800000a4
359 };
360
361 static const u32 pitcairn_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x2ae4, 0x00073ffe, 0x000022a2,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x9060, 0x0000007f, 0x00000020,
384         0x9508, 0x00010000, 0x00010000,
385         0xac14, 0x000003ff, 0x000000f7,
386         0xac10, 0xffffffff, 0x00000000,
387         0xac0c, 0xffffffff, 0x32761054,
388         0x88d4, 0x0000001f, 0x00000010,
389         0x15c0, 0x000c0fc0, 0x000c0400
390 };
391
392 static const u32 verde_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x033f1005,
395         0xc47c, 0xffffffff, 0x10808020,
396         0xc488, 0xffffffff, 0x00800008,
397         0xc314, 0xffffffff, 0x00001000,
398         0xc30c, 0xffffffff, 0x80010014
399 };
400
401 static const u32 verde_golden_registers[] =
402 {
403         0x9a10, 0x00010000, 0x00018208,
404         0x9830, 0xffffffff, 0x00000000,
405         0x9834, 0xf00fffff, 0x00000400,
406         0x9838, 0x0002021c, 0x00020200,
407         0xc78, 0x00000080, 0x00000000,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0x5bb0, 0x000000f0, 0x00000070,
413         0x5bc0, 0x00200000, 0x50100000,
414         0x7030, 0x31000311, 0x00000011,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8b24, 0xffffffff, 0x00ffffff,
425         0x8b10, 0x0000ff0f, 0x00000000,
426         0x28a4c, 0x07ffffff, 0x4e000000,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x30, 0x000000ff, 0x0040,
431         0x34, 0x00000040, 0x00004040,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x9060, 0x0000007f, 0x00000020,
441         0x9508, 0x00010000, 0x00010000,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x15c0, 0x000c0fc0, 0x000c0400
455 };
456
457 static const u32 oland_golden_rlc_registers[] =
458 {
459         0xc424, 0xffffffff, 0x00601005,
460         0xc47c, 0xffffffff, 0x10104040,
461         0xc488, 0xffffffff, 0x0100000a,
462         0xc314, 0xffffffff, 0x00000800,
463         0xc30c, 0xffffffff, 0x800000f4
464 };
465
466 static const u32 oland_golden_registers[] =
467 {
468         0x9a10, 0x00010000, 0x00018208,
469         0x9830, 0xffffffff, 0x00000000,
470         0x9834, 0xf00fffff, 0x00000400,
471         0x9838, 0x0002021c, 0x00020200,
472         0xc78, 0x00000080, 0x00000000,
473         0xd030, 0x000300c0, 0x00800040,
474         0xd830, 0x000300c0, 0x00800040,
475         0x5bb0, 0x000000f0, 0x00000070,
476         0x5bc0, 0x00200000, 0x50100000,
477         0x7030, 0x31000311, 0x00000011,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x240c, 0x000007ff, 0x00000000,
480         0x8a14, 0xf000001f, 0x00000007,
481         0x8b24, 0xffffffff, 0x00ffffff,
482         0x8b10, 0x0000ff0f, 0x00000000,
483         0x28a4c, 0x07ffffff, 0x4e000000,
484         0x28350, 0x3f3f3fff, 0x00000082,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9060, 0x0000007f, 0x00000020,
489         0x9508, 0x00010000, 0x00010000,
490         0xac14, 0x000003ff, 0x000000f3,
491         0xac10, 0xffffffff, 0x00000000,
492         0xac0c, 0xffffffff, 0x00003210,
493         0x88d4, 0x0000001f, 0x00000010,
494         0x15c0, 0x000c0fc0, 0x000c0400
495 };
496
497 static const u32 hainan_golden_registers[] =
498 {
499         0x9a10, 0x00010000, 0x00018208,
500         0x9830, 0xffffffff, 0x00000000,
501         0x9834, 0xf00fffff, 0x00000400,
502         0x9838, 0x0002021c, 0x00020200,
503         0xd0c0, 0xff000fff, 0x00000100,
504         0xd030, 0x000300c0, 0x00800040,
505         0xd8c0, 0xff000fff, 0x00000100,
506         0xd830, 0x000300c0, 0x00800040,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x8a14, 0xf000001f, 0x00000007,
510         0x8b24, 0xffffffff, 0x00ffffff,
511         0x8b10, 0x0000ff0f, 0x00000000,
512         0x28a4c, 0x07ffffff, 0x4e000000,
513         0x28350, 0x3f3f3fff, 0x00000000,
514         0x30, 0x000000ff, 0x0040,
515         0x34, 0x00000040, 0x00004040,
516         0x9100, 0x03e00000, 0x03600000,
517         0x9060, 0x0000007f, 0x00000020,
518         0x9508, 0x00010000, 0x00010000,
519         0xac14, 0x000003ff, 0x000000f1,
520         0xac10, 0xffffffff, 0x00000000,
521         0xac0c, 0xffffffff, 0x00003210,
522         0x88d4, 0x0000001f, 0x00000010,
523         0x15c0, 0x000c0fc0, 0x000c0400
524 };
525
526 static const u32 hainan_golden_registers2[] =
527 {
528         0x98f8, 0xffffffff, 0x02010001
529 };
530
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533         0xc400, 0xffffffff, 0xfffffffc,
534         0x802c, 0xffffffff, 0xe0000000,
535         0x9a60, 0xffffffff, 0x00000100,
536         0x92a4, 0xffffffff, 0x00000100,
537         0xc164, 0xffffffff, 0x00000100,
538         0x9774, 0xffffffff, 0x00000100,
539         0x8984, 0xffffffff, 0x06000100,
540         0x8a18, 0xffffffff, 0x00000100,
541         0x92a0, 0xffffffff, 0x00000100,
542         0xc380, 0xffffffff, 0x00000100,
543         0x8b28, 0xffffffff, 0x00000100,
544         0x9144, 0xffffffff, 0x00000100,
545         0x8d88, 0xffffffff, 0x00000100,
546         0x8d8c, 0xffffffff, 0x00000100,
547         0x9030, 0xffffffff, 0x00000100,
548         0x9034, 0xffffffff, 0x00000100,
549         0x9038, 0xffffffff, 0x00000100,
550         0x903c, 0xffffffff, 0x00000100,
551         0xad80, 0xffffffff, 0x00000100,
552         0xac54, 0xffffffff, 0x00000100,
553         0x897c, 0xffffffff, 0x06000100,
554         0x9868, 0xffffffff, 0x00000100,
555         0x9510, 0xffffffff, 0x00000100,
556         0xaf04, 0xffffffff, 0x00000100,
557         0xae04, 0xffffffff, 0x00000100,
558         0x949c, 0xffffffff, 0x00000100,
559         0x802c, 0xffffffff, 0xe0000000,
560         0x9160, 0xffffffff, 0x00010000,
561         0x9164, 0xffffffff, 0x00030002,
562         0x9168, 0xffffffff, 0x00040007,
563         0x916c, 0xffffffff, 0x00060005,
564         0x9170, 0xffffffff, 0x00090008,
565         0x9174, 0xffffffff, 0x00020001,
566         0x9178, 0xffffffff, 0x00040003,
567         0x917c, 0xffffffff, 0x00000007,
568         0x9180, 0xffffffff, 0x00060005,
569         0x9184, 0xffffffff, 0x00090008,
570         0x9188, 0xffffffff, 0x00030002,
571         0x918c, 0xffffffff, 0x00050004,
572         0x9190, 0xffffffff, 0x00000008,
573         0x9194, 0xffffffff, 0x00070006,
574         0x9198, 0xffffffff, 0x000a0009,
575         0x919c, 0xffffffff, 0x00040003,
576         0x91a0, 0xffffffff, 0x00060005,
577         0x91a4, 0xffffffff, 0x00000009,
578         0x91a8, 0xffffffff, 0x00080007,
579         0x91ac, 0xffffffff, 0x000b000a,
580         0x91b0, 0xffffffff, 0x00050004,
581         0x91b4, 0xffffffff, 0x00070006,
582         0x91b8, 0xffffffff, 0x0008000b,
583         0x91bc, 0xffffffff, 0x000a0009,
584         0x91c0, 0xffffffff, 0x000d000c,
585         0x91c4, 0xffffffff, 0x00060005,
586         0x91c8, 0xffffffff, 0x00080007,
587         0x91cc, 0xffffffff, 0x0000000b,
588         0x91d0, 0xffffffff, 0x000a0009,
589         0x91d4, 0xffffffff, 0x000d000c,
590         0x91d8, 0xffffffff, 0x00070006,
591         0x91dc, 0xffffffff, 0x00090008,
592         0x91e0, 0xffffffff, 0x0000000c,
593         0x91e4, 0xffffffff, 0x000b000a,
594         0x91e8, 0xffffffff, 0x000e000d,
595         0x91ec, 0xffffffff, 0x00080007,
596         0x91f0, 0xffffffff, 0x000a0009,
597         0x91f4, 0xffffffff, 0x0000000d,
598         0x91f8, 0xffffffff, 0x000c000b,
599         0x91fc, 0xffffffff, 0x000f000e,
600         0x9200, 0xffffffff, 0x00090008,
601         0x9204, 0xffffffff, 0x000b000a,
602         0x9208, 0xffffffff, 0x000c000f,
603         0x920c, 0xffffffff, 0x000e000d,
604         0x9210, 0xffffffff, 0x00110010,
605         0x9214, 0xffffffff, 0x000a0009,
606         0x9218, 0xffffffff, 0x000c000b,
607         0x921c, 0xffffffff, 0x0000000f,
608         0x9220, 0xffffffff, 0x000e000d,
609         0x9224, 0xffffffff, 0x00110010,
610         0x9228, 0xffffffff, 0x000b000a,
611         0x922c, 0xffffffff, 0x000d000c,
612         0x9230, 0xffffffff, 0x00000010,
613         0x9234, 0xffffffff, 0x000f000e,
614         0x9238, 0xffffffff, 0x00120011,
615         0x923c, 0xffffffff, 0x000c000b,
616         0x9240, 0xffffffff, 0x000e000d,
617         0x9244, 0xffffffff, 0x00000011,
618         0x9248, 0xffffffff, 0x0010000f,
619         0x924c, 0xffffffff, 0x00130012,
620         0x9250, 0xffffffff, 0x000d000c,
621         0x9254, 0xffffffff, 0x000f000e,
622         0x9258, 0xffffffff, 0x00100013,
623         0x925c, 0xffffffff, 0x00120011,
624         0x9260, 0xffffffff, 0x00150014,
625         0x9264, 0xffffffff, 0x000e000d,
626         0x9268, 0xffffffff, 0x0010000f,
627         0x926c, 0xffffffff, 0x00000013,
628         0x9270, 0xffffffff, 0x00120011,
629         0x9274, 0xffffffff, 0x00150014,
630         0x9278, 0xffffffff, 0x000f000e,
631         0x927c, 0xffffffff, 0x00110010,
632         0x9280, 0xffffffff, 0x00000014,
633         0x9284, 0xffffffff, 0x00130012,
634         0x9288, 0xffffffff, 0x00160015,
635         0x928c, 0xffffffff, 0x0010000f,
636         0x9290, 0xffffffff, 0x00120011,
637         0x9294, 0xffffffff, 0x00000015,
638         0x9298, 0xffffffff, 0x00140013,
639         0x929c, 0xffffffff, 0x00170016,
640         0x9150, 0xffffffff, 0x96940200,
641         0x8708, 0xffffffff, 0x00900100,
642         0xc478, 0xffffffff, 0x00000080,
643         0xc404, 0xffffffff, 0x0020003f,
644         0x30, 0xffffffff, 0x0000001c,
645         0x34, 0x000f0000, 0x000f0000,
646         0x160c, 0xffffffff, 0x00000100,
647         0x1024, 0xffffffff, 0x00000100,
648         0x102c, 0x00000101, 0x00000000,
649         0x20a8, 0xffffffff, 0x00000104,
650         0x264c, 0x000c0000, 0x000c0000,
651         0x2648, 0x000c0000, 0x000c0000,
652         0x55e4, 0xff000fff, 0x00000100,
653         0x55e8, 0x00000001, 0x00000001,
654         0x2f50, 0x00000001, 0x00000001,
655         0x30cc, 0xc0000fff, 0x00000104,
656         0xc1e4, 0x00000001, 0x00000001,
657         0xd0c0, 0xfffffff0, 0x00000100,
658         0xd8c0, 0xfffffff0, 0x00000100
659 };
660
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663         0xc400, 0xffffffff, 0xfffffffc,
664         0x802c, 0xffffffff, 0xe0000000,
665         0x9a60, 0xffffffff, 0x00000100,
666         0x92a4, 0xffffffff, 0x00000100,
667         0xc164, 0xffffffff, 0x00000100,
668         0x9774, 0xffffffff, 0x00000100,
669         0x8984, 0xffffffff, 0x06000100,
670         0x8a18, 0xffffffff, 0x00000100,
671         0x92a0, 0xffffffff, 0x00000100,
672         0xc380, 0xffffffff, 0x00000100,
673         0x8b28, 0xffffffff, 0x00000100,
674         0x9144, 0xffffffff, 0x00000100,
675         0x8d88, 0xffffffff, 0x00000100,
676         0x8d8c, 0xffffffff, 0x00000100,
677         0x9030, 0xffffffff, 0x00000100,
678         0x9034, 0xffffffff, 0x00000100,
679         0x9038, 0xffffffff, 0x00000100,
680         0x903c, 0xffffffff, 0x00000100,
681         0xad80, 0xffffffff, 0x00000100,
682         0xac54, 0xffffffff, 0x00000100,
683         0x897c, 0xffffffff, 0x06000100,
684         0x9868, 0xffffffff, 0x00000100,
685         0x9510, 0xffffffff, 0x00000100,
686         0xaf04, 0xffffffff, 0x00000100,
687         0xae04, 0xffffffff, 0x00000100,
688         0x949c, 0xffffffff, 0x00000100,
689         0x802c, 0xffffffff, 0xe0000000,
690         0x9160, 0xffffffff, 0x00010000,
691         0x9164, 0xffffffff, 0x00030002,
692         0x9168, 0xffffffff, 0x00040007,
693         0x916c, 0xffffffff, 0x00060005,
694         0x9170, 0xffffffff, 0x00090008,
695         0x9174, 0xffffffff, 0x00020001,
696         0x9178, 0xffffffff, 0x00040003,
697         0x917c, 0xffffffff, 0x00000007,
698         0x9180, 0xffffffff, 0x00060005,
699         0x9184, 0xffffffff, 0x00090008,
700         0x9188, 0xffffffff, 0x00030002,
701         0x918c, 0xffffffff, 0x00050004,
702         0x9190, 0xffffffff, 0x00000008,
703         0x9194, 0xffffffff, 0x00070006,
704         0x9198, 0xffffffff, 0x000a0009,
705         0x919c, 0xffffffff, 0x00040003,
706         0x91a0, 0xffffffff, 0x00060005,
707         0x91a4, 0xffffffff, 0x00000009,
708         0x91a8, 0xffffffff, 0x00080007,
709         0x91ac, 0xffffffff, 0x000b000a,
710         0x91b0, 0xffffffff, 0x00050004,
711         0x91b4, 0xffffffff, 0x00070006,
712         0x91b8, 0xffffffff, 0x0008000b,
713         0x91bc, 0xffffffff, 0x000a0009,
714         0x91c0, 0xffffffff, 0x000d000c,
715         0x9200, 0xffffffff, 0x00090008,
716         0x9204, 0xffffffff, 0x000b000a,
717         0x9208, 0xffffffff, 0x000c000f,
718         0x920c, 0xffffffff, 0x000e000d,
719         0x9210, 0xffffffff, 0x00110010,
720         0x9214, 0xffffffff, 0x000a0009,
721         0x9218, 0xffffffff, 0x000c000b,
722         0x921c, 0xffffffff, 0x0000000f,
723         0x9220, 0xffffffff, 0x000e000d,
724         0x9224, 0xffffffff, 0x00110010,
725         0x9228, 0xffffffff, 0x000b000a,
726         0x922c, 0xffffffff, 0x000d000c,
727         0x9230, 0xffffffff, 0x00000010,
728         0x9234, 0xffffffff, 0x000f000e,
729         0x9238, 0xffffffff, 0x00120011,
730         0x923c, 0xffffffff, 0x000c000b,
731         0x9240, 0xffffffff, 0x000e000d,
732         0x9244, 0xffffffff, 0x00000011,
733         0x9248, 0xffffffff, 0x0010000f,
734         0x924c, 0xffffffff, 0x00130012,
735         0x9250, 0xffffffff, 0x000d000c,
736         0x9254, 0xffffffff, 0x000f000e,
737         0x9258, 0xffffffff, 0x00100013,
738         0x925c, 0xffffffff, 0x00120011,
739         0x9260, 0xffffffff, 0x00150014,
740         0x9150, 0xffffffff, 0x96940200,
741         0x8708, 0xffffffff, 0x00900100,
742         0xc478, 0xffffffff, 0x00000080,
743         0xc404, 0xffffffff, 0x0020003f,
744         0x30, 0xffffffff, 0x0000001c,
745         0x34, 0x000f0000, 0x000f0000,
746         0x160c, 0xffffffff, 0x00000100,
747         0x1024, 0xffffffff, 0x00000100,
748         0x102c, 0x00000101, 0x00000000,
749         0x20a8, 0xffffffff, 0x00000104,
750         0x55e4, 0xff000fff, 0x00000100,
751         0x55e8, 0x00000001, 0x00000001,
752         0x2f50, 0x00000001, 0x00000001,
753         0x30cc, 0xc0000fff, 0x00000104,
754         0xc1e4, 0x00000001, 0x00000001,
755         0xd0c0, 0xfffffff0, 0x00000100,
756         0xd8c0, 0xfffffff0, 0x00000100
757 };
758
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761         0xc400, 0xffffffff, 0xfffffffc,
762         0x802c, 0xffffffff, 0xe0000000,
763         0x9a60, 0xffffffff, 0x00000100,
764         0x92a4, 0xffffffff, 0x00000100,
765         0xc164, 0xffffffff, 0x00000100,
766         0x9774, 0xffffffff, 0x00000100,
767         0x8984, 0xffffffff, 0x06000100,
768         0x8a18, 0xffffffff, 0x00000100,
769         0x92a0, 0xffffffff, 0x00000100,
770         0xc380, 0xffffffff, 0x00000100,
771         0x8b28, 0xffffffff, 0x00000100,
772         0x9144, 0xffffffff, 0x00000100,
773         0x8d88, 0xffffffff, 0x00000100,
774         0x8d8c, 0xffffffff, 0x00000100,
775         0x9030, 0xffffffff, 0x00000100,
776         0x9034, 0xffffffff, 0x00000100,
777         0x9038, 0xffffffff, 0x00000100,
778         0x903c, 0xffffffff, 0x00000100,
779         0xad80, 0xffffffff, 0x00000100,
780         0xac54, 0xffffffff, 0x00000100,
781         0x897c, 0xffffffff, 0x06000100,
782         0x9868, 0xffffffff, 0x00000100,
783         0x9510, 0xffffffff, 0x00000100,
784         0xaf04, 0xffffffff, 0x00000100,
785         0xae04, 0xffffffff, 0x00000100,
786         0x949c, 0xffffffff, 0x00000100,
787         0x802c, 0xffffffff, 0xe0000000,
788         0x9160, 0xffffffff, 0x00010000,
789         0x9164, 0xffffffff, 0x00030002,
790         0x9168, 0xffffffff, 0x00040007,
791         0x916c, 0xffffffff, 0x00060005,
792         0x9170, 0xffffffff, 0x00090008,
793         0x9174, 0xffffffff, 0x00020001,
794         0x9178, 0xffffffff, 0x00040003,
795         0x917c, 0xffffffff, 0x00000007,
796         0x9180, 0xffffffff, 0x00060005,
797         0x9184, 0xffffffff, 0x00090008,
798         0x9188, 0xffffffff, 0x00030002,
799         0x918c, 0xffffffff, 0x00050004,
800         0x9190, 0xffffffff, 0x00000008,
801         0x9194, 0xffffffff, 0x00070006,
802         0x9198, 0xffffffff, 0x000a0009,
803         0x919c, 0xffffffff, 0x00040003,
804         0x91a0, 0xffffffff, 0x00060005,
805         0x91a4, 0xffffffff, 0x00000009,
806         0x91a8, 0xffffffff, 0x00080007,
807         0x91ac, 0xffffffff, 0x000b000a,
808         0x91b0, 0xffffffff, 0x00050004,
809         0x91b4, 0xffffffff, 0x00070006,
810         0x91b8, 0xffffffff, 0x0008000b,
811         0x91bc, 0xffffffff, 0x000a0009,
812         0x91c0, 0xffffffff, 0x000d000c,
813         0x9200, 0xffffffff, 0x00090008,
814         0x9204, 0xffffffff, 0x000b000a,
815         0x9208, 0xffffffff, 0x000c000f,
816         0x920c, 0xffffffff, 0x000e000d,
817         0x9210, 0xffffffff, 0x00110010,
818         0x9214, 0xffffffff, 0x000a0009,
819         0x9218, 0xffffffff, 0x000c000b,
820         0x921c, 0xffffffff, 0x0000000f,
821         0x9220, 0xffffffff, 0x000e000d,
822         0x9224, 0xffffffff, 0x00110010,
823         0x9228, 0xffffffff, 0x000b000a,
824         0x922c, 0xffffffff, 0x000d000c,
825         0x9230, 0xffffffff, 0x00000010,
826         0x9234, 0xffffffff, 0x000f000e,
827         0x9238, 0xffffffff, 0x00120011,
828         0x923c, 0xffffffff, 0x000c000b,
829         0x9240, 0xffffffff, 0x000e000d,
830         0x9244, 0xffffffff, 0x00000011,
831         0x9248, 0xffffffff, 0x0010000f,
832         0x924c, 0xffffffff, 0x00130012,
833         0x9250, 0xffffffff, 0x000d000c,
834         0x9254, 0xffffffff, 0x000f000e,
835         0x9258, 0xffffffff, 0x00100013,
836         0x925c, 0xffffffff, 0x00120011,
837         0x9260, 0xffffffff, 0x00150014,
838         0x9150, 0xffffffff, 0x96940200,
839         0x8708, 0xffffffff, 0x00900100,
840         0xc478, 0xffffffff, 0x00000080,
841         0xc404, 0xffffffff, 0x0020003f,
842         0x30, 0xffffffff, 0x0000001c,
843         0x34, 0x000f0000, 0x000f0000,
844         0x160c, 0xffffffff, 0x00000100,
845         0x1024, 0xffffffff, 0x00000100,
846         0x102c, 0x00000101, 0x00000000,
847         0x20a8, 0xffffffff, 0x00000104,
848         0x264c, 0x000c0000, 0x000c0000,
849         0x2648, 0x000c0000, 0x000c0000,
850         0x55e4, 0xff000fff, 0x00000100,
851         0x55e8, 0x00000001, 0x00000001,
852         0x2f50, 0x00000001, 0x00000001,
853         0x30cc, 0xc0000fff, 0x00000104,
854         0xc1e4, 0x00000001, 0x00000001,
855         0xd0c0, 0xfffffff0, 0x00000100,
856         0xd8c0, 0xfffffff0, 0x00000100
857 };
858
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861         0xc400, 0xffffffff, 0xfffffffc,
862         0x802c, 0xffffffff, 0xe0000000,
863         0x9a60, 0xffffffff, 0x00000100,
864         0x92a4, 0xffffffff, 0x00000100,
865         0xc164, 0xffffffff, 0x00000100,
866         0x9774, 0xffffffff, 0x00000100,
867         0x8984, 0xffffffff, 0x06000100,
868         0x8a18, 0xffffffff, 0x00000100,
869         0x92a0, 0xffffffff, 0x00000100,
870         0xc380, 0xffffffff, 0x00000100,
871         0x8b28, 0xffffffff, 0x00000100,
872         0x9144, 0xffffffff, 0x00000100,
873         0x8d88, 0xffffffff, 0x00000100,
874         0x8d8c, 0xffffffff, 0x00000100,
875         0x9030, 0xffffffff, 0x00000100,
876         0x9034, 0xffffffff, 0x00000100,
877         0x9038, 0xffffffff, 0x00000100,
878         0x903c, 0xffffffff, 0x00000100,
879         0xad80, 0xffffffff, 0x00000100,
880         0xac54, 0xffffffff, 0x00000100,
881         0x897c, 0xffffffff, 0x06000100,
882         0x9868, 0xffffffff, 0x00000100,
883         0x9510, 0xffffffff, 0x00000100,
884         0xaf04, 0xffffffff, 0x00000100,
885         0xae04, 0xffffffff, 0x00000100,
886         0x949c, 0xffffffff, 0x00000100,
887         0x802c, 0xffffffff, 0xe0000000,
888         0x9160, 0xffffffff, 0x00010000,
889         0x9164, 0xffffffff, 0x00030002,
890         0x9168, 0xffffffff, 0x00040007,
891         0x916c, 0xffffffff, 0x00060005,
892         0x9170, 0xffffffff, 0x00090008,
893         0x9174, 0xffffffff, 0x00020001,
894         0x9178, 0xffffffff, 0x00040003,
895         0x917c, 0xffffffff, 0x00000007,
896         0x9180, 0xffffffff, 0x00060005,
897         0x9184, 0xffffffff, 0x00090008,
898         0x9188, 0xffffffff, 0x00030002,
899         0x918c, 0xffffffff, 0x00050004,
900         0x9190, 0xffffffff, 0x00000008,
901         0x9194, 0xffffffff, 0x00070006,
902         0x9198, 0xffffffff, 0x000a0009,
903         0x919c, 0xffffffff, 0x00040003,
904         0x91a0, 0xffffffff, 0x00060005,
905         0x91a4, 0xffffffff, 0x00000009,
906         0x91a8, 0xffffffff, 0x00080007,
907         0x91ac, 0xffffffff, 0x000b000a,
908         0x91b0, 0xffffffff, 0x00050004,
909         0x91b4, 0xffffffff, 0x00070006,
910         0x91b8, 0xffffffff, 0x0008000b,
911         0x91bc, 0xffffffff, 0x000a0009,
912         0x91c0, 0xffffffff, 0x000d000c,
913         0x91c4, 0xffffffff, 0x00060005,
914         0x91c8, 0xffffffff, 0x00080007,
915         0x91cc, 0xffffffff, 0x0000000b,
916         0x91d0, 0xffffffff, 0x000a0009,
917         0x91d4, 0xffffffff, 0x000d000c,
918         0x9150, 0xffffffff, 0x96940200,
919         0x8708, 0xffffffff, 0x00900100,
920         0xc478, 0xffffffff, 0x00000080,
921         0xc404, 0xffffffff, 0x0020003f,
922         0x30, 0xffffffff, 0x0000001c,
923         0x34, 0x000f0000, 0x000f0000,
924         0x160c, 0xffffffff, 0x00000100,
925         0x1024, 0xffffffff, 0x00000100,
926         0x102c, 0x00000101, 0x00000000,
927         0x20a8, 0xffffffff, 0x00000104,
928         0x264c, 0x000c0000, 0x000c0000,
929         0x2648, 0x000c0000, 0x000c0000,
930         0x55e4, 0xff000fff, 0x00000100,
931         0x55e8, 0x00000001, 0x00000001,
932         0x2f50, 0x00000001, 0x00000001,
933         0x30cc, 0xc0000fff, 0x00000104,
934         0xc1e4, 0x00000001, 0x00000001,
935         0xd0c0, 0xfffffff0, 0x00000100,
936         0xd8c0, 0xfffffff0, 0x00000100
937 };
938
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941         0xc400, 0xffffffff, 0xfffffffc,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9a60, 0xffffffff, 0x00000100,
944         0x92a4, 0xffffffff, 0x00000100,
945         0xc164, 0xffffffff, 0x00000100,
946         0x9774, 0xffffffff, 0x00000100,
947         0x8984, 0xffffffff, 0x06000100,
948         0x8a18, 0xffffffff, 0x00000100,
949         0x92a0, 0xffffffff, 0x00000100,
950         0xc380, 0xffffffff, 0x00000100,
951         0x8b28, 0xffffffff, 0x00000100,
952         0x9144, 0xffffffff, 0x00000100,
953         0x8d88, 0xffffffff, 0x00000100,
954         0x8d8c, 0xffffffff, 0x00000100,
955         0x9030, 0xffffffff, 0x00000100,
956         0x9034, 0xffffffff, 0x00000100,
957         0x9038, 0xffffffff, 0x00000100,
958         0x903c, 0xffffffff, 0x00000100,
959         0xad80, 0xffffffff, 0x00000100,
960         0xac54, 0xffffffff, 0x00000100,
961         0x897c, 0xffffffff, 0x06000100,
962         0x9868, 0xffffffff, 0x00000100,
963         0x9510, 0xffffffff, 0x00000100,
964         0xaf04, 0xffffffff, 0x00000100,
965         0xae04, 0xffffffff, 0x00000100,
966         0x949c, 0xffffffff, 0x00000100,
967         0x802c, 0xffffffff, 0xe0000000,
968         0x9160, 0xffffffff, 0x00010000,
969         0x9164, 0xffffffff, 0x00030002,
970         0x9168, 0xffffffff, 0x00040007,
971         0x916c, 0xffffffff, 0x00060005,
972         0x9170, 0xffffffff, 0x00090008,
973         0x9174, 0xffffffff, 0x00020001,
974         0x9178, 0xffffffff, 0x00040003,
975         0x917c, 0xffffffff, 0x00000007,
976         0x9180, 0xffffffff, 0x00060005,
977         0x9184, 0xffffffff, 0x00090008,
978         0x9188, 0xffffffff, 0x00030002,
979         0x918c, 0xffffffff, 0x00050004,
980         0x9190, 0xffffffff, 0x00000008,
981         0x9194, 0xffffffff, 0x00070006,
982         0x9198, 0xffffffff, 0x000a0009,
983         0x919c, 0xffffffff, 0x00040003,
984         0x91a0, 0xffffffff, 0x00060005,
985         0x91a4, 0xffffffff, 0x00000009,
986         0x91a8, 0xffffffff, 0x00080007,
987         0x91ac, 0xffffffff, 0x000b000a,
988         0x91b0, 0xffffffff, 0x00050004,
989         0x91b4, 0xffffffff, 0x00070006,
990         0x91b8, 0xffffffff, 0x0008000b,
991         0x91bc, 0xffffffff, 0x000a0009,
992         0x91c0, 0xffffffff, 0x000d000c,
993         0x91c4, 0xffffffff, 0x00060005,
994         0x91c8, 0xffffffff, 0x00080007,
995         0x91cc, 0xffffffff, 0x0000000b,
996         0x91d0, 0xffffffff, 0x000a0009,
997         0x91d4, 0xffffffff, 0x000d000c,
998         0x9150, 0xffffffff, 0x96940200,
999         0x8708, 0xffffffff, 0x00900100,
1000         0xc478, 0xffffffff, 0x00000080,
1001         0xc404, 0xffffffff, 0x0020003f,
1002         0x30, 0xffffffff, 0x0000001c,
1003         0x34, 0x000f0000, 0x000f0000,
1004         0x160c, 0xffffffff, 0x00000100,
1005         0x1024, 0xffffffff, 0x00000100,
1006         0x20a8, 0xffffffff, 0x00000104,
1007         0x264c, 0x000c0000, 0x000c0000,
1008         0x2648, 0x000c0000, 0x000c0000,
1009         0x2f50, 0x00000001, 0x00000001,
1010         0x30cc, 0xc0000fff, 0x00000104,
1011         0xc1e4, 0x00000001, 0x00000001,
1012         0xd0c0, 0xfffffff0, 0x00000100,
1013         0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015
1016 static u32 verde_pg_init[] =
1017 {
1018         0x353c, 0xffffffff, 0x40000,
1019         0x3538, 0xffffffff, 0x200010ff,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x7007,
1026         0x3538, 0xffffffff, 0x300010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x400000,
1033         0x3538, 0xffffffff, 0x100010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x120200,
1040         0x3538, 0xffffffff, 0x500010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x1e1e16,
1047         0x3538, 0xffffffff, 0x600010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x171f1e,
1054         0x3538, 0xffffffff, 0x700010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x3538, 0xffffffff, 0x9ff,
1062         0x3500, 0xffffffff, 0x0,
1063         0x3504, 0xffffffff, 0x10000800,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3500, 0xffffffff, 0x4,
1067         0x3504, 0xffffffff, 0x1000051e,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3500, 0xffffffff, 0x8,
1071         0x3504, 0xffffffff, 0x80500,
1072         0x3500, 0xffffffff, 0x12,
1073         0x3504, 0xffffffff, 0x9050c,
1074         0x3500, 0xffffffff, 0x1d,
1075         0x3504, 0xffffffff, 0xb052c,
1076         0x3500, 0xffffffff, 0x2a,
1077         0x3504, 0xffffffff, 0x1053e,
1078         0x3500, 0xffffffff, 0x2d,
1079         0x3504, 0xffffffff, 0x10546,
1080         0x3500, 0xffffffff, 0x30,
1081         0x3504, 0xffffffff, 0xa054e,
1082         0x3500, 0xffffffff, 0x3c,
1083         0x3504, 0xffffffff, 0x1055f,
1084         0x3500, 0xffffffff, 0x3f,
1085         0x3504, 0xffffffff, 0x10567,
1086         0x3500, 0xffffffff, 0x42,
1087         0x3504, 0xffffffff, 0x1056f,
1088         0x3500, 0xffffffff, 0x45,
1089         0x3504, 0xffffffff, 0x10572,
1090         0x3500, 0xffffffff, 0x48,
1091         0x3504, 0xffffffff, 0x20575,
1092         0x3500, 0xffffffff, 0x4c,
1093         0x3504, 0xffffffff, 0x190801,
1094         0x3500, 0xffffffff, 0x67,
1095         0x3504, 0xffffffff, 0x1082a,
1096         0x3500, 0xffffffff, 0x6a,
1097         0x3504, 0xffffffff, 0x1b082d,
1098         0x3500, 0xffffffff, 0x87,
1099         0x3504, 0xffffffff, 0x310851,
1100         0x3500, 0xffffffff, 0xba,
1101         0x3504, 0xffffffff, 0x891,
1102         0x3500, 0xffffffff, 0xbc,
1103         0x3504, 0xffffffff, 0x893,
1104         0x3500, 0xffffffff, 0xbe,
1105         0x3504, 0xffffffff, 0x20895,
1106         0x3500, 0xffffffff, 0xc2,
1107         0x3504, 0xffffffff, 0x20899,
1108         0x3500, 0xffffffff, 0xc6,
1109         0x3504, 0xffffffff, 0x2089d,
1110         0x3500, 0xffffffff, 0xca,
1111         0x3504, 0xffffffff, 0x8a1,
1112         0x3500, 0xffffffff, 0xcc,
1113         0x3504, 0xffffffff, 0x8a3,
1114         0x3500, 0xffffffff, 0xce,
1115         0x3504, 0xffffffff, 0x308a5,
1116         0x3500, 0xffffffff, 0xd3,
1117         0x3504, 0xffffffff, 0x6d08cd,
1118         0x3500, 0xffffffff, 0x142,
1119         0x3504, 0xffffffff, 0x2000095a,
1120         0x3504, 0xffffffff, 0x1,
1121         0x3500, 0xffffffff, 0x144,
1122         0x3504, 0xffffffff, 0x301f095b,
1123         0x3500, 0xffffffff, 0x165,
1124         0x3504, 0xffffffff, 0xc094d,
1125         0x3500, 0xffffffff, 0x173,
1126         0x3504, 0xffffffff, 0xf096d,
1127         0x3500, 0xffffffff, 0x184,
1128         0x3504, 0xffffffff, 0x15097f,
1129         0x3500, 0xffffffff, 0x19b,
1130         0x3504, 0xffffffff, 0xc0998,
1131         0x3500, 0xffffffff, 0x1a9,
1132         0x3504, 0xffffffff, 0x409a7,
1133         0x3500, 0xffffffff, 0x1af,
1134         0x3504, 0xffffffff, 0xcdc,
1135         0x3500, 0xffffffff, 0x1b1,
1136         0x3504, 0xffffffff, 0x800,
1137         0x3508, 0xffffffff, 0x6c9b2000,
1138         0x3510, 0xfc00, 0x2000,
1139         0x3544, 0xffffffff, 0xfc0,
1140         0x28d4, 0x00000100, 0x100
1141 };
1142
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145         switch (rdev->family) {
1146         case CHIP_TAHITI:
1147                 radeon_program_register_sequence(rdev,
1148                                                  tahiti_golden_registers,
1149                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_rlc_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_mgcg_cgcg_init,
1155                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers2,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159                 break;
1160         case CHIP_PITCAIRN:
1161                 radeon_program_register_sequence(rdev,
1162                                                  pitcairn_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170                 break;
1171         case CHIP_VERDE:
1172                 radeon_program_register_sequence(rdev,
1173                                                  verde_golden_registers,
1174                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_rlc_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_mgcg_cgcg_init,
1180                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_pg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1184                 break;
1185         case CHIP_OLAND:
1186                 radeon_program_register_sequence(rdev,
1187                                                  oland_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195                 break;
1196         case CHIP_HAINAN:
1197                 radeon_program_register_sequence(rdev,
1198                                                  hainan_golden_registers,
1199                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers2,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_mgcg_cgcg_init,
1205                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206                 break;
1207         default:
1208                 break;
1209         }
1210 }
1211
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226         u32 tmp;
1227
1228         tmp = RREG32(CG_CLKPIN_CNTL_2);
1229         if (tmp & MUX_TCLK_TO_XCLK)
1230                 return TCLK;
1231
1232         tmp = RREG32(CG_CLKPIN_CNTL);
1233         if (tmp & XTALIN_DIVIDE)
1234                 return reference_clock / 4;
1235
1236         return reference_clock;
1237 }
1238
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242         u32 temp;
1243         int actual_temp = 0;
1244
1245         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246                 CTF_TEMP_SHIFT;
1247
1248         if (temp & 0x200)
1249                 actual_temp = 255;
1250         else
1251                 actual_temp = temp & 0x1ff;
1252
1253         actual_temp = (actual_temp * 1000);
1254
1255         return actual_temp;
1256 }
1257
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261         {0x0000006f, 0x03044000},
1262         {0x00000070, 0x0480c018},
1263         {0x00000071, 0x00000040},
1264         {0x00000072, 0x01000000},
1265         {0x00000074, 0x000000ff},
1266         {0x00000075, 0x00143400},
1267         {0x00000076, 0x08ec0800},
1268         {0x00000077, 0x040000cc},
1269         {0x00000079, 0x00000000},
1270         {0x0000007a, 0x21000409},
1271         {0x0000007c, 0x00000000},
1272         {0x0000007d, 0xe8000000},
1273         {0x0000007e, 0x044408a8},
1274         {0x0000007f, 0x00000003},
1275         {0x00000080, 0x00000000},
1276         {0x00000081, 0x01000000},
1277         {0x00000082, 0x02000000},
1278         {0x00000083, 0x00000000},
1279         {0x00000084, 0xe3f3e4f4},
1280         {0x00000085, 0x00052024},
1281         {0x00000087, 0x00000000},
1282         {0x00000088, 0x66036603},
1283         {0x00000089, 0x01000000},
1284         {0x0000008b, 0x1c0a0000},
1285         {0x0000008c, 0xff010000},
1286         {0x0000008e, 0xffffefff},
1287         {0x0000008f, 0xfff3efff},
1288         {0x00000090, 0xfff3efbf},
1289         {0x00000094, 0x00101101},
1290         {0x00000095, 0x00000fff},
1291         {0x00000096, 0x00116fff},
1292         {0x00000097, 0x60010000},
1293         {0x00000098, 0x10010000},
1294         {0x00000099, 0x00006000},
1295         {0x0000009a, 0x00001000},
1296         {0x0000009f, 0x00a77400}
1297 };
1298
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300         {0x0000006f, 0x03044000},
1301         {0x00000070, 0x0480c018},
1302         {0x00000071, 0x00000040},
1303         {0x00000072, 0x01000000},
1304         {0x00000074, 0x000000ff},
1305         {0x00000075, 0x00143400},
1306         {0x00000076, 0x08ec0800},
1307         {0x00000077, 0x040000cc},
1308         {0x00000079, 0x00000000},
1309         {0x0000007a, 0x21000409},
1310         {0x0000007c, 0x00000000},
1311         {0x0000007d, 0xe8000000},
1312         {0x0000007e, 0x044408a8},
1313         {0x0000007f, 0x00000003},
1314         {0x00000080, 0x00000000},
1315         {0x00000081, 0x01000000},
1316         {0x00000082, 0x02000000},
1317         {0x00000083, 0x00000000},
1318         {0x00000084, 0xe3f3e4f4},
1319         {0x00000085, 0x00052024},
1320         {0x00000087, 0x00000000},
1321         {0x00000088, 0x66036603},
1322         {0x00000089, 0x01000000},
1323         {0x0000008b, 0x1c0a0000},
1324         {0x0000008c, 0xff010000},
1325         {0x0000008e, 0xffffefff},
1326         {0x0000008f, 0xfff3efff},
1327         {0x00000090, 0xfff3efbf},
1328         {0x00000094, 0x00101101},
1329         {0x00000095, 0x00000fff},
1330         {0x00000096, 0x00116fff},
1331         {0x00000097, 0x60010000},
1332         {0x00000098, 0x10010000},
1333         {0x00000099, 0x00006000},
1334         {0x0000009a, 0x00001000},
1335         {0x0000009f, 0x00a47400}
1336 };
1337
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339         {0x0000006f, 0x03044000},
1340         {0x00000070, 0x0480c018},
1341         {0x00000071, 0x00000040},
1342         {0x00000072, 0x01000000},
1343         {0x00000074, 0x000000ff},
1344         {0x00000075, 0x00143400},
1345         {0x00000076, 0x08ec0800},
1346         {0x00000077, 0x040000cc},
1347         {0x00000079, 0x00000000},
1348         {0x0000007a, 0x21000409},
1349         {0x0000007c, 0x00000000},
1350         {0x0000007d, 0xe8000000},
1351         {0x0000007e, 0x044408a8},
1352         {0x0000007f, 0x00000003},
1353         {0x00000080, 0x00000000},
1354         {0x00000081, 0x01000000},
1355         {0x00000082, 0x02000000},
1356         {0x00000083, 0x00000000},
1357         {0x00000084, 0xe3f3e4f4},
1358         {0x00000085, 0x00052024},
1359         {0x00000087, 0x00000000},
1360         {0x00000088, 0x66036603},
1361         {0x00000089, 0x01000000},
1362         {0x0000008b, 0x1c0a0000},
1363         {0x0000008c, 0xff010000},
1364         {0x0000008e, 0xffffefff},
1365         {0x0000008f, 0xfff3efff},
1366         {0x00000090, 0xfff3efbf},
1367         {0x00000094, 0x00101101},
1368         {0x00000095, 0x00000fff},
1369         {0x00000096, 0x00116fff},
1370         {0x00000097, 0x60010000},
1371         {0x00000098, 0x10010000},
1372         {0x00000099, 0x00006000},
1373         {0x0000009a, 0x00001000},
1374         {0x0000009f, 0x00a37400}
1375 };
1376
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378         {0x0000006f, 0x03044000},
1379         {0x00000070, 0x0480c018},
1380         {0x00000071, 0x00000040},
1381         {0x00000072, 0x01000000},
1382         {0x00000074, 0x000000ff},
1383         {0x00000075, 0x00143400},
1384         {0x00000076, 0x08ec0800},
1385         {0x00000077, 0x040000cc},
1386         {0x00000079, 0x00000000},
1387         {0x0000007a, 0x21000409},
1388         {0x0000007c, 0x00000000},
1389         {0x0000007d, 0xe8000000},
1390         {0x0000007e, 0x044408a8},
1391         {0x0000007f, 0x00000003},
1392         {0x00000080, 0x00000000},
1393         {0x00000081, 0x01000000},
1394         {0x00000082, 0x02000000},
1395         {0x00000083, 0x00000000},
1396         {0x00000084, 0xe3f3e4f4},
1397         {0x00000085, 0x00052024},
1398         {0x00000087, 0x00000000},
1399         {0x00000088, 0x66036603},
1400         {0x00000089, 0x01000000},
1401         {0x0000008b, 0x1c0a0000},
1402         {0x0000008c, 0xff010000},
1403         {0x0000008e, 0xffffefff},
1404         {0x0000008f, 0xfff3efff},
1405         {0x00000090, 0xfff3efbf},
1406         {0x00000094, 0x00101101},
1407         {0x00000095, 0x00000fff},
1408         {0x00000096, 0x00116fff},
1409         {0x00000097, 0x60010000},
1410         {0x00000098, 0x10010000},
1411         {0x00000099, 0x00006000},
1412         {0x0000009a, 0x00001000},
1413         {0x0000009f, 0x00a17730}
1414 };
1415
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417         {0x0000006f, 0x03044000},
1418         {0x00000070, 0x0480c018},
1419         {0x00000071, 0x00000040},
1420         {0x00000072, 0x01000000},
1421         {0x00000074, 0x000000ff},
1422         {0x00000075, 0x00143400},
1423         {0x00000076, 0x08ec0800},
1424         {0x00000077, 0x040000cc},
1425         {0x00000079, 0x00000000},
1426         {0x0000007a, 0x21000409},
1427         {0x0000007c, 0x00000000},
1428         {0x0000007d, 0xe8000000},
1429         {0x0000007e, 0x044408a8},
1430         {0x0000007f, 0x00000003},
1431         {0x00000080, 0x00000000},
1432         {0x00000081, 0x01000000},
1433         {0x00000082, 0x02000000},
1434         {0x00000083, 0x00000000},
1435         {0x00000084, 0xe3f3e4f4},
1436         {0x00000085, 0x00052024},
1437         {0x00000087, 0x00000000},
1438         {0x00000088, 0x66036603},
1439         {0x00000089, 0x01000000},
1440         {0x0000008b, 0x1c0a0000},
1441         {0x0000008c, 0xff010000},
1442         {0x0000008e, 0xffffefff},
1443         {0x0000008f, 0xfff3efff},
1444         {0x00000090, 0xfff3efbf},
1445         {0x00000094, 0x00101101},
1446         {0x00000095, 0x00000fff},
1447         {0x00000096, 0x00116fff},
1448         {0x00000097, 0x60010000},
1449         {0x00000098, 0x10010000},
1450         {0x00000099, 0x00006000},
1451         {0x0000009a, 0x00001000},
1452         {0x0000009f, 0x00a07730}
1453 };
1454
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458         const __be32 *fw_data;
1459         u32 running, blackout = 0;
1460         u32 *io_mc_regs;
1461         int i, ucode_size, regs_size;
1462
1463         if (!rdev->mc_fw)
1464                 return -EINVAL;
1465
1466         switch (rdev->family) {
1467         case CHIP_TAHITI:
1468                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469                 ucode_size = SI_MC_UCODE_SIZE;
1470                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1471                 break;
1472         case CHIP_PITCAIRN:
1473                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474                 ucode_size = SI_MC_UCODE_SIZE;
1475                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1476                 break;
1477         case CHIP_VERDE:
1478         default:
1479                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1480                 ucode_size = SI_MC_UCODE_SIZE;
1481                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1482                 break;
1483         case CHIP_OLAND:
1484                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1485                 ucode_size = OLAND_MC_UCODE_SIZE;
1486                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1487                 break;
1488         case CHIP_HAINAN:
1489                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490                 ucode_size = OLAND_MC_UCODE_SIZE;
1491                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1492                 break;
1493         }
1494
1495         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496
1497         if (running == 0) {
1498                 if (running) {
1499                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501                 }
1502
1503                 /* reset the engine and set to writable */
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506
1507                 /* load mc io regs */
1508                 for (i = 0; i < regs_size; i++) {
1509                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511                 }
1512                 /* load the MC ucode */
1513                 fw_data = (const __be32 *)rdev->mc_fw->data;
1514                 for (i = 0; i < ucode_size; i++)
1515                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516
1517                 /* put the engine back into the active state */
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521
1522                 /* wait for training to complete */
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525                                 break;
1526                         udelay(1);
1527                 }
1528                 for (i = 0; i < rdev->usec_timeout; i++) {
1529                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530                                 break;
1531                         udelay(1);
1532                 }
1533
1534                 if (running)
1535                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536         }
1537
1538         return 0;
1539 }
1540
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543         const char *chip_name;
1544         const char *rlc_chip_name;
1545         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546         size_t smc_req_size;
1547         char fw_name[30];
1548         int err;
1549
1550         DRM_DEBUG("\n");
1551
1552         switch (rdev->family) {
1553         case CHIP_TAHITI:
1554                 chip_name = "TAHITI";
1555                 rlc_chip_name = "TAHITI";
1556                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1558                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1559                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1561                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562                 break;
1563         case CHIP_PITCAIRN:
1564                 chip_name = "PITCAIRN";
1565                 rlc_chip_name = "PITCAIRN";
1566                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1568                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1569                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1571                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572                 break;
1573         case CHIP_VERDE:
1574                 chip_name = "VERDE";
1575                 rlc_chip_name = "VERDE";
1576                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1578                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1579                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1581                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582                 break;
1583         case CHIP_OLAND:
1584                 chip_name = "OLAND";
1585                 rlc_chip_name = "OLAND";
1586                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1588                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1589                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592                 break;
1593         case CHIP_HAINAN:
1594                 chip_name = "HAINAN";
1595                 rlc_chip_name = "HAINAN";
1596                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1598                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1599                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602                 break;
1603         default: BUG();
1604         }
1605
1606         DRM_INFO("Loading %s Microcode\n", chip_name);
1607
1608         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610         if (err)
1611                 goto out;
1612         if (rdev->pfp_fw->size != pfp_req_size) {
1613                 printk(KERN_ERR
1614                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615                        rdev->pfp_fw->size, fw_name);
1616                 err = -EINVAL;
1617                 goto out;
1618         }
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->me_fw->size != me_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->me_fw->size, fw_name);
1628                 err = -EINVAL;
1629         }
1630
1631         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633         if (err)
1634                 goto out;
1635         if (rdev->ce_fw->size != ce_req_size) {
1636                 printk(KERN_ERR
1637                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638                        rdev->ce_fw->size, fw_name);
1639                 err = -EINVAL;
1640         }
1641
1642         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644         if (err)
1645                 goto out;
1646         if (rdev->rlc_fw->size != rlc_req_size) {
1647                 printk(KERN_ERR
1648                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649                        rdev->rlc_fw->size, fw_name);
1650                 err = -EINVAL;
1651         }
1652
1653         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655         if (err)
1656                 goto out;
1657         if (rdev->mc_fw->size != mc_req_size) {
1658                 printk(KERN_ERR
1659                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660                        rdev->mc_fw->size, fw_name);
1661                 err = -EINVAL;
1662         }
1663
1664         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666         if (err)
1667                 goto out;
1668         if (rdev->smc_fw->size != smc_req_size) {
1669                 printk(KERN_ERR
1670                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1671                        rdev->smc_fw->size, fw_name);
1672                 err = -EINVAL;
1673         }
1674
1675 out:
1676         if (err) {
1677                 if (err != -EINVAL)
1678                         printk(KERN_ERR
1679                                "si_cp: Failed to load firmware \"%s\"\n",
1680                                fw_name);
1681                 release_firmware(rdev->pfp_fw);
1682                 rdev->pfp_fw = NULL;
1683                 release_firmware(rdev->me_fw);
1684                 rdev->me_fw = NULL;
1685                 release_firmware(rdev->ce_fw);
1686                 rdev->ce_fw = NULL;
1687                 release_firmware(rdev->rlc_fw);
1688                 rdev->rlc_fw = NULL;
1689                 release_firmware(rdev->mc_fw);
1690                 rdev->mc_fw = NULL;
1691                 release_firmware(rdev->smc_fw);
1692                 rdev->smc_fw = NULL;
1693         }
1694         return err;
1695 }
1696
1697 /* watermark setup */
1698 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1699                                    struct radeon_crtc *radeon_crtc,
1700                                    struct drm_display_mode *mode,
1701                                    struct drm_display_mode *other_mode)
1702 {
1703         u32 tmp;
1704         /*
1705          * Line Buffer Setup
1706          * There are 3 line buffers, each one shared by 2 display controllers.
1707          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1708          * the display controllers.  The paritioning is done via one of four
1709          * preset allocations specified in bits 21:20:
1710          *  0 - half lb
1711          *  2 - whole lb, other crtc must be disabled
1712          */
1713         /* this can get tricky if we have two large displays on a paired group
1714          * of crtcs.  Ideally for multiple large displays we'd assign them to
1715          * non-linked crtcs for maximum line buffer allocation.
1716          */
1717         if (radeon_crtc->base.enabled && mode) {
1718                 if (other_mode)
1719                         tmp = 0; /* 1/2 */
1720                 else
1721                         tmp = 2; /* whole */
1722         } else
1723                 tmp = 0;
1724
1725         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1726                DC_LB_MEMORY_CONFIG(tmp));
1727
1728         if (radeon_crtc->base.enabled && mode) {
1729                 switch (tmp) {
1730                 case 0:
1731                 default:
1732                         return 4096 * 2;
1733                 case 2:
1734                         return 8192 * 2;
1735                 }
1736         }
1737
1738         /* controller not enabled, so no lb used */
1739         return 0;
1740 }
1741
1742 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1743 {
1744         u32 tmp = RREG32(MC_SHARED_CHMAP);
1745
1746         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1747         case 0:
1748         default:
1749                 return 1;
1750         case 1:
1751                 return 2;
1752         case 2:
1753                 return 4;
1754         case 3:
1755                 return 8;
1756         case 4:
1757                 return 3;
1758         case 5:
1759                 return 6;
1760         case 6:
1761                 return 10;
1762         case 7:
1763                 return 12;
1764         case 8:
1765                 return 16;
1766         }
1767 }
1768
1769 struct dce6_wm_params {
1770         u32 dram_channels; /* number of dram channels */
1771         u32 yclk;          /* bandwidth per dram data pin in kHz */
1772         u32 sclk;          /* engine clock in kHz */
1773         u32 disp_clk;      /* display clock in kHz */
1774         u32 src_width;     /* viewport width */
1775         u32 active_time;   /* active display time in ns */
1776         u32 blank_time;    /* blank time in ns */
1777         bool interlaced;    /* mode is interlaced */
1778         fixed20_12 vsc;    /* vertical scale ratio */
1779         u32 num_heads;     /* number of active crtcs */
1780         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1781         u32 lb_size;       /* line buffer allocated to pipe */
1782         u32 vtaps;         /* vertical scaler taps */
1783 };
1784
1785 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1786 {
1787         /* Calculate raw DRAM Bandwidth */
1788         fixed20_12 dram_efficiency; /* 0.7 */
1789         fixed20_12 yclk, dram_channels, bandwidth;
1790         fixed20_12 a;
1791
1792         a.full = dfixed_const(1000);
1793         yclk.full = dfixed_const(wm->yclk);
1794         yclk.full = dfixed_div(yclk, a);
1795         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1796         a.full = dfixed_const(10);
1797         dram_efficiency.full = dfixed_const(7);
1798         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1799         bandwidth.full = dfixed_mul(dram_channels, yclk);
1800         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1801
1802         return dfixed_trunc(bandwidth);
1803 }
1804
1805 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1806 {
1807         /* Calculate DRAM Bandwidth and the part allocated to display. */
1808         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1809         fixed20_12 yclk, dram_channels, bandwidth;
1810         fixed20_12 a;
1811
1812         a.full = dfixed_const(1000);
1813         yclk.full = dfixed_const(wm->yclk);
1814         yclk.full = dfixed_div(yclk, a);
1815         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1816         a.full = dfixed_const(10);
1817         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1818         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1819         bandwidth.full = dfixed_mul(dram_channels, yclk);
1820         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1821
1822         return dfixed_trunc(bandwidth);
1823 }
1824
1825 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1826 {
1827         /* Calculate the display Data return Bandwidth */
1828         fixed20_12 return_efficiency; /* 0.8 */
1829         fixed20_12 sclk, bandwidth;
1830         fixed20_12 a;
1831
1832         a.full = dfixed_const(1000);
1833         sclk.full = dfixed_const(wm->sclk);
1834         sclk.full = dfixed_div(sclk, a);
1835         a.full = dfixed_const(10);
1836         return_efficiency.full = dfixed_const(8);
1837         return_efficiency.full = dfixed_div(return_efficiency, a);
1838         a.full = dfixed_const(32);
1839         bandwidth.full = dfixed_mul(a, sclk);
1840         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1841
1842         return dfixed_trunc(bandwidth);
1843 }
1844
1845 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1846 {
1847         return 32;
1848 }
1849
1850 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1851 {
1852         /* Calculate the DMIF Request Bandwidth */
1853         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1854         fixed20_12 disp_clk, sclk, bandwidth;
1855         fixed20_12 a, b1, b2;
1856         u32 min_bandwidth;
1857
1858         a.full = dfixed_const(1000);
1859         disp_clk.full = dfixed_const(wm->disp_clk);
1860         disp_clk.full = dfixed_div(disp_clk, a);
1861         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1862         b1.full = dfixed_mul(a, disp_clk);
1863
1864         a.full = dfixed_const(1000);
1865         sclk.full = dfixed_const(wm->sclk);
1866         sclk.full = dfixed_div(sclk, a);
1867         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1868         b2.full = dfixed_mul(a, sclk);
1869
1870         a.full = dfixed_const(10);
1871         disp_clk_request_efficiency.full = dfixed_const(8);
1872         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1873
1874         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1875
1876         a.full = dfixed_const(min_bandwidth);
1877         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1878
1879         return dfixed_trunc(bandwidth);
1880 }
1881
1882 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1883 {
1884         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1885         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1886         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1887         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1888
1889         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1890 }
1891
1892 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1893 {
1894         /* Calculate the display mode Average Bandwidth
1895          * DisplayMode should contain the source and destination dimensions,
1896          * timing, etc.
1897          */
1898         fixed20_12 bpp;
1899         fixed20_12 line_time;
1900         fixed20_12 src_width;
1901         fixed20_12 bandwidth;
1902         fixed20_12 a;
1903
1904         a.full = dfixed_const(1000);
1905         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1906         line_time.full = dfixed_div(line_time, a);
1907         bpp.full = dfixed_const(wm->bytes_per_pixel);
1908         src_width.full = dfixed_const(wm->src_width);
1909         bandwidth.full = dfixed_mul(src_width, bpp);
1910         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1911         bandwidth.full = dfixed_div(bandwidth, line_time);
1912
1913         return dfixed_trunc(bandwidth);
1914 }
1915
1916 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1917 {
1918         /* First calcualte the latency in ns */
1919         u32 mc_latency = 2000; /* 2000 ns. */
1920         u32 available_bandwidth = dce6_available_bandwidth(wm);
1921         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1922         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1923         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1924         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1925                 (wm->num_heads * cursor_line_pair_return_time);
1926         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1927         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1928         u32 tmp, dmif_size = 12288;
1929         fixed20_12 a, b, c;
1930
1931         if (wm->num_heads == 0)
1932                 return 0;
1933
1934         a.full = dfixed_const(2);
1935         b.full = dfixed_const(1);
1936         if ((wm->vsc.full > a.full) ||
1937             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1938             (wm->vtaps >= 5) ||
1939             ((wm->vsc.full >= a.full) && wm->interlaced))
1940                 max_src_lines_per_dst_line = 4;
1941         else
1942                 max_src_lines_per_dst_line = 2;
1943
1944         a.full = dfixed_const(available_bandwidth);
1945         b.full = dfixed_const(wm->num_heads);
1946         a.full = dfixed_div(a, b);
1947
1948         b.full = dfixed_const(mc_latency + 512);
1949         c.full = dfixed_const(wm->disp_clk);
1950         b.full = dfixed_div(b, c);
1951
1952         c.full = dfixed_const(dmif_size);
1953         b.full = dfixed_div(c, b);
1954
1955         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1956
1957         b.full = dfixed_const(1000);
1958         c.full = dfixed_const(wm->disp_clk);
1959         b.full = dfixed_div(c, b);
1960         c.full = dfixed_const(wm->bytes_per_pixel);
1961         b.full = dfixed_mul(b, c);
1962
1963         lb_fill_bw = min(tmp, dfixed_trunc(b));
1964
1965         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1966         b.full = dfixed_const(1000);
1967         c.full = dfixed_const(lb_fill_bw);
1968         b.full = dfixed_div(c, b);
1969         a.full = dfixed_div(a, b);
1970         line_fill_time = dfixed_trunc(a);
1971
1972         if (line_fill_time < wm->active_time)
1973                 return latency;
1974         else
1975                 return latency + (line_fill_time - wm->active_time);
1976
1977 }
1978
1979 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1980 {
1981         if (dce6_average_bandwidth(wm) <=
1982             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1983                 return true;
1984         else
1985                 return false;
1986 };
1987
1988 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1989 {
1990         if (dce6_average_bandwidth(wm) <=
1991             (dce6_available_bandwidth(wm) / wm->num_heads))
1992                 return true;
1993         else
1994                 return false;
1995 };
1996
1997 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1998 {
1999         u32 lb_partitions = wm->lb_size / wm->src_width;
2000         u32 line_time = wm->active_time + wm->blank_time;
2001         u32 latency_tolerant_lines;
2002         u32 latency_hiding;
2003         fixed20_12 a;
2004
2005         a.full = dfixed_const(1);
2006         if (wm->vsc.full > a.full)
2007                 latency_tolerant_lines = 1;
2008         else {
2009                 if (lb_partitions <= (wm->vtaps + 1))
2010                         latency_tolerant_lines = 1;
2011                 else
2012                         latency_tolerant_lines = 2;
2013         }
2014
2015         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2016
2017         if (dce6_latency_watermark(wm) <= latency_hiding)
2018                 return true;
2019         else
2020                 return false;
2021 }
2022
2023 static void dce6_program_watermarks(struct radeon_device *rdev,
2024                                          struct radeon_crtc *radeon_crtc,
2025                                          u32 lb_size, u32 num_heads)
2026 {
2027         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2028         struct dce6_wm_params wm_low, wm_high;
2029         u32 dram_channels;
2030         u32 pixel_period;
2031         u32 line_time = 0;
2032         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2033         u32 priority_a_mark = 0, priority_b_mark = 0;
2034         u32 priority_a_cnt = PRIORITY_OFF;
2035         u32 priority_b_cnt = PRIORITY_OFF;
2036         u32 tmp, arb_control3;
2037         fixed20_12 a, b, c;
2038
2039         if (radeon_crtc->base.enabled && num_heads && mode) {
2040                 pixel_period = 1000000 / (u32)mode->clock;
2041                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2042                 priority_a_cnt = 0;
2043                 priority_b_cnt = 0;
2044
2045                 if (rdev->family == CHIP_ARUBA)
2046                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2047                 else
2048                         dram_channels = si_get_number_of_dram_channels(rdev);
2049
2050                 /* watermark for high clocks */
2051                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2052                         wm_high.yclk =
2053                                 radeon_dpm_get_mclk(rdev, false) * 10;
2054                         wm_high.sclk =
2055                                 radeon_dpm_get_sclk(rdev, false) * 10;
2056                 } else {
2057                         wm_high.yclk = rdev->pm.current_mclk * 10;
2058                         wm_high.sclk = rdev->pm.current_sclk * 10;
2059                 }
2060
2061                 wm_high.disp_clk = mode->clock;
2062                 wm_high.src_width = mode->crtc_hdisplay;
2063                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2064                 wm_high.blank_time = line_time - wm_high.active_time;
2065                 wm_high.interlaced = false;
2066                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2067                         wm_high.interlaced = true;
2068                 wm_high.vsc = radeon_crtc->vsc;
2069                 wm_high.vtaps = 1;
2070                 if (radeon_crtc->rmx_type != RMX_OFF)
2071                         wm_high.vtaps = 2;
2072                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2073                 wm_high.lb_size = lb_size;
2074                 wm_high.dram_channels = dram_channels;
2075                 wm_high.num_heads = num_heads;
2076
2077                 /* watermark for low clocks */
2078                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079                         wm_low.yclk =
2080                                 radeon_dpm_get_mclk(rdev, true) * 10;
2081                         wm_low.sclk =
2082                                 radeon_dpm_get_sclk(rdev, true) * 10;
2083                 } else {
2084                         wm_low.yclk = rdev->pm.current_mclk * 10;
2085                         wm_low.sclk = rdev->pm.current_sclk * 10;
2086                 }
2087
2088                 wm_low.disp_clk = mode->clock;
2089                 wm_low.src_width = mode->crtc_hdisplay;
2090                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2091                 wm_low.blank_time = line_time - wm_low.active_time;
2092                 wm_low.interlaced = false;
2093                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094                         wm_low.interlaced = true;
2095                 wm_low.vsc = radeon_crtc->vsc;
2096                 wm_low.vtaps = 1;
2097                 if (radeon_crtc->rmx_type != RMX_OFF)
2098                         wm_low.vtaps = 2;
2099                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100                 wm_low.lb_size = lb_size;
2101                 wm_low.dram_channels = dram_channels;
2102                 wm_low.num_heads = num_heads;
2103
2104                 /* set for high clocks */
2105                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2106                 /* set for low clocks */
2107                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2108
2109                 /* possibly force display priority to high */
2110                 /* should really do this at mode validation time... */
2111                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2112                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2113                     !dce6_check_latency_hiding(&wm_high) ||
2114                     (rdev->disp_priority == 2)) {
2115                         DRM_DEBUG_KMS("force priority to high\n");
2116                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2117                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2118                 }
2119                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2120                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2121                     !dce6_check_latency_hiding(&wm_low) ||
2122                     (rdev->disp_priority == 2)) {
2123                         DRM_DEBUG_KMS("force priority to high\n");
2124                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2125                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2126                 }
2127
2128                 a.full = dfixed_const(1000);
2129                 b.full = dfixed_const(mode->clock);
2130                 b.full = dfixed_div(b, a);
2131                 c.full = dfixed_const(latency_watermark_a);
2132                 c.full = dfixed_mul(c, b);
2133                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2134                 c.full = dfixed_div(c, a);
2135                 a.full = dfixed_const(16);
2136                 c.full = dfixed_div(c, a);
2137                 priority_a_mark = dfixed_trunc(c);
2138                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2139
2140                 a.full = dfixed_const(1000);
2141                 b.full = dfixed_const(mode->clock);
2142                 b.full = dfixed_div(b, a);
2143                 c.full = dfixed_const(latency_watermark_b);
2144                 c.full = dfixed_mul(c, b);
2145                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2146                 c.full = dfixed_div(c, a);
2147                 a.full = dfixed_const(16);
2148                 c.full = dfixed_div(c, a);
2149                 priority_b_mark = dfixed_trunc(c);
2150                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2151         }
2152
2153         /* select wm A */
2154         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2155         tmp = arb_control3;
2156         tmp &= ~LATENCY_WATERMARK_MASK(3);
2157         tmp |= LATENCY_WATERMARK_MASK(1);
2158         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2159         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2160                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2161                 LATENCY_HIGH_WATERMARK(line_time)));
2162         /* select wm B */
2163         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2164         tmp &= ~LATENCY_WATERMARK_MASK(3);
2165         tmp |= LATENCY_WATERMARK_MASK(2);
2166         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2167         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2168                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2169                 LATENCY_HIGH_WATERMARK(line_time)));
2170         /* restore original selection */
2171         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2172
2173         /* write the priority marks */
2174         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2175         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2176
2177         /* save values for DPM */
2178         radeon_crtc->line_time = line_time;
2179         radeon_crtc->wm_high = latency_watermark_a;
2180         radeon_crtc->wm_low = latency_watermark_b;
2181 }
2182
2183 void dce6_bandwidth_update(struct radeon_device *rdev)
2184 {
2185         struct drm_display_mode *mode0 = NULL;
2186         struct drm_display_mode *mode1 = NULL;
2187         u32 num_heads = 0, lb_size;
2188         int i;
2189
2190         radeon_update_display_priority(rdev);
2191
2192         for (i = 0; i < rdev->num_crtc; i++) {
2193                 if (rdev->mode_info.crtcs[i]->base.enabled)
2194                         num_heads++;
2195         }
2196         for (i = 0; i < rdev->num_crtc; i += 2) {
2197                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2198                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2199                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2200                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2201                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2202                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2203         }
2204 }
2205
2206 /*
2207  * Core functions
2208  */
2209 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2210 {
2211         const u32 num_tile_mode_states = 32;
2212         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2213
2214         switch (rdev->config.si.mem_row_size_in_kb) {
2215         case 1:
2216                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2217                 break;
2218         case 2:
2219         default:
2220                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2221                 break;
2222         case 4:
2223                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2224                 break;
2225         }
2226
2227         if ((rdev->family == CHIP_TAHITI) ||
2228             (rdev->family == CHIP_PITCAIRN)) {
2229                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2230                         switch (reg_offset) {
2231                         case 0:  /* non-AA compressed depth or any compressed stencil */
2232                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2234                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2235                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2236                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2237                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2240                                 break;
2241                         case 1:  /* 2xAA/4xAA compressed depth only */
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2245                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2247                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2250                                 break;
2251                         case 2:  /* 8xAA compressed depth only */
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2257                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2260                                 break;
2261                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2262                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2264                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2265                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2267                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2270                                 break;
2271                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2277                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280                                 break;
2281                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2282                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285                                                  TILE_SPLIT(split_equal_to_row_size) |
2286                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2287                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290                                 break;
2291                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2292                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295                                                  TILE_SPLIT(split_equal_to_row_size) |
2296                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2297                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2300                                 break;
2301                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305                                                  TILE_SPLIT(split_equal_to_row_size) |
2306                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2307                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310                                 break;
2311                         case 8:  /* 1D and 1D Array Surfaces */
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2313                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2314                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2317                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320                                 break;
2321                         case 9:  /* Displayable maps. */
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2327                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330                                 break;
2331                         case 10:  /* Display 8bpp. */
2332                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2336                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2337                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2340                                 break;
2341                         case 11:  /* Display 16bpp. */
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2347                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350                                 break;
2351                         case 12:  /* Display 32bpp. */
2352                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2356                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2357                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2360                                 break;
2361                         case 13:  /* Thin. */
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2364                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2367                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370                                 break;
2371                         case 14:  /* Thin 8 bpp. */
2372                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2374                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2377                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2380                                 break;
2381                         case 15:  /* Thin 16 bpp. */
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2384                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2387                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2390                                 break;
2391                         case 16:  /* Thin 32 bpp. */
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2397                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400                                 break;
2401                         case 17:  /* Thin 64 bpp. */
2402                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405                                                  TILE_SPLIT(split_equal_to_row_size) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2407                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2410                                 break;
2411                         case 21:  /* 8 bpp PRT. */
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2417                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2418                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2420                                 break;
2421                         case 22:  /* 16 bpp PRT */
2422                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2427                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2430                                 break;
2431                         case 23:  /* 32 bpp PRT */
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2436                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2437                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440                                 break;
2441                         case 24:  /* 64 bpp PRT */
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2447                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450                                 break;
2451                         case 25:  /* 128 bpp PRT */
2452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2456                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2457                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2460                                 break;
2461                         default:
2462                                 gb_tile_moden = 0;
2463                                 break;
2464                         }
2465                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2466                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2467                 }
2468         } else if ((rdev->family == CHIP_VERDE) ||
2469                    (rdev->family == CHIP_OLAND) ||
2470                    (rdev->family == CHIP_HAINAN)) {
2471                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2472                         switch (reg_offset) {
2473                         case 0:  /* non-AA compressed depth or any compressed stencil */
2474                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2476                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2477                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2479                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2482                                 break;
2483                         case 1:  /* 2xAA/4xAA compressed depth only */
2484                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2486                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2487                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2488                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2489                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2492                                 break;
2493                         case 2:  /* 8xAA compressed depth only */
2494                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2497                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2498                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2499                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2502                                 break;
2503                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2504                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2506                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2507                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2508                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2509                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2512                                 break;
2513                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2519                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522                                 break;
2523                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2524                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527                                                  TILE_SPLIT(split_equal_to_row_size) |
2528                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2529                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532                                 break;
2533                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2534                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537                                                  TILE_SPLIT(split_equal_to_row_size) |
2538                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2539                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542                                 break;
2543                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547                                                  TILE_SPLIT(split_equal_to_row_size) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2549                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552                                 break;
2553                         case 8:  /* 1D and 1D Array Surfaces */
2554                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2559                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562                                 break;
2563                         case 9:  /* Displayable maps. */
2564                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2565                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2569                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572                                 break;
2573                         case 10:  /* Display 8bpp. */
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2579                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2582                                 break;
2583                         case 11:  /* Display 16bpp. */
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2588                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2589                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2592                                 break;
2593                         case 12:  /* Display 32bpp. */
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2599                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602                                 break;
2603                         case 13:  /* Thin. */
2604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2606                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2609                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612                                 break;
2613                         case 14:  /* Thin 8 bpp. */
2614                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2619                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622                                 break;
2623                         case 15:  /* Thin 16 bpp. */
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2629                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632                                 break;
2633                         case 16:  /* Thin 32 bpp. */
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2639                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642                                 break;
2643                         case 17:  /* Thin 64 bpp. */
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647                                                  TILE_SPLIT(split_equal_to_row_size) |
2648                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2649                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652                                 break;
2653                         case 21:  /* 8 bpp PRT. */
2654                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2659                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2660                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662                                 break;
2663                         case 22:  /* 16 bpp PRT */
2664                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2669                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2672                                 break;
2673                         case 23:  /* 32 bpp PRT */
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2678                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2679                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682                                 break;
2683                         case 24:  /* 64 bpp PRT */
2684                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2687                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2689                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692                                 break;
2693                         case 25:  /* 128 bpp PRT */
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2698                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2699                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2702                                 break;
2703                         default:
2704                                 gb_tile_moden = 0;
2705                                 break;
2706                         }
2707                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2708                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2709                 }
2710         } else
2711                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2712 }
2713
2714 static void si_select_se_sh(struct radeon_device *rdev,
2715                             u32 se_num, u32 sh_num)
2716 {
2717         u32 data = INSTANCE_BROADCAST_WRITES;
2718
2719         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2720                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2721         else if (se_num == 0xffffffff)
2722                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2723         else if (sh_num == 0xffffffff)
2724                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2725         else
2726                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2727         WREG32(GRBM_GFX_INDEX, data);
2728 }
2729
2730 static u32 si_create_bitmask(u32 bit_width)
2731 {
2732         u32 i, mask = 0;
2733
2734         for (i = 0; i < bit_width; i++) {
2735                 mask <<= 1;
2736                 mask |= 1;
2737         }
2738         return mask;
2739 }
2740
2741 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2742 {
2743         u32 data, mask;
2744
2745         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2746         if (data & 1)
2747                 data &= INACTIVE_CUS_MASK;
2748         else
2749                 data = 0;
2750         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2751
2752         data >>= INACTIVE_CUS_SHIFT;
2753
2754         mask = si_create_bitmask(cu_per_sh);
2755
2756         return ~data & mask;
2757 }
2758
2759 static void si_setup_spi(struct radeon_device *rdev,
2760                          u32 se_num, u32 sh_per_se,
2761                          u32 cu_per_sh)
2762 {
2763         int i, j, k;
2764         u32 data, mask, active_cu;
2765
2766         for (i = 0; i < se_num; i++) {
2767                 for (j = 0; j < sh_per_se; j++) {
2768                         si_select_se_sh(rdev, i, j);
2769                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2770                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2771
2772                         mask = 1;
2773                         for (k = 0; k < 16; k++) {
2774                                 mask <<= k;
2775                                 if (active_cu & mask) {
2776                                         data &= ~mask;
2777                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2778                                         break;
2779                                 }
2780                         }
2781                 }
2782         }
2783         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2784 }
2785
2786 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2787                               u32 max_rb_num, u32 se_num,
2788                               u32 sh_per_se)
2789 {
2790         u32 data, mask;
2791
2792         data = RREG32(CC_RB_BACKEND_DISABLE);
2793         if (data & 1)
2794                 data &= BACKEND_DISABLE_MASK;
2795         else
2796                 data = 0;
2797         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2798
2799         data >>= BACKEND_DISABLE_SHIFT;
2800
2801         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2802
2803         return data & mask;
2804 }
2805
2806 static void si_setup_rb(struct radeon_device *rdev,
2807                         u32 se_num, u32 sh_per_se,
2808                         u32 max_rb_num)
2809 {
2810         int i, j;
2811         u32 data, mask;
2812         u32 disabled_rbs = 0;
2813         u32 enabled_rbs = 0;
2814
2815         for (i = 0; i < se_num; i++) {
2816                 for (j = 0; j < sh_per_se; j++) {
2817                         si_select_se_sh(rdev, i, j);
2818                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2819                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2820                 }
2821         }
2822         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2823
2824         mask = 1;
2825         for (i = 0; i < max_rb_num; i++) {
2826                 if (!(disabled_rbs & mask))
2827                         enabled_rbs |= mask;
2828                 mask <<= 1;
2829         }
2830
2831         for (i = 0; i < se_num; i++) {
2832                 si_select_se_sh(rdev, i, 0xffffffff);
2833                 data = 0;
2834                 for (j = 0; j < sh_per_se; j++) {
2835                         switch (enabled_rbs & 3) {
2836                         case 1:
2837                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2838                                 break;
2839                         case 2:
2840                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2841                                 break;
2842                         case 3:
2843                         default:
2844                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2845                                 break;
2846                         }
2847                         enabled_rbs >>= 2;
2848                 }
2849                 WREG32(PA_SC_RASTER_CONFIG, data);
2850         }
2851         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2852 }
2853
2854 static void si_gpu_init(struct radeon_device *rdev)
2855 {
2856         u32 gb_addr_config = 0;
2857         u32 mc_shared_chmap, mc_arb_ramcfg;
2858         u32 sx_debug_1;
2859         u32 hdp_host_path_cntl;
2860         u32 tmp;
2861         int i, j;
2862
2863         switch (rdev->family) {
2864         case CHIP_TAHITI:
2865                 rdev->config.si.max_shader_engines = 2;
2866                 rdev->config.si.max_tile_pipes = 12;
2867                 rdev->config.si.max_cu_per_sh = 8;
2868                 rdev->config.si.max_sh_per_se = 2;
2869                 rdev->config.si.max_backends_per_se = 4;
2870                 rdev->config.si.max_texture_channel_caches = 12;
2871                 rdev->config.si.max_gprs = 256;
2872                 rdev->config.si.max_gs_threads = 32;
2873                 rdev->config.si.max_hw_contexts = 8;
2874
2875                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2876                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2877                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2878                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2879                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2880                 break;
2881         case CHIP_PITCAIRN:
2882                 rdev->config.si.max_shader_engines = 2;
2883                 rdev->config.si.max_tile_pipes = 8;
2884                 rdev->config.si.max_cu_per_sh = 5;
2885                 rdev->config.si.max_sh_per_se = 2;
2886                 rdev->config.si.max_backends_per_se = 4;
2887                 rdev->config.si.max_texture_channel_caches = 8;
2888                 rdev->config.si.max_gprs = 256;
2889                 rdev->config.si.max_gs_threads = 32;
2890                 rdev->config.si.max_hw_contexts = 8;
2891
2892                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2893                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2894                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2895                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2896                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2897                 break;
2898         case CHIP_VERDE:
2899         default:
2900                 rdev->config.si.max_shader_engines = 1;
2901                 rdev->config.si.max_tile_pipes = 4;
2902                 rdev->config.si.max_cu_per_sh = 5;
2903                 rdev->config.si.max_sh_per_se = 2;
2904                 rdev->config.si.max_backends_per_se = 4;
2905                 rdev->config.si.max_texture_channel_caches = 4;
2906                 rdev->config.si.max_gprs = 256;
2907                 rdev->config.si.max_gs_threads = 32;
2908                 rdev->config.si.max_hw_contexts = 8;
2909
2910                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2911                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2912                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2913                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2914                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2915                 break;
2916         case CHIP_OLAND:
2917                 rdev->config.si.max_shader_engines = 1;
2918                 rdev->config.si.max_tile_pipes = 4;
2919                 rdev->config.si.max_cu_per_sh = 6;
2920                 rdev->config.si.max_sh_per_se = 1;
2921                 rdev->config.si.max_backends_per_se = 2;
2922                 rdev->config.si.max_texture_channel_caches = 4;
2923                 rdev->config.si.max_gprs = 256;
2924                 rdev->config.si.max_gs_threads = 16;
2925                 rdev->config.si.max_hw_contexts = 8;
2926
2927                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2928                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2929                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2930                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2931                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2932                 break;
2933         case CHIP_HAINAN:
2934                 rdev->config.si.max_shader_engines = 1;
2935                 rdev->config.si.max_tile_pipes = 4;
2936                 rdev->config.si.max_cu_per_sh = 5;
2937                 rdev->config.si.max_sh_per_se = 1;
2938                 rdev->config.si.max_backends_per_se = 1;
2939                 rdev->config.si.max_texture_channel_caches = 2;
2940                 rdev->config.si.max_gprs = 256;
2941                 rdev->config.si.max_gs_threads = 16;
2942                 rdev->config.si.max_hw_contexts = 8;
2943
2944                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2945                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2946                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2947                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2948                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2949                 break;
2950         }
2951
2952         /* Initialize HDP */
2953         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2954                 WREG32((0x2c14 + j), 0x00000000);
2955                 WREG32((0x2c18 + j), 0x00000000);
2956                 WREG32((0x2c1c + j), 0x00000000);
2957                 WREG32((0x2c20 + j), 0x00000000);
2958                 WREG32((0x2c24 + j), 0x00000000);
2959         }
2960
2961         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2962
2963         evergreen_fix_pci_max_read_req_size(rdev);
2964
2965         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2966
2967         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2968         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2969
2970         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2971         rdev->config.si.mem_max_burst_length_bytes = 256;
2972         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2973         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2974         if (rdev->config.si.mem_row_size_in_kb > 4)
2975                 rdev->config.si.mem_row_size_in_kb = 4;
2976         /* XXX use MC settings? */
2977         rdev->config.si.shader_engine_tile_size = 32;
2978         rdev->config.si.num_gpus = 1;
2979         rdev->config.si.multi_gpu_tile_size = 64;
2980
2981         /* fix up row size */
2982         gb_addr_config &= ~ROW_SIZE_MASK;
2983         switch (rdev->config.si.mem_row_size_in_kb) {
2984         case 1:
2985         default:
2986                 gb_addr_config |= ROW_SIZE(0);
2987                 break;
2988         case 2:
2989                 gb_addr_config |= ROW_SIZE(1);
2990                 break;
2991         case 4:
2992                 gb_addr_config |= ROW_SIZE(2);
2993                 break;
2994         }
2995
2996         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2997          * not have bank info, so create a custom tiling dword.
2998          * bits 3:0   num_pipes
2999          * bits 7:4   num_banks
3000          * bits 11:8  group_size
3001          * bits 15:12 row_size
3002          */
3003         rdev->config.si.tile_config = 0;
3004         switch (rdev->config.si.num_tile_pipes) {
3005         case 1:
3006                 rdev->config.si.tile_config |= (0 << 0);
3007                 break;
3008         case 2:
3009                 rdev->config.si.tile_config |= (1 << 0);
3010                 break;
3011         case 4:
3012                 rdev->config.si.tile_config |= (2 << 0);
3013                 break;
3014         case 8:
3015         default:
3016                 /* XXX what about 12? */
3017                 rdev->config.si.tile_config |= (3 << 0);
3018                 break;
3019         }       
3020         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3021         case 0: /* four banks */
3022                 rdev->config.si.tile_config |= 0 << 4;
3023                 break;
3024         case 1: /* eight banks */
3025                 rdev->config.si.tile_config |= 1 << 4;
3026                 break;
3027         case 2: /* sixteen banks */
3028         default:
3029                 rdev->config.si.tile_config |= 2 << 4;
3030                 break;
3031         }
3032         rdev->config.si.tile_config |=
3033                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3034         rdev->config.si.tile_config |=
3035                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3036
3037         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3038         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3039         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3040         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3041         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3042         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3043         if (rdev->has_uvd) {
3044                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3045                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3046                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3047         }
3048
3049         si_tiling_mode_table_init(rdev);
3050
3051         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3052                     rdev->config.si.max_sh_per_se,
3053                     rdev->config.si.max_backends_per_se);
3054
3055         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3056                      rdev->config.si.max_sh_per_se,
3057                      rdev->config.si.max_cu_per_sh);
3058
3059
3060         /* set HW defaults for 3D engine */
3061         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3062                                      ROQ_IB2_START(0x2b)));
3063         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3064
3065         sx_debug_1 = RREG32(SX_DEBUG_1);
3066         WREG32(SX_DEBUG_1, sx_debug_1);
3067
3068         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3069
3070         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3071                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3072                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3073                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3074
3075         WREG32(VGT_NUM_INSTANCES, 1);
3076
3077         WREG32(CP_PERFMON_CNTL, 0);
3078
3079         WREG32(SQ_CONFIG, 0);
3080
3081         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3082                                           FORCE_EOV_MAX_REZ_CNT(255)));
3083
3084         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3085                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3086
3087         WREG32(VGT_GS_VERTEX_REUSE, 16);
3088         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3089
3090         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3091         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3092         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3093         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3094         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3095         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3096         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3097         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3098
3099         tmp = RREG32(HDP_MISC_CNTL);
3100         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3101         WREG32(HDP_MISC_CNTL, tmp);
3102
3103         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3104         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3105
3106         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3107
3108         udelay(50);
3109 }
3110
3111 /*
3112  * GPU scratch registers helpers function.
3113  */
3114 static void si_scratch_init(struct radeon_device *rdev)
3115 {
3116         int i;
3117
3118         rdev->scratch.num_reg = 7;
3119         rdev->scratch.reg_base = SCRATCH_REG0;
3120         for (i = 0; i < rdev->scratch.num_reg; i++) {
3121                 rdev->scratch.free[i] = true;
3122                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3123         }
3124 }
3125
3126 void si_fence_ring_emit(struct radeon_device *rdev,
3127                         struct radeon_fence *fence)
3128 {
3129         struct radeon_ring *ring = &rdev->ring[fence->ring];
3130         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3131
3132         /* flush read cache over gart */
3133         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3134         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3135         radeon_ring_write(ring, 0);
3136         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3137         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3138                           PACKET3_TC_ACTION_ENA |
3139                           PACKET3_SH_KCACHE_ACTION_ENA |
3140                           PACKET3_SH_ICACHE_ACTION_ENA);
3141         radeon_ring_write(ring, 0xFFFFFFFF);
3142         radeon_ring_write(ring, 0);
3143         radeon_ring_write(ring, 10); /* poll interval */
3144         /* EVENT_WRITE_EOP - flush caches, send int */
3145         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3146         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3147         radeon_ring_write(ring, addr & 0xffffffff);
3148         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3149         radeon_ring_write(ring, fence->seq);
3150         radeon_ring_write(ring, 0);
3151 }
3152
3153 /*
3154  * IB stuff
3155  */
3156 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3157 {
3158         struct radeon_ring *ring = &rdev->ring[ib->ring];
3159         u32 header;
3160
3161         if (ib->is_const_ib) {
3162                 /* set switch buffer packet before const IB */
3163                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3164                 radeon_ring_write(ring, 0);
3165
3166                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3167         } else {
3168                 u32 next_rptr;
3169                 if (ring->rptr_save_reg) {
3170                         next_rptr = ring->wptr + 3 + 4 + 8;
3171                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3172                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3173                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3174                         radeon_ring_write(ring, next_rptr);
3175                 } else if (rdev->wb.enabled) {
3176                         next_rptr = ring->wptr + 5 + 4 + 8;
3177                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3178                         radeon_ring_write(ring, (1 << 8));
3179                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3180                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3181                         radeon_ring_write(ring, next_rptr);
3182                 }
3183
3184                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3185         }
3186
3187         radeon_ring_write(ring, header);
3188         radeon_ring_write(ring,
3189 #ifdef __BIG_ENDIAN
3190                           (2 << 0) |
3191 #endif
3192                           (ib->gpu_addr & 0xFFFFFFFC));
3193         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3194         radeon_ring_write(ring, ib->length_dw |
3195                           (ib->vm ? (ib->vm->id << 24) : 0));
3196
3197         if (!ib->is_const_ib) {
3198                 /* flush read cache over gart for this vmid */
3199                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3201                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3202                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3203                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3204                                   PACKET3_TC_ACTION_ENA |
3205                                   PACKET3_SH_KCACHE_ACTION_ENA |
3206                                   PACKET3_SH_ICACHE_ACTION_ENA);
3207                 radeon_ring_write(ring, 0xFFFFFFFF);
3208                 radeon_ring_write(ring, 0);
3209                 radeon_ring_write(ring, 10); /* poll interval */
3210         }
3211 }
3212
3213 /*
3214  * CP.
3215  */
3216 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3217 {
3218         if (enable)
3219                 WREG32(CP_ME_CNTL, 0);
3220         else {
3221                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3222                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3223                 WREG32(SCRATCH_UMSK, 0);
3224                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3225                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3226                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3227         }
3228         udelay(50);
3229 }
3230
3231 static int si_cp_load_microcode(struct radeon_device *rdev)
3232 {
3233         const __be32 *fw_data;
3234         int i;
3235
3236         if (!rdev->me_fw || !rdev->pfp_fw)
3237                 return -EINVAL;
3238
3239         si_cp_enable(rdev, false);
3240
3241         /* PFP */
3242         fw_data = (const __be32 *)rdev->pfp_fw->data;
3243         WREG32(CP_PFP_UCODE_ADDR, 0);
3244         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3245                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3246         WREG32(CP_PFP_UCODE_ADDR, 0);
3247
3248         /* CE */
3249         fw_data = (const __be32 *)rdev->ce_fw->data;
3250         WREG32(CP_CE_UCODE_ADDR, 0);
3251         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3252                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3253         WREG32(CP_CE_UCODE_ADDR, 0);
3254
3255         /* ME */
3256         fw_data = (const __be32 *)rdev->me_fw->data;
3257         WREG32(CP_ME_RAM_WADDR, 0);
3258         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3259                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3260         WREG32(CP_ME_RAM_WADDR, 0);
3261
3262         WREG32(CP_PFP_UCODE_ADDR, 0);
3263         WREG32(CP_CE_UCODE_ADDR, 0);
3264         WREG32(CP_ME_RAM_WADDR, 0);
3265         WREG32(CP_ME_RAM_RADDR, 0);
3266         return 0;
3267 }
3268
3269 static int si_cp_start(struct radeon_device *rdev)
3270 {
3271         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272         int r, i;
3273
3274         r = radeon_ring_lock(rdev, ring, 7 + 4);
3275         if (r) {
3276                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277                 return r;
3278         }
3279         /* init the CP */
3280         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3281         radeon_ring_write(ring, 0x1);
3282         radeon_ring_write(ring, 0x0);
3283         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3284         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3285         radeon_ring_write(ring, 0);
3286         radeon_ring_write(ring, 0);
3287
3288         /* init the CE partitions */
3289         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3290         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3291         radeon_ring_write(ring, 0xc000);
3292         radeon_ring_write(ring, 0xe000);
3293         radeon_ring_unlock_commit(rdev, ring);
3294
3295         si_cp_enable(rdev, true);
3296
3297         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3298         if (r) {
3299                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3300                 return r;
3301         }
3302
3303         /* setup clear context state */
3304         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3306
3307         for (i = 0; i < si_default_size; i++)
3308                 radeon_ring_write(ring, si_default_state[i]);
3309
3310         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3311         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3312
3313         /* set clear context state */
3314         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3315         radeon_ring_write(ring, 0);
3316
3317         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3318         radeon_ring_write(ring, 0x00000316);
3319         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3320         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3321
3322         radeon_ring_unlock_commit(rdev, ring);
3323
3324         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3325                 ring = &rdev->ring[i];
3326                 r = radeon_ring_lock(rdev, ring, 2);
3327
3328                 /* clear the compute context state */
3329                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3330                 radeon_ring_write(ring, 0);
3331
3332                 radeon_ring_unlock_commit(rdev, ring);
3333         }
3334
3335         return 0;
3336 }
3337
3338 static void si_cp_fini(struct radeon_device *rdev)
3339 {
3340         struct radeon_ring *ring;
3341         si_cp_enable(rdev, false);
3342
3343         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3344         radeon_ring_fini(rdev, ring);
3345         radeon_scratch_free(rdev, ring->rptr_save_reg);
3346
3347         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3348         radeon_ring_fini(rdev, ring);
3349         radeon_scratch_free(rdev, ring->rptr_save_reg);
3350
3351         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3352         radeon_ring_fini(rdev, ring);
3353         radeon_scratch_free(rdev, ring->rptr_save_reg);
3354 }
3355
3356 static int si_cp_resume(struct radeon_device *rdev)
3357 {
3358         struct radeon_ring *ring;
3359         u32 tmp;
3360         u32 rb_bufsz;
3361         int r;
3362
3363         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3364         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3365                                  SOFT_RESET_PA |
3366                                  SOFT_RESET_VGT |
3367                                  SOFT_RESET_SPI |
3368                                  SOFT_RESET_SX));
3369         RREG32(GRBM_SOFT_RESET);
3370         mdelay(15);
3371         WREG32(GRBM_SOFT_RESET, 0);
3372         RREG32(GRBM_SOFT_RESET);
3373
3374         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3375         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3376
3377         /* Set the write pointer delay */
3378         WREG32(CP_RB_WPTR_DELAY, 0);
3379
3380         WREG32(CP_DEBUG, 0);
3381         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3382
3383         /* ring 0 - compute and gfx */
3384         /* Set ring buffer size */
3385         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3386         rb_bufsz = drm_order(ring->ring_size / 8);
3387         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3388 #ifdef __BIG_ENDIAN
3389         tmp |= BUF_SWAP_32BIT;
3390 #endif
3391         WREG32(CP_RB0_CNTL, tmp);
3392
3393         /* Initialize the ring buffer's read and write pointers */
3394         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3395         ring->wptr = 0;
3396         WREG32(CP_RB0_WPTR, ring->wptr);
3397
3398         /* set the wb address whether it's enabled or not */
3399         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3400         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3401
3402         if (rdev->wb.enabled)
3403                 WREG32(SCRATCH_UMSK, 0xff);
3404         else {
3405                 tmp |= RB_NO_UPDATE;
3406                 WREG32(SCRATCH_UMSK, 0);
3407         }
3408
3409         mdelay(1);
3410         WREG32(CP_RB0_CNTL, tmp);
3411
3412         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3413
3414         ring->rptr = RREG32(CP_RB0_RPTR);
3415
3416         /* ring1  - compute only */
3417         /* Set ring buffer size */
3418         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3419         rb_bufsz = drm_order(ring->ring_size / 8);
3420         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422         tmp |= BUF_SWAP_32BIT;
3423 #endif
3424         WREG32(CP_RB1_CNTL, tmp);
3425
3426         /* Initialize the ring buffer's read and write pointers */
3427         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3428         ring->wptr = 0;
3429         WREG32(CP_RB1_WPTR, ring->wptr);
3430
3431         /* set the wb address whether it's enabled or not */
3432         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3433         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3434
3435         mdelay(1);
3436         WREG32(CP_RB1_CNTL, tmp);
3437
3438         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3439
3440         ring->rptr = RREG32(CP_RB1_RPTR);
3441
3442         /* ring2 - compute only */
3443         /* Set ring buffer size */
3444         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3445         rb_bufsz = drm_order(ring->ring_size / 8);
3446         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3447 #ifdef __BIG_ENDIAN
3448         tmp |= BUF_SWAP_32BIT;
3449 #endif
3450         WREG32(CP_RB2_CNTL, tmp);
3451
3452         /* Initialize the ring buffer's read and write pointers */
3453         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3454         ring->wptr = 0;
3455         WREG32(CP_RB2_WPTR, ring->wptr);
3456
3457         /* set the wb address whether it's enabled or not */
3458         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3459         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3460
3461         mdelay(1);
3462         WREG32(CP_RB2_CNTL, tmp);
3463
3464         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3465
3466         ring->rptr = RREG32(CP_RB2_RPTR);
3467
3468         /* start the rings */
3469         si_cp_start(rdev);
3470         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3471         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3472         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3473         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3474         if (r) {
3475                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3476                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3477                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3478                 return r;
3479         }
3480         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3481         if (r) {
3482                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483         }
3484         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3485         if (r) {
3486                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3487         }
3488
3489         return 0;
3490 }
3491
3492 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3493 {
3494         u32 reset_mask = 0;
3495         u32 tmp;
3496
3497         /* GRBM_STATUS */
3498         tmp = RREG32(GRBM_STATUS);
3499         if (tmp & (PA_BUSY | SC_BUSY |
3500                    BCI_BUSY | SX_BUSY |
3501                    TA_BUSY | VGT_BUSY |
3502                    DB_BUSY | CB_BUSY |
3503                    GDS_BUSY | SPI_BUSY |
3504                    IA_BUSY | IA_BUSY_NO_DMA))
3505                 reset_mask |= RADEON_RESET_GFX;
3506
3507         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3508                    CP_BUSY | CP_COHERENCY_BUSY))
3509                 reset_mask |= RADEON_RESET_CP;
3510
3511         if (tmp & GRBM_EE_BUSY)
3512                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3513
3514         /* GRBM_STATUS2 */
3515         tmp = RREG32(GRBM_STATUS2);
3516         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3517                 reset_mask |= RADEON_RESET_RLC;
3518
3519         /* DMA_STATUS_REG 0 */
3520         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3521         if (!(tmp & DMA_IDLE))
3522                 reset_mask |= RADEON_RESET_DMA;
3523
3524         /* DMA_STATUS_REG 1 */
3525         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3526         if (!(tmp & DMA_IDLE))
3527                 reset_mask |= RADEON_RESET_DMA1;
3528
3529         /* SRBM_STATUS2 */
3530         tmp = RREG32(SRBM_STATUS2);
3531         if (tmp & DMA_BUSY)
3532                 reset_mask |= RADEON_RESET_DMA;
3533
3534         if (tmp & DMA1_BUSY)
3535                 reset_mask |= RADEON_RESET_DMA1;
3536
3537         /* SRBM_STATUS */
3538         tmp = RREG32(SRBM_STATUS);
3539
3540         if (tmp & IH_BUSY)
3541                 reset_mask |= RADEON_RESET_IH;
3542
3543         if (tmp & SEM_BUSY)
3544                 reset_mask |= RADEON_RESET_SEM;
3545
3546         if (tmp & GRBM_RQ_PENDING)
3547                 reset_mask |= RADEON_RESET_GRBM;
3548
3549         if (tmp & VMC_BUSY)
3550                 reset_mask |= RADEON_RESET_VMC;
3551
3552         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3553                    MCC_BUSY | MCD_BUSY))
3554                 reset_mask |= RADEON_RESET_MC;
3555
3556         if (evergreen_is_display_hung(rdev))
3557                 reset_mask |= RADEON_RESET_DISPLAY;
3558
3559         /* VM_L2_STATUS */
3560         tmp = RREG32(VM_L2_STATUS);
3561         if (tmp & L2_BUSY)
3562                 reset_mask |= RADEON_RESET_VMC;
3563
3564         /* Skip MC reset as it's mostly likely not hung, just busy */
3565         if (reset_mask & RADEON_RESET_MC) {
3566                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3567                 reset_mask &= ~RADEON_RESET_MC;
3568         }
3569
3570         return reset_mask;
3571 }
3572
3573 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3574 {
3575         struct evergreen_mc_save save;
3576         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3577         u32 tmp;
3578
3579         if (reset_mask == 0)
3580                 return;
3581
3582         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3583
3584         evergreen_print_gpu_status_regs(rdev);
3585         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3586                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3587         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3588                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3589
3590         /* Disable CP parsing/prefetching */
3591         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3592
3593         if (reset_mask & RADEON_RESET_DMA) {
3594                 /* dma0 */
3595                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3596                 tmp &= ~DMA_RB_ENABLE;
3597                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3598         }
3599         if (reset_mask & RADEON_RESET_DMA1) {
3600                 /* dma1 */
3601                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3602                 tmp &= ~DMA_RB_ENABLE;
3603                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3604         }
3605
3606         udelay(50);
3607
3608         evergreen_mc_stop(rdev, &save);
3609         if (evergreen_mc_wait_for_idle(rdev)) {
3610                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3611         }
3612
3613         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3614                 grbm_soft_reset = SOFT_RESET_CB |
3615                         SOFT_RESET_DB |
3616                         SOFT_RESET_GDS |
3617                         SOFT_RESET_PA |
3618                         SOFT_RESET_SC |
3619                         SOFT_RESET_BCI |
3620                         SOFT_RESET_SPI |
3621                         SOFT_RESET_SX |
3622                         SOFT_RESET_TC |
3623                         SOFT_RESET_TA |
3624                         SOFT_RESET_VGT |
3625                         SOFT_RESET_IA;
3626         }
3627
3628         if (reset_mask & RADEON_RESET_CP) {
3629                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3630
3631                 srbm_soft_reset |= SOFT_RESET_GRBM;
3632         }
3633
3634         if (reset_mask & RADEON_RESET_DMA)
3635                 srbm_soft_reset |= SOFT_RESET_DMA;
3636
3637         if (reset_mask & RADEON_RESET_DMA1)
3638                 srbm_soft_reset |= SOFT_RESET_DMA1;
3639
3640         if (reset_mask & RADEON_RESET_DISPLAY)
3641                 srbm_soft_reset |= SOFT_RESET_DC;
3642
3643         if (reset_mask & RADEON_RESET_RLC)
3644                 grbm_soft_reset |= SOFT_RESET_RLC;
3645
3646         if (reset_mask & RADEON_RESET_SEM)
3647                 srbm_soft_reset |= SOFT_RESET_SEM;
3648
3649         if (reset_mask & RADEON_RESET_IH)
3650                 srbm_soft_reset |= SOFT_RESET_IH;
3651
3652         if (reset_mask & RADEON_RESET_GRBM)
3653                 srbm_soft_reset |= SOFT_RESET_GRBM;
3654
3655         if (reset_mask & RADEON_RESET_VMC)
3656                 srbm_soft_reset |= SOFT_RESET_VMC;
3657
3658         if (reset_mask & RADEON_RESET_MC)
3659                 srbm_soft_reset |= SOFT_RESET_MC;
3660
3661         if (grbm_soft_reset) {
3662                 tmp = RREG32(GRBM_SOFT_RESET);
3663                 tmp |= grbm_soft_reset;
3664                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3665                 WREG32(GRBM_SOFT_RESET, tmp);
3666                 tmp = RREG32(GRBM_SOFT_RESET);
3667
3668                 udelay(50);
3669
3670                 tmp &= ~grbm_soft_reset;
3671                 WREG32(GRBM_SOFT_RESET, tmp);
3672                 tmp = RREG32(GRBM_SOFT_RESET);
3673         }
3674
3675         if (srbm_soft_reset) {
3676                 tmp = RREG32(SRBM_SOFT_RESET);
3677                 tmp |= srbm_soft_reset;
3678                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3679                 WREG32(SRBM_SOFT_RESET, tmp);
3680                 tmp = RREG32(SRBM_SOFT_RESET);
3681
3682                 udelay(50);
3683
3684                 tmp &= ~srbm_soft_reset;
3685                 WREG32(SRBM_SOFT_RESET, tmp);
3686                 tmp = RREG32(SRBM_SOFT_RESET);
3687         }
3688
3689         /* Wait a little for things to settle down */
3690         udelay(50);
3691
3692         evergreen_mc_resume(rdev, &save);
3693         udelay(50);
3694
3695         evergreen_print_gpu_status_regs(rdev);
3696 }
3697
3698 int si_asic_reset(struct radeon_device *rdev)
3699 {
3700         u32 reset_mask;
3701
3702         reset_mask = si_gpu_check_soft_reset(rdev);
3703
3704         if (reset_mask)
3705                 r600_set_bios_scratch_engine_hung(rdev, true);
3706
3707         si_gpu_soft_reset(rdev, reset_mask);
3708
3709         reset_mask = si_gpu_check_soft_reset(rdev);
3710
3711         if (!reset_mask)
3712                 r600_set_bios_scratch_engine_hung(rdev, false);
3713
3714         return 0;
3715 }
3716
3717 /**
3718  * si_gfx_is_lockup - Check if the GFX engine is locked up
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Check if the GFX engine is locked up.
3724  * Returns true if the engine appears to be locked up, false if not.
3725  */
3726 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3727 {
3728         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3729
3730         if (!(reset_mask & (RADEON_RESET_GFX |
3731                             RADEON_RESET_COMPUTE |
3732                             RADEON_RESET_CP))) {
3733                 radeon_ring_lockup_update(ring);
3734                 return false;
3735         }
3736         /* force CP activities */
3737         radeon_ring_force_activity(rdev, ring);
3738         return radeon_ring_test_lockup(rdev, ring);
3739 }
3740
3741 /**
3742  * si_dma_is_lockup - Check if the DMA engine is locked up
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Check if the async DMA engine is locked up.
3748  * Returns true if the engine appears to be locked up, false if not.
3749  */
3750 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3751 {
3752         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3753         u32 mask;
3754
3755         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3756                 mask = RADEON_RESET_DMA;
3757         else
3758                 mask = RADEON_RESET_DMA1;
3759
3760         if (!(reset_mask & mask)) {
3761                 radeon_ring_lockup_update(ring);
3762                 return false;
3763         }
3764         /* force ring activities */
3765         radeon_ring_force_activity(rdev, ring);
3766         return radeon_ring_test_lockup(rdev, ring);
3767 }
3768
3769 /* MC */
3770 static void si_mc_program(struct radeon_device *rdev)
3771 {
3772         struct evergreen_mc_save save;
3773         u32 tmp;
3774         int i, j;
3775
3776         /* Initialize HDP */
3777         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3778                 WREG32((0x2c14 + j), 0x00000000);
3779                 WREG32((0x2c18 + j), 0x00000000);
3780                 WREG32((0x2c1c + j), 0x00000000);
3781                 WREG32((0x2c20 + j), 0x00000000);
3782                 WREG32((0x2c24 + j), 0x00000000);
3783         }
3784         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3785
3786         evergreen_mc_stop(rdev, &save);
3787         if (radeon_mc_wait_for_idle(rdev)) {
3788                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3789         }
3790         if (!ASIC_IS_NODCE(rdev))
3791                 /* Lockout access through VGA aperture*/
3792                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3793         /* Update configuration */
3794         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3795                rdev->mc.vram_start >> 12);
3796         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3797                rdev->mc.vram_end >> 12);
3798         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3799                rdev->vram_scratch.gpu_addr >> 12);
3800         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3801         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3802         WREG32(MC_VM_FB_LOCATION, tmp);
3803         /* XXX double check these! */
3804         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3805         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3806         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3807         WREG32(MC_VM_AGP_BASE, 0);
3808         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3809         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3810         if (radeon_mc_wait_for_idle(rdev)) {
3811                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3812         }
3813         evergreen_mc_resume(rdev, &save);
3814         if (!ASIC_IS_NODCE(rdev)) {
3815                 /* we need to own VRAM, so turn off the VGA renderer here
3816                  * to stop it overwriting our objects */
3817                 rv515_vga_render_disable(rdev);
3818         }
3819 }
3820
3821 void si_vram_gtt_location(struct radeon_device *rdev,
3822                           struct radeon_mc *mc)
3823 {
3824         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3825                 /* leave room for at least 1024M GTT */
3826                 dev_warn(rdev->dev, "limiting VRAM\n");
3827                 mc->real_vram_size = 0xFFC0000000ULL;
3828                 mc->mc_vram_size = 0xFFC0000000ULL;
3829         }
3830         radeon_vram_location(rdev, &rdev->mc, 0);
3831         rdev->mc.gtt_base_align = 0;
3832         radeon_gtt_location(rdev, mc);
3833 }
3834
3835 static int si_mc_init(struct radeon_device *rdev)
3836 {
3837         u32 tmp;
3838         int chansize, numchan;
3839
3840         /* Get VRAM informations */
3841         rdev->mc.vram_is_ddr = true;
3842         tmp = RREG32(MC_ARB_RAMCFG);
3843         if (tmp & CHANSIZE_OVERRIDE) {
3844                 chansize = 16;
3845         } else if (tmp & CHANSIZE_MASK) {
3846                 chansize = 64;
3847         } else {
3848                 chansize = 32;
3849         }
3850         tmp = RREG32(MC_SHARED_CHMAP);
3851         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3852         case 0:
3853         default:
3854                 numchan = 1;
3855                 break;
3856         case 1:
3857                 numchan = 2;
3858                 break;
3859         case 2:
3860                 numchan = 4;
3861                 break;
3862         case 3:
3863                 numchan = 8;
3864                 break;
3865         case 4:
3866                 numchan = 3;
3867                 break;
3868         case 5:
3869                 numchan = 6;
3870                 break;
3871         case 6:
3872                 numchan = 10;
3873                 break;
3874         case 7:
3875                 numchan = 12;
3876                 break;
3877         case 8:
3878                 numchan = 16;
3879                 break;
3880         }
3881         rdev->mc.vram_width = numchan * chansize;
3882         /* Could aper size report 0 ? */
3883         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3884         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3885         /* size in MB on si */
3886         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3887         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3888         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3889         si_vram_gtt_location(rdev, &rdev->mc);
3890         radeon_update_bandwidth_info(rdev);
3891
3892         return 0;
3893 }
3894
3895 /*
3896  * GART
3897  */
3898 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3899 {
3900         /* flush hdp cache */
3901         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3902
3903         /* bits 0-15 are the VM contexts0-15 */
3904         WREG32(VM_INVALIDATE_REQUEST, 1);
3905 }
3906
3907 static int si_pcie_gart_enable(struct radeon_device *rdev)
3908 {
3909         int r, i;
3910
3911         if (rdev->gart.robj == NULL) {
3912                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3913                 return -EINVAL;
3914         }
3915         r = radeon_gart_table_vram_pin(rdev);
3916         if (r)
3917                 return r;
3918         radeon_gart_restore(rdev);
3919         /* Setup TLB control */
3920         WREG32(MC_VM_MX_L1_TLB_CNTL,
3921                (0xA << 7) |
3922                ENABLE_L1_TLB |
3923                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3924                ENABLE_ADVANCED_DRIVER_MODEL |
3925                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3926         /* Setup L2 cache */
3927         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3928                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3929                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3930                EFFECTIVE_L2_QUEUE_SIZE(7) |
3931                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3932         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3933         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3934                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3935         /* setup context0 */
3936         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3937         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3938         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3939         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3940                         (u32)(rdev->dummy_page.addr >> 12));
3941         WREG32(VM_CONTEXT0_CNTL2, 0);
3942         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3943                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3944
3945         WREG32(0x15D4, 0);
3946         WREG32(0x15D8, 0);
3947         WREG32(0x15DC, 0);
3948
3949         /* empty context1-15 */
3950         /* set vm size, must be a multiple of 4 */
3951         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3952         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3953         /* Assign the pt base to something valid for now; the pts used for
3954          * the VMs are determined by the application and setup and assigned
3955          * on the fly in the vm part of radeon_gart.c
3956          */
3957         for (i = 1; i < 16; i++) {
3958                 if (i < 8)
3959                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3960                                rdev->gart.table_addr >> 12);
3961                 else
3962                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3963                                rdev->gart.table_addr >> 12);
3964         }
3965
3966         /* enable context1-15 */
3967         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3968                (u32)(rdev->dummy_page.addr >> 12));
3969         WREG32(VM_CONTEXT1_CNTL2, 4);
3970         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3971                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3972                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3973                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3974                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3975                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3977                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3983
3984         si_pcie_gart_tlb_flush(rdev);
3985         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3986                  (unsigned)(rdev->mc.gtt_size >> 20),
3987                  (unsigned long long)rdev->gart.table_addr);
3988         rdev->gart.ready = true;
3989         return 0;
3990 }
3991
3992 static void si_pcie_gart_disable(struct radeon_device *rdev)
3993 {
3994         /* Disable all tables */
3995         WREG32(VM_CONTEXT0_CNTL, 0);
3996         WREG32(VM_CONTEXT1_CNTL, 0);
3997         /* Setup TLB control */
3998         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3999                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4000         /* Setup L2 cache */
4001         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4002                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4003                EFFECTIVE_L2_QUEUE_SIZE(7) |
4004                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4005         WREG32(VM_L2_CNTL2, 0);
4006         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4007                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4008         radeon_gart_table_vram_unpin(rdev);
4009 }
4010
4011 static void si_pcie_gart_fini(struct radeon_device *rdev)
4012 {
4013         si_pcie_gart_disable(rdev);
4014         radeon_gart_table_vram_free(rdev);
4015         radeon_gart_fini(rdev);
4016 }
4017
4018 /* vm parser */
4019 static bool si_vm_reg_valid(u32 reg)
4020 {
4021         /* context regs are fine */
4022         if (reg >= 0x28000)
4023                 return true;
4024
4025         /* check config regs */
4026         switch (reg) {
4027         case GRBM_GFX_INDEX:
4028         case CP_STRMOUT_CNTL:
4029         case VGT_VTX_VECT_EJECT_REG:
4030         case VGT_CACHE_INVALIDATION:
4031         case VGT_ESGS_RING_SIZE:
4032         case VGT_GSVS_RING_SIZE:
4033         case VGT_GS_VERTEX_REUSE:
4034         case VGT_PRIMITIVE_TYPE:
4035         case VGT_INDEX_TYPE:
4036         case VGT_NUM_INDICES:
4037         case VGT_NUM_INSTANCES:
4038         case VGT_TF_RING_SIZE:
4039         case VGT_HS_OFFCHIP_PARAM:
4040         case VGT_TF_MEMORY_BASE:
4041         case PA_CL_ENHANCE:
4042         case PA_SU_LINE_STIPPLE_VALUE:
4043         case PA_SC_LINE_STIPPLE_STATE:
4044         case PA_SC_ENHANCE:
4045         case SQC_CACHES:
4046         case SPI_STATIC_THREAD_MGMT_1:
4047         case SPI_STATIC_THREAD_MGMT_2:
4048         case SPI_STATIC_THREAD_MGMT_3:
4049         case SPI_PS_MAX_WAVE_ID:
4050         case SPI_CONFIG_CNTL:
4051         case SPI_CONFIG_CNTL_1:
4052         case TA_CNTL_AUX:
4053                 return true;
4054         default:
4055                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4056                 return false;
4057         }
4058 }
4059
4060 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4061                                   u32 *ib, struct radeon_cs_packet *pkt)
4062 {
4063         switch (pkt->opcode) {
4064         case PACKET3_NOP:
4065         case PACKET3_SET_BASE:
4066         case PACKET3_SET_CE_DE_COUNTERS:
4067         case PACKET3_LOAD_CONST_RAM:
4068         case PACKET3_WRITE_CONST_RAM:
4069         case PACKET3_WRITE_CONST_RAM_OFFSET:
4070         case PACKET3_DUMP_CONST_RAM:
4071         case PACKET3_INCREMENT_CE_COUNTER:
4072         case PACKET3_WAIT_ON_DE_COUNTER:
4073         case PACKET3_CE_WRITE:
4074                 break;
4075         default:
4076                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4077                 return -EINVAL;
4078         }
4079         return 0;
4080 }
4081
4082 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4083                                    u32 *ib, struct radeon_cs_packet *pkt)
4084 {
4085         u32 idx = pkt->idx + 1;
4086         u32 idx_value = ib[idx];
4087         u32 start_reg, end_reg, reg, i;
4088         u32 command, info;
4089
4090         switch (pkt->opcode) {
4091         case PACKET3_NOP:
4092         case PACKET3_SET_BASE:
4093         case PACKET3_CLEAR_STATE:
4094         case PACKET3_INDEX_BUFFER_SIZE:
4095         case PACKET3_DISPATCH_DIRECT:
4096         case PACKET3_DISPATCH_INDIRECT:
4097         case PACKET3_ALLOC_GDS:
4098         case PACKET3_WRITE_GDS_RAM:
4099         case PACKET3_ATOMIC_GDS:
4100         case PACKET3_ATOMIC:
4101         case PACKET3_OCCLUSION_QUERY:
4102         case PACKET3_SET_PREDICATION:
4103         case PACKET3_COND_EXEC:
4104         case PACKET3_PRED_EXEC:
4105         case PACKET3_DRAW_INDIRECT:
4106         case PACKET3_DRAW_INDEX_INDIRECT:
4107         case PACKET3_INDEX_BASE:
4108         case PACKET3_DRAW_INDEX_2:
4109         case PACKET3_CONTEXT_CONTROL:
4110         case PACKET3_INDEX_TYPE:
4111         case PACKET3_DRAW_INDIRECT_MULTI:
4112         case PACKET3_DRAW_INDEX_AUTO:
4113         case PACKET3_DRAW_INDEX_IMMD:
4114         case PACKET3_NUM_INSTANCES:
4115         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4116         case PACKET3_STRMOUT_BUFFER_UPDATE:
4117         case PACKET3_DRAW_INDEX_OFFSET_2:
4118         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4119         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4120         case PACKET3_MPEG_INDEX:
4121         case PACKET3_WAIT_REG_MEM:
4122         case PACKET3_MEM_WRITE:
4123         case PACKET3_PFP_SYNC_ME:
4124         case PACKET3_SURFACE_SYNC:
4125         case PACKET3_EVENT_WRITE:
4126         case PACKET3_EVENT_WRITE_EOP:
4127         case PACKET3_EVENT_WRITE_EOS:
4128         case PACKET3_SET_CONTEXT_REG:
4129         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4130         case PACKET3_SET_SH_REG:
4131         case PACKET3_SET_SH_REG_OFFSET:
4132         case PACKET3_INCREMENT_DE_COUNTER:
4133         case PACKET3_WAIT_ON_CE_COUNTER:
4134         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4135         case PACKET3_ME_WRITE:
4136                 break;
4137         case PACKET3_COPY_DATA:
4138                 if ((idx_value & 0xf00) == 0) {
4139                         reg = ib[idx + 3] * 4;
4140                         if (!si_vm_reg_valid(reg))
4141                                 return -EINVAL;
4142                 }
4143                 break;
4144         case PACKET3_WRITE_DATA:
4145                 if ((idx_value & 0xf00) == 0) {
4146                         start_reg = ib[idx + 1] * 4;
4147                         if (idx_value & 0x10000) {
4148                                 if (!si_vm_reg_valid(start_reg))
4149                                         return -EINVAL;
4150                         } else {
4151                                 for (i = 0; i < (pkt->count - 2); i++) {
4152                                         reg = start_reg + (4 * i);
4153                                         if (!si_vm_reg_valid(reg))
4154                                                 return -EINVAL;
4155                                 }
4156                         }
4157                 }
4158                 break;
4159         case PACKET3_COND_WRITE:
4160                 if (idx_value & 0x100) {
4161                         reg = ib[idx + 5] * 4;
4162                         if (!si_vm_reg_valid(reg))
4163                                 return -EINVAL;
4164                 }
4165                 break;
4166         case PACKET3_COPY_DW:
4167                 if (idx_value & 0x2) {
4168                         reg = ib[idx + 3] * 4;
4169                         if (!si_vm_reg_valid(reg))
4170                                 return -EINVAL;
4171                 }
4172                 break;
4173         case PACKET3_SET_CONFIG_REG:
4174                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4175                 end_reg = 4 * pkt->count + start_reg - 4;
4176                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4177                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4178                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4179                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4180                         return -EINVAL;
4181                 }
4182                 for (i = 0; i < pkt->count; i++) {
4183                         reg = start_reg + (4 * i);
4184                         if (!si_vm_reg_valid(reg))
4185                                 return -EINVAL;
4186                 }
4187                 break;
4188         case PACKET3_CP_DMA:
4189                 command = ib[idx + 4];
4190                 info = ib[idx + 1];
4191                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4192                         /* src address space is register */
4193                         if (((info & 0x60000000) >> 29) == 0) {
4194                                 start_reg = idx_value << 2;
4195                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4196                                         reg = start_reg;
4197                                         if (!si_vm_reg_valid(reg)) {
4198                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4199                                                 return -EINVAL;
4200                                         }
4201                                 } else {
4202                                         for (i = 0; i < (command & 0x1fffff); i++) {
4203                                                 reg = start_reg + (4 * i);
4204                                                 if (!si_vm_reg_valid(reg)) {
4205                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4206                                                         return -EINVAL;
4207                                                 }
4208                                         }
4209                                 }
4210                         }
4211                 }
4212                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4213                         /* dst address space is register */
4214                         if (((info & 0x00300000) >> 20) == 0) {
4215                                 start_reg = ib[idx + 2];
4216                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4217                                         reg = start_reg;
4218                                         if (!si_vm_reg_valid(reg)) {
4219                                                 DRM_ERROR("CP DMA Bad DST register\n");
4220                                                 return -EINVAL;
4221                                         }
4222                                 } else {
4223                                         for (i = 0; i < (command & 0x1fffff); i++) {
4224                                                 reg = start_reg + (4 * i);
4225                                                 if (!si_vm_reg_valid(reg)) {
4226                                                         DRM_ERROR("CP DMA Bad DST register\n");
4227                                                         return -EINVAL;
4228                                                 }
4229                                         }
4230                                 }
4231                         }
4232                 }
4233                 break;
4234         default:
4235                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4236                 return -EINVAL;
4237         }
4238         return 0;
4239 }
4240
4241 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4242                                        u32 *ib, struct radeon_cs_packet *pkt)
4243 {
4244         u32 idx = pkt->idx + 1;
4245         u32 idx_value = ib[idx];
4246         u32 start_reg, reg, i;
4247
4248         switch (pkt->opcode) {
4249         case PACKET3_NOP:
4250         case PACKET3_SET_BASE:
4251         case PACKET3_CLEAR_STATE:
4252         case PACKET3_DISPATCH_DIRECT:
4253         case PACKET3_DISPATCH_INDIRECT:
4254         case PACKET3_ALLOC_GDS:
4255         case PACKET3_WRITE_GDS_RAM:
4256         case PACKET3_ATOMIC_GDS:
4257         case PACKET3_ATOMIC:
4258         case PACKET3_OCCLUSION_QUERY:
4259         case PACKET3_SET_PREDICATION:
4260         case PACKET3_COND_EXEC:
4261         case PACKET3_PRED_EXEC:
4262         case PACKET3_CONTEXT_CONTROL:
4263         case PACKET3_STRMOUT_BUFFER_UPDATE:
4264         case PACKET3_WAIT_REG_MEM:
4265         case PACKET3_MEM_WRITE:
4266         case PACKET3_PFP_SYNC_ME:
4267         case PACKET3_SURFACE_SYNC:
4268         case PACKET3_EVENT_WRITE:
4269         case PACKET3_EVENT_WRITE_EOP:
4270         case PACKET3_EVENT_WRITE_EOS:
4271         case PACKET3_SET_CONTEXT_REG:
4272         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4273         case PACKET3_SET_SH_REG:
4274         case PACKET3_SET_SH_REG_OFFSET:
4275         case PACKET3_INCREMENT_DE_COUNTER:
4276         case PACKET3_WAIT_ON_CE_COUNTER:
4277         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4278         case PACKET3_ME_WRITE:
4279                 break;
4280         case PACKET3_COPY_DATA:
4281                 if ((idx_value & 0xf00) == 0) {
4282                         reg = ib[idx + 3] * 4;
4283                         if (!si_vm_reg_valid(reg))
4284                                 return -EINVAL;
4285                 }
4286                 break;
4287         case PACKET3_WRITE_DATA:
4288                 if ((idx_value & 0xf00) == 0) {
4289                         start_reg = ib[idx + 1] * 4;
4290                         if (idx_value & 0x10000) {
4291                                 if (!si_vm_reg_valid(start_reg))
4292                                         return -EINVAL;
4293                         } else {
4294                                 for (i = 0; i < (pkt->count - 2); i++) {
4295                                         reg = start_reg + (4 * i);
4296                                         if (!si_vm_reg_valid(reg))
4297                                                 return -EINVAL;
4298                                 }
4299                         }
4300                 }
4301                 break;
4302         case PACKET3_COND_WRITE:
4303                 if (idx_value & 0x100) {
4304                         reg = ib[idx + 5] * 4;
4305                         if (!si_vm_reg_valid(reg))
4306                                 return -EINVAL;
4307                 }
4308                 break;
4309         case PACKET3_COPY_DW:
4310                 if (idx_value & 0x2) {
4311                         reg = ib[idx + 3] * 4;
4312                         if (!si_vm_reg_valid(reg))
4313                                 return -EINVAL;
4314                 }
4315                 break;
4316         default:
4317                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4318                 return -EINVAL;
4319         }
4320         return 0;
4321 }
4322
4323 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4324 {
4325         int ret = 0;
4326         u32 idx = 0;
4327         struct radeon_cs_packet pkt;
4328
4329         do {
4330                 pkt.idx = idx;
4331                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4332                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4333                 pkt.one_reg_wr = 0;
4334                 switch (pkt.type) {
4335                 case RADEON_PACKET_TYPE0:
4336                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4337                         ret = -EINVAL;
4338                         break;
4339                 case RADEON_PACKET_TYPE2:
4340                         idx += 1;
4341                         break;
4342                 case RADEON_PACKET_TYPE3:
4343                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4344                         if (ib->is_const_ib)
4345                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4346                         else {
4347                                 switch (ib->ring) {
4348                                 case RADEON_RING_TYPE_GFX_INDEX:
4349                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4350                                         break;
4351                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4352                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4353                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4354                                         break;
4355                                 default:
4356                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4357                                         ret = -EINVAL;
4358                                         break;
4359                                 }
4360                         }
4361                         idx += pkt.count + 2;
4362                         break;
4363                 default:
4364                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4365                         ret = -EINVAL;
4366                         break;
4367                 }
4368                 if (ret)
4369                         break;
4370         } while (idx < ib->length_dw);
4371
4372         return ret;
4373 }
4374
4375 /*
4376  * vm
4377  */
4378 int si_vm_init(struct radeon_device *rdev)
4379 {
4380         /* number of VMs */
4381         rdev->vm_manager.nvm = 16;
4382         /* base offset of vram pages */
4383         rdev->vm_manager.vram_base_offset = 0;
4384
4385         return 0;
4386 }
4387
4388 void si_vm_fini(struct radeon_device *rdev)
4389 {
4390 }
4391
4392 /**
4393  * si_vm_decode_fault - print human readable fault info
4394  *
4395  * @rdev: radeon_device pointer
4396  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4397  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4398  *
4399  * Print human readable fault information (SI).
4400  */
4401 static void si_vm_decode_fault(struct radeon_device *rdev,
4402                                u32 status, u32 addr)
4403 {
4404         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4405         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4406         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4407         char *block;
4408
4409         if (rdev->family == CHIP_TAHITI) {
4410                 switch (mc_id) {
4411                 case 160:
4412                 case 144:
4413                 case 96:
4414                 case 80:
4415                 case 224:
4416                 case 208:
4417                 case 32:
4418                 case 16:
4419                         block = "CB";
4420                         break;
4421                 case 161:
4422                 case 145:
4423                 case 97:
4424                 case 81:
4425                 case 225:
4426                 case 209:
4427                 case 33:
4428                 case 17:
4429                         block = "CB_FMASK";
4430                         break;
4431                 case 162:
4432                 case 146:
4433                 case 98:
4434                 case 82:
4435                 case 226:
4436                 case 210:
4437                 case 34:
4438                 case 18:
4439                         block = "CB_CMASK";
4440                         break;
4441                 case 163:
4442                 case 147:
4443                 case 99:
4444                 case 83:
4445                 case 227:
4446                 case 211:
4447                 case 35:
4448                 case 19:
4449                         block = "CB_IMMED";
4450                         break;
4451                 case 164:
4452                 case 148:
4453                 case 100:
4454                 case 84:
4455                 case 228:
4456                 case 212:
4457                 case 36:
4458                 case 20:
4459                         block = "DB";
4460                         break;
4461                 case 165:
4462                 case 149:
4463                 case 101:
4464                 case 85:
4465                 case 229:
4466                 case 213:
4467                 case 37:
4468                 case 21:
4469                         block = "DB_HTILE";
4470                         break;
4471                 case 167:
4472                 case 151:
4473                 case 103:
4474                 case 87:
4475                 case 231:
4476                 case 215:
4477                 case 39:
4478                 case 23:
4479                         block = "DB_STEN";
4480                         break;
4481                 case 72:
4482                 case 68:
4483                 case 64:
4484                 case 8:
4485                 case 4:
4486                 case 0:
4487                 case 136:
4488                 case 132:
4489                 case 128:
4490                 case 200:
4491                 case 196:
4492                 case 192:
4493                         block = "TC";
4494                         break;
4495                 case 112:
4496                 case 48:
4497                         block = "CP";
4498                         break;
4499                 case 49:
4500                 case 177:
4501                 case 50:
4502                 case 178:
4503                         block = "SH";
4504                         break;
4505                 case 53:
4506                 case 190:
4507                         block = "VGT";
4508                         break;
4509                 case 117:
4510                         block = "IH";
4511                         break;
4512                 case 51:
4513                 case 115:
4514                         block = "RLC";
4515                         break;
4516                 case 119:
4517                 case 183:
4518                         block = "DMA0";
4519                         break;
4520                 case 61:
4521                         block = "DMA1";
4522                         break;
4523                 case 248:
4524                 case 120:
4525                         block = "HDP";
4526                         break;
4527                 default:
4528                         block = "unknown";
4529                         break;
4530                 }
4531         } else {
4532                 switch (mc_id) {
4533                 case 32:
4534                 case 16:
4535                 case 96:
4536                 case 80:
4537                 case 160:
4538                 case 144:
4539                 case 224:
4540                 case 208:
4541                         block = "CB";
4542                         break;
4543                 case 33:
4544                 case 17:
4545                 case 97:
4546                 case 81:
4547                 case 161:
4548                 case 145:
4549                 case 225:
4550                 case 209:
4551                         block = "CB_FMASK";
4552                         break;
4553                 case 34:
4554                 case 18:
4555                 case 98:
4556                 case 82:
4557                 case 162:
4558                 case 146:
4559                 case 226:
4560                 case 210:
4561                         block = "CB_CMASK";
4562                         break;
4563                 case 35:
4564                 case 19:
4565                 case 99:
4566                 case 83:
4567                 case 163:
4568                 case 147:
4569                 case 227:
4570                 case 211:
4571                         block = "CB_IMMED";
4572                         break;
4573                 case 36:
4574                 case 20:
4575                 case 100:
4576                 case 84:
4577                 case 164:
4578                 case 148:
4579                 case 228:
4580                 case 212:
4581                         block = "DB";
4582                         break;
4583                 case 37:
4584                 case 21:
4585                 case 101:
4586                 case 85:
4587                 case 165:
4588                 case 149:
4589                 case 229:
4590                 case 213:
4591                         block = "DB_HTILE";
4592                         break;
4593                 case 39:
4594                 case 23:
4595                 case 103:
4596                 case 87:
4597                 case 167:
4598                 case 151:
4599                 case 231:
4600                 case 215:
4601                         block = "DB_STEN";
4602                         break;
4603                 case 72:
4604                 case 68:
4605                 case 8:
4606                 case 4:
4607                 case 136:
4608                 case 132:
4609                 case 200:
4610                 case 196:
4611                         block = "TC";
4612                         break;
4613                 case 112:
4614                 case 48:
4615                         block = "CP";
4616                         break;
4617                 case 49:
4618                 case 177:
4619                 case 50:
4620                 case 178:
4621                         block = "SH";
4622                         break;
4623                 case 53:
4624                         block = "VGT";
4625                         break;
4626                 case 117:
4627                         block = "IH";
4628                         break;
4629                 case 51:
4630                 case 115:
4631                         block = "RLC";
4632                         break;
4633                 case 119:
4634                 case 183:
4635                         block = "DMA0";
4636                         break;
4637                 case 61:
4638                         block = "DMA1";
4639                         break;
4640                 case 248:
4641                 case 120:
4642                         block = "HDP";
4643                         break;
4644                 default:
4645                         block = "unknown";
4646                         break;
4647                 }
4648         }
4649
4650         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4651                protections, vmid, addr,
4652                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4653                block, mc_id);
4654 }
4655
4656 /**
4657  * si_vm_set_page - update the page tables using the CP
4658  *
4659  * @rdev: radeon_device pointer
4660  * @ib: indirect buffer to fill with commands
4661  * @pe: addr of the page entry
4662  * @addr: dst addr to write into pe
4663  * @count: number of page entries to update
4664  * @incr: increase next addr by incr bytes
4665  * @flags: access flags
4666  *
4667  * Update the page tables using the CP (SI).
4668  */
4669 void si_vm_set_page(struct radeon_device *rdev,
4670                     struct radeon_ib *ib,
4671                     uint64_t pe,
4672                     uint64_t addr, unsigned count,
4673                     uint32_t incr, uint32_t flags)
4674 {
4675         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4676         uint64_t value;
4677         unsigned ndw;
4678
4679         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4680                 while (count) {
4681                         ndw = 2 + count * 2;
4682                         if (ndw > 0x3FFE)
4683                                 ndw = 0x3FFE;
4684
4685                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4686                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4687                                         WRITE_DATA_DST_SEL(1));
4688                         ib->ptr[ib->length_dw++] = pe;
4689                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4690                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4691                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4692                                         value = radeon_vm_map_gart(rdev, addr);
4693                                         value &= 0xFFFFFFFFFFFFF000ULL;
4694                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4695                                         value = addr;
4696                                 } else {
4697                                         value = 0;
4698                                 }
4699                                 addr += incr;
4700                                 value |= r600_flags;
4701                                 ib->ptr[ib->length_dw++] = value;
4702                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4703                         }
4704                 }
4705         } else {
4706                 /* DMA */
4707                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4708                         while (count) {
4709                                 ndw = count * 2;
4710                                 if (ndw > 0xFFFFE)
4711                                         ndw = 0xFFFFE;
4712
4713                                 /* for non-physically contiguous pages (system) */
4714                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4715                                 ib->ptr[ib->length_dw++] = pe;
4716                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4717                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4718                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4719                                                 value = radeon_vm_map_gart(rdev, addr);
4720                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4721                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4722                                                 value = addr;
4723                                         } else {
4724                                                 value = 0;
4725                                         }
4726                                         addr += incr;
4727                                         value |= r600_flags;
4728                                         ib->ptr[ib->length_dw++] = value;
4729                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4730                                 }
4731                         }
4732                 } else {
4733                         while (count) {
4734                                 ndw = count * 2;
4735                                 if (ndw > 0xFFFFE)
4736                                         ndw = 0xFFFFE;
4737
4738                                 if (flags & RADEON_VM_PAGE_VALID)
4739                                         value = addr;
4740                                 else
4741                                         value = 0;
4742                                 /* for physically contiguous pages (vram) */
4743                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4744                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4745                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4746                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4747                                 ib->ptr[ib->length_dw++] = 0;
4748                                 ib->ptr[ib->length_dw++] = value; /* value */
4749                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4750                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4751                                 ib->ptr[ib->length_dw++] = 0;
4752                                 pe += ndw * 4;
4753                                 addr += (ndw / 2) * incr;
4754                                 count -= ndw / 2;
4755                         }
4756                 }
4757                 while (ib->length_dw & 0x7)
4758                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4759         }
4760 }
4761
4762 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4763 {
4764         struct radeon_ring *ring = &rdev->ring[ridx];
4765
4766         if (vm == NULL)
4767                 return;
4768
4769         /* write new base address */
4770         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772                                  WRITE_DATA_DST_SEL(0)));
4773
4774         if (vm->id < 8) {
4775                 radeon_ring_write(ring,
4776                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4777         } else {
4778                 radeon_ring_write(ring,
4779                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4780         }
4781         radeon_ring_write(ring, 0);
4782         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4783
4784         /* flush hdp cache */
4785         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4786         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4787                                  WRITE_DATA_DST_SEL(0)));
4788         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4789         radeon_ring_write(ring, 0);
4790         radeon_ring_write(ring, 0x1);
4791
4792         /* bits 0-15 are the VM contexts0-15 */
4793         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795                                  WRITE_DATA_DST_SEL(0)));
4796         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4797         radeon_ring_write(ring, 0);
4798         radeon_ring_write(ring, 1 << vm->id);
4799
4800         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4801         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4802         radeon_ring_write(ring, 0x0);
4803 }
4804
4805 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4806 {
4807         struct radeon_ring *ring = &rdev->ring[ridx];
4808
4809         if (vm == NULL)
4810                 return;
4811
4812         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4813         if (vm->id < 8) {
4814                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4815         } else {
4816                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4817         }
4818         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4819
4820         /* flush hdp cache */
4821         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4822         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4823         radeon_ring_write(ring, 1);
4824
4825         /* bits 0-7 are the VM contexts0-7 */
4826         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4827         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4828         radeon_ring_write(ring, 1 << vm->id);
4829 }
4830
4831 /*
4832  *  Power and clock gating
4833  */
4834 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4835 {
4836         int i;
4837
4838         for (i = 0; i < rdev->usec_timeout; i++) {
4839                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4840                         break;
4841                 udelay(1);
4842         }
4843
4844         for (i = 0; i < rdev->usec_timeout; i++) {
4845                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4846                         break;
4847                 udelay(1);
4848         }
4849 }
4850
4851 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4852                                          bool enable)
4853 {
4854         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4855         u32 mask;
4856         int i;
4857
4858         if (enable)
4859                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4860         else
4861                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4862         WREG32(CP_INT_CNTL_RING0, tmp);
4863
4864         if (!enable) {
4865                 /* read a gfx register */
4866                 tmp = RREG32(DB_DEPTH_INFO);
4867
4868                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4869                 for (i = 0; i < rdev->usec_timeout; i++) {
4870                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4871                                 break;
4872                         udelay(1);
4873                 }
4874         }
4875 }
4876
4877 static void si_set_uvd_dcm(struct radeon_device *rdev,
4878                            bool sw_mode)
4879 {
4880         u32 tmp, tmp2;
4881
4882         tmp = RREG32(UVD_CGC_CTRL);
4883         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4884         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4885
4886         if (sw_mode) {
4887                 tmp &= ~0x7ffff800;
4888                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4889         } else {
4890                 tmp |= 0x7ffff800;
4891                 tmp2 = 0;
4892         }
4893
4894         WREG32(UVD_CGC_CTRL, tmp);
4895         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4896 }
4897
4898 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4899 {
4900         bool hw_mode = true;
4901
4902         if (hw_mode) {
4903                 si_set_uvd_dcm(rdev, false);
4904         } else {
4905                 u32 tmp = RREG32(UVD_CGC_CTRL);
4906                 tmp &= ~DCM;
4907                 WREG32(UVD_CGC_CTRL, tmp);
4908         }
4909 }
4910
4911 static u32 si_halt_rlc(struct radeon_device *rdev)
4912 {
4913         u32 data, orig;
4914
4915         orig = data = RREG32(RLC_CNTL);
4916
4917         if (data & RLC_ENABLE) {
4918                 data &= ~RLC_ENABLE;
4919                 WREG32(RLC_CNTL, data);
4920
4921                 si_wait_for_rlc_serdes(rdev);
4922         }
4923
4924         return orig;
4925 }
4926
4927 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4928 {
4929         u32 tmp;
4930
4931         tmp = RREG32(RLC_CNTL);
4932         if (tmp != rlc)
4933                 WREG32(RLC_CNTL, rlc);
4934 }
4935
4936 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4937 {
4938         u32 data, orig;
4939
4940         orig = data = RREG32(DMA_PG);
4941         if (enable)
4942                 data |= PG_CNTL_ENABLE;
4943         else
4944                 data &= ~PG_CNTL_ENABLE;
4945         if (orig != data)
4946                 WREG32(DMA_PG, data);
4947 }
4948
4949 static void si_init_dma_pg(struct radeon_device *rdev)
4950 {
4951         u32 tmp;
4952
4953         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4954         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4955
4956         for (tmp = 0; tmp < 5; tmp++)
4957                 WREG32(DMA_PGFSM_WRITE, 0);
4958 }
4959
4960 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4961                                bool enable)
4962 {
4963         u32 tmp;
4964
4965         if (enable) {
4966                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4967                 WREG32(RLC_TTOP_D, tmp);
4968
4969                 tmp = RREG32(RLC_PG_CNTL);
4970                 tmp |= GFX_PG_ENABLE;
4971                 WREG32(RLC_PG_CNTL, tmp);
4972
4973                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4974                 tmp |= AUTO_PG_EN;
4975                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4976         } else {
4977                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4978                 tmp &= ~AUTO_PG_EN;
4979                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4980
4981                 tmp = RREG32(DB_RENDER_CONTROL);
4982         }
4983 }
4984
4985 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4986 {
4987         u32 tmp;
4988
4989         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4990
4991         tmp = RREG32(RLC_PG_CNTL);
4992         tmp |= GFX_PG_SRC;
4993         WREG32(RLC_PG_CNTL, tmp);
4994
4995         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4996
4997         tmp = RREG32(RLC_AUTO_PG_CTRL);
4998
4999         tmp &= ~GRBM_REG_SGIT_MASK;
5000         tmp |= GRBM_REG_SGIT(0x700);
5001         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5002         WREG32(RLC_AUTO_PG_CTRL, tmp);
5003 }
5004
5005 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5006 {
5007         u32 mask = 0, tmp, tmp1;
5008         int i;
5009
5010         si_select_se_sh(rdev, se, sh);
5011         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5012         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5013         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5014
5015         tmp &= 0xffff0000;
5016
5017         tmp |= tmp1;
5018         tmp >>= 16;
5019
5020         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5021                 mask <<= 1;
5022                 mask |= 1;
5023         }
5024
5025         return (~tmp) & mask;
5026 }
5027
5028 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5029 {
5030         u32 i, j, k, active_cu_number = 0;
5031         u32 mask, counter, cu_bitmap;
5032         u32 tmp = 0;
5033
5034         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5035                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5036                         mask = 1;
5037                         cu_bitmap = 0;
5038                         counter  = 0;
5039                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5040                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5041                                         if (counter < 2)
5042                                                 cu_bitmap |= mask;
5043                                         counter++;
5044                                 }
5045                                 mask <<= 1;
5046                         }
5047
5048                         active_cu_number += counter;
5049                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5050                 }
5051         }
5052
5053         WREG32(RLC_PG_AO_CU_MASK, tmp);
5054
5055         tmp = RREG32(RLC_MAX_PG_CU);
5056         tmp &= ~MAX_PU_CU_MASK;
5057         tmp |= MAX_PU_CU(active_cu_number);
5058         WREG32(RLC_MAX_PG_CU, tmp);
5059 }
5060
5061 static void si_enable_cgcg(struct radeon_device *rdev,
5062                            bool enable)
5063 {
5064         u32 data, orig, tmp;
5065
5066         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5067
5068         si_enable_gui_idle_interrupt(rdev, enable);
5069
5070         if (enable) {
5071                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5072
5073                 tmp = si_halt_rlc(rdev);
5074
5075                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5076                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5077                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5078
5079                 si_wait_for_rlc_serdes(rdev);
5080
5081                 si_update_rlc(rdev, tmp);
5082
5083                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5084
5085                 data |= CGCG_EN | CGLS_EN;
5086         } else {
5087                 RREG32(CB_CGTT_SCLK_CTRL);
5088                 RREG32(CB_CGTT_SCLK_CTRL);
5089                 RREG32(CB_CGTT_SCLK_CTRL);
5090                 RREG32(CB_CGTT_SCLK_CTRL);
5091
5092                 data &= ~(CGCG_EN | CGLS_EN);
5093         }
5094
5095         if (orig != data)
5096                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5097 }
5098
5099 static void si_enable_mgcg(struct radeon_device *rdev,
5100                            bool enable)
5101 {
5102         u32 data, orig, tmp = 0;
5103
5104         if (enable) {
5105                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5106                 data = 0x96940200;
5107                 if (orig != data)
5108                         WREG32(CGTS_SM_CTRL_REG, data);
5109
5110                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5111                 data |= CP_MEM_LS_EN;
5112                 if (orig != data)
5113                         WREG32(CP_MEM_SLP_CNTL, data);
5114
5115                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5116                 data &= 0xffffffc0;
5117                 if (orig != data)
5118                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5119
5120                 tmp = si_halt_rlc(rdev);
5121
5122                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5123                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5124                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5125
5126                 si_update_rlc(rdev, tmp);
5127         } else {
5128                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5129                 data |= 0x00000003;
5130                 if (orig != data)
5131                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5132
5133                 data = RREG32(CP_MEM_SLP_CNTL);
5134                 if (data & CP_MEM_LS_EN) {
5135                         data &= ~CP_MEM_LS_EN;
5136                         WREG32(CP_MEM_SLP_CNTL, data);
5137                 }
5138                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5139                 data |= LS_OVERRIDE | OVERRIDE;
5140                 if (orig != data)
5141                         WREG32(CGTS_SM_CTRL_REG, data);
5142
5143                 tmp = si_halt_rlc(rdev);
5144
5145                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5146                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5147                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5148
5149                 si_update_rlc(rdev, tmp);
5150         }
5151 }
5152
5153 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5154                                bool enable)
5155 {
5156         u32 orig, data, tmp;
5157
5158         if (enable) {
5159                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5160                 tmp |= 0x3fff;
5161                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5162
5163                 orig = data = RREG32(UVD_CGC_CTRL);
5164                 data |= DCM;
5165                 if (orig != data)
5166                         WREG32(UVD_CGC_CTRL, data);
5167
5168                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5169                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5170         } else {
5171                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5172                 tmp &= ~0x3fff;
5173                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5174
5175                 orig = data = RREG32(UVD_CGC_CTRL);
5176                 data &= ~DCM;
5177                 if (orig != data)
5178                         WREG32(UVD_CGC_CTRL, data);
5179
5180                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5181                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5182         }
5183 }
5184
5185 static const u32 mc_cg_registers[] =
5186 {
5187         MC_HUB_MISC_HUB_CG,
5188         MC_HUB_MISC_SIP_CG,
5189         MC_HUB_MISC_VM_CG,
5190         MC_XPB_CLK_GAT,
5191         ATC_MISC_CG,
5192         MC_CITF_MISC_WR_CG,
5193         MC_CITF_MISC_RD_CG,
5194         MC_CITF_MISC_VM_CG,
5195         VM_L2_CG,
5196 };
5197
5198 static void si_enable_mc_ls(struct radeon_device *rdev,
5199                             bool enable)
5200 {
5201         int i;
5202         u32 orig, data;
5203
5204         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5205                 orig = data = RREG32(mc_cg_registers[i]);
5206                 if (enable)
5207                         data |= MC_LS_ENABLE;
5208                 else
5209                         data &= ~MC_LS_ENABLE;
5210                 if (data != orig)
5211                         WREG32(mc_cg_registers[i], data);
5212         }
5213 }
5214
5215
5216 static void si_init_cg(struct radeon_device *rdev)
5217 {
5218         si_enable_mgcg(rdev, true);
5219         si_enable_cgcg(rdev, false);
5220         /* disable MC LS on Tahiti */
5221         if (rdev->family == CHIP_TAHITI)
5222                 si_enable_mc_ls(rdev, false);
5223         if (rdev->has_uvd) {
5224                 si_enable_uvd_mgcg(rdev, true);
5225                 si_init_uvd_internal_cg(rdev);
5226         }
5227 }
5228
5229 static void si_fini_cg(struct radeon_device *rdev)
5230 {
5231         if (rdev->has_uvd)
5232                 si_enable_uvd_mgcg(rdev, false);
5233         si_enable_cgcg(rdev, false);
5234         si_enable_mgcg(rdev, false);
5235 }
5236
5237 static void si_init_pg(struct radeon_device *rdev)
5238 {
5239         bool has_pg = false;
5240 #if 0
5241         /* only cape verde supports PG */
5242         if (rdev->family == CHIP_VERDE)
5243                 has_pg = true;
5244 #endif
5245         if (has_pg) {
5246                 si_init_ao_cu_mask(rdev);
5247                 si_init_dma_pg(rdev);
5248                 si_enable_dma_pg(rdev, true);
5249                 si_init_gfx_cgpg(rdev);
5250                 si_enable_gfx_cgpg(rdev, true);
5251         } else {
5252                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5253                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5254         }
5255 }
5256
5257 static void si_fini_pg(struct radeon_device *rdev)
5258 {
5259         bool has_pg = false;
5260
5261         /* only cape verde supports PG */
5262         if (rdev->family == CHIP_VERDE)
5263                 has_pg = true;
5264
5265         if (has_pg) {
5266                 si_enable_dma_pg(rdev, false);
5267                 si_enable_gfx_cgpg(rdev, false);
5268         }
5269 }
5270
5271 /*
5272  * RLC
5273  */
5274 void si_rlc_fini(struct radeon_device *rdev)
5275 {
5276         int r;
5277
5278         /* save restore block */
5279         if (rdev->rlc.save_restore_obj) {
5280                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5281                 if (unlikely(r != 0))
5282                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5283                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5284                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5285
5286                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5287                 rdev->rlc.save_restore_obj = NULL;
5288         }
5289
5290         /* clear state block */
5291         if (rdev->rlc.clear_state_obj) {
5292                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5293                 if (unlikely(r != 0))
5294                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5295                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5296                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5297
5298                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5299                 rdev->rlc.clear_state_obj = NULL;
5300         }
5301 }
5302
5303 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5304
5305 int si_rlc_init(struct radeon_device *rdev)
5306 {
5307         volatile u32 *dst_ptr;
5308         u32 dws, data, i, j, k, reg_num;
5309         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5310         u64 reg_list_mc_addr;
5311         const struct cs_section_def *cs_data = si_cs_data;
5312         int r;
5313
5314         /* save restore block */
5315         if (rdev->rlc.save_restore_obj == NULL) {
5316                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5317                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5318                                      &rdev->rlc.save_restore_obj);
5319                 if (r) {
5320                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5321                         return r;
5322                 }
5323         }
5324
5325         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5326         if (unlikely(r != 0)) {
5327                 si_rlc_fini(rdev);
5328                 return r;
5329         }
5330         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5331                           &rdev->rlc.save_restore_gpu_addr);
5332         if (r) {
5333                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5334                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5335                 si_rlc_fini(rdev);
5336                 return r;
5337         }
5338
5339         if (rdev->family == CHIP_VERDE) {
5340                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5341                 if (r) {
5342                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5343                         si_rlc_fini(rdev);
5344                 return r;
5345                 }
5346                 /* write the sr buffer */
5347                 dst_ptr = rdev->rlc.sr_ptr;
5348                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5349                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5350                 }
5351                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5352         }
5353         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5354
5355         /* clear state block */
5356         reg_list_num = 0;
5357         dws = 0;
5358         for (i = 0; cs_data[i].section != NULL; i++) {
5359                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5360                         reg_list_num++;
5361                         dws += cs_data[i].section[j].reg_count;
5362                 }
5363         }
5364         reg_list_blk_index = (3 * reg_list_num + 2);
5365         dws += reg_list_blk_index;
5366
5367         if (rdev->rlc.clear_state_obj == NULL) {
5368                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5369                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5370                 if (r) {
5371                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5372                         si_rlc_fini(rdev);
5373                         return r;
5374                 }
5375         }
5376         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5377         if (unlikely(r != 0)) {
5378                 si_rlc_fini(rdev);
5379                 return r;
5380         }
5381         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5382                           &rdev->rlc.clear_state_gpu_addr);
5383         if (r) {
5384
5385                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5386                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5387                 si_rlc_fini(rdev);
5388                 return r;
5389         }
5390         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5391         if (r) {
5392                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5393                 si_rlc_fini(rdev);
5394                 return r;
5395         }
5396         /* set up the cs buffer */
5397         dst_ptr = rdev->rlc.cs_ptr;
5398         reg_list_hdr_blk_index = 0;
5399         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5400         data = upper_32_bits(reg_list_mc_addr);
5401         dst_ptr[reg_list_hdr_blk_index] = data;
5402         reg_list_hdr_blk_index++;
5403         for (i = 0; cs_data[i].section != NULL; i++) {
5404                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5405                         reg_num = cs_data[i].section[j].reg_count;
5406                         data = reg_list_mc_addr & 0xffffffff;
5407                         dst_ptr[reg_list_hdr_blk_index] = data;
5408                         reg_list_hdr_blk_index++;
5409
5410                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5411                         dst_ptr[reg_list_hdr_blk_index] = data;
5412                         reg_list_hdr_blk_index++;
5413
5414                         data = 0x08000000 | (reg_num * 4);
5415                         dst_ptr[reg_list_hdr_blk_index] = data;
5416                         reg_list_hdr_blk_index++;
5417
5418                         for (k = 0; k < reg_num; k++) {
5419                                 data = cs_data[i].section[j].extent[k];
5420                                 dst_ptr[reg_list_blk_index + k] = data;
5421                         }
5422                         reg_list_mc_addr += reg_num * 4;
5423                         reg_list_blk_index += reg_num;
5424                 }
5425         }
5426         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5427
5428         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5429         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5430
5431         return 0;
5432 }
5433
5434 static void si_rlc_reset(struct radeon_device *rdev)
5435 {
5436         u32 tmp = RREG32(GRBM_SOFT_RESET);
5437
5438         tmp |= SOFT_RESET_RLC;
5439         WREG32(GRBM_SOFT_RESET, tmp);
5440         udelay(50);
5441         tmp &= ~SOFT_RESET_RLC;
5442         WREG32(GRBM_SOFT_RESET, tmp);
5443         udelay(50);
5444 }
5445
5446 static void si_rlc_stop(struct radeon_device *rdev)
5447 {
5448         WREG32(RLC_CNTL, 0);
5449
5450         si_enable_gui_idle_interrupt(rdev, false);
5451
5452         si_wait_for_rlc_serdes(rdev);
5453 }
5454
5455 static void si_rlc_start(struct radeon_device *rdev)
5456 {
5457         WREG32(RLC_CNTL, RLC_ENABLE);
5458
5459         si_enable_gui_idle_interrupt(rdev, true);
5460
5461         udelay(50);
5462 }
5463
5464 static bool si_lbpw_supported(struct radeon_device *rdev)
5465 {
5466         u32 tmp;
5467
5468         /* Enable LBPW only for DDR3 */
5469         tmp = RREG32(MC_SEQ_MISC0);
5470         if ((tmp & 0xF0000000) == 0xB0000000)
5471                 return true;
5472         return false;
5473 }
5474
5475 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5476 {
5477         u32 tmp;
5478
5479         tmp = RREG32(RLC_LB_CNTL);
5480         if (enable)
5481                 tmp |= LOAD_BALANCE_ENABLE;
5482         else
5483                 tmp &= ~LOAD_BALANCE_ENABLE;
5484         WREG32(RLC_LB_CNTL, tmp);
5485
5486         if (!enable) {
5487                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5488                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5489         }
5490 }
5491
5492 static int si_rlc_resume(struct radeon_device *rdev)
5493 {
5494         u32 i;
5495         const __be32 *fw_data;
5496
5497         if (!rdev->rlc_fw)
5498                 return -EINVAL;
5499
5500         si_rlc_stop(rdev);
5501
5502         si_rlc_reset(rdev);
5503
5504         si_init_pg(rdev);
5505
5506         si_init_cg(rdev);
5507
5508         WREG32(RLC_RL_BASE, 0);
5509         WREG32(RLC_RL_SIZE, 0);
5510         WREG32(RLC_LB_CNTL, 0);
5511         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5512         WREG32(RLC_LB_CNTR_INIT, 0);
5513         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5514
5515         WREG32(RLC_MC_CNTL, 0);
5516         WREG32(RLC_UCODE_CNTL, 0);
5517
5518         fw_data = (const __be32 *)rdev->rlc_fw->data;
5519         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5520                 WREG32(RLC_UCODE_ADDR, i);
5521                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5522         }
5523         WREG32(RLC_UCODE_ADDR, 0);
5524
5525         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5526
5527         si_rlc_start(rdev);
5528
5529         return 0;
5530 }
5531
5532 static void si_enable_interrupts(struct radeon_device *rdev)
5533 {
5534         u32 ih_cntl = RREG32(IH_CNTL);
5535         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5536
5537         ih_cntl |= ENABLE_INTR;
5538         ih_rb_cntl |= IH_RB_ENABLE;
5539         WREG32(IH_CNTL, ih_cntl);
5540         WREG32(IH_RB_CNTL, ih_rb_cntl);
5541         rdev->ih.enabled = true;
5542 }
5543
5544 static void si_disable_interrupts(struct radeon_device *rdev)
5545 {
5546         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5547         u32 ih_cntl = RREG32(IH_CNTL);
5548
5549         ih_rb_cntl &= ~IH_RB_ENABLE;
5550         ih_cntl &= ~ENABLE_INTR;
5551         WREG32(IH_RB_CNTL, ih_rb_cntl);
5552         WREG32(IH_CNTL, ih_cntl);
5553         /* set rptr, wptr to 0 */
5554         WREG32(IH_RB_RPTR, 0);
5555         WREG32(IH_RB_WPTR, 0);
5556         rdev->ih.enabled = false;
5557         rdev->ih.rptr = 0;
5558 }
5559
5560 static void si_disable_interrupt_state(struct radeon_device *rdev)
5561 {
5562         u32 tmp;
5563
5564         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5565         WREG32(CP_INT_CNTL_RING1, 0);
5566         WREG32(CP_INT_CNTL_RING2, 0);
5567         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5568         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5569         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5570         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5571         WREG32(GRBM_INT_CNTL, 0);
5572         if (rdev->num_crtc >= 2) {
5573                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5574                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5575         }
5576         if (rdev->num_crtc >= 4) {
5577                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5578                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5579         }
5580         if (rdev->num_crtc >= 6) {
5581                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5582                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5583         }
5584
5585         if (rdev->num_crtc >= 2) {
5586                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5587                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5588         }
5589         if (rdev->num_crtc >= 4) {
5590                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5591                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5592         }
5593         if (rdev->num_crtc >= 6) {
5594                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5595                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5596         }
5597
5598         if (!ASIC_IS_NODCE(rdev)) {
5599                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5600
5601                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5602                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5603                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5604                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5605                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5606                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5607                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5608                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5609                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5610                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5611                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5612                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5613         }
5614 }
5615
5616 static int si_irq_init(struct radeon_device *rdev)
5617 {
5618         int ret = 0;
5619         int rb_bufsz;
5620         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5621
5622         /* allocate ring */
5623         ret = r600_ih_ring_alloc(rdev);
5624         if (ret)
5625                 return ret;
5626
5627         /* disable irqs */
5628         si_disable_interrupts(rdev);
5629
5630         /* init rlc */
5631         ret = si_rlc_resume(rdev);
5632         if (ret) {
5633                 r600_ih_ring_fini(rdev);
5634                 return ret;
5635         }
5636
5637         /* setup interrupt control */
5638         /* set dummy read address to ring address */
5639         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5640         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5641         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5642          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5643          */
5644         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5645         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5646         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5647         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5648
5649         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5650         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5651
5652         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5653                       IH_WPTR_OVERFLOW_CLEAR |
5654                       (rb_bufsz << 1));
5655
5656         if (rdev->wb.enabled)
5657                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5658
5659         /* set the writeback address whether it's enabled or not */
5660         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5661         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5662
5663         WREG32(IH_RB_CNTL, ih_rb_cntl);
5664
5665         /* set rptr, wptr to 0 */
5666         WREG32(IH_RB_RPTR, 0);
5667         WREG32(IH_RB_WPTR, 0);
5668
5669         /* Default settings for IH_CNTL (disabled at first) */
5670         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5671         /* RPTR_REARM only works if msi's are enabled */
5672         if (rdev->msi_enabled)
5673                 ih_cntl |= RPTR_REARM;
5674         WREG32(IH_CNTL, ih_cntl);
5675
5676         /* force the active interrupt state to all disabled */
5677         si_disable_interrupt_state(rdev);
5678
5679         pci_set_master(rdev->pdev);
5680
5681         /* enable irqs */
5682         si_enable_interrupts(rdev);
5683
5684         return ret;
5685 }
5686
5687 int si_irq_set(struct radeon_device *rdev)
5688 {
5689         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5690         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5691         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5692         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5693         u32 grbm_int_cntl = 0;
5694         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5695         u32 dma_cntl, dma_cntl1;
5696         u32 thermal_int = 0;
5697
5698         if (!rdev->irq.installed) {
5699                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5700                 return -EINVAL;
5701         }
5702         /* don't enable anything if the ih is disabled */
5703         if (!rdev->ih.enabled) {
5704                 si_disable_interrupts(rdev);
5705                 /* force the active interrupt state to all disabled */
5706                 si_disable_interrupt_state(rdev);
5707                 return 0;
5708         }
5709
5710         if (!ASIC_IS_NODCE(rdev)) {
5711                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5712                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5713                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5714                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5715                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5717         }
5718
5719         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5720         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5721
5722         thermal_int = RREG32(CG_THERMAL_INT) &
5723                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5724
5725         /* enable CP interrupts on all rings */
5726         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5727                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5728                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5729         }
5730         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5731                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5732                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5733         }
5734         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5735                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5736                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5737         }
5738         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5739                 DRM_DEBUG("si_irq_set: sw int dma\n");
5740                 dma_cntl |= TRAP_ENABLE;
5741         }
5742
5743         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5744                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5745                 dma_cntl1 |= TRAP_ENABLE;
5746         }
5747         if (rdev->irq.crtc_vblank_int[0] ||
5748             atomic_read(&rdev->irq.pflip[0])) {
5749                 DRM_DEBUG("si_irq_set: vblank 0\n");
5750                 crtc1 |= VBLANK_INT_MASK;
5751         }
5752         if (rdev->irq.crtc_vblank_int[1] ||
5753             atomic_read(&rdev->irq.pflip[1])) {
5754                 DRM_DEBUG("si_irq_set: vblank 1\n");
5755                 crtc2 |= VBLANK_INT_MASK;
5756         }
5757         if (rdev->irq.crtc_vblank_int[2] ||
5758             atomic_read(&rdev->irq.pflip[2])) {
5759                 DRM_DEBUG("si_irq_set: vblank 2\n");
5760                 crtc3 |= VBLANK_INT_MASK;
5761         }
5762         if (rdev->irq.crtc_vblank_int[3] ||
5763             atomic_read(&rdev->irq.pflip[3])) {
5764                 DRM_DEBUG("si_irq_set: vblank 3\n");
5765                 crtc4 |= VBLANK_INT_MASK;
5766         }
5767         if (rdev->irq.crtc_vblank_int[4] ||
5768             atomic_read(&rdev->irq.pflip[4])) {
5769                 DRM_DEBUG("si_irq_set: vblank 4\n");
5770                 crtc5 |= VBLANK_INT_MASK;
5771         }
5772         if (rdev->irq.crtc_vblank_int[5] ||
5773             atomic_read(&rdev->irq.pflip[5])) {
5774                 DRM_DEBUG("si_irq_set: vblank 5\n");
5775                 crtc6 |= VBLANK_INT_MASK;
5776         }
5777         if (rdev->irq.hpd[0]) {
5778                 DRM_DEBUG("si_irq_set: hpd 1\n");
5779                 hpd1 |= DC_HPDx_INT_EN;
5780         }
5781         if (rdev->irq.hpd[1]) {
5782                 DRM_DEBUG("si_irq_set: hpd 2\n");
5783                 hpd2 |= DC_HPDx_INT_EN;
5784         }
5785         if (rdev->irq.hpd[2]) {
5786                 DRM_DEBUG("si_irq_set: hpd 3\n");
5787                 hpd3 |= DC_HPDx_INT_EN;
5788         }
5789         if (rdev->irq.hpd[3]) {
5790                 DRM_DEBUG("si_irq_set: hpd 4\n");
5791                 hpd4 |= DC_HPDx_INT_EN;
5792         }
5793         if (rdev->irq.hpd[4]) {
5794                 DRM_DEBUG("si_irq_set: hpd 5\n");
5795                 hpd5 |= DC_HPDx_INT_EN;
5796         }
5797         if (rdev->irq.hpd[5]) {
5798                 DRM_DEBUG("si_irq_set: hpd 6\n");
5799                 hpd6 |= DC_HPDx_INT_EN;
5800         }
5801
5802         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5803         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5804         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5805
5806         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5807         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5808
5809         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5810
5811         if (rdev->irq.dpm_thermal) {
5812                 DRM_DEBUG("dpm thermal\n");
5813                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5814         }
5815
5816         if (rdev->num_crtc >= 2) {
5817                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5818                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5819         }
5820         if (rdev->num_crtc >= 4) {
5821                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5822                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5823         }
5824         if (rdev->num_crtc >= 6) {
5825                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5826                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5827         }
5828
5829         if (rdev->num_crtc >= 2) {
5830                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5831                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5832         }
5833         if (rdev->num_crtc >= 4) {
5834                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5835                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5836         }
5837         if (rdev->num_crtc >= 6) {
5838                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5839                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5840         }
5841
5842         if (!ASIC_IS_NODCE(rdev)) {
5843                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5844                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5845                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5846                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5847                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5848                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5849         }
5850
5851         WREG32(CG_THERMAL_INT, thermal_int);
5852
5853         return 0;
5854 }
5855
5856 static inline void si_irq_ack(struct radeon_device *rdev)
5857 {
5858         u32 tmp;
5859
5860         if (ASIC_IS_NODCE(rdev))
5861                 return;
5862
5863         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5864         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5865         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5866         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5867         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5868         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5869         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5870         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5871         if (rdev->num_crtc >= 4) {
5872                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5873                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5874         }
5875         if (rdev->num_crtc >= 6) {
5876                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5877                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5878         }
5879
5880         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5881                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5882         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5883                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5884         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5885                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5886         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5887                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5888         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5889                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5890         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5891                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5892
5893         if (rdev->num_crtc >= 4) {
5894                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5895                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5896                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5897                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5898                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5899                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5900                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5901                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5902                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5903                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5904                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5905                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5906         }
5907
5908         if (rdev->num_crtc >= 6) {
5909                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5910                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5911                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5912                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5913                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5914                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5915                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5916                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5917                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5918                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5919                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5920                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5921         }
5922
5923         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5924                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5925                 tmp |= DC_HPDx_INT_ACK;
5926                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5927         }
5928         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5929                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5930                 tmp |= DC_HPDx_INT_ACK;
5931                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5932         }
5933         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5934                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5935                 tmp |= DC_HPDx_INT_ACK;
5936                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5937         }
5938         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5939                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5940                 tmp |= DC_HPDx_INT_ACK;
5941                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5942         }
5943         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5944                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5945                 tmp |= DC_HPDx_INT_ACK;
5946                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5947         }
5948         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5949                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5950                 tmp |= DC_HPDx_INT_ACK;
5951                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5952         }
5953 }
5954
5955 static void si_irq_disable(struct radeon_device *rdev)
5956 {
5957         si_disable_interrupts(rdev);
5958         /* Wait and acknowledge irq */
5959         mdelay(1);
5960         si_irq_ack(rdev);
5961         si_disable_interrupt_state(rdev);
5962 }
5963
5964 static void si_irq_suspend(struct radeon_device *rdev)
5965 {
5966         si_irq_disable(rdev);
5967         si_rlc_stop(rdev);
5968 }
5969
5970 static void si_irq_fini(struct radeon_device *rdev)
5971 {
5972         si_irq_suspend(rdev);
5973         r600_ih_ring_fini(rdev);
5974 }
5975
5976 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5977 {
5978         u32 wptr, tmp;
5979
5980         if (rdev->wb.enabled)
5981                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5982         else
5983                 wptr = RREG32(IH_RB_WPTR);
5984
5985         if (wptr & RB_OVERFLOW) {
5986                 /* When a ring buffer overflow happen start parsing interrupt
5987                  * from the last not overwritten vector (wptr + 16). Hopefully
5988                  * this should allow us to catchup.
5989                  */
5990                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5991                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5992                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5993                 tmp = RREG32(IH_RB_CNTL);
5994                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5995                 WREG32(IH_RB_CNTL, tmp);
5996         }
5997         return (wptr & rdev->ih.ptr_mask);
5998 }
5999
6000 /*        SI IV Ring
6001  * Each IV ring entry is 128 bits:
6002  * [7:0]    - interrupt source id
6003  * [31:8]   - reserved
6004  * [59:32]  - interrupt source data
6005  * [63:60]  - reserved
6006  * [71:64]  - RINGID
6007  * [79:72]  - VMID
6008  * [127:80] - reserved
6009  */
6010 int si_irq_process(struct radeon_device *rdev)
6011 {
6012         u32 wptr;
6013         u32 rptr;
6014         u32 src_id, src_data, ring_id;
6015         u32 ring_index;
6016         bool queue_hotplug = false;
6017         bool queue_thermal = false;
6018         u32 status, addr;
6019
6020         if (!rdev->ih.enabled || rdev->shutdown)
6021                 return IRQ_NONE;
6022
6023         wptr = si_get_ih_wptr(rdev);
6024
6025 restart_ih:
6026         /* is somebody else already processing irqs? */
6027         if (atomic_xchg(&rdev->ih.lock, 1))
6028                 return IRQ_NONE;
6029
6030         rptr = rdev->ih.rptr;
6031         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6032
6033         /* Order reading of wptr vs. reading of IH ring data */
6034         rmb();
6035
6036         /* display interrupts */
6037         si_irq_ack(rdev);
6038
6039         while (rptr != wptr) {
6040                 /* wptr/rptr are in bytes! */
6041                 ring_index = rptr / 4;
6042                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6043                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6044                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6045
6046                 switch (src_id) {
6047                 case 1: /* D1 vblank/vline */
6048                         switch (src_data) {
6049                         case 0: /* D1 vblank */
6050                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6051                                         if (rdev->irq.crtc_vblank_int[0]) {
6052                                                 drm_handle_vblank(rdev->ddev, 0);
6053                                                 rdev->pm.vblank_sync = true;
6054                                                 wake_up(&rdev->irq.vblank_queue);
6055                                         }
6056                                         if (atomic_read(&rdev->irq.pflip[0]))
6057                                                 radeon_crtc_handle_flip(rdev, 0);
6058                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6059                                         DRM_DEBUG("IH: D1 vblank\n");
6060                                 }
6061                                 break;
6062                         case 1: /* D1 vline */
6063                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6064                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6065                                         DRM_DEBUG("IH: D1 vline\n");
6066                                 }
6067                                 break;
6068                         default:
6069                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6070                                 break;
6071                         }
6072                         break;
6073                 case 2: /* D2 vblank/vline */
6074                         switch (src_data) {
6075                         case 0: /* D2 vblank */
6076                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6077                                         if (rdev->irq.crtc_vblank_int[1]) {
6078                                                 drm_handle_vblank(rdev->ddev, 1);
6079                                                 rdev->pm.vblank_sync = true;
6080                                                 wake_up(&rdev->irq.vblank_queue);
6081                                         }
6082                                         if (atomic_read(&rdev->irq.pflip[1]))
6083                                                 radeon_crtc_handle_flip(rdev, 1);
6084                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6085                                         DRM_DEBUG("IH: D2 vblank\n");
6086                                 }
6087                                 break;
6088                         case 1: /* D2 vline */
6089                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6090                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6091                                         DRM_DEBUG("IH: D2 vline\n");
6092                                 }
6093                                 break;
6094                         default:
6095                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6096                                 break;
6097                         }
6098                         break;
6099                 case 3: /* D3 vblank/vline */
6100                         switch (src_data) {
6101                         case 0: /* D3 vblank */
6102                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6103                                         if (rdev->irq.crtc_vblank_int[2]) {
6104                                                 drm_handle_vblank(rdev->ddev, 2);
6105                                                 rdev->pm.vblank_sync = true;
6106                                                 wake_up(&rdev->irq.vblank_queue);
6107                                         }
6108                                         if (atomic_read(&rdev->irq.pflip[2]))
6109                                                 radeon_crtc_handle_flip(rdev, 2);
6110                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6111                                         DRM_DEBUG("IH: D3 vblank\n");
6112                                 }
6113                                 break;
6114                         case 1: /* D3 vline */
6115                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6116                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6117                                         DRM_DEBUG("IH: D3 vline\n");
6118                                 }
6119                                 break;
6120                         default:
6121                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6122                                 break;
6123                         }
6124                         break;
6125                 case 4: /* D4 vblank/vline */
6126                         switch (src_data) {
6127                         case 0: /* D4 vblank */
6128                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6129                                         if (rdev->irq.crtc_vblank_int[3]) {
6130                                                 drm_handle_vblank(rdev->ddev, 3);
6131                                                 rdev->pm.vblank_sync = true;
6132                                                 wake_up(&rdev->irq.vblank_queue);
6133                                         }
6134                                         if (atomic_read(&rdev->irq.pflip[3]))
6135                                                 radeon_crtc_handle_flip(rdev, 3);
6136                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6137                                         DRM_DEBUG("IH: D4 vblank\n");
6138                                 }
6139                                 break;
6140                         case 1: /* D4 vline */
6141                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6142                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6143                                         DRM_DEBUG("IH: D4 vline\n");
6144                                 }
6145                                 break;
6146                         default:
6147                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6148                                 break;
6149                         }
6150                         break;
6151                 case 5: /* D5 vblank/vline */
6152                         switch (src_data) {
6153                         case 0: /* D5 vblank */
6154                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6155                                         if (rdev->irq.crtc_vblank_int[4]) {
6156                                                 drm_handle_vblank(rdev->ddev, 4);
6157                                                 rdev->pm.vblank_sync = true;
6158                                                 wake_up(&rdev->irq.vblank_queue);
6159                                         }
6160                                         if (atomic_read(&rdev->irq.pflip[4]))
6161                                                 radeon_crtc_handle_flip(rdev, 4);
6162                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6163                                         DRM_DEBUG("IH: D5 vblank\n");
6164                                 }
6165                                 break;
6166                         case 1: /* D5 vline */
6167                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6168                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6169                                         DRM_DEBUG("IH: D5 vline\n");
6170                                 }
6171                                 break;
6172                         default:
6173                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6174                                 break;
6175                         }
6176                         break;
6177                 case 6: /* D6 vblank/vline */
6178                         switch (src_data) {
6179                         case 0: /* D6 vblank */
6180                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6181                                         if (rdev->irq.crtc_vblank_int[5]) {
6182                                                 drm_handle_vblank(rdev->ddev, 5);
6183                                                 rdev->pm.vblank_sync = true;
6184                                                 wake_up(&rdev->irq.vblank_queue);
6185                                         }
6186                                         if (atomic_read(&rdev->irq.pflip[5]))
6187                                                 radeon_crtc_handle_flip(rdev, 5);
6188                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6189                                         DRM_DEBUG("IH: D6 vblank\n");
6190                                 }
6191                                 break;
6192                         case 1: /* D6 vline */
6193                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6194                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6195                                         DRM_DEBUG("IH: D6 vline\n");
6196                                 }
6197                                 break;
6198                         default:
6199                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6200                                 break;
6201                         }
6202                         break;
6203                 case 42: /* HPD hotplug */
6204                         switch (src_data) {
6205                         case 0:
6206                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6207                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6208                                         queue_hotplug = true;
6209                                         DRM_DEBUG("IH: HPD1\n");
6210                                 }
6211                                 break;
6212                         case 1:
6213                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6214                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6215                                         queue_hotplug = true;
6216                                         DRM_DEBUG("IH: HPD2\n");
6217                                 }
6218                                 break;
6219                         case 2:
6220                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6221                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6222                                         queue_hotplug = true;
6223                                         DRM_DEBUG("IH: HPD3\n");
6224                                 }
6225                                 break;
6226                         case 3:
6227                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6228                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6229                                         queue_hotplug = true;
6230                                         DRM_DEBUG("IH: HPD4\n");
6231                                 }
6232                                 break;
6233                         case 4:
6234                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6235                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6236                                         queue_hotplug = true;
6237                                         DRM_DEBUG("IH: HPD5\n");
6238                                 }
6239                                 break;
6240                         case 5:
6241                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6242                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6243                                         queue_hotplug = true;
6244                                         DRM_DEBUG("IH: HPD6\n");
6245                                 }
6246                                 break;
6247                         default:
6248                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6249                                 break;
6250                         }
6251                         break;
6252                 case 146:
6253                 case 147:
6254                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6255                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6256                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6257                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6258                                 addr);
6259                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6260                                 status);
6261                         si_vm_decode_fault(rdev, status, addr);
6262                         /* reset addr and status */
6263                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6264                         break;
6265                 case 176: /* RINGID0 CP_INT */
6266                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6267                         break;
6268                 case 177: /* RINGID1 CP_INT */
6269                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6270                         break;
6271                 case 178: /* RINGID2 CP_INT */
6272                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6273                         break;
6274                 case 181: /* CP EOP event */
6275                         DRM_DEBUG("IH: CP EOP\n");
6276                         switch (ring_id) {
6277                         case 0:
6278                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6279                                 break;
6280                         case 1:
6281                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6282                                 break;
6283                         case 2:
6284                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6285                                 break;
6286                         }
6287                         break;
6288                 case 224: /* DMA trap event */
6289                         DRM_DEBUG("IH: DMA trap\n");
6290                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6291                         break;
6292                 case 230: /* thermal low to high */
6293                         DRM_DEBUG("IH: thermal low to high\n");
6294                         rdev->pm.dpm.thermal.high_to_low = false;
6295                         queue_thermal = true;
6296                         break;
6297                 case 231: /* thermal high to low */
6298                         DRM_DEBUG("IH: thermal high to low\n");
6299                         rdev->pm.dpm.thermal.high_to_low = true;
6300                         queue_thermal = true;
6301                         break;
6302                 case 233: /* GUI IDLE */
6303                         DRM_DEBUG("IH: GUI idle\n");
6304                         break;
6305                 case 244: /* DMA trap event */
6306                         DRM_DEBUG("IH: DMA1 trap\n");
6307                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6308                         break;
6309                 default:
6310                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6311                         break;
6312                 }
6313
6314                 /* wptr/rptr are in bytes! */
6315                 rptr += 16;
6316                 rptr &= rdev->ih.ptr_mask;
6317         }
6318         if (queue_hotplug)
6319                 schedule_work(&rdev->hotplug_work);
6320         if (queue_thermal && rdev->pm.dpm_enabled)
6321                 schedule_work(&rdev->pm.dpm.thermal.work);
6322         rdev->ih.rptr = rptr;
6323         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6324         atomic_set(&rdev->ih.lock, 0);
6325
6326         /* make sure wptr hasn't changed while processing */
6327         wptr = si_get_ih_wptr(rdev);
6328         if (wptr != rptr)
6329                 goto restart_ih;
6330
6331         return IRQ_HANDLED;
6332 }
6333
6334 /**
6335  * si_copy_dma - copy pages using the DMA engine
6336  *
6337  * @rdev: radeon_device pointer
6338  * @src_offset: src GPU address
6339  * @dst_offset: dst GPU address
6340  * @num_gpu_pages: number of GPU pages to xfer
6341  * @fence: radeon fence object
6342  *
6343  * Copy GPU paging using the DMA engine (SI).
6344  * Used by the radeon ttm implementation to move pages if
6345  * registered as the asic copy callback.
6346  */
6347 int si_copy_dma(struct radeon_device *rdev,
6348                 uint64_t src_offset, uint64_t dst_offset,
6349                 unsigned num_gpu_pages,
6350                 struct radeon_fence **fence)
6351 {
6352         struct radeon_semaphore *sem = NULL;
6353         int ring_index = rdev->asic->copy.dma_ring_index;
6354         struct radeon_ring *ring = &rdev->ring[ring_index];
6355         u32 size_in_bytes, cur_size_in_bytes;
6356         int i, num_loops;
6357         int r = 0;
6358
6359         r = radeon_semaphore_create(rdev, &sem);
6360         if (r) {
6361                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6362                 return r;
6363         }
6364
6365         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6366         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6367         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6368         if (r) {
6369                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6370                 radeon_semaphore_free(rdev, &sem, NULL);
6371                 return r;
6372         }
6373
6374         if (radeon_fence_need_sync(*fence, ring->idx)) {
6375                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6376                                             ring->idx);
6377                 radeon_fence_note_sync(*fence, ring->idx);
6378         } else {
6379                 radeon_semaphore_free(rdev, &sem, NULL);
6380         }
6381
6382         for (i = 0; i < num_loops; i++) {
6383                 cur_size_in_bytes = size_in_bytes;
6384                 if (cur_size_in_bytes > 0xFFFFF)
6385                         cur_size_in_bytes = 0xFFFFF;
6386                 size_in_bytes -= cur_size_in_bytes;
6387                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6388                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6389                 radeon_ring_write(ring, src_offset & 0xffffffff);
6390                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6391                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6392                 src_offset += cur_size_in_bytes;
6393                 dst_offset += cur_size_in_bytes;
6394         }
6395
6396         r = radeon_fence_emit(rdev, fence, ring->idx);
6397         if (r) {
6398                 radeon_ring_unlock_undo(rdev, ring);
6399                 return r;
6400         }
6401
6402         radeon_ring_unlock_commit(rdev, ring);
6403         radeon_semaphore_free(rdev, &sem, *fence);
6404
6405         return r;
6406 }
6407
6408 /*
6409  * startup/shutdown callbacks
6410  */
6411 static int si_startup(struct radeon_device *rdev)
6412 {
6413         struct radeon_ring *ring;
6414         int r;
6415
6416         /* enable pcie gen2/3 link */
6417         si_pcie_gen3_enable(rdev);
6418         /* enable aspm */
6419         si_program_aspm(rdev);
6420
6421         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6422             !rdev->rlc_fw || !rdev->mc_fw) {
6423                 r = si_init_microcode(rdev);
6424                 if (r) {
6425                         DRM_ERROR("Failed to load firmware!\n");
6426                         return r;
6427                 }
6428         }
6429
6430         r = si_mc_load_microcode(rdev);
6431         if (r) {
6432                 DRM_ERROR("Failed to load MC firmware!\n");
6433                 return r;
6434         }
6435
6436         r = r600_vram_scratch_init(rdev);
6437         if (r)
6438                 return r;
6439
6440         si_mc_program(rdev);
6441         r = si_pcie_gart_enable(rdev);
6442         if (r)
6443                 return r;
6444         si_gpu_init(rdev);
6445
6446         /* allocate rlc buffers */
6447         r = si_rlc_init(rdev);
6448         if (r) {
6449                 DRM_ERROR("Failed to init rlc BOs!\n");
6450                 return r;
6451         }
6452
6453         /* allocate wb buffer */
6454         r = radeon_wb_init(rdev);
6455         if (r)
6456                 return r;
6457
6458         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6459         if (r) {
6460                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6461                 return r;
6462         }
6463
6464         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6465         if (r) {
6466                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6467                 return r;
6468         }
6469
6470         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6471         if (r) {
6472                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6473                 return r;
6474         }
6475
6476         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6477         if (r) {
6478                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6479                 return r;
6480         }
6481
6482         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6483         if (r) {
6484                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6485                 return r;
6486         }
6487
6488         if (rdev->has_uvd) {
6489                 r = rv770_uvd_resume(rdev);
6490                 if (!r) {
6491                         r = radeon_fence_driver_start_ring(rdev,
6492                                                            R600_RING_TYPE_UVD_INDEX);
6493                         if (r)
6494                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6495                 }
6496                 if (r)
6497                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6498         }
6499
6500         /* Enable IRQ */
6501         if (!rdev->irq.installed) {
6502                 r = radeon_irq_kms_init(rdev);
6503                 if (r)
6504                         return r;
6505         }
6506
6507         r = si_irq_init(rdev);
6508         if (r) {
6509                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6510                 radeon_irq_kms_fini(rdev);
6511                 return r;
6512         }
6513         si_irq_set(rdev);
6514
6515         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6516         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6517                              CP_RB0_RPTR, CP_RB0_WPTR,
6518                              0, 0xfffff, RADEON_CP_PACKET2);
6519         if (r)
6520                 return r;
6521
6522         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6523         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6524                              CP_RB1_RPTR, CP_RB1_WPTR,
6525                              0, 0xfffff, RADEON_CP_PACKET2);
6526         if (r)
6527                 return r;
6528
6529         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6530         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6531                              CP_RB2_RPTR, CP_RB2_WPTR,
6532                              0, 0xfffff, RADEON_CP_PACKET2);
6533         if (r)
6534                 return r;
6535
6536         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6537         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6538                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6539                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6540                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6541         if (r)
6542                 return r;
6543
6544         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6545         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6546                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6547                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6548                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6549         if (r)
6550                 return r;
6551
6552         r = si_cp_load_microcode(rdev);
6553         if (r)
6554                 return r;
6555         r = si_cp_resume(rdev);
6556         if (r)
6557                 return r;
6558
6559         r = cayman_dma_resume(rdev);
6560         if (r)
6561                 return r;
6562
6563         if (rdev->has_uvd) {
6564                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6565                 if (ring->ring_size) {
6566                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6567                                              R600_WB_UVD_RPTR_OFFSET,
6568                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6569                                              0, 0xfffff, RADEON_CP_PACKET2);
6570                         if (!r)
6571                                 r = r600_uvd_init(rdev);
6572                         if (r)
6573                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6574                 }
6575         }
6576
6577         r = radeon_ib_pool_init(rdev);
6578         if (r) {
6579                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6580                 return r;
6581         }
6582
6583         r = radeon_vm_manager_init(rdev);
6584         if (r) {
6585                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6586                 return r;
6587         }
6588
6589         return 0;
6590 }
6591
6592 int si_resume(struct radeon_device *rdev)
6593 {
6594         int r;
6595
6596         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6597          * posting will perform necessary task to bring back GPU into good
6598          * shape.
6599          */
6600         /* post card */
6601         atom_asic_init(rdev->mode_info.atom_context);
6602
6603         /* init golden registers */
6604         si_init_golden_registers(rdev);
6605
6606         rdev->accel_working = true;
6607         r = si_startup(rdev);
6608         if (r) {
6609                 DRM_ERROR("si startup failed on resume\n");
6610                 rdev->accel_working = false;
6611                 return r;
6612         }
6613
6614         return r;
6615
6616 }
6617
6618 int si_suspend(struct radeon_device *rdev)
6619 {
6620         radeon_vm_manager_fini(rdev);
6621         si_cp_enable(rdev, false);
6622         cayman_dma_stop(rdev);
6623         if (rdev->has_uvd) {
6624                 r600_uvd_stop(rdev);
6625                 radeon_uvd_suspend(rdev);
6626         }
6627         si_irq_suspend(rdev);
6628         radeon_wb_disable(rdev);
6629         si_pcie_gart_disable(rdev);
6630         return 0;
6631 }
6632
6633 /* Plan is to move initialization in that function and use
6634  * helper function so that radeon_device_init pretty much
6635  * do nothing more than calling asic specific function. This
6636  * should also allow to remove a bunch of callback function
6637  * like vram_info.
6638  */
6639 int si_init(struct radeon_device *rdev)
6640 {
6641         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6642         int r;
6643
6644         /* Read BIOS */
6645         if (!radeon_get_bios(rdev)) {
6646                 if (ASIC_IS_AVIVO(rdev))
6647                         return -EINVAL;
6648         }
6649         /* Must be an ATOMBIOS */
6650         if (!rdev->is_atom_bios) {
6651                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6652                 return -EINVAL;
6653         }
6654         r = radeon_atombios_init(rdev);
6655         if (r)
6656                 return r;
6657
6658         /* Post card if necessary */
6659         if (!radeon_card_posted(rdev)) {
6660                 if (!rdev->bios) {
6661                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6662                         return -EINVAL;
6663                 }
6664                 DRM_INFO("GPU not posted. posting now...\n");
6665                 atom_asic_init(rdev->mode_info.atom_context);
6666         }
6667         /* init golden registers */
6668         si_init_golden_registers(rdev);
6669         /* Initialize scratch registers */
6670         si_scratch_init(rdev);
6671         /* Initialize surface registers */
6672         radeon_surface_init(rdev);
6673         /* Initialize clocks */
6674         radeon_get_clock_info(rdev->ddev);
6675
6676         /* Fence driver */
6677         r = radeon_fence_driver_init(rdev);
6678         if (r)
6679                 return r;
6680
6681         /* initialize memory controller */
6682         r = si_mc_init(rdev);
6683         if (r)
6684                 return r;
6685         /* Memory manager */
6686         r = radeon_bo_init(rdev);
6687         if (r)
6688                 return r;
6689
6690         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6691         ring->ring_obj = NULL;
6692         r600_ring_init(rdev, ring, 1024 * 1024);
6693
6694         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6695         ring->ring_obj = NULL;
6696         r600_ring_init(rdev, ring, 1024 * 1024);
6697
6698         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6699         ring->ring_obj = NULL;
6700         r600_ring_init(rdev, ring, 1024 * 1024);
6701
6702         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6703         ring->ring_obj = NULL;
6704         r600_ring_init(rdev, ring, 64 * 1024);
6705
6706         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6707         ring->ring_obj = NULL;
6708         r600_ring_init(rdev, ring, 64 * 1024);
6709
6710         if (rdev->has_uvd) {
6711                 r = radeon_uvd_init(rdev);
6712                 if (!r) {
6713                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6714                         ring->ring_obj = NULL;
6715                         r600_ring_init(rdev, ring, 4096);
6716                 }
6717         }
6718
6719         rdev->ih.ring_obj = NULL;
6720         r600_ih_ring_init(rdev, 64 * 1024);
6721
6722         r = r600_pcie_gart_init(rdev);
6723         if (r)
6724                 return r;
6725
6726         rdev->accel_working = true;
6727         r = si_startup(rdev);
6728         if (r) {
6729                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6730                 si_cp_fini(rdev);
6731                 cayman_dma_fini(rdev);
6732                 si_irq_fini(rdev);
6733                 si_rlc_fini(rdev);
6734                 radeon_wb_fini(rdev);
6735                 radeon_ib_pool_fini(rdev);
6736                 radeon_vm_manager_fini(rdev);
6737                 radeon_irq_kms_fini(rdev);
6738                 si_pcie_gart_fini(rdev);
6739                 rdev->accel_working = false;
6740         }
6741
6742         /* Don't start up if the MC ucode is missing.
6743          * The default clocks and voltages before the MC ucode
6744          * is loaded are not suffient for advanced operations.
6745          */
6746         if (!rdev->mc_fw) {
6747                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6748                 return -EINVAL;
6749         }
6750
6751         return 0;
6752 }
6753
6754 void si_fini(struct radeon_device *rdev)
6755 {
6756         si_cp_fini(rdev);
6757         cayman_dma_fini(rdev);
6758         si_irq_fini(rdev);
6759         si_rlc_fini(rdev);
6760         si_fini_cg(rdev);
6761         si_fini_pg(rdev);
6762         radeon_wb_fini(rdev);
6763         radeon_vm_manager_fini(rdev);
6764         radeon_ib_pool_fini(rdev);
6765         radeon_irq_kms_fini(rdev);
6766         if (rdev->has_uvd) {
6767                 r600_uvd_stop(rdev);
6768                 radeon_uvd_fini(rdev);
6769         }
6770         si_pcie_gart_fini(rdev);
6771         r600_vram_scratch_fini(rdev);
6772         radeon_gem_fini(rdev);
6773         radeon_fence_driver_fini(rdev);
6774         radeon_bo_fini(rdev);
6775         radeon_atombios_fini(rdev);
6776         kfree(rdev->bios);
6777         rdev->bios = NULL;
6778 }
6779
6780 /**
6781  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6782  *
6783  * @rdev: radeon_device pointer
6784  *
6785  * Fetches a GPU clock counter snapshot (SI).
6786  * Returns the 64 bit clock counter snapshot.
6787  */
6788 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6789 {
6790         uint64_t clock;
6791
6792         mutex_lock(&rdev->gpu_clock_mutex);
6793         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6794         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6795                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6796         mutex_unlock(&rdev->gpu_clock_mutex);
6797         return clock;
6798 }
6799
6800 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6801 {
6802         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6803         int r;
6804
6805         /* bypass vclk and dclk with bclk */
6806         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6807                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6808                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6809
6810         /* put PLL in bypass mode */
6811         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6812
6813         if (!vclk || !dclk) {
6814                 /* keep the Bypass mode, put PLL to sleep */
6815                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6816                 return 0;
6817         }
6818
6819         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6820                                           16384, 0x03FFFFFF, 0, 128, 5,
6821                                           &fb_div, &vclk_div, &dclk_div);
6822         if (r)
6823                 return r;
6824
6825         /* set RESET_ANTI_MUX to 0 */
6826         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6827
6828         /* set VCO_MODE to 1 */
6829         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6830
6831         /* toggle UPLL_SLEEP to 1 then back to 0 */
6832         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6833         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6834
6835         /* deassert UPLL_RESET */
6836         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6837
6838         mdelay(1);
6839
6840         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6841         if (r)
6842                 return r;
6843
6844         /* assert UPLL_RESET again */
6845         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6846
6847         /* disable spread spectrum. */
6848         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6849
6850         /* set feedback divider */
6851         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6852
6853         /* set ref divider to 0 */
6854         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6855
6856         if (fb_div < 307200)
6857                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6858         else
6859                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6860
6861         /* set PDIV_A and PDIV_B */
6862         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6863                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6864                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6865
6866         /* give the PLL some time to settle */
6867         mdelay(15);
6868
6869         /* deassert PLL_RESET */
6870         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6871
6872         mdelay(15);
6873
6874         /* switch from bypass mode to normal mode */
6875         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6876
6877         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6878         if (r)
6879                 return r;
6880
6881         /* switch VCLK and DCLK selection */
6882         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6883                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6884                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6885
6886         mdelay(100);
6887
6888         return 0;
6889 }
6890
6891 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6892 {
6893         struct pci_dev *root = rdev->pdev->bus->self;
6894         int bridge_pos, gpu_pos;
6895         u32 speed_cntl, mask, current_data_rate;
6896         int ret, i;
6897         u16 tmp16;
6898
6899         if (radeon_pcie_gen2 == 0)
6900                 return;
6901
6902         if (rdev->flags & RADEON_IS_IGP)
6903                 return;
6904
6905         if (!(rdev->flags & RADEON_IS_PCIE))
6906                 return;
6907
6908         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6909         if (ret != 0)
6910                 return;
6911
6912         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6913                 return;
6914
6915         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6916         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6917                 LC_CURRENT_DATA_RATE_SHIFT;
6918         if (mask & DRM_PCIE_SPEED_80) {
6919                 if (current_data_rate == 2) {
6920                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6921                         return;
6922                 }
6923                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6924         } else if (mask & DRM_PCIE_SPEED_50) {
6925                 if (current_data_rate == 1) {
6926                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6927                         return;
6928                 }
6929                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6930         }
6931
6932         bridge_pos = pci_pcie_cap(root);
6933         if (!bridge_pos)
6934                 return;
6935
6936         gpu_pos = pci_pcie_cap(rdev->pdev);
6937         if (!gpu_pos)
6938                 return;
6939
6940         if (mask & DRM_PCIE_SPEED_80) {
6941                 /* re-try equalization if gen3 is not already enabled */
6942                 if (current_data_rate != 2) {
6943                         u16 bridge_cfg, gpu_cfg;
6944                         u16 bridge_cfg2, gpu_cfg2;
6945                         u32 max_lw, current_lw, tmp;
6946
6947                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6948                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6949
6950                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6951                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6952
6953                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6954                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6955
6956                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6957                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6958                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6959
6960                         if (current_lw < max_lw) {
6961                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6962                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6963                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6964                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6965                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6966                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6967                                 }
6968                         }
6969
6970                         for (i = 0; i < 10; i++) {
6971                                 /* check status */
6972                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6973                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6974                                         break;
6975
6976                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6977                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6978
6979                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6980                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6981
6982                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6983                                 tmp |= LC_SET_QUIESCE;
6984                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6985
6986                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6987                                 tmp |= LC_REDO_EQ;
6988                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6989
6990                                 mdelay(100);
6991
6992                                 /* linkctl */
6993                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6994                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6995                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6996                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6997
6998                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6999                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7000                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7001                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7002
7003                                 /* linkctl2 */
7004                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7005                                 tmp16 &= ~((1 << 4) | (7 << 9));
7006                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7007                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7008
7009                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7010                                 tmp16 &= ~((1 << 4) | (7 << 9));
7011                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7012                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7013
7014                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7015                                 tmp &= ~LC_SET_QUIESCE;
7016                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7017                         }
7018                 }
7019         }
7020
7021         /* set the link speed */
7022         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7023         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7024         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7025
7026         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7027         tmp16 &= ~0xf;
7028         if (mask & DRM_PCIE_SPEED_80)
7029                 tmp16 |= 3; /* gen3 */
7030         else if (mask & DRM_PCIE_SPEED_50)
7031                 tmp16 |= 2; /* gen2 */
7032         else
7033                 tmp16 |= 1; /* gen1 */
7034         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7035
7036         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7037         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7038         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7039
7040         for (i = 0; i < rdev->usec_timeout; i++) {
7041                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7042                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7043                         break;
7044                 udelay(1);
7045         }
7046 }
7047
7048 static void si_program_aspm(struct radeon_device *rdev)
7049 {
7050         u32 data, orig;
7051         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7052         bool disable_clkreq = false;
7053
7054         if (radeon_aspm == 0)
7055                 return;
7056
7057         if (!(rdev->flags & RADEON_IS_PCIE))
7058                 return;
7059
7060         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7061         data &= ~LC_XMIT_N_FTS_MASK;
7062         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7063         if (orig != data)
7064                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7065
7066         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7067         data |= LC_GO_TO_RECOVERY;
7068         if (orig != data)
7069                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7070
7071         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7072         data |= P_IGNORE_EDB_ERR;
7073         if (orig != data)
7074                 WREG32_PCIE(PCIE_P_CNTL, data);
7075
7076         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7077         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7078         data |= LC_PMI_TO_L1_DIS;
7079         if (!disable_l0s)
7080                 data |= LC_L0S_INACTIVITY(7);
7081
7082         if (!disable_l1) {
7083                 data |= LC_L1_INACTIVITY(7);
7084                 data &= ~LC_PMI_TO_L1_DIS;
7085                 if (orig != data)
7086                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7087
7088                 if (!disable_plloff_in_l1) {
7089                         bool clk_req_support;
7090
7091                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7092                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7093                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7094                         if (orig != data)
7095                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7096
7097                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7098                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7099                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7100                         if (orig != data)
7101                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7102
7103                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7104                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7105                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7106                         if (orig != data)
7107                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7108
7109                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7110                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7111                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7112                         if (orig != data)
7113                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7114
7115                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7116                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7117                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7118                                 if (orig != data)
7119                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7120
7121                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7122                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7123                                 if (orig != data)
7124                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7125
7126                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7127                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7128                                 if (orig != data)
7129                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7130
7131                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7132                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7133                                 if (orig != data)
7134                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7135
7136                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7137                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7138                                 if (orig != data)
7139                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7140
7141                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7142                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7143                                 if (orig != data)
7144                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7145
7146                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7147                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7148                                 if (orig != data)
7149                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7150
7151                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7152                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7153                                 if (orig != data)
7154                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7155                         }
7156                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7157                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7158                         data |= LC_DYN_LANES_PWR_STATE(3);
7159                         if (orig != data)
7160                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7161
7162                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7163                         data &= ~LS2_EXIT_TIME_MASK;
7164                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7165                                 data |= LS2_EXIT_TIME(5);
7166                         if (orig != data)
7167                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7168
7169                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7170                         data &= ~LS2_EXIT_TIME_MASK;
7171                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7172                                 data |= LS2_EXIT_TIME(5);
7173                         if (orig != data)
7174                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7175
7176                         if (!disable_clkreq) {
7177                                 struct pci_dev *root = rdev->pdev->bus->self;
7178                                 u32 lnkcap;
7179
7180                                 clk_req_support = false;
7181                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7182                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7183                                         clk_req_support = true;
7184                         } else {
7185                                 clk_req_support = false;
7186                         }
7187
7188                         if (clk_req_support) {
7189                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7190                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7191                                 if (orig != data)
7192                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7193
7194                                 orig = data = RREG32(THM_CLK_CNTL);
7195                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7196                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7197                                 if (orig != data)
7198                                         WREG32(THM_CLK_CNTL, data);
7199
7200                                 orig = data = RREG32(MISC_CLK_CNTL);
7201                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7202                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7203                                 if (orig != data)
7204                                         WREG32(MISC_CLK_CNTL, data);
7205
7206                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7207                                 data &= ~BCLK_AS_XCLK;
7208                                 if (orig != data)
7209                                         WREG32(CG_CLKPIN_CNTL, data);
7210
7211                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7212                                 data &= ~FORCE_BIF_REFCLK_EN;
7213                                 if (orig != data)
7214                                         WREG32(CG_CLKPIN_CNTL_2, data);
7215
7216                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7217                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7218                                 data |= MPLL_CLKOUT_SEL(4);
7219                                 if (orig != data)
7220                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7221
7222                                 orig = data = RREG32(SPLL_CNTL_MODE);
7223                                 data &= ~SPLL_REFCLK_SEL_MASK;
7224                                 if (orig != data)
7225                                         WREG32(SPLL_CNTL_MODE, data);
7226                         }
7227                 }
7228         } else {
7229                 if (orig != data)
7230                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7231         }
7232
7233         orig = data = RREG32_PCIE(PCIE_CNTL2);
7234         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7235         if (orig != data)
7236                 WREG32_PCIE(PCIE_CNTL2, data);
7237
7238         if (!disable_l0s) {
7239                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7240                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7241                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7242                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7243                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7244                                 data &= ~LC_L0S_INACTIVITY_MASK;
7245                                 if (orig != data)
7246                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7247                         }
7248                 }
7249         }
7250 }