]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/si.c
drm/radeon: use radeon device for request firmware
[linux-imx.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82         (0x8000 << 16) | (0x98f4 >> 2),
83         0x00000000,
84         (0x8040 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8000 << 16) | (0xe80 >> 2),
87         0x00000000,
88         (0x8040 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8000 << 16) | (0x89bc >> 2),
91         0x00000000,
92         (0x8040 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8000 << 16) | (0x8c1c >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x9c00 << 16) | (0x98f0 >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0xe7c >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x9148 >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0x9150 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x897c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x8d8c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xac54 >> 2),
113         0X00000000,
114         0x3,
115         (0x9c00 << 16) | (0x98f8 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x9910 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9914 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9918 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x991c >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9920 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9924 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9928 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x992c >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9930 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9934 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9938 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x993c >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9940 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9944 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9948 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x994c >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9950 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9954 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9958 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x995c >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9960 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9964 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9968 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x996c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9970 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9974 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9978 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x997c >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9980 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9984 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9988 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x998c >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x8c00 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c14 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c04 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c08 >> 2),
188         0x00000000,
189         (0x8000 << 16) | (0x9b7c >> 2),
190         0x00000000,
191         (0x8040 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8000 << 16) | (0xe84 >> 2),
194         0x00000000,
195         (0x8040 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8000 << 16) | (0x89c0 >> 2),
198         0x00000000,
199         (0x8040 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x914c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0x8c20 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x9354 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9060 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9364 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9100 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x913c >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x90e0 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e4 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e8 >> 2),
226         0x00000000,
227         (0x8040 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x8bcc >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8b24 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x88c4 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8e50 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c0c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8e58 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e5c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9508 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x950c >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9494 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0xac0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac10 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac14 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xae00 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac08 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x88d4 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88c8 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88cc >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x89b0 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8b10 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8a14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9830 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9834 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9838 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9a10 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x9870 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9874 >> 2),
286         0x00000000,
287         (0x8001 << 16) | (0x9870 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9874 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x9870 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9874 >> 2),
294         0x00000000,
295         (0x8041 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9874 >> 2),
298         0x00000000,
299         0x00000000
300 };
301
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304         0xc424, 0xffffffff, 0x00601005,
305         0xc47c, 0xffffffff, 0x10104040,
306         0xc488, 0xffffffff, 0x0100000a,
307         0xc314, 0xffffffff, 0x00000800,
308         0xc30c, 0xffffffff, 0x800000f4,
309         0xf4a8, 0xffffffff, 0x00000000
310 };
311
312 static const u32 tahiti_golden_registers[] =
313 {
314         0x9a10, 0x00010000, 0x00018208,
315         0x9830, 0xffffffff, 0x00000000,
316         0x9834, 0xf00fffff, 0x00000400,
317         0x9838, 0x0002021c, 0x00020200,
318         0xc78, 0x00000080, 0x00000000,
319         0xd030, 0x000300c0, 0x00800040,
320         0xd830, 0x000300c0, 0x00800040,
321         0x5bb0, 0x000000f0, 0x00000070,
322         0x5bc0, 0x00200000, 0x50100000,
323         0x7030, 0x31000311, 0x00000011,
324         0x277c, 0x00000003, 0x000007ff,
325         0x240c, 0x000007ff, 0x00000000,
326         0x8a14, 0xf000001f, 0x00000007,
327         0x8b24, 0xffffffff, 0x00ffffff,
328         0x8b10, 0x0000ff0f, 0x00000000,
329         0x28a4c, 0x07ffffff, 0x4e000000,
330         0x28350, 0x3f3f3fff, 0x2a00126a,
331         0x30, 0x000000ff, 0x0040,
332         0x34, 0x00000040, 0x00004040,
333         0x9100, 0x07ffffff, 0x03000000,
334         0x8e88, 0x01ff1f3f, 0x00000000,
335         0x8e84, 0x01ff1f3f, 0x00000000,
336         0x9060, 0x0000007f, 0x00000020,
337         0x9508, 0x00010000, 0x00010000,
338         0xac14, 0x00000200, 0x000002fb,
339         0xac10, 0xffffffff, 0x0000543b,
340         0xac0c, 0xffffffff, 0xa9210876,
341         0x88d0, 0xffffffff, 0x000fff40,
342         0x88d4, 0x0000001f, 0x00000010,
343         0x1410, 0x20000000, 0x20fffed8,
344         0x15c0, 0x000c0fc0, 0x000c0400
345 };
346
347 static const u32 tahiti_golden_registers2[] =
348 {
349         0xc64, 0x00000001, 0x00000001
350 };
351
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354         0xc424, 0xffffffff, 0x00601004,
355         0xc47c, 0xffffffff, 0x10102020,
356         0xc488, 0xffffffff, 0x01000020,
357         0xc314, 0xffffffff, 0x00000800,
358         0xc30c, 0xffffffff, 0x800000a4
359 };
360
361 static const u32 pitcairn_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x2ae4, 0x00073ffe, 0x000022a2,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x9060, 0x0000007f, 0x00000020,
384         0x9508, 0x00010000, 0x00010000,
385         0xac14, 0x000003ff, 0x000000f7,
386         0xac10, 0xffffffff, 0x00000000,
387         0xac0c, 0xffffffff, 0x32761054,
388         0x88d4, 0x0000001f, 0x00000010,
389         0x15c0, 0x000c0fc0, 0x000c0400
390 };
391
392 static const u32 verde_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x033f1005,
395         0xc47c, 0xffffffff, 0x10808020,
396         0xc488, 0xffffffff, 0x00800008,
397         0xc314, 0xffffffff, 0x00001000,
398         0xc30c, 0xffffffff, 0x80010014
399 };
400
401 static const u32 verde_golden_registers[] =
402 {
403         0x9a10, 0x00010000, 0x00018208,
404         0x9830, 0xffffffff, 0x00000000,
405         0x9834, 0xf00fffff, 0x00000400,
406         0x9838, 0x0002021c, 0x00020200,
407         0xc78, 0x00000080, 0x00000000,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0x5bb0, 0x000000f0, 0x00000070,
413         0x5bc0, 0x00200000, 0x50100000,
414         0x7030, 0x31000311, 0x00000011,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8b24, 0xffffffff, 0x00ffffff,
425         0x8b10, 0x0000ff0f, 0x00000000,
426         0x28a4c, 0x07ffffff, 0x4e000000,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x30, 0x000000ff, 0x0040,
431         0x34, 0x00000040, 0x00004040,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x9060, 0x0000007f, 0x00000020,
441         0x9508, 0x00010000, 0x00010000,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x15c0, 0x000c0fc0, 0x000c0400
455 };
456
457 static const u32 oland_golden_rlc_registers[] =
458 {
459         0xc424, 0xffffffff, 0x00601005,
460         0xc47c, 0xffffffff, 0x10104040,
461         0xc488, 0xffffffff, 0x0100000a,
462         0xc314, 0xffffffff, 0x00000800,
463         0xc30c, 0xffffffff, 0x800000f4
464 };
465
466 static const u32 oland_golden_registers[] =
467 {
468         0x9a10, 0x00010000, 0x00018208,
469         0x9830, 0xffffffff, 0x00000000,
470         0x9834, 0xf00fffff, 0x00000400,
471         0x9838, 0x0002021c, 0x00020200,
472         0xc78, 0x00000080, 0x00000000,
473         0xd030, 0x000300c0, 0x00800040,
474         0xd830, 0x000300c0, 0x00800040,
475         0x5bb0, 0x000000f0, 0x00000070,
476         0x5bc0, 0x00200000, 0x50100000,
477         0x7030, 0x31000311, 0x00000011,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x240c, 0x000007ff, 0x00000000,
480         0x8a14, 0xf000001f, 0x00000007,
481         0x8b24, 0xffffffff, 0x00ffffff,
482         0x8b10, 0x0000ff0f, 0x00000000,
483         0x28a4c, 0x07ffffff, 0x4e000000,
484         0x28350, 0x3f3f3fff, 0x00000082,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9060, 0x0000007f, 0x00000020,
489         0x9508, 0x00010000, 0x00010000,
490         0xac14, 0x000003ff, 0x000000f3,
491         0xac10, 0xffffffff, 0x00000000,
492         0xac0c, 0xffffffff, 0x00003210,
493         0x88d4, 0x0000001f, 0x00000010,
494         0x15c0, 0x000c0fc0, 0x000c0400
495 };
496
497 static const u32 hainan_golden_registers[] =
498 {
499         0x9a10, 0x00010000, 0x00018208,
500         0x9830, 0xffffffff, 0x00000000,
501         0x9834, 0xf00fffff, 0x00000400,
502         0x9838, 0x0002021c, 0x00020200,
503         0xd0c0, 0xff000fff, 0x00000100,
504         0xd030, 0x000300c0, 0x00800040,
505         0xd8c0, 0xff000fff, 0x00000100,
506         0xd830, 0x000300c0, 0x00800040,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x8a14, 0xf000001f, 0x00000007,
510         0x8b24, 0xffffffff, 0x00ffffff,
511         0x8b10, 0x0000ff0f, 0x00000000,
512         0x28a4c, 0x07ffffff, 0x4e000000,
513         0x28350, 0x3f3f3fff, 0x00000000,
514         0x30, 0x000000ff, 0x0040,
515         0x34, 0x00000040, 0x00004040,
516         0x9100, 0x03e00000, 0x03600000,
517         0x9060, 0x0000007f, 0x00000020,
518         0x9508, 0x00010000, 0x00010000,
519         0xac14, 0x000003ff, 0x000000f1,
520         0xac10, 0xffffffff, 0x00000000,
521         0xac0c, 0xffffffff, 0x00003210,
522         0x88d4, 0x0000001f, 0x00000010,
523         0x15c0, 0x000c0fc0, 0x000c0400
524 };
525
526 static const u32 hainan_golden_registers2[] =
527 {
528         0x98f8, 0xffffffff, 0x02010001
529 };
530
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533         0xc400, 0xffffffff, 0xfffffffc,
534         0x802c, 0xffffffff, 0xe0000000,
535         0x9a60, 0xffffffff, 0x00000100,
536         0x92a4, 0xffffffff, 0x00000100,
537         0xc164, 0xffffffff, 0x00000100,
538         0x9774, 0xffffffff, 0x00000100,
539         0x8984, 0xffffffff, 0x06000100,
540         0x8a18, 0xffffffff, 0x00000100,
541         0x92a0, 0xffffffff, 0x00000100,
542         0xc380, 0xffffffff, 0x00000100,
543         0x8b28, 0xffffffff, 0x00000100,
544         0x9144, 0xffffffff, 0x00000100,
545         0x8d88, 0xffffffff, 0x00000100,
546         0x8d8c, 0xffffffff, 0x00000100,
547         0x9030, 0xffffffff, 0x00000100,
548         0x9034, 0xffffffff, 0x00000100,
549         0x9038, 0xffffffff, 0x00000100,
550         0x903c, 0xffffffff, 0x00000100,
551         0xad80, 0xffffffff, 0x00000100,
552         0xac54, 0xffffffff, 0x00000100,
553         0x897c, 0xffffffff, 0x06000100,
554         0x9868, 0xffffffff, 0x00000100,
555         0x9510, 0xffffffff, 0x00000100,
556         0xaf04, 0xffffffff, 0x00000100,
557         0xae04, 0xffffffff, 0x00000100,
558         0x949c, 0xffffffff, 0x00000100,
559         0x802c, 0xffffffff, 0xe0000000,
560         0x9160, 0xffffffff, 0x00010000,
561         0x9164, 0xffffffff, 0x00030002,
562         0x9168, 0xffffffff, 0x00040007,
563         0x916c, 0xffffffff, 0x00060005,
564         0x9170, 0xffffffff, 0x00090008,
565         0x9174, 0xffffffff, 0x00020001,
566         0x9178, 0xffffffff, 0x00040003,
567         0x917c, 0xffffffff, 0x00000007,
568         0x9180, 0xffffffff, 0x00060005,
569         0x9184, 0xffffffff, 0x00090008,
570         0x9188, 0xffffffff, 0x00030002,
571         0x918c, 0xffffffff, 0x00050004,
572         0x9190, 0xffffffff, 0x00000008,
573         0x9194, 0xffffffff, 0x00070006,
574         0x9198, 0xffffffff, 0x000a0009,
575         0x919c, 0xffffffff, 0x00040003,
576         0x91a0, 0xffffffff, 0x00060005,
577         0x91a4, 0xffffffff, 0x00000009,
578         0x91a8, 0xffffffff, 0x00080007,
579         0x91ac, 0xffffffff, 0x000b000a,
580         0x91b0, 0xffffffff, 0x00050004,
581         0x91b4, 0xffffffff, 0x00070006,
582         0x91b8, 0xffffffff, 0x0008000b,
583         0x91bc, 0xffffffff, 0x000a0009,
584         0x91c0, 0xffffffff, 0x000d000c,
585         0x91c4, 0xffffffff, 0x00060005,
586         0x91c8, 0xffffffff, 0x00080007,
587         0x91cc, 0xffffffff, 0x0000000b,
588         0x91d0, 0xffffffff, 0x000a0009,
589         0x91d4, 0xffffffff, 0x000d000c,
590         0x91d8, 0xffffffff, 0x00070006,
591         0x91dc, 0xffffffff, 0x00090008,
592         0x91e0, 0xffffffff, 0x0000000c,
593         0x91e4, 0xffffffff, 0x000b000a,
594         0x91e8, 0xffffffff, 0x000e000d,
595         0x91ec, 0xffffffff, 0x00080007,
596         0x91f0, 0xffffffff, 0x000a0009,
597         0x91f4, 0xffffffff, 0x0000000d,
598         0x91f8, 0xffffffff, 0x000c000b,
599         0x91fc, 0xffffffff, 0x000f000e,
600         0x9200, 0xffffffff, 0x00090008,
601         0x9204, 0xffffffff, 0x000b000a,
602         0x9208, 0xffffffff, 0x000c000f,
603         0x920c, 0xffffffff, 0x000e000d,
604         0x9210, 0xffffffff, 0x00110010,
605         0x9214, 0xffffffff, 0x000a0009,
606         0x9218, 0xffffffff, 0x000c000b,
607         0x921c, 0xffffffff, 0x0000000f,
608         0x9220, 0xffffffff, 0x000e000d,
609         0x9224, 0xffffffff, 0x00110010,
610         0x9228, 0xffffffff, 0x000b000a,
611         0x922c, 0xffffffff, 0x000d000c,
612         0x9230, 0xffffffff, 0x00000010,
613         0x9234, 0xffffffff, 0x000f000e,
614         0x9238, 0xffffffff, 0x00120011,
615         0x923c, 0xffffffff, 0x000c000b,
616         0x9240, 0xffffffff, 0x000e000d,
617         0x9244, 0xffffffff, 0x00000011,
618         0x9248, 0xffffffff, 0x0010000f,
619         0x924c, 0xffffffff, 0x00130012,
620         0x9250, 0xffffffff, 0x000d000c,
621         0x9254, 0xffffffff, 0x000f000e,
622         0x9258, 0xffffffff, 0x00100013,
623         0x925c, 0xffffffff, 0x00120011,
624         0x9260, 0xffffffff, 0x00150014,
625         0x9264, 0xffffffff, 0x000e000d,
626         0x9268, 0xffffffff, 0x0010000f,
627         0x926c, 0xffffffff, 0x00000013,
628         0x9270, 0xffffffff, 0x00120011,
629         0x9274, 0xffffffff, 0x00150014,
630         0x9278, 0xffffffff, 0x000f000e,
631         0x927c, 0xffffffff, 0x00110010,
632         0x9280, 0xffffffff, 0x00000014,
633         0x9284, 0xffffffff, 0x00130012,
634         0x9288, 0xffffffff, 0x00160015,
635         0x928c, 0xffffffff, 0x0010000f,
636         0x9290, 0xffffffff, 0x00120011,
637         0x9294, 0xffffffff, 0x00000015,
638         0x9298, 0xffffffff, 0x00140013,
639         0x929c, 0xffffffff, 0x00170016,
640         0x9150, 0xffffffff, 0x96940200,
641         0x8708, 0xffffffff, 0x00900100,
642         0xc478, 0xffffffff, 0x00000080,
643         0xc404, 0xffffffff, 0x0020003f,
644         0x30, 0xffffffff, 0x0000001c,
645         0x34, 0x000f0000, 0x000f0000,
646         0x160c, 0xffffffff, 0x00000100,
647         0x1024, 0xffffffff, 0x00000100,
648         0x102c, 0x00000101, 0x00000000,
649         0x20a8, 0xffffffff, 0x00000104,
650         0x264c, 0x000c0000, 0x000c0000,
651         0x2648, 0x000c0000, 0x000c0000,
652         0x55e4, 0xff000fff, 0x00000100,
653         0x55e8, 0x00000001, 0x00000001,
654         0x2f50, 0x00000001, 0x00000001,
655         0x30cc, 0xc0000fff, 0x00000104,
656         0xc1e4, 0x00000001, 0x00000001,
657         0xd0c0, 0xfffffff0, 0x00000100,
658         0xd8c0, 0xfffffff0, 0x00000100
659 };
660
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663         0xc400, 0xffffffff, 0xfffffffc,
664         0x802c, 0xffffffff, 0xe0000000,
665         0x9a60, 0xffffffff, 0x00000100,
666         0x92a4, 0xffffffff, 0x00000100,
667         0xc164, 0xffffffff, 0x00000100,
668         0x9774, 0xffffffff, 0x00000100,
669         0x8984, 0xffffffff, 0x06000100,
670         0x8a18, 0xffffffff, 0x00000100,
671         0x92a0, 0xffffffff, 0x00000100,
672         0xc380, 0xffffffff, 0x00000100,
673         0x8b28, 0xffffffff, 0x00000100,
674         0x9144, 0xffffffff, 0x00000100,
675         0x8d88, 0xffffffff, 0x00000100,
676         0x8d8c, 0xffffffff, 0x00000100,
677         0x9030, 0xffffffff, 0x00000100,
678         0x9034, 0xffffffff, 0x00000100,
679         0x9038, 0xffffffff, 0x00000100,
680         0x903c, 0xffffffff, 0x00000100,
681         0xad80, 0xffffffff, 0x00000100,
682         0xac54, 0xffffffff, 0x00000100,
683         0x897c, 0xffffffff, 0x06000100,
684         0x9868, 0xffffffff, 0x00000100,
685         0x9510, 0xffffffff, 0x00000100,
686         0xaf04, 0xffffffff, 0x00000100,
687         0xae04, 0xffffffff, 0x00000100,
688         0x949c, 0xffffffff, 0x00000100,
689         0x802c, 0xffffffff, 0xe0000000,
690         0x9160, 0xffffffff, 0x00010000,
691         0x9164, 0xffffffff, 0x00030002,
692         0x9168, 0xffffffff, 0x00040007,
693         0x916c, 0xffffffff, 0x00060005,
694         0x9170, 0xffffffff, 0x00090008,
695         0x9174, 0xffffffff, 0x00020001,
696         0x9178, 0xffffffff, 0x00040003,
697         0x917c, 0xffffffff, 0x00000007,
698         0x9180, 0xffffffff, 0x00060005,
699         0x9184, 0xffffffff, 0x00090008,
700         0x9188, 0xffffffff, 0x00030002,
701         0x918c, 0xffffffff, 0x00050004,
702         0x9190, 0xffffffff, 0x00000008,
703         0x9194, 0xffffffff, 0x00070006,
704         0x9198, 0xffffffff, 0x000a0009,
705         0x919c, 0xffffffff, 0x00040003,
706         0x91a0, 0xffffffff, 0x00060005,
707         0x91a4, 0xffffffff, 0x00000009,
708         0x91a8, 0xffffffff, 0x00080007,
709         0x91ac, 0xffffffff, 0x000b000a,
710         0x91b0, 0xffffffff, 0x00050004,
711         0x91b4, 0xffffffff, 0x00070006,
712         0x91b8, 0xffffffff, 0x0008000b,
713         0x91bc, 0xffffffff, 0x000a0009,
714         0x91c0, 0xffffffff, 0x000d000c,
715         0x9200, 0xffffffff, 0x00090008,
716         0x9204, 0xffffffff, 0x000b000a,
717         0x9208, 0xffffffff, 0x000c000f,
718         0x920c, 0xffffffff, 0x000e000d,
719         0x9210, 0xffffffff, 0x00110010,
720         0x9214, 0xffffffff, 0x000a0009,
721         0x9218, 0xffffffff, 0x000c000b,
722         0x921c, 0xffffffff, 0x0000000f,
723         0x9220, 0xffffffff, 0x000e000d,
724         0x9224, 0xffffffff, 0x00110010,
725         0x9228, 0xffffffff, 0x000b000a,
726         0x922c, 0xffffffff, 0x000d000c,
727         0x9230, 0xffffffff, 0x00000010,
728         0x9234, 0xffffffff, 0x000f000e,
729         0x9238, 0xffffffff, 0x00120011,
730         0x923c, 0xffffffff, 0x000c000b,
731         0x9240, 0xffffffff, 0x000e000d,
732         0x9244, 0xffffffff, 0x00000011,
733         0x9248, 0xffffffff, 0x0010000f,
734         0x924c, 0xffffffff, 0x00130012,
735         0x9250, 0xffffffff, 0x000d000c,
736         0x9254, 0xffffffff, 0x000f000e,
737         0x9258, 0xffffffff, 0x00100013,
738         0x925c, 0xffffffff, 0x00120011,
739         0x9260, 0xffffffff, 0x00150014,
740         0x9150, 0xffffffff, 0x96940200,
741         0x8708, 0xffffffff, 0x00900100,
742         0xc478, 0xffffffff, 0x00000080,
743         0xc404, 0xffffffff, 0x0020003f,
744         0x30, 0xffffffff, 0x0000001c,
745         0x34, 0x000f0000, 0x000f0000,
746         0x160c, 0xffffffff, 0x00000100,
747         0x1024, 0xffffffff, 0x00000100,
748         0x102c, 0x00000101, 0x00000000,
749         0x20a8, 0xffffffff, 0x00000104,
750         0x55e4, 0xff000fff, 0x00000100,
751         0x55e8, 0x00000001, 0x00000001,
752         0x2f50, 0x00000001, 0x00000001,
753         0x30cc, 0xc0000fff, 0x00000104,
754         0xc1e4, 0x00000001, 0x00000001,
755         0xd0c0, 0xfffffff0, 0x00000100,
756         0xd8c0, 0xfffffff0, 0x00000100
757 };
758
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761         0xc400, 0xffffffff, 0xfffffffc,
762         0x802c, 0xffffffff, 0xe0000000,
763         0x9a60, 0xffffffff, 0x00000100,
764         0x92a4, 0xffffffff, 0x00000100,
765         0xc164, 0xffffffff, 0x00000100,
766         0x9774, 0xffffffff, 0x00000100,
767         0x8984, 0xffffffff, 0x06000100,
768         0x8a18, 0xffffffff, 0x00000100,
769         0x92a0, 0xffffffff, 0x00000100,
770         0xc380, 0xffffffff, 0x00000100,
771         0x8b28, 0xffffffff, 0x00000100,
772         0x9144, 0xffffffff, 0x00000100,
773         0x8d88, 0xffffffff, 0x00000100,
774         0x8d8c, 0xffffffff, 0x00000100,
775         0x9030, 0xffffffff, 0x00000100,
776         0x9034, 0xffffffff, 0x00000100,
777         0x9038, 0xffffffff, 0x00000100,
778         0x903c, 0xffffffff, 0x00000100,
779         0xad80, 0xffffffff, 0x00000100,
780         0xac54, 0xffffffff, 0x00000100,
781         0x897c, 0xffffffff, 0x06000100,
782         0x9868, 0xffffffff, 0x00000100,
783         0x9510, 0xffffffff, 0x00000100,
784         0xaf04, 0xffffffff, 0x00000100,
785         0xae04, 0xffffffff, 0x00000100,
786         0x949c, 0xffffffff, 0x00000100,
787         0x802c, 0xffffffff, 0xe0000000,
788         0x9160, 0xffffffff, 0x00010000,
789         0x9164, 0xffffffff, 0x00030002,
790         0x9168, 0xffffffff, 0x00040007,
791         0x916c, 0xffffffff, 0x00060005,
792         0x9170, 0xffffffff, 0x00090008,
793         0x9174, 0xffffffff, 0x00020001,
794         0x9178, 0xffffffff, 0x00040003,
795         0x917c, 0xffffffff, 0x00000007,
796         0x9180, 0xffffffff, 0x00060005,
797         0x9184, 0xffffffff, 0x00090008,
798         0x9188, 0xffffffff, 0x00030002,
799         0x918c, 0xffffffff, 0x00050004,
800         0x9190, 0xffffffff, 0x00000008,
801         0x9194, 0xffffffff, 0x00070006,
802         0x9198, 0xffffffff, 0x000a0009,
803         0x919c, 0xffffffff, 0x00040003,
804         0x91a0, 0xffffffff, 0x00060005,
805         0x91a4, 0xffffffff, 0x00000009,
806         0x91a8, 0xffffffff, 0x00080007,
807         0x91ac, 0xffffffff, 0x000b000a,
808         0x91b0, 0xffffffff, 0x00050004,
809         0x91b4, 0xffffffff, 0x00070006,
810         0x91b8, 0xffffffff, 0x0008000b,
811         0x91bc, 0xffffffff, 0x000a0009,
812         0x91c0, 0xffffffff, 0x000d000c,
813         0x9200, 0xffffffff, 0x00090008,
814         0x9204, 0xffffffff, 0x000b000a,
815         0x9208, 0xffffffff, 0x000c000f,
816         0x920c, 0xffffffff, 0x000e000d,
817         0x9210, 0xffffffff, 0x00110010,
818         0x9214, 0xffffffff, 0x000a0009,
819         0x9218, 0xffffffff, 0x000c000b,
820         0x921c, 0xffffffff, 0x0000000f,
821         0x9220, 0xffffffff, 0x000e000d,
822         0x9224, 0xffffffff, 0x00110010,
823         0x9228, 0xffffffff, 0x000b000a,
824         0x922c, 0xffffffff, 0x000d000c,
825         0x9230, 0xffffffff, 0x00000010,
826         0x9234, 0xffffffff, 0x000f000e,
827         0x9238, 0xffffffff, 0x00120011,
828         0x923c, 0xffffffff, 0x000c000b,
829         0x9240, 0xffffffff, 0x000e000d,
830         0x9244, 0xffffffff, 0x00000011,
831         0x9248, 0xffffffff, 0x0010000f,
832         0x924c, 0xffffffff, 0x00130012,
833         0x9250, 0xffffffff, 0x000d000c,
834         0x9254, 0xffffffff, 0x000f000e,
835         0x9258, 0xffffffff, 0x00100013,
836         0x925c, 0xffffffff, 0x00120011,
837         0x9260, 0xffffffff, 0x00150014,
838         0x9150, 0xffffffff, 0x96940200,
839         0x8708, 0xffffffff, 0x00900100,
840         0xc478, 0xffffffff, 0x00000080,
841         0xc404, 0xffffffff, 0x0020003f,
842         0x30, 0xffffffff, 0x0000001c,
843         0x34, 0x000f0000, 0x000f0000,
844         0x160c, 0xffffffff, 0x00000100,
845         0x1024, 0xffffffff, 0x00000100,
846         0x102c, 0x00000101, 0x00000000,
847         0x20a8, 0xffffffff, 0x00000104,
848         0x264c, 0x000c0000, 0x000c0000,
849         0x2648, 0x000c0000, 0x000c0000,
850         0x55e4, 0xff000fff, 0x00000100,
851         0x55e8, 0x00000001, 0x00000001,
852         0x2f50, 0x00000001, 0x00000001,
853         0x30cc, 0xc0000fff, 0x00000104,
854         0xc1e4, 0x00000001, 0x00000001,
855         0xd0c0, 0xfffffff0, 0x00000100,
856         0xd8c0, 0xfffffff0, 0x00000100
857 };
858
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861         0xc400, 0xffffffff, 0xfffffffc,
862         0x802c, 0xffffffff, 0xe0000000,
863         0x9a60, 0xffffffff, 0x00000100,
864         0x92a4, 0xffffffff, 0x00000100,
865         0xc164, 0xffffffff, 0x00000100,
866         0x9774, 0xffffffff, 0x00000100,
867         0x8984, 0xffffffff, 0x06000100,
868         0x8a18, 0xffffffff, 0x00000100,
869         0x92a0, 0xffffffff, 0x00000100,
870         0xc380, 0xffffffff, 0x00000100,
871         0x8b28, 0xffffffff, 0x00000100,
872         0x9144, 0xffffffff, 0x00000100,
873         0x8d88, 0xffffffff, 0x00000100,
874         0x8d8c, 0xffffffff, 0x00000100,
875         0x9030, 0xffffffff, 0x00000100,
876         0x9034, 0xffffffff, 0x00000100,
877         0x9038, 0xffffffff, 0x00000100,
878         0x903c, 0xffffffff, 0x00000100,
879         0xad80, 0xffffffff, 0x00000100,
880         0xac54, 0xffffffff, 0x00000100,
881         0x897c, 0xffffffff, 0x06000100,
882         0x9868, 0xffffffff, 0x00000100,
883         0x9510, 0xffffffff, 0x00000100,
884         0xaf04, 0xffffffff, 0x00000100,
885         0xae04, 0xffffffff, 0x00000100,
886         0x949c, 0xffffffff, 0x00000100,
887         0x802c, 0xffffffff, 0xe0000000,
888         0x9160, 0xffffffff, 0x00010000,
889         0x9164, 0xffffffff, 0x00030002,
890         0x9168, 0xffffffff, 0x00040007,
891         0x916c, 0xffffffff, 0x00060005,
892         0x9170, 0xffffffff, 0x00090008,
893         0x9174, 0xffffffff, 0x00020001,
894         0x9178, 0xffffffff, 0x00040003,
895         0x917c, 0xffffffff, 0x00000007,
896         0x9180, 0xffffffff, 0x00060005,
897         0x9184, 0xffffffff, 0x00090008,
898         0x9188, 0xffffffff, 0x00030002,
899         0x918c, 0xffffffff, 0x00050004,
900         0x9190, 0xffffffff, 0x00000008,
901         0x9194, 0xffffffff, 0x00070006,
902         0x9198, 0xffffffff, 0x000a0009,
903         0x919c, 0xffffffff, 0x00040003,
904         0x91a0, 0xffffffff, 0x00060005,
905         0x91a4, 0xffffffff, 0x00000009,
906         0x91a8, 0xffffffff, 0x00080007,
907         0x91ac, 0xffffffff, 0x000b000a,
908         0x91b0, 0xffffffff, 0x00050004,
909         0x91b4, 0xffffffff, 0x00070006,
910         0x91b8, 0xffffffff, 0x0008000b,
911         0x91bc, 0xffffffff, 0x000a0009,
912         0x91c0, 0xffffffff, 0x000d000c,
913         0x91c4, 0xffffffff, 0x00060005,
914         0x91c8, 0xffffffff, 0x00080007,
915         0x91cc, 0xffffffff, 0x0000000b,
916         0x91d0, 0xffffffff, 0x000a0009,
917         0x91d4, 0xffffffff, 0x000d000c,
918         0x9150, 0xffffffff, 0x96940200,
919         0x8708, 0xffffffff, 0x00900100,
920         0xc478, 0xffffffff, 0x00000080,
921         0xc404, 0xffffffff, 0x0020003f,
922         0x30, 0xffffffff, 0x0000001c,
923         0x34, 0x000f0000, 0x000f0000,
924         0x160c, 0xffffffff, 0x00000100,
925         0x1024, 0xffffffff, 0x00000100,
926         0x102c, 0x00000101, 0x00000000,
927         0x20a8, 0xffffffff, 0x00000104,
928         0x264c, 0x000c0000, 0x000c0000,
929         0x2648, 0x000c0000, 0x000c0000,
930         0x55e4, 0xff000fff, 0x00000100,
931         0x55e8, 0x00000001, 0x00000001,
932         0x2f50, 0x00000001, 0x00000001,
933         0x30cc, 0xc0000fff, 0x00000104,
934         0xc1e4, 0x00000001, 0x00000001,
935         0xd0c0, 0xfffffff0, 0x00000100,
936         0xd8c0, 0xfffffff0, 0x00000100
937 };
938
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941         0xc400, 0xffffffff, 0xfffffffc,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9a60, 0xffffffff, 0x00000100,
944         0x92a4, 0xffffffff, 0x00000100,
945         0xc164, 0xffffffff, 0x00000100,
946         0x9774, 0xffffffff, 0x00000100,
947         0x8984, 0xffffffff, 0x06000100,
948         0x8a18, 0xffffffff, 0x00000100,
949         0x92a0, 0xffffffff, 0x00000100,
950         0xc380, 0xffffffff, 0x00000100,
951         0x8b28, 0xffffffff, 0x00000100,
952         0x9144, 0xffffffff, 0x00000100,
953         0x8d88, 0xffffffff, 0x00000100,
954         0x8d8c, 0xffffffff, 0x00000100,
955         0x9030, 0xffffffff, 0x00000100,
956         0x9034, 0xffffffff, 0x00000100,
957         0x9038, 0xffffffff, 0x00000100,
958         0x903c, 0xffffffff, 0x00000100,
959         0xad80, 0xffffffff, 0x00000100,
960         0xac54, 0xffffffff, 0x00000100,
961         0x897c, 0xffffffff, 0x06000100,
962         0x9868, 0xffffffff, 0x00000100,
963         0x9510, 0xffffffff, 0x00000100,
964         0xaf04, 0xffffffff, 0x00000100,
965         0xae04, 0xffffffff, 0x00000100,
966         0x949c, 0xffffffff, 0x00000100,
967         0x802c, 0xffffffff, 0xe0000000,
968         0x9160, 0xffffffff, 0x00010000,
969         0x9164, 0xffffffff, 0x00030002,
970         0x9168, 0xffffffff, 0x00040007,
971         0x916c, 0xffffffff, 0x00060005,
972         0x9170, 0xffffffff, 0x00090008,
973         0x9174, 0xffffffff, 0x00020001,
974         0x9178, 0xffffffff, 0x00040003,
975         0x917c, 0xffffffff, 0x00000007,
976         0x9180, 0xffffffff, 0x00060005,
977         0x9184, 0xffffffff, 0x00090008,
978         0x9188, 0xffffffff, 0x00030002,
979         0x918c, 0xffffffff, 0x00050004,
980         0x9190, 0xffffffff, 0x00000008,
981         0x9194, 0xffffffff, 0x00070006,
982         0x9198, 0xffffffff, 0x000a0009,
983         0x919c, 0xffffffff, 0x00040003,
984         0x91a0, 0xffffffff, 0x00060005,
985         0x91a4, 0xffffffff, 0x00000009,
986         0x91a8, 0xffffffff, 0x00080007,
987         0x91ac, 0xffffffff, 0x000b000a,
988         0x91b0, 0xffffffff, 0x00050004,
989         0x91b4, 0xffffffff, 0x00070006,
990         0x91b8, 0xffffffff, 0x0008000b,
991         0x91bc, 0xffffffff, 0x000a0009,
992         0x91c0, 0xffffffff, 0x000d000c,
993         0x91c4, 0xffffffff, 0x00060005,
994         0x91c8, 0xffffffff, 0x00080007,
995         0x91cc, 0xffffffff, 0x0000000b,
996         0x91d0, 0xffffffff, 0x000a0009,
997         0x91d4, 0xffffffff, 0x000d000c,
998         0x9150, 0xffffffff, 0x96940200,
999         0x8708, 0xffffffff, 0x00900100,
1000         0xc478, 0xffffffff, 0x00000080,
1001         0xc404, 0xffffffff, 0x0020003f,
1002         0x30, 0xffffffff, 0x0000001c,
1003         0x34, 0x000f0000, 0x000f0000,
1004         0x160c, 0xffffffff, 0x00000100,
1005         0x1024, 0xffffffff, 0x00000100,
1006         0x20a8, 0xffffffff, 0x00000104,
1007         0x264c, 0x000c0000, 0x000c0000,
1008         0x2648, 0x000c0000, 0x000c0000,
1009         0x2f50, 0x00000001, 0x00000001,
1010         0x30cc, 0xc0000fff, 0x00000104,
1011         0xc1e4, 0x00000001, 0x00000001,
1012         0xd0c0, 0xfffffff0, 0x00000100,
1013         0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015
1016 static u32 verde_pg_init[] =
1017 {
1018         0x353c, 0xffffffff, 0x40000,
1019         0x3538, 0xffffffff, 0x200010ff,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x7007,
1026         0x3538, 0xffffffff, 0x300010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x400000,
1033         0x3538, 0xffffffff, 0x100010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x120200,
1040         0x3538, 0xffffffff, 0x500010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x1e1e16,
1047         0x3538, 0xffffffff, 0x600010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x171f1e,
1054         0x3538, 0xffffffff, 0x700010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x3538, 0xffffffff, 0x9ff,
1062         0x3500, 0xffffffff, 0x0,
1063         0x3504, 0xffffffff, 0x10000800,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3500, 0xffffffff, 0x4,
1067         0x3504, 0xffffffff, 0x1000051e,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3500, 0xffffffff, 0x8,
1071         0x3504, 0xffffffff, 0x80500,
1072         0x3500, 0xffffffff, 0x12,
1073         0x3504, 0xffffffff, 0x9050c,
1074         0x3500, 0xffffffff, 0x1d,
1075         0x3504, 0xffffffff, 0xb052c,
1076         0x3500, 0xffffffff, 0x2a,
1077         0x3504, 0xffffffff, 0x1053e,
1078         0x3500, 0xffffffff, 0x2d,
1079         0x3504, 0xffffffff, 0x10546,
1080         0x3500, 0xffffffff, 0x30,
1081         0x3504, 0xffffffff, 0xa054e,
1082         0x3500, 0xffffffff, 0x3c,
1083         0x3504, 0xffffffff, 0x1055f,
1084         0x3500, 0xffffffff, 0x3f,
1085         0x3504, 0xffffffff, 0x10567,
1086         0x3500, 0xffffffff, 0x42,
1087         0x3504, 0xffffffff, 0x1056f,
1088         0x3500, 0xffffffff, 0x45,
1089         0x3504, 0xffffffff, 0x10572,
1090         0x3500, 0xffffffff, 0x48,
1091         0x3504, 0xffffffff, 0x20575,
1092         0x3500, 0xffffffff, 0x4c,
1093         0x3504, 0xffffffff, 0x190801,
1094         0x3500, 0xffffffff, 0x67,
1095         0x3504, 0xffffffff, 0x1082a,
1096         0x3500, 0xffffffff, 0x6a,
1097         0x3504, 0xffffffff, 0x1b082d,
1098         0x3500, 0xffffffff, 0x87,
1099         0x3504, 0xffffffff, 0x310851,
1100         0x3500, 0xffffffff, 0xba,
1101         0x3504, 0xffffffff, 0x891,
1102         0x3500, 0xffffffff, 0xbc,
1103         0x3504, 0xffffffff, 0x893,
1104         0x3500, 0xffffffff, 0xbe,
1105         0x3504, 0xffffffff, 0x20895,
1106         0x3500, 0xffffffff, 0xc2,
1107         0x3504, 0xffffffff, 0x20899,
1108         0x3500, 0xffffffff, 0xc6,
1109         0x3504, 0xffffffff, 0x2089d,
1110         0x3500, 0xffffffff, 0xca,
1111         0x3504, 0xffffffff, 0x8a1,
1112         0x3500, 0xffffffff, 0xcc,
1113         0x3504, 0xffffffff, 0x8a3,
1114         0x3500, 0xffffffff, 0xce,
1115         0x3504, 0xffffffff, 0x308a5,
1116         0x3500, 0xffffffff, 0xd3,
1117         0x3504, 0xffffffff, 0x6d08cd,
1118         0x3500, 0xffffffff, 0x142,
1119         0x3504, 0xffffffff, 0x2000095a,
1120         0x3504, 0xffffffff, 0x1,
1121         0x3500, 0xffffffff, 0x144,
1122         0x3504, 0xffffffff, 0x301f095b,
1123         0x3500, 0xffffffff, 0x165,
1124         0x3504, 0xffffffff, 0xc094d,
1125         0x3500, 0xffffffff, 0x173,
1126         0x3504, 0xffffffff, 0xf096d,
1127         0x3500, 0xffffffff, 0x184,
1128         0x3504, 0xffffffff, 0x15097f,
1129         0x3500, 0xffffffff, 0x19b,
1130         0x3504, 0xffffffff, 0xc0998,
1131         0x3500, 0xffffffff, 0x1a9,
1132         0x3504, 0xffffffff, 0x409a7,
1133         0x3500, 0xffffffff, 0x1af,
1134         0x3504, 0xffffffff, 0xcdc,
1135         0x3500, 0xffffffff, 0x1b1,
1136         0x3504, 0xffffffff, 0x800,
1137         0x3508, 0xffffffff, 0x6c9b2000,
1138         0x3510, 0xfc00, 0x2000,
1139         0x3544, 0xffffffff, 0xfc0,
1140         0x28d4, 0x00000100, 0x100
1141 };
1142
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145         switch (rdev->family) {
1146         case CHIP_TAHITI:
1147                 radeon_program_register_sequence(rdev,
1148                                                  tahiti_golden_registers,
1149                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_rlc_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_mgcg_cgcg_init,
1155                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers2,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159                 break;
1160         case CHIP_PITCAIRN:
1161                 radeon_program_register_sequence(rdev,
1162                                                  pitcairn_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170                 break;
1171         case CHIP_VERDE:
1172                 radeon_program_register_sequence(rdev,
1173                                                  verde_golden_registers,
1174                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_rlc_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_mgcg_cgcg_init,
1180                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_pg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1184                 break;
1185         case CHIP_OLAND:
1186                 radeon_program_register_sequence(rdev,
1187                                                  oland_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195                 break;
1196         case CHIP_HAINAN:
1197                 radeon_program_register_sequence(rdev,
1198                                                  hainan_golden_registers,
1199                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers2,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_mgcg_cgcg_init,
1205                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206                 break;
1207         default:
1208                 break;
1209         }
1210 }
1211
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226         u32 tmp;
1227
1228         tmp = RREG32(CG_CLKPIN_CNTL_2);
1229         if (tmp & MUX_TCLK_TO_XCLK)
1230                 return TCLK;
1231
1232         tmp = RREG32(CG_CLKPIN_CNTL);
1233         if (tmp & XTALIN_DIVIDE)
1234                 return reference_clock / 4;
1235
1236         return reference_clock;
1237 }
1238
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242         u32 temp;
1243         int actual_temp = 0;
1244
1245         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246                 CTF_TEMP_SHIFT;
1247
1248         if (temp & 0x200)
1249                 actual_temp = 255;
1250         else
1251                 actual_temp = temp & 0x1ff;
1252
1253         actual_temp = (actual_temp * 1000);
1254
1255         return actual_temp;
1256 }
1257
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261         {0x0000006f, 0x03044000},
1262         {0x00000070, 0x0480c018},
1263         {0x00000071, 0x00000040},
1264         {0x00000072, 0x01000000},
1265         {0x00000074, 0x000000ff},
1266         {0x00000075, 0x00143400},
1267         {0x00000076, 0x08ec0800},
1268         {0x00000077, 0x040000cc},
1269         {0x00000079, 0x00000000},
1270         {0x0000007a, 0x21000409},
1271         {0x0000007c, 0x00000000},
1272         {0x0000007d, 0xe8000000},
1273         {0x0000007e, 0x044408a8},
1274         {0x0000007f, 0x00000003},
1275         {0x00000080, 0x00000000},
1276         {0x00000081, 0x01000000},
1277         {0x00000082, 0x02000000},
1278         {0x00000083, 0x00000000},
1279         {0x00000084, 0xe3f3e4f4},
1280         {0x00000085, 0x00052024},
1281         {0x00000087, 0x00000000},
1282         {0x00000088, 0x66036603},
1283         {0x00000089, 0x01000000},
1284         {0x0000008b, 0x1c0a0000},
1285         {0x0000008c, 0xff010000},
1286         {0x0000008e, 0xffffefff},
1287         {0x0000008f, 0xfff3efff},
1288         {0x00000090, 0xfff3efbf},
1289         {0x00000094, 0x00101101},
1290         {0x00000095, 0x00000fff},
1291         {0x00000096, 0x00116fff},
1292         {0x00000097, 0x60010000},
1293         {0x00000098, 0x10010000},
1294         {0x00000099, 0x00006000},
1295         {0x0000009a, 0x00001000},
1296         {0x0000009f, 0x00a77400}
1297 };
1298
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300         {0x0000006f, 0x03044000},
1301         {0x00000070, 0x0480c018},
1302         {0x00000071, 0x00000040},
1303         {0x00000072, 0x01000000},
1304         {0x00000074, 0x000000ff},
1305         {0x00000075, 0x00143400},
1306         {0x00000076, 0x08ec0800},
1307         {0x00000077, 0x040000cc},
1308         {0x00000079, 0x00000000},
1309         {0x0000007a, 0x21000409},
1310         {0x0000007c, 0x00000000},
1311         {0x0000007d, 0xe8000000},
1312         {0x0000007e, 0x044408a8},
1313         {0x0000007f, 0x00000003},
1314         {0x00000080, 0x00000000},
1315         {0x00000081, 0x01000000},
1316         {0x00000082, 0x02000000},
1317         {0x00000083, 0x00000000},
1318         {0x00000084, 0xe3f3e4f4},
1319         {0x00000085, 0x00052024},
1320         {0x00000087, 0x00000000},
1321         {0x00000088, 0x66036603},
1322         {0x00000089, 0x01000000},
1323         {0x0000008b, 0x1c0a0000},
1324         {0x0000008c, 0xff010000},
1325         {0x0000008e, 0xffffefff},
1326         {0x0000008f, 0xfff3efff},
1327         {0x00000090, 0xfff3efbf},
1328         {0x00000094, 0x00101101},
1329         {0x00000095, 0x00000fff},
1330         {0x00000096, 0x00116fff},
1331         {0x00000097, 0x60010000},
1332         {0x00000098, 0x10010000},
1333         {0x00000099, 0x00006000},
1334         {0x0000009a, 0x00001000},
1335         {0x0000009f, 0x00a47400}
1336 };
1337
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339         {0x0000006f, 0x03044000},
1340         {0x00000070, 0x0480c018},
1341         {0x00000071, 0x00000040},
1342         {0x00000072, 0x01000000},
1343         {0x00000074, 0x000000ff},
1344         {0x00000075, 0x00143400},
1345         {0x00000076, 0x08ec0800},
1346         {0x00000077, 0x040000cc},
1347         {0x00000079, 0x00000000},
1348         {0x0000007a, 0x21000409},
1349         {0x0000007c, 0x00000000},
1350         {0x0000007d, 0xe8000000},
1351         {0x0000007e, 0x044408a8},
1352         {0x0000007f, 0x00000003},
1353         {0x00000080, 0x00000000},
1354         {0x00000081, 0x01000000},
1355         {0x00000082, 0x02000000},
1356         {0x00000083, 0x00000000},
1357         {0x00000084, 0xe3f3e4f4},
1358         {0x00000085, 0x00052024},
1359         {0x00000087, 0x00000000},
1360         {0x00000088, 0x66036603},
1361         {0x00000089, 0x01000000},
1362         {0x0000008b, 0x1c0a0000},
1363         {0x0000008c, 0xff010000},
1364         {0x0000008e, 0xffffefff},
1365         {0x0000008f, 0xfff3efff},
1366         {0x00000090, 0xfff3efbf},
1367         {0x00000094, 0x00101101},
1368         {0x00000095, 0x00000fff},
1369         {0x00000096, 0x00116fff},
1370         {0x00000097, 0x60010000},
1371         {0x00000098, 0x10010000},
1372         {0x00000099, 0x00006000},
1373         {0x0000009a, 0x00001000},
1374         {0x0000009f, 0x00a37400}
1375 };
1376
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378         {0x0000006f, 0x03044000},
1379         {0x00000070, 0x0480c018},
1380         {0x00000071, 0x00000040},
1381         {0x00000072, 0x01000000},
1382         {0x00000074, 0x000000ff},
1383         {0x00000075, 0x00143400},
1384         {0x00000076, 0x08ec0800},
1385         {0x00000077, 0x040000cc},
1386         {0x00000079, 0x00000000},
1387         {0x0000007a, 0x21000409},
1388         {0x0000007c, 0x00000000},
1389         {0x0000007d, 0xe8000000},
1390         {0x0000007e, 0x044408a8},
1391         {0x0000007f, 0x00000003},
1392         {0x00000080, 0x00000000},
1393         {0x00000081, 0x01000000},
1394         {0x00000082, 0x02000000},
1395         {0x00000083, 0x00000000},
1396         {0x00000084, 0xe3f3e4f4},
1397         {0x00000085, 0x00052024},
1398         {0x00000087, 0x00000000},
1399         {0x00000088, 0x66036603},
1400         {0x00000089, 0x01000000},
1401         {0x0000008b, 0x1c0a0000},
1402         {0x0000008c, 0xff010000},
1403         {0x0000008e, 0xffffefff},
1404         {0x0000008f, 0xfff3efff},
1405         {0x00000090, 0xfff3efbf},
1406         {0x00000094, 0x00101101},
1407         {0x00000095, 0x00000fff},
1408         {0x00000096, 0x00116fff},
1409         {0x00000097, 0x60010000},
1410         {0x00000098, 0x10010000},
1411         {0x00000099, 0x00006000},
1412         {0x0000009a, 0x00001000},
1413         {0x0000009f, 0x00a17730}
1414 };
1415
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417         {0x0000006f, 0x03044000},
1418         {0x00000070, 0x0480c018},
1419         {0x00000071, 0x00000040},
1420         {0x00000072, 0x01000000},
1421         {0x00000074, 0x000000ff},
1422         {0x00000075, 0x00143400},
1423         {0x00000076, 0x08ec0800},
1424         {0x00000077, 0x040000cc},
1425         {0x00000079, 0x00000000},
1426         {0x0000007a, 0x21000409},
1427         {0x0000007c, 0x00000000},
1428         {0x0000007d, 0xe8000000},
1429         {0x0000007e, 0x044408a8},
1430         {0x0000007f, 0x00000003},
1431         {0x00000080, 0x00000000},
1432         {0x00000081, 0x01000000},
1433         {0x00000082, 0x02000000},
1434         {0x00000083, 0x00000000},
1435         {0x00000084, 0xe3f3e4f4},
1436         {0x00000085, 0x00052024},
1437         {0x00000087, 0x00000000},
1438         {0x00000088, 0x66036603},
1439         {0x00000089, 0x01000000},
1440         {0x0000008b, 0x1c0a0000},
1441         {0x0000008c, 0xff010000},
1442         {0x0000008e, 0xffffefff},
1443         {0x0000008f, 0xfff3efff},
1444         {0x00000090, 0xfff3efbf},
1445         {0x00000094, 0x00101101},
1446         {0x00000095, 0x00000fff},
1447         {0x00000096, 0x00116fff},
1448         {0x00000097, 0x60010000},
1449         {0x00000098, 0x10010000},
1450         {0x00000099, 0x00006000},
1451         {0x0000009a, 0x00001000},
1452         {0x0000009f, 0x00a07730}
1453 };
1454
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458         const __be32 *fw_data;
1459         u32 running, blackout = 0;
1460         u32 *io_mc_regs;
1461         int i, ucode_size, regs_size;
1462
1463         if (!rdev->mc_fw)
1464                 return -EINVAL;
1465
1466         switch (rdev->family) {
1467         case CHIP_TAHITI:
1468                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469                 ucode_size = SI_MC_UCODE_SIZE;
1470                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1471                 break;
1472         case CHIP_PITCAIRN:
1473                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474                 ucode_size = SI_MC_UCODE_SIZE;
1475                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1476                 break;
1477         case CHIP_VERDE:
1478         default:
1479                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1480                 ucode_size = SI_MC_UCODE_SIZE;
1481                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1482                 break;
1483         case CHIP_OLAND:
1484                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1485                 ucode_size = OLAND_MC_UCODE_SIZE;
1486                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1487                 break;
1488         case CHIP_HAINAN:
1489                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490                 ucode_size = OLAND_MC_UCODE_SIZE;
1491                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1492                 break;
1493         }
1494
1495         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496
1497         if (running == 0) {
1498                 if (running) {
1499                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501                 }
1502
1503                 /* reset the engine and set to writable */
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506
1507                 /* load mc io regs */
1508                 for (i = 0; i < regs_size; i++) {
1509                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511                 }
1512                 /* load the MC ucode */
1513                 fw_data = (const __be32 *)rdev->mc_fw->data;
1514                 for (i = 0; i < ucode_size; i++)
1515                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516
1517                 /* put the engine back into the active state */
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521
1522                 /* wait for training to complete */
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525                                 break;
1526                         udelay(1);
1527                 }
1528                 for (i = 0; i < rdev->usec_timeout; i++) {
1529                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530                                 break;
1531                         udelay(1);
1532                 }
1533
1534                 if (running)
1535                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536         }
1537
1538         return 0;
1539 }
1540
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543         const char *chip_name;
1544         const char *rlc_chip_name;
1545         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546         size_t smc_req_size;
1547         char fw_name[30];
1548         int err;
1549
1550         DRM_DEBUG("\n");
1551
1552         switch (rdev->family) {
1553         case CHIP_TAHITI:
1554                 chip_name = "TAHITI";
1555                 rlc_chip_name = "TAHITI";
1556                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1558                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1559                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1561                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562                 break;
1563         case CHIP_PITCAIRN:
1564                 chip_name = "PITCAIRN";
1565                 rlc_chip_name = "PITCAIRN";
1566                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1568                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1569                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1571                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572                 break;
1573         case CHIP_VERDE:
1574                 chip_name = "VERDE";
1575                 rlc_chip_name = "VERDE";
1576                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1578                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1579                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1581                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582                 break;
1583         case CHIP_OLAND:
1584                 chip_name = "OLAND";
1585                 rlc_chip_name = "OLAND";
1586                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1588                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1589                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592                 break;
1593         case CHIP_HAINAN:
1594                 chip_name = "HAINAN";
1595                 rlc_chip_name = "HAINAN";
1596                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1598                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1599                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602                 break;
1603         default: BUG();
1604         }
1605
1606         DRM_INFO("Loading %s Microcode\n", chip_name);
1607
1608         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610         if (err)
1611                 goto out;
1612         if (rdev->pfp_fw->size != pfp_req_size) {
1613                 printk(KERN_ERR
1614                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615                        rdev->pfp_fw->size, fw_name);
1616                 err = -EINVAL;
1617                 goto out;
1618         }
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->me_fw->size != me_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->me_fw->size, fw_name);
1628                 err = -EINVAL;
1629         }
1630
1631         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633         if (err)
1634                 goto out;
1635         if (rdev->ce_fw->size != ce_req_size) {
1636                 printk(KERN_ERR
1637                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638                        rdev->ce_fw->size, fw_name);
1639                 err = -EINVAL;
1640         }
1641
1642         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644         if (err)
1645                 goto out;
1646         if (rdev->rlc_fw->size != rlc_req_size) {
1647                 printk(KERN_ERR
1648                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649                        rdev->rlc_fw->size, fw_name);
1650                 err = -EINVAL;
1651         }
1652
1653         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655         if (err)
1656                 goto out;
1657         if (rdev->mc_fw->size != mc_req_size) {
1658                 printk(KERN_ERR
1659                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660                        rdev->mc_fw->size, fw_name);
1661                 err = -EINVAL;
1662         }
1663
1664         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666         if (err)
1667                 goto out;
1668         if (rdev->smc_fw->size != smc_req_size) {
1669                 printk(KERN_ERR
1670                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1671                        rdev->smc_fw->size, fw_name);
1672                 err = -EINVAL;
1673         }
1674
1675 out:
1676         if (err) {
1677                 if (err != -EINVAL)
1678                         printk(KERN_ERR
1679                                "si_cp: Failed to load firmware \"%s\"\n",
1680                                fw_name);
1681                 release_firmware(rdev->pfp_fw);
1682                 rdev->pfp_fw = NULL;
1683                 release_firmware(rdev->me_fw);
1684                 rdev->me_fw = NULL;
1685                 release_firmware(rdev->ce_fw);
1686                 rdev->ce_fw = NULL;
1687                 release_firmware(rdev->rlc_fw);
1688                 rdev->rlc_fw = NULL;
1689                 release_firmware(rdev->mc_fw);
1690                 rdev->mc_fw = NULL;
1691                 release_firmware(rdev->smc_fw);
1692                 rdev->smc_fw = NULL;
1693         }
1694         return err;
1695 }
1696
1697 /* watermark setup */
1698 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1699                                    struct radeon_crtc *radeon_crtc,
1700                                    struct drm_display_mode *mode,
1701                                    struct drm_display_mode *other_mode)
1702 {
1703         u32 tmp;
1704         /*
1705          * Line Buffer Setup
1706          * There are 3 line buffers, each one shared by 2 display controllers.
1707          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1708          * the display controllers.  The paritioning is done via one of four
1709          * preset allocations specified in bits 21:20:
1710          *  0 - half lb
1711          *  2 - whole lb, other crtc must be disabled
1712          */
1713         /* this can get tricky if we have two large displays on a paired group
1714          * of crtcs.  Ideally for multiple large displays we'd assign them to
1715          * non-linked crtcs for maximum line buffer allocation.
1716          */
1717         if (radeon_crtc->base.enabled && mode) {
1718                 if (other_mode)
1719                         tmp = 0; /* 1/2 */
1720                 else
1721                         tmp = 2; /* whole */
1722         } else
1723                 tmp = 0;
1724
1725         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1726                DC_LB_MEMORY_CONFIG(tmp));
1727
1728         if (radeon_crtc->base.enabled && mode) {
1729                 switch (tmp) {
1730                 case 0:
1731                 default:
1732                         return 4096 * 2;
1733                 case 2:
1734                         return 8192 * 2;
1735                 }
1736         }
1737
1738         /* controller not enabled, so no lb used */
1739         return 0;
1740 }
1741
1742 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1743 {
1744         u32 tmp = RREG32(MC_SHARED_CHMAP);
1745
1746         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1747         case 0:
1748         default:
1749                 return 1;
1750         case 1:
1751                 return 2;
1752         case 2:
1753                 return 4;
1754         case 3:
1755                 return 8;
1756         case 4:
1757                 return 3;
1758         case 5:
1759                 return 6;
1760         case 6:
1761                 return 10;
1762         case 7:
1763                 return 12;
1764         case 8:
1765                 return 16;
1766         }
1767 }
1768
1769 struct dce6_wm_params {
1770         u32 dram_channels; /* number of dram channels */
1771         u32 yclk;          /* bandwidth per dram data pin in kHz */
1772         u32 sclk;          /* engine clock in kHz */
1773         u32 disp_clk;      /* display clock in kHz */
1774         u32 src_width;     /* viewport width */
1775         u32 active_time;   /* active display time in ns */
1776         u32 blank_time;    /* blank time in ns */
1777         bool interlaced;    /* mode is interlaced */
1778         fixed20_12 vsc;    /* vertical scale ratio */
1779         u32 num_heads;     /* number of active crtcs */
1780         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1781         u32 lb_size;       /* line buffer allocated to pipe */
1782         u32 vtaps;         /* vertical scaler taps */
1783 };
1784
1785 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1786 {
1787         /* Calculate raw DRAM Bandwidth */
1788         fixed20_12 dram_efficiency; /* 0.7 */
1789         fixed20_12 yclk, dram_channels, bandwidth;
1790         fixed20_12 a;
1791
1792         a.full = dfixed_const(1000);
1793         yclk.full = dfixed_const(wm->yclk);
1794         yclk.full = dfixed_div(yclk, a);
1795         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1796         a.full = dfixed_const(10);
1797         dram_efficiency.full = dfixed_const(7);
1798         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1799         bandwidth.full = dfixed_mul(dram_channels, yclk);
1800         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1801
1802         return dfixed_trunc(bandwidth);
1803 }
1804
1805 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1806 {
1807         /* Calculate DRAM Bandwidth and the part allocated to display. */
1808         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1809         fixed20_12 yclk, dram_channels, bandwidth;
1810         fixed20_12 a;
1811
1812         a.full = dfixed_const(1000);
1813         yclk.full = dfixed_const(wm->yclk);
1814         yclk.full = dfixed_div(yclk, a);
1815         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1816         a.full = dfixed_const(10);
1817         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1818         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1819         bandwidth.full = dfixed_mul(dram_channels, yclk);
1820         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1821
1822         return dfixed_trunc(bandwidth);
1823 }
1824
1825 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1826 {
1827         /* Calculate the display Data return Bandwidth */
1828         fixed20_12 return_efficiency; /* 0.8 */
1829         fixed20_12 sclk, bandwidth;
1830         fixed20_12 a;
1831
1832         a.full = dfixed_const(1000);
1833         sclk.full = dfixed_const(wm->sclk);
1834         sclk.full = dfixed_div(sclk, a);
1835         a.full = dfixed_const(10);
1836         return_efficiency.full = dfixed_const(8);
1837         return_efficiency.full = dfixed_div(return_efficiency, a);
1838         a.full = dfixed_const(32);
1839         bandwidth.full = dfixed_mul(a, sclk);
1840         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1841
1842         return dfixed_trunc(bandwidth);
1843 }
1844
1845 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1846 {
1847         return 32;
1848 }
1849
1850 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1851 {
1852         /* Calculate the DMIF Request Bandwidth */
1853         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1854         fixed20_12 disp_clk, sclk, bandwidth;
1855         fixed20_12 a, b1, b2;
1856         u32 min_bandwidth;
1857
1858         a.full = dfixed_const(1000);
1859         disp_clk.full = dfixed_const(wm->disp_clk);
1860         disp_clk.full = dfixed_div(disp_clk, a);
1861         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1862         b1.full = dfixed_mul(a, disp_clk);
1863
1864         a.full = dfixed_const(1000);
1865         sclk.full = dfixed_const(wm->sclk);
1866         sclk.full = dfixed_div(sclk, a);
1867         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1868         b2.full = dfixed_mul(a, sclk);
1869
1870         a.full = dfixed_const(10);
1871         disp_clk_request_efficiency.full = dfixed_const(8);
1872         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1873
1874         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1875
1876         a.full = dfixed_const(min_bandwidth);
1877         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1878
1879         return dfixed_trunc(bandwidth);
1880 }
1881
1882 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1883 {
1884         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1885         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1886         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1887         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1888
1889         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1890 }
1891
1892 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1893 {
1894         /* Calculate the display mode Average Bandwidth
1895          * DisplayMode should contain the source and destination dimensions,
1896          * timing, etc.
1897          */
1898         fixed20_12 bpp;
1899         fixed20_12 line_time;
1900         fixed20_12 src_width;
1901         fixed20_12 bandwidth;
1902         fixed20_12 a;
1903
1904         a.full = dfixed_const(1000);
1905         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1906         line_time.full = dfixed_div(line_time, a);
1907         bpp.full = dfixed_const(wm->bytes_per_pixel);
1908         src_width.full = dfixed_const(wm->src_width);
1909         bandwidth.full = dfixed_mul(src_width, bpp);
1910         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1911         bandwidth.full = dfixed_div(bandwidth, line_time);
1912
1913         return dfixed_trunc(bandwidth);
1914 }
1915
1916 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1917 {
1918         /* First calcualte the latency in ns */
1919         u32 mc_latency = 2000; /* 2000 ns. */
1920         u32 available_bandwidth = dce6_available_bandwidth(wm);
1921         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1922         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1923         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1924         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1925                 (wm->num_heads * cursor_line_pair_return_time);
1926         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1927         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1928         u32 tmp, dmif_size = 12288;
1929         fixed20_12 a, b, c;
1930
1931         if (wm->num_heads == 0)
1932                 return 0;
1933
1934         a.full = dfixed_const(2);
1935         b.full = dfixed_const(1);
1936         if ((wm->vsc.full > a.full) ||
1937             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1938             (wm->vtaps >= 5) ||
1939             ((wm->vsc.full >= a.full) && wm->interlaced))
1940                 max_src_lines_per_dst_line = 4;
1941         else
1942                 max_src_lines_per_dst_line = 2;
1943
1944         a.full = dfixed_const(available_bandwidth);
1945         b.full = dfixed_const(wm->num_heads);
1946         a.full = dfixed_div(a, b);
1947
1948         b.full = dfixed_const(mc_latency + 512);
1949         c.full = dfixed_const(wm->disp_clk);
1950         b.full = dfixed_div(b, c);
1951
1952         c.full = dfixed_const(dmif_size);
1953         b.full = dfixed_div(c, b);
1954
1955         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1956
1957         b.full = dfixed_const(1000);
1958         c.full = dfixed_const(wm->disp_clk);
1959         b.full = dfixed_div(c, b);
1960         c.full = dfixed_const(wm->bytes_per_pixel);
1961         b.full = dfixed_mul(b, c);
1962
1963         lb_fill_bw = min(tmp, dfixed_trunc(b));
1964
1965         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1966         b.full = dfixed_const(1000);
1967         c.full = dfixed_const(lb_fill_bw);
1968         b.full = dfixed_div(c, b);
1969         a.full = dfixed_div(a, b);
1970         line_fill_time = dfixed_trunc(a);
1971
1972         if (line_fill_time < wm->active_time)
1973                 return latency;
1974         else
1975                 return latency + (line_fill_time - wm->active_time);
1976
1977 }
1978
1979 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1980 {
1981         if (dce6_average_bandwidth(wm) <=
1982             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1983                 return true;
1984         else
1985                 return false;
1986 };
1987
1988 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1989 {
1990         if (dce6_average_bandwidth(wm) <=
1991             (dce6_available_bandwidth(wm) / wm->num_heads))
1992                 return true;
1993         else
1994                 return false;
1995 };
1996
1997 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1998 {
1999         u32 lb_partitions = wm->lb_size / wm->src_width;
2000         u32 line_time = wm->active_time + wm->blank_time;
2001         u32 latency_tolerant_lines;
2002         u32 latency_hiding;
2003         fixed20_12 a;
2004
2005         a.full = dfixed_const(1);
2006         if (wm->vsc.full > a.full)
2007                 latency_tolerant_lines = 1;
2008         else {
2009                 if (lb_partitions <= (wm->vtaps + 1))
2010                         latency_tolerant_lines = 1;
2011                 else
2012                         latency_tolerant_lines = 2;
2013         }
2014
2015         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2016
2017         if (dce6_latency_watermark(wm) <= latency_hiding)
2018                 return true;
2019         else
2020                 return false;
2021 }
2022
2023 static void dce6_program_watermarks(struct radeon_device *rdev,
2024                                          struct radeon_crtc *radeon_crtc,
2025                                          u32 lb_size, u32 num_heads)
2026 {
2027         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2028         struct dce6_wm_params wm_low, wm_high;
2029         u32 dram_channels;
2030         u32 pixel_period;
2031         u32 line_time = 0;
2032         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2033         u32 priority_a_mark = 0, priority_b_mark = 0;
2034         u32 priority_a_cnt = PRIORITY_OFF;
2035         u32 priority_b_cnt = PRIORITY_OFF;
2036         u32 tmp, arb_control3;
2037         fixed20_12 a, b, c;
2038
2039         if (radeon_crtc->base.enabled && num_heads && mode) {
2040                 pixel_period = 1000000 / (u32)mode->clock;
2041                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2042                 priority_a_cnt = 0;
2043                 priority_b_cnt = 0;
2044
2045                 if (rdev->family == CHIP_ARUBA)
2046                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2047                 else
2048                         dram_channels = si_get_number_of_dram_channels(rdev);
2049
2050                 /* watermark for high clocks */
2051                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2052                         wm_high.yclk =
2053                                 radeon_dpm_get_mclk(rdev, false) * 10;
2054                         wm_high.sclk =
2055                                 radeon_dpm_get_sclk(rdev, false) * 10;
2056                 } else {
2057                         wm_high.yclk = rdev->pm.current_mclk * 10;
2058                         wm_high.sclk = rdev->pm.current_sclk * 10;
2059                 }
2060
2061                 wm_high.disp_clk = mode->clock;
2062                 wm_high.src_width = mode->crtc_hdisplay;
2063                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2064                 wm_high.blank_time = line_time - wm_high.active_time;
2065                 wm_high.interlaced = false;
2066                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2067                         wm_high.interlaced = true;
2068                 wm_high.vsc = radeon_crtc->vsc;
2069                 wm_high.vtaps = 1;
2070                 if (radeon_crtc->rmx_type != RMX_OFF)
2071                         wm_high.vtaps = 2;
2072                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2073                 wm_high.lb_size = lb_size;
2074                 wm_high.dram_channels = dram_channels;
2075                 wm_high.num_heads = num_heads;
2076
2077                 /* watermark for low clocks */
2078                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079                         wm_low.yclk =
2080                                 radeon_dpm_get_mclk(rdev, true) * 10;
2081                         wm_low.sclk =
2082                                 radeon_dpm_get_sclk(rdev, true) * 10;
2083                 } else {
2084                         wm_low.yclk = rdev->pm.current_mclk * 10;
2085                         wm_low.sclk = rdev->pm.current_sclk * 10;
2086                 }
2087
2088                 wm_low.disp_clk = mode->clock;
2089                 wm_low.src_width = mode->crtc_hdisplay;
2090                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2091                 wm_low.blank_time = line_time - wm_low.active_time;
2092                 wm_low.interlaced = false;
2093                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094                         wm_low.interlaced = true;
2095                 wm_low.vsc = radeon_crtc->vsc;
2096                 wm_low.vtaps = 1;
2097                 if (radeon_crtc->rmx_type != RMX_OFF)
2098                         wm_low.vtaps = 2;
2099                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100                 wm_low.lb_size = lb_size;
2101                 wm_low.dram_channels = dram_channels;
2102                 wm_low.num_heads = num_heads;
2103
2104                 /* set for high clocks */
2105                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2106                 /* set for low clocks */
2107                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2108
2109                 /* possibly force display priority to high */
2110                 /* should really do this at mode validation time... */
2111                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2112                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2113                     !dce6_check_latency_hiding(&wm_high) ||
2114                     (rdev->disp_priority == 2)) {
2115                         DRM_DEBUG_KMS("force priority to high\n");
2116                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2117                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2118                 }
2119                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2120                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2121                     !dce6_check_latency_hiding(&wm_low) ||
2122                     (rdev->disp_priority == 2)) {
2123                         DRM_DEBUG_KMS("force priority to high\n");
2124                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2125                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2126                 }
2127
2128                 a.full = dfixed_const(1000);
2129                 b.full = dfixed_const(mode->clock);
2130                 b.full = dfixed_div(b, a);
2131                 c.full = dfixed_const(latency_watermark_a);
2132                 c.full = dfixed_mul(c, b);
2133                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2134                 c.full = dfixed_div(c, a);
2135                 a.full = dfixed_const(16);
2136                 c.full = dfixed_div(c, a);
2137                 priority_a_mark = dfixed_trunc(c);
2138                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2139
2140                 a.full = dfixed_const(1000);
2141                 b.full = dfixed_const(mode->clock);
2142                 b.full = dfixed_div(b, a);
2143                 c.full = dfixed_const(latency_watermark_b);
2144                 c.full = dfixed_mul(c, b);
2145                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2146                 c.full = dfixed_div(c, a);
2147                 a.full = dfixed_const(16);
2148                 c.full = dfixed_div(c, a);
2149                 priority_b_mark = dfixed_trunc(c);
2150                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2151         }
2152
2153         /* select wm A */
2154         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2155         tmp = arb_control3;
2156         tmp &= ~LATENCY_WATERMARK_MASK(3);
2157         tmp |= LATENCY_WATERMARK_MASK(1);
2158         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2159         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2160                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2161                 LATENCY_HIGH_WATERMARK(line_time)));
2162         /* select wm B */
2163         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2164         tmp &= ~LATENCY_WATERMARK_MASK(3);
2165         tmp |= LATENCY_WATERMARK_MASK(2);
2166         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2167         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2168                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2169                 LATENCY_HIGH_WATERMARK(line_time)));
2170         /* restore original selection */
2171         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2172
2173         /* write the priority marks */
2174         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2175         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2176
2177         /* save values for DPM */
2178         radeon_crtc->line_time = line_time;
2179         radeon_crtc->wm_high = latency_watermark_a;
2180         radeon_crtc->wm_low = latency_watermark_b;
2181 }
2182
2183 void dce6_bandwidth_update(struct radeon_device *rdev)
2184 {
2185         struct drm_display_mode *mode0 = NULL;
2186         struct drm_display_mode *mode1 = NULL;
2187         u32 num_heads = 0, lb_size;
2188         int i;
2189
2190         radeon_update_display_priority(rdev);
2191
2192         for (i = 0; i < rdev->num_crtc; i++) {
2193                 if (rdev->mode_info.crtcs[i]->base.enabled)
2194                         num_heads++;
2195         }
2196         for (i = 0; i < rdev->num_crtc; i += 2) {
2197                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2198                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2199                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2200                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2201                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2202                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2203         }
2204 }
2205
2206 /*
2207  * Core functions
2208  */
2209 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2210 {
2211         const u32 num_tile_mode_states = 32;
2212         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2213
2214         switch (rdev->config.si.mem_row_size_in_kb) {
2215         case 1:
2216                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2217                 break;
2218         case 2:
2219         default:
2220                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2221                 break;
2222         case 4:
2223                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2224                 break;
2225         }
2226
2227         if ((rdev->family == CHIP_TAHITI) ||
2228             (rdev->family == CHIP_PITCAIRN)) {
2229                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2230                         switch (reg_offset) {
2231                         case 0:  /* non-AA compressed depth or any compressed stencil */
2232                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2234                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2235                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2236                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2237                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2240                                 break;
2241                         case 1:  /* 2xAA/4xAA compressed depth only */
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2245                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2247                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2250                                 break;
2251                         case 2:  /* 8xAA compressed depth only */
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2257                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2260                                 break;
2261                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2262                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2264                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2265                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2267                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2270                                 break;
2271                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2277                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280                                 break;
2281                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2282                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285                                                  TILE_SPLIT(split_equal_to_row_size) |
2286                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2287                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290                                 break;
2291                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2292                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295                                                  TILE_SPLIT(split_equal_to_row_size) |
2296                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2297                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2300                                 break;
2301                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305                                                  TILE_SPLIT(split_equal_to_row_size) |
2306                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2307                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310                                 break;
2311                         case 8:  /* 1D and 1D Array Surfaces */
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2313                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2314                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2317                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320                                 break;
2321                         case 9:  /* Displayable maps. */
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2327                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330                                 break;
2331                         case 10:  /* Display 8bpp. */
2332                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2336                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2337                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2340                                 break;
2341                         case 11:  /* Display 16bpp. */
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2347                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350                                 break;
2351                         case 12:  /* Display 32bpp. */
2352                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2356                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2357                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2360                                 break;
2361                         case 13:  /* Thin. */
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2364                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2367                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370                                 break;
2371                         case 14:  /* Thin 8 bpp. */
2372                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2374                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2377                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2380                                 break;
2381                         case 15:  /* Thin 16 bpp. */
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2384                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2387                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2390                                 break;
2391                         case 16:  /* Thin 32 bpp. */
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2397                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400                                 break;
2401                         case 17:  /* Thin 64 bpp. */
2402                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405                                                  TILE_SPLIT(split_equal_to_row_size) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2407                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2410                                 break;
2411                         case 21:  /* 8 bpp PRT. */
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2417                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2418                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2420                                 break;
2421                         case 22:  /* 16 bpp PRT */
2422                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2427                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2430                                 break;
2431                         case 23:  /* 32 bpp PRT */
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2436                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2437                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440                                 break;
2441                         case 24:  /* 64 bpp PRT */
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2447                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450                                 break;
2451                         case 25:  /* 128 bpp PRT */
2452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2456                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2457                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2460                                 break;
2461                         default:
2462                                 gb_tile_moden = 0;
2463                                 break;
2464                         }
2465                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2466                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2467                 }
2468         } else if ((rdev->family == CHIP_VERDE) ||
2469                    (rdev->family == CHIP_OLAND) ||
2470                    (rdev->family == CHIP_HAINAN)) {
2471                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2472                         switch (reg_offset) {
2473                         case 0:  /* non-AA compressed depth or any compressed stencil */
2474                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2476                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2477                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2479                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2482                                 break;
2483                         case 1:  /* 2xAA/4xAA compressed depth only */
2484                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2486                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2487                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2488                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2489                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2492                                 break;
2493                         case 2:  /* 8xAA compressed depth only */
2494                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2497                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2498                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2499                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2502                                 break;
2503                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2504                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2506                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2507                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2508                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2509                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2512                                 break;
2513                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2519                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522                                 break;
2523                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2524                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527                                                  TILE_SPLIT(split_equal_to_row_size) |
2528                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2529                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532                                 break;
2533                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2534                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537                                                  TILE_SPLIT(split_equal_to_row_size) |
2538                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2539                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542                                 break;
2543                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547                                                  TILE_SPLIT(split_equal_to_row_size) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2549                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552                                 break;
2553                         case 8:  /* 1D and 1D Array Surfaces */
2554                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2559                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562                                 break;
2563                         case 9:  /* Displayable maps. */
2564                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2565                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2569                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572                                 break;
2573                         case 10:  /* Display 8bpp. */
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2579                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2582                                 break;
2583                         case 11:  /* Display 16bpp. */
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2588                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2589                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2592                                 break;
2593                         case 12:  /* Display 32bpp. */
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2599                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602                                 break;
2603                         case 13:  /* Thin. */
2604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2606                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2609                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612                                 break;
2613                         case 14:  /* Thin 8 bpp. */
2614                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2619                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622                                 break;
2623                         case 15:  /* Thin 16 bpp. */
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2629                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632                                 break;
2633                         case 16:  /* Thin 32 bpp. */
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2639                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642                                 break;
2643                         case 17:  /* Thin 64 bpp. */
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647                                                  TILE_SPLIT(split_equal_to_row_size) |
2648                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2649                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652                                 break;
2653                         case 21:  /* 8 bpp PRT. */
2654                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2659                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2660                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662                                 break;
2663                         case 22:  /* 16 bpp PRT */
2664                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2669                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2672                                 break;
2673                         case 23:  /* 32 bpp PRT */
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2678                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2679                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682                                 break;
2683                         case 24:  /* 64 bpp PRT */
2684                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2687                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2689                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692                                 break;
2693                         case 25:  /* 128 bpp PRT */
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2698                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2699                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2702                                 break;
2703                         default:
2704                                 gb_tile_moden = 0;
2705                                 break;
2706                         }
2707                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2708                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2709                 }
2710         } else
2711                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2712 }
2713
2714 static void si_select_se_sh(struct radeon_device *rdev,
2715                             u32 se_num, u32 sh_num)
2716 {
2717         u32 data = INSTANCE_BROADCAST_WRITES;
2718
2719         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2720                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2721         else if (se_num == 0xffffffff)
2722                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2723         else if (sh_num == 0xffffffff)
2724                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2725         else
2726                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2727         WREG32(GRBM_GFX_INDEX, data);
2728 }
2729
2730 static u32 si_create_bitmask(u32 bit_width)
2731 {
2732         u32 i, mask = 0;
2733
2734         for (i = 0; i < bit_width; i++) {
2735                 mask <<= 1;
2736                 mask |= 1;
2737         }
2738         return mask;
2739 }
2740
2741 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2742 {
2743         u32 data, mask;
2744
2745         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2746         if (data & 1)
2747                 data &= INACTIVE_CUS_MASK;
2748         else
2749                 data = 0;
2750         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2751
2752         data >>= INACTIVE_CUS_SHIFT;
2753
2754         mask = si_create_bitmask(cu_per_sh);
2755
2756         return ~data & mask;
2757 }
2758
2759 static void si_setup_spi(struct radeon_device *rdev,
2760                          u32 se_num, u32 sh_per_se,
2761                          u32 cu_per_sh)
2762 {
2763         int i, j, k;
2764         u32 data, mask, active_cu;
2765
2766         for (i = 0; i < se_num; i++) {
2767                 for (j = 0; j < sh_per_se; j++) {
2768                         si_select_se_sh(rdev, i, j);
2769                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2770                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2771
2772                         mask = 1;
2773                         for (k = 0; k < 16; k++) {
2774                                 mask <<= k;
2775                                 if (active_cu & mask) {
2776                                         data &= ~mask;
2777                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2778                                         break;
2779                                 }
2780                         }
2781                 }
2782         }
2783         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2784 }
2785
2786 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2787                               u32 max_rb_num, u32 se_num,
2788                               u32 sh_per_se)
2789 {
2790         u32 data, mask;
2791
2792         data = RREG32(CC_RB_BACKEND_DISABLE);
2793         if (data & 1)
2794                 data &= BACKEND_DISABLE_MASK;
2795         else
2796                 data = 0;
2797         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2798
2799         data >>= BACKEND_DISABLE_SHIFT;
2800
2801         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2802
2803         return data & mask;
2804 }
2805
2806 static void si_setup_rb(struct radeon_device *rdev,
2807                         u32 se_num, u32 sh_per_se,
2808                         u32 max_rb_num)
2809 {
2810         int i, j;
2811         u32 data, mask;
2812         u32 disabled_rbs = 0;
2813         u32 enabled_rbs = 0;
2814
2815         for (i = 0; i < se_num; i++) {
2816                 for (j = 0; j < sh_per_se; j++) {
2817                         si_select_se_sh(rdev, i, j);
2818                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2819                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2820                 }
2821         }
2822         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2823
2824         mask = 1;
2825         for (i = 0; i < max_rb_num; i++) {
2826                 if (!(disabled_rbs & mask))
2827                         enabled_rbs |= mask;
2828                 mask <<= 1;
2829         }
2830
2831         for (i = 0; i < se_num; i++) {
2832                 si_select_se_sh(rdev, i, 0xffffffff);
2833                 data = 0;
2834                 for (j = 0; j < sh_per_se; j++) {
2835                         switch (enabled_rbs & 3) {
2836                         case 1:
2837                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2838                                 break;
2839                         case 2:
2840                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2841                                 break;
2842                         case 3:
2843                         default:
2844                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2845                                 break;
2846                         }
2847                         enabled_rbs >>= 2;
2848                 }
2849                 WREG32(PA_SC_RASTER_CONFIG, data);
2850         }
2851         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2852 }
2853
2854 static void si_gpu_init(struct radeon_device *rdev)
2855 {
2856         u32 gb_addr_config = 0;
2857         u32 mc_shared_chmap, mc_arb_ramcfg;
2858         u32 sx_debug_1;
2859         u32 hdp_host_path_cntl;
2860         u32 tmp;
2861         int i, j;
2862
2863         switch (rdev->family) {
2864         case CHIP_TAHITI:
2865                 rdev->config.si.max_shader_engines = 2;
2866                 rdev->config.si.max_tile_pipes = 12;
2867                 rdev->config.si.max_cu_per_sh = 8;
2868                 rdev->config.si.max_sh_per_se = 2;
2869                 rdev->config.si.max_backends_per_se = 4;
2870                 rdev->config.si.max_texture_channel_caches = 12;
2871                 rdev->config.si.max_gprs = 256;
2872                 rdev->config.si.max_gs_threads = 32;
2873                 rdev->config.si.max_hw_contexts = 8;
2874
2875                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2876                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2877                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2878                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2879                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2880                 break;
2881         case CHIP_PITCAIRN:
2882                 rdev->config.si.max_shader_engines = 2;
2883                 rdev->config.si.max_tile_pipes = 8;
2884                 rdev->config.si.max_cu_per_sh = 5;
2885                 rdev->config.si.max_sh_per_se = 2;
2886                 rdev->config.si.max_backends_per_se = 4;
2887                 rdev->config.si.max_texture_channel_caches = 8;
2888                 rdev->config.si.max_gprs = 256;
2889                 rdev->config.si.max_gs_threads = 32;
2890                 rdev->config.si.max_hw_contexts = 8;
2891
2892                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2893                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2894                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2895                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2896                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2897                 break;
2898         case CHIP_VERDE:
2899         default:
2900                 rdev->config.si.max_shader_engines = 1;
2901                 rdev->config.si.max_tile_pipes = 4;
2902                 rdev->config.si.max_cu_per_sh = 5;
2903                 rdev->config.si.max_sh_per_se = 2;
2904                 rdev->config.si.max_backends_per_se = 4;
2905                 rdev->config.si.max_texture_channel_caches = 4;
2906                 rdev->config.si.max_gprs = 256;
2907                 rdev->config.si.max_gs_threads = 32;
2908                 rdev->config.si.max_hw_contexts = 8;
2909
2910                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2911                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2912                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2913                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2914                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2915                 break;
2916         case CHIP_OLAND:
2917                 rdev->config.si.max_shader_engines = 1;
2918                 rdev->config.si.max_tile_pipes = 4;
2919                 rdev->config.si.max_cu_per_sh = 6;
2920                 rdev->config.si.max_sh_per_se = 1;
2921                 rdev->config.si.max_backends_per_se = 2;
2922                 rdev->config.si.max_texture_channel_caches = 4;
2923                 rdev->config.si.max_gprs = 256;
2924                 rdev->config.si.max_gs_threads = 16;
2925                 rdev->config.si.max_hw_contexts = 8;
2926
2927                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2928                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2929                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2930                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2931                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2932                 break;
2933         case CHIP_HAINAN:
2934                 rdev->config.si.max_shader_engines = 1;
2935                 rdev->config.si.max_tile_pipes = 4;
2936                 rdev->config.si.max_cu_per_sh = 5;
2937                 rdev->config.si.max_sh_per_se = 1;
2938                 rdev->config.si.max_backends_per_se = 1;
2939                 rdev->config.si.max_texture_channel_caches = 2;
2940                 rdev->config.si.max_gprs = 256;
2941                 rdev->config.si.max_gs_threads = 16;
2942                 rdev->config.si.max_hw_contexts = 8;
2943
2944                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2945                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2946                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2947                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2948                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2949                 break;
2950         }
2951
2952         /* Initialize HDP */
2953         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2954                 WREG32((0x2c14 + j), 0x00000000);
2955                 WREG32((0x2c18 + j), 0x00000000);
2956                 WREG32((0x2c1c + j), 0x00000000);
2957                 WREG32((0x2c20 + j), 0x00000000);
2958                 WREG32((0x2c24 + j), 0x00000000);
2959         }
2960
2961         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2962
2963         evergreen_fix_pci_max_read_req_size(rdev);
2964
2965         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2966
2967         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2968         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2969
2970         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2971         rdev->config.si.mem_max_burst_length_bytes = 256;
2972         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2973         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2974         if (rdev->config.si.mem_row_size_in_kb > 4)
2975                 rdev->config.si.mem_row_size_in_kb = 4;
2976         /* XXX use MC settings? */
2977         rdev->config.si.shader_engine_tile_size = 32;
2978         rdev->config.si.num_gpus = 1;
2979         rdev->config.si.multi_gpu_tile_size = 64;
2980
2981         /* fix up row size */
2982         gb_addr_config &= ~ROW_SIZE_MASK;
2983         switch (rdev->config.si.mem_row_size_in_kb) {
2984         case 1:
2985         default:
2986                 gb_addr_config |= ROW_SIZE(0);
2987                 break;
2988         case 2:
2989                 gb_addr_config |= ROW_SIZE(1);
2990                 break;
2991         case 4:
2992                 gb_addr_config |= ROW_SIZE(2);
2993                 break;
2994         }
2995
2996         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2997          * not have bank info, so create a custom tiling dword.
2998          * bits 3:0   num_pipes
2999          * bits 7:4   num_banks
3000          * bits 11:8  group_size
3001          * bits 15:12 row_size
3002          */
3003         rdev->config.si.tile_config = 0;
3004         switch (rdev->config.si.num_tile_pipes) {
3005         case 1:
3006                 rdev->config.si.tile_config |= (0 << 0);
3007                 break;
3008         case 2:
3009                 rdev->config.si.tile_config |= (1 << 0);
3010                 break;
3011         case 4:
3012                 rdev->config.si.tile_config |= (2 << 0);
3013                 break;
3014         case 8:
3015         default:
3016                 /* XXX what about 12? */
3017                 rdev->config.si.tile_config |= (3 << 0);
3018                 break;
3019         }       
3020         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3021         case 0: /* four banks */
3022                 rdev->config.si.tile_config |= 0 << 4;
3023                 break;
3024         case 1: /* eight banks */
3025                 rdev->config.si.tile_config |= 1 << 4;
3026                 break;
3027         case 2: /* sixteen banks */
3028         default:
3029                 rdev->config.si.tile_config |= 2 << 4;
3030                 break;
3031         }
3032         rdev->config.si.tile_config |=
3033                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3034         rdev->config.si.tile_config |=
3035                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3036
3037         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3038         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3039         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3040         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3041         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3042         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3043         if (rdev->has_uvd) {
3044                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3045                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3046                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3047         }
3048
3049         si_tiling_mode_table_init(rdev);
3050
3051         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3052                     rdev->config.si.max_sh_per_se,
3053                     rdev->config.si.max_backends_per_se);
3054
3055         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3056                      rdev->config.si.max_sh_per_se,
3057                      rdev->config.si.max_cu_per_sh);
3058
3059
3060         /* set HW defaults for 3D engine */
3061         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3062                                      ROQ_IB2_START(0x2b)));
3063         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3064
3065         sx_debug_1 = RREG32(SX_DEBUG_1);
3066         WREG32(SX_DEBUG_1, sx_debug_1);
3067
3068         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3069
3070         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3071                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3072                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3073                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3074
3075         WREG32(VGT_NUM_INSTANCES, 1);
3076
3077         WREG32(CP_PERFMON_CNTL, 0);
3078
3079         WREG32(SQ_CONFIG, 0);
3080
3081         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3082                                           FORCE_EOV_MAX_REZ_CNT(255)));
3083
3084         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3085                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3086
3087         WREG32(VGT_GS_VERTEX_REUSE, 16);
3088         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3089
3090         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3091         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3092         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3093         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3094         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3095         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3096         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3097         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3098
3099         tmp = RREG32(HDP_MISC_CNTL);
3100         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3101         WREG32(HDP_MISC_CNTL, tmp);
3102
3103         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3104         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3105
3106         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3107
3108         udelay(50);
3109 }
3110
3111 /*
3112  * GPU scratch registers helpers function.
3113  */
3114 static void si_scratch_init(struct radeon_device *rdev)
3115 {
3116         int i;
3117
3118         rdev->scratch.num_reg = 7;
3119         rdev->scratch.reg_base = SCRATCH_REG0;
3120         for (i = 0; i < rdev->scratch.num_reg; i++) {
3121                 rdev->scratch.free[i] = true;
3122                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3123         }
3124 }
3125
3126 void si_fence_ring_emit(struct radeon_device *rdev,
3127                         struct radeon_fence *fence)
3128 {
3129         struct radeon_ring *ring = &rdev->ring[fence->ring];
3130         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3131
3132         /* flush read cache over gart */
3133         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3134         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3135         radeon_ring_write(ring, 0);
3136         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3137         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3138                           PACKET3_TC_ACTION_ENA |
3139                           PACKET3_SH_KCACHE_ACTION_ENA |
3140                           PACKET3_SH_ICACHE_ACTION_ENA);
3141         radeon_ring_write(ring, 0xFFFFFFFF);
3142         radeon_ring_write(ring, 0);
3143         radeon_ring_write(ring, 10); /* poll interval */
3144         /* EVENT_WRITE_EOP - flush caches, send int */
3145         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3146         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3147         radeon_ring_write(ring, addr & 0xffffffff);
3148         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3149         radeon_ring_write(ring, fence->seq);
3150         radeon_ring_write(ring, 0);
3151 }
3152
3153 /*
3154  * IB stuff
3155  */
3156 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3157 {
3158         struct radeon_ring *ring = &rdev->ring[ib->ring];
3159         u32 header;
3160
3161         if (ib->is_const_ib) {
3162                 /* set switch buffer packet before const IB */
3163                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3164                 radeon_ring_write(ring, 0);
3165
3166                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3167         } else {
3168                 u32 next_rptr;
3169                 if (ring->rptr_save_reg) {
3170                         next_rptr = ring->wptr + 3 + 4 + 8;
3171                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3172                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3173                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3174                         radeon_ring_write(ring, next_rptr);
3175                 } else if (rdev->wb.enabled) {
3176                         next_rptr = ring->wptr + 5 + 4 + 8;
3177                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3178                         radeon_ring_write(ring, (1 << 8));
3179                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3180                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3181                         radeon_ring_write(ring, next_rptr);
3182                 }
3183
3184                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3185         }
3186
3187         radeon_ring_write(ring, header);
3188         radeon_ring_write(ring,
3189 #ifdef __BIG_ENDIAN
3190                           (2 << 0) |
3191 #endif
3192                           (ib->gpu_addr & 0xFFFFFFFC));
3193         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3194         radeon_ring_write(ring, ib->length_dw |
3195                           (ib->vm ? (ib->vm->id << 24) : 0));
3196
3197         if (!ib->is_const_ib) {
3198                 /* flush read cache over gart for this vmid */
3199                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3201                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3202                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3203                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3204                                   PACKET3_TC_ACTION_ENA |
3205                                   PACKET3_SH_KCACHE_ACTION_ENA |
3206                                   PACKET3_SH_ICACHE_ACTION_ENA);
3207                 radeon_ring_write(ring, 0xFFFFFFFF);
3208                 radeon_ring_write(ring, 0);
3209                 radeon_ring_write(ring, 10); /* poll interval */
3210         }
3211 }
3212
3213 /*
3214  * CP.
3215  */
3216 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3217 {
3218         if (enable)
3219                 WREG32(CP_ME_CNTL, 0);
3220         else {
3221                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3222                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3223                 WREG32(SCRATCH_UMSK, 0);
3224                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3225                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3226                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3227         }
3228         udelay(50);
3229 }
3230
3231 static int si_cp_load_microcode(struct radeon_device *rdev)
3232 {
3233         const __be32 *fw_data;
3234         int i;
3235
3236         if (!rdev->me_fw || !rdev->pfp_fw)
3237                 return -EINVAL;
3238
3239         si_cp_enable(rdev, false);
3240
3241         /* PFP */
3242         fw_data = (const __be32 *)rdev->pfp_fw->data;
3243         WREG32(CP_PFP_UCODE_ADDR, 0);
3244         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3245                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3246         WREG32(CP_PFP_UCODE_ADDR, 0);
3247
3248         /* CE */
3249         fw_data = (const __be32 *)rdev->ce_fw->data;
3250         WREG32(CP_CE_UCODE_ADDR, 0);
3251         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3252                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3253         WREG32(CP_CE_UCODE_ADDR, 0);
3254
3255         /* ME */
3256         fw_data = (const __be32 *)rdev->me_fw->data;
3257         WREG32(CP_ME_RAM_WADDR, 0);
3258         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3259                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3260         WREG32(CP_ME_RAM_WADDR, 0);
3261
3262         WREG32(CP_PFP_UCODE_ADDR, 0);
3263         WREG32(CP_CE_UCODE_ADDR, 0);
3264         WREG32(CP_ME_RAM_WADDR, 0);
3265         WREG32(CP_ME_RAM_RADDR, 0);
3266         return 0;
3267 }
3268
3269 static int si_cp_start(struct radeon_device *rdev)
3270 {
3271         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272         int r, i;
3273
3274         r = radeon_ring_lock(rdev, ring, 7 + 4);
3275         if (r) {
3276                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277                 return r;
3278         }
3279         /* init the CP */
3280         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3281         radeon_ring_write(ring, 0x1);
3282         radeon_ring_write(ring, 0x0);
3283         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3284         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3285         radeon_ring_write(ring, 0);
3286         radeon_ring_write(ring, 0);
3287
3288         /* init the CE partitions */
3289         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3290         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3291         radeon_ring_write(ring, 0xc000);
3292         radeon_ring_write(ring, 0xe000);
3293         radeon_ring_unlock_commit(rdev, ring);
3294
3295         si_cp_enable(rdev, true);
3296
3297         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3298         if (r) {
3299                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3300                 return r;
3301         }
3302
3303         /* setup clear context state */
3304         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3306
3307         for (i = 0; i < si_default_size; i++)
3308                 radeon_ring_write(ring, si_default_state[i]);
3309
3310         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3311         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3312
3313         /* set clear context state */
3314         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3315         radeon_ring_write(ring, 0);
3316
3317         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3318         radeon_ring_write(ring, 0x00000316);
3319         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3320         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3321
3322         radeon_ring_unlock_commit(rdev, ring);
3323
3324         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3325                 ring = &rdev->ring[i];
3326                 r = radeon_ring_lock(rdev, ring, 2);
3327
3328                 /* clear the compute context state */
3329                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3330                 radeon_ring_write(ring, 0);
3331
3332                 radeon_ring_unlock_commit(rdev, ring);
3333         }
3334
3335         return 0;
3336 }
3337
3338 static void si_cp_fini(struct radeon_device *rdev)
3339 {
3340         struct radeon_ring *ring;
3341         si_cp_enable(rdev, false);
3342
3343         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3344         radeon_ring_fini(rdev, ring);
3345         radeon_scratch_free(rdev, ring->rptr_save_reg);
3346
3347         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3348         radeon_ring_fini(rdev, ring);
3349         radeon_scratch_free(rdev, ring->rptr_save_reg);
3350
3351         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3352         radeon_ring_fini(rdev, ring);
3353         radeon_scratch_free(rdev, ring->rptr_save_reg);
3354 }
3355
3356 static int si_cp_resume(struct radeon_device *rdev)
3357 {
3358         struct radeon_ring *ring;
3359         u32 tmp;
3360         u32 rb_bufsz;
3361         int r;
3362
3363         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3364         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3365                                  SOFT_RESET_PA |
3366                                  SOFT_RESET_VGT |
3367                                  SOFT_RESET_SPI |
3368                                  SOFT_RESET_SX));
3369         RREG32(GRBM_SOFT_RESET);
3370         mdelay(15);
3371         WREG32(GRBM_SOFT_RESET, 0);
3372         RREG32(GRBM_SOFT_RESET);
3373
3374         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3375         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3376
3377         /* Set the write pointer delay */
3378         WREG32(CP_RB_WPTR_DELAY, 0);
3379
3380         WREG32(CP_DEBUG, 0);
3381         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3382
3383         /* ring 0 - compute and gfx */
3384         /* Set ring buffer size */
3385         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3386         rb_bufsz = drm_order(ring->ring_size / 8);
3387         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3388 #ifdef __BIG_ENDIAN
3389         tmp |= BUF_SWAP_32BIT;
3390 #endif
3391         WREG32(CP_RB0_CNTL, tmp);
3392
3393         /* Initialize the ring buffer's read and write pointers */
3394         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3395         ring->wptr = 0;
3396         WREG32(CP_RB0_WPTR, ring->wptr);
3397
3398         /* set the wb address whether it's enabled or not */
3399         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3400         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3401
3402         if (rdev->wb.enabled)
3403                 WREG32(SCRATCH_UMSK, 0xff);
3404         else {
3405                 tmp |= RB_NO_UPDATE;
3406                 WREG32(SCRATCH_UMSK, 0);
3407         }
3408
3409         mdelay(1);
3410         WREG32(CP_RB0_CNTL, tmp);
3411
3412         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3413
3414         ring->rptr = RREG32(CP_RB0_RPTR);
3415
3416         /* ring1  - compute only */
3417         /* Set ring buffer size */
3418         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3419         rb_bufsz = drm_order(ring->ring_size / 8);
3420         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422         tmp |= BUF_SWAP_32BIT;
3423 #endif
3424         WREG32(CP_RB1_CNTL, tmp);
3425
3426         /* Initialize the ring buffer's read and write pointers */
3427         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3428         ring->wptr = 0;
3429         WREG32(CP_RB1_WPTR, ring->wptr);
3430
3431         /* set the wb address whether it's enabled or not */
3432         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3433         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3434
3435         mdelay(1);
3436         WREG32(CP_RB1_CNTL, tmp);
3437
3438         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3439
3440         ring->rptr = RREG32(CP_RB1_RPTR);
3441
3442         /* ring2 - compute only */
3443         /* Set ring buffer size */
3444         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3445         rb_bufsz = drm_order(ring->ring_size / 8);
3446         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3447 #ifdef __BIG_ENDIAN
3448         tmp |= BUF_SWAP_32BIT;
3449 #endif
3450         WREG32(CP_RB2_CNTL, tmp);
3451
3452         /* Initialize the ring buffer's read and write pointers */
3453         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3454         ring->wptr = 0;
3455         WREG32(CP_RB2_WPTR, ring->wptr);
3456
3457         /* set the wb address whether it's enabled or not */
3458         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3459         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3460
3461         mdelay(1);
3462         WREG32(CP_RB2_CNTL, tmp);
3463
3464         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3465
3466         ring->rptr = RREG32(CP_RB2_RPTR);
3467
3468         /* start the rings */
3469         si_cp_start(rdev);
3470         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3471         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3472         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3473         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3474         if (r) {
3475                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3476                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3477                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3478                 return r;
3479         }
3480         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3481         if (r) {
3482                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483         }
3484         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3485         if (r) {
3486                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3487         }
3488
3489         return 0;
3490 }
3491
3492 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3493 {
3494         u32 reset_mask = 0;
3495         u32 tmp;
3496
3497         /* GRBM_STATUS */
3498         tmp = RREG32(GRBM_STATUS);
3499         if (tmp & (PA_BUSY | SC_BUSY |
3500                    BCI_BUSY | SX_BUSY |
3501                    TA_BUSY | VGT_BUSY |
3502                    DB_BUSY | CB_BUSY |
3503                    GDS_BUSY | SPI_BUSY |
3504                    IA_BUSY | IA_BUSY_NO_DMA))
3505                 reset_mask |= RADEON_RESET_GFX;
3506
3507         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3508                    CP_BUSY | CP_COHERENCY_BUSY))
3509                 reset_mask |= RADEON_RESET_CP;
3510
3511         if (tmp & GRBM_EE_BUSY)
3512                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3513
3514         /* GRBM_STATUS2 */
3515         tmp = RREG32(GRBM_STATUS2);
3516         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3517                 reset_mask |= RADEON_RESET_RLC;
3518
3519         /* DMA_STATUS_REG 0 */
3520         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3521         if (!(tmp & DMA_IDLE))
3522                 reset_mask |= RADEON_RESET_DMA;
3523
3524         /* DMA_STATUS_REG 1 */
3525         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3526         if (!(tmp & DMA_IDLE))
3527                 reset_mask |= RADEON_RESET_DMA1;
3528
3529         /* SRBM_STATUS2 */
3530         tmp = RREG32(SRBM_STATUS2);
3531         if (tmp & DMA_BUSY)
3532                 reset_mask |= RADEON_RESET_DMA;
3533
3534         if (tmp & DMA1_BUSY)
3535                 reset_mask |= RADEON_RESET_DMA1;
3536
3537         /* SRBM_STATUS */
3538         tmp = RREG32(SRBM_STATUS);
3539
3540         if (tmp & IH_BUSY)
3541                 reset_mask |= RADEON_RESET_IH;
3542
3543         if (tmp & SEM_BUSY)
3544                 reset_mask |= RADEON_RESET_SEM;
3545
3546         if (tmp & GRBM_RQ_PENDING)
3547                 reset_mask |= RADEON_RESET_GRBM;
3548
3549         if (tmp & VMC_BUSY)
3550                 reset_mask |= RADEON_RESET_VMC;
3551
3552         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3553                    MCC_BUSY | MCD_BUSY))
3554                 reset_mask |= RADEON_RESET_MC;
3555
3556         if (evergreen_is_display_hung(rdev))
3557                 reset_mask |= RADEON_RESET_DISPLAY;
3558
3559         /* VM_L2_STATUS */
3560         tmp = RREG32(VM_L2_STATUS);
3561         if (tmp & L2_BUSY)
3562                 reset_mask |= RADEON_RESET_VMC;
3563
3564         /* Skip MC reset as it's mostly likely not hung, just busy */
3565         if (reset_mask & RADEON_RESET_MC) {
3566                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3567                 reset_mask &= ~RADEON_RESET_MC;
3568         }
3569
3570         return reset_mask;
3571 }
3572
3573 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3574 {
3575         struct evergreen_mc_save save;
3576         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3577         u32 tmp;
3578
3579         if (reset_mask == 0)
3580                 return;
3581
3582         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3583
3584         evergreen_print_gpu_status_regs(rdev);
3585         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3586                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3587         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3588                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3589
3590         /* Disable CP parsing/prefetching */
3591         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3592
3593         if (reset_mask & RADEON_RESET_DMA) {
3594                 /* dma0 */
3595                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3596                 tmp &= ~DMA_RB_ENABLE;
3597                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3598         }
3599         if (reset_mask & RADEON_RESET_DMA1) {
3600                 /* dma1 */
3601                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3602                 tmp &= ~DMA_RB_ENABLE;
3603                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3604         }
3605
3606         udelay(50);
3607
3608         evergreen_mc_stop(rdev, &save);
3609         if (evergreen_mc_wait_for_idle(rdev)) {
3610                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3611         }
3612
3613         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3614                 grbm_soft_reset = SOFT_RESET_CB |
3615                         SOFT_RESET_DB |
3616                         SOFT_RESET_GDS |
3617                         SOFT_RESET_PA |
3618                         SOFT_RESET_SC |
3619                         SOFT_RESET_BCI |
3620                         SOFT_RESET_SPI |
3621                         SOFT_RESET_SX |
3622                         SOFT_RESET_TC |
3623                         SOFT_RESET_TA |
3624                         SOFT_RESET_VGT |
3625                         SOFT_RESET_IA;
3626         }
3627
3628         if (reset_mask & RADEON_RESET_CP) {
3629                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3630
3631                 srbm_soft_reset |= SOFT_RESET_GRBM;
3632         }
3633
3634         if (reset_mask & RADEON_RESET_DMA)
3635                 srbm_soft_reset |= SOFT_RESET_DMA;
3636
3637         if (reset_mask & RADEON_RESET_DMA1)
3638                 srbm_soft_reset |= SOFT_RESET_DMA1;
3639
3640         if (reset_mask & RADEON_RESET_DISPLAY)
3641                 srbm_soft_reset |= SOFT_RESET_DC;
3642
3643         if (reset_mask & RADEON_RESET_RLC)
3644                 grbm_soft_reset |= SOFT_RESET_RLC;
3645
3646         if (reset_mask & RADEON_RESET_SEM)
3647                 srbm_soft_reset |= SOFT_RESET_SEM;
3648
3649         if (reset_mask & RADEON_RESET_IH)
3650                 srbm_soft_reset |= SOFT_RESET_IH;
3651
3652         if (reset_mask & RADEON_RESET_GRBM)
3653                 srbm_soft_reset |= SOFT_RESET_GRBM;
3654
3655         if (reset_mask & RADEON_RESET_VMC)
3656                 srbm_soft_reset |= SOFT_RESET_VMC;
3657
3658         if (reset_mask & RADEON_RESET_MC)
3659                 srbm_soft_reset |= SOFT_RESET_MC;
3660
3661         if (grbm_soft_reset) {
3662                 tmp = RREG32(GRBM_SOFT_RESET);
3663                 tmp |= grbm_soft_reset;
3664                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3665                 WREG32(GRBM_SOFT_RESET, tmp);
3666                 tmp = RREG32(GRBM_SOFT_RESET);
3667
3668                 udelay(50);
3669
3670                 tmp &= ~grbm_soft_reset;
3671                 WREG32(GRBM_SOFT_RESET, tmp);
3672                 tmp = RREG32(GRBM_SOFT_RESET);
3673         }
3674
3675         if (srbm_soft_reset) {
3676                 tmp = RREG32(SRBM_SOFT_RESET);
3677                 tmp |= srbm_soft_reset;
3678                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3679                 WREG32(SRBM_SOFT_RESET, tmp);
3680                 tmp = RREG32(SRBM_SOFT_RESET);
3681
3682                 udelay(50);
3683
3684                 tmp &= ~srbm_soft_reset;
3685                 WREG32(SRBM_SOFT_RESET, tmp);
3686                 tmp = RREG32(SRBM_SOFT_RESET);
3687         }
3688
3689         /* Wait a little for things to settle down */
3690         udelay(50);
3691
3692         evergreen_mc_resume(rdev, &save);
3693         udelay(50);
3694
3695         evergreen_print_gpu_status_regs(rdev);
3696 }
3697
3698 int si_asic_reset(struct radeon_device *rdev)
3699 {
3700         u32 reset_mask;
3701
3702         reset_mask = si_gpu_check_soft_reset(rdev);
3703
3704         if (reset_mask)
3705                 r600_set_bios_scratch_engine_hung(rdev, true);
3706
3707         si_gpu_soft_reset(rdev, reset_mask);
3708
3709         reset_mask = si_gpu_check_soft_reset(rdev);
3710
3711         if (!reset_mask)
3712                 r600_set_bios_scratch_engine_hung(rdev, false);
3713
3714         return 0;
3715 }
3716
3717 /**
3718  * si_gfx_is_lockup - Check if the GFX engine is locked up
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Check if the GFX engine is locked up.
3724  * Returns true if the engine appears to be locked up, false if not.
3725  */
3726 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3727 {
3728         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3729
3730         if (!(reset_mask & (RADEON_RESET_GFX |
3731                             RADEON_RESET_COMPUTE |
3732                             RADEON_RESET_CP))) {
3733                 radeon_ring_lockup_update(ring);
3734                 return false;
3735         }
3736         /* force CP activities */
3737         radeon_ring_force_activity(rdev, ring);
3738         return radeon_ring_test_lockup(rdev, ring);
3739 }
3740
3741 /**
3742  * si_dma_is_lockup - Check if the DMA engine is locked up
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Check if the async DMA engine is locked up.
3748  * Returns true if the engine appears to be locked up, false if not.
3749  */
3750 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3751 {
3752         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3753         u32 mask;
3754
3755         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3756                 mask = RADEON_RESET_DMA;
3757         else
3758                 mask = RADEON_RESET_DMA1;
3759
3760         if (!(reset_mask & mask)) {
3761                 radeon_ring_lockup_update(ring);
3762                 return false;
3763         }
3764         /* force ring activities */
3765         radeon_ring_force_activity(rdev, ring);
3766         return radeon_ring_test_lockup(rdev, ring);
3767 }
3768
3769 /* MC */
3770 static void si_mc_program(struct radeon_device *rdev)
3771 {
3772         struct evergreen_mc_save save;
3773         u32 tmp;
3774         int i, j;
3775
3776         /* Initialize HDP */
3777         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3778                 WREG32((0x2c14 + j), 0x00000000);
3779                 WREG32((0x2c18 + j), 0x00000000);
3780                 WREG32((0x2c1c + j), 0x00000000);
3781                 WREG32((0x2c20 + j), 0x00000000);
3782                 WREG32((0x2c24 + j), 0x00000000);
3783         }
3784         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3785
3786         evergreen_mc_stop(rdev, &save);
3787         if (radeon_mc_wait_for_idle(rdev)) {
3788                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3789         }
3790         if (!ASIC_IS_NODCE(rdev))
3791                 /* Lockout access through VGA aperture*/
3792                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3793         /* Update configuration */
3794         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3795                rdev->mc.vram_start >> 12);
3796         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3797                rdev->mc.vram_end >> 12);
3798         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3799                rdev->vram_scratch.gpu_addr >> 12);
3800         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3801         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3802         WREG32(MC_VM_FB_LOCATION, tmp);
3803         /* XXX double check these! */
3804         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3805         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3806         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3807         WREG32(MC_VM_AGP_BASE, 0);
3808         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3809         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3810         if (radeon_mc_wait_for_idle(rdev)) {
3811                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3812         }
3813         evergreen_mc_resume(rdev, &save);
3814         if (!ASIC_IS_NODCE(rdev)) {
3815                 /* we need to own VRAM, so turn off the VGA renderer here
3816                  * to stop it overwriting our objects */
3817                 rv515_vga_render_disable(rdev);
3818         }
3819 }
3820
3821 void si_vram_gtt_location(struct radeon_device *rdev,
3822                           struct radeon_mc *mc)
3823 {
3824         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3825                 /* leave room for at least 1024M GTT */
3826                 dev_warn(rdev->dev, "limiting VRAM\n");
3827                 mc->real_vram_size = 0xFFC0000000ULL;
3828                 mc->mc_vram_size = 0xFFC0000000ULL;
3829         }
3830         radeon_vram_location(rdev, &rdev->mc, 0);
3831         rdev->mc.gtt_base_align = 0;
3832         radeon_gtt_location(rdev, mc);
3833 }
3834
3835 static int si_mc_init(struct radeon_device *rdev)
3836 {
3837         u32 tmp;
3838         int chansize, numchan;
3839
3840         /* Get VRAM informations */
3841         rdev->mc.vram_is_ddr = true;
3842         tmp = RREG32(MC_ARB_RAMCFG);
3843         if (tmp & CHANSIZE_OVERRIDE) {
3844                 chansize = 16;
3845         } else if (tmp & CHANSIZE_MASK) {
3846                 chansize = 64;
3847         } else {
3848                 chansize = 32;
3849         }
3850         tmp = RREG32(MC_SHARED_CHMAP);
3851         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3852         case 0:
3853         default:
3854                 numchan = 1;
3855                 break;
3856         case 1:
3857                 numchan = 2;
3858                 break;
3859         case 2:
3860                 numchan = 4;
3861                 break;
3862         case 3:
3863                 numchan = 8;
3864                 break;
3865         case 4:
3866                 numchan = 3;
3867                 break;
3868         case 5:
3869                 numchan = 6;
3870                 break;
3871         case 6:
3872                 numchan = 10;
3873                 break;
3874         case 7:
3875                 numchan = 12;
3876                 break;
3877         case 8:
3878                 numchan = 16;
3879                 break;
3880         }
3881         rdev->mc.vram_width = numchan * chansize;
3882         /* Could aper size report 0 ? */
3883         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3884         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3885         /* size in MB on si */
3886         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3887         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3888         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3889         si_vram_gtt_location(rdev, &rdev->mc);
3890         radeon_update_bandwidth_info(rdev);
3891
3892         return 0;
3893 }
3894
3895 /*
3896  * GART
3897  */
3898 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3899 {
3900         /* flush hdp cache */
3901         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3902
3903         /* bits 0-15 are the VM contexts0-15 */
3904         WREG32(VM_INVALIDATE_REQUEST, 1);
3905 }
3906
3907 static int si_pcie_gart_enable(struct radeon_device *rdev)
3908 {
3909         int r, i;
3910
3911         if (rdev->gart.robj == NULL) {
3912                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3913                 return -EINVAL;
3914         }
3915         r = radeon_gart_table_vram_pin(rdev);
3916         if (r)
3917                 return r;
3918         radeon_gart_restore(rdev);
3919         /* Setup TLB control */
3920         WREG32(MC_VM_MX_L1_TLB_CNTL,
3921                (0xA << 7) |
3922                ENABLE_L1_TLB |
3923                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3924                ENABLE_ADVANCED_DRIVER_MODEL |
3925                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3926         /* Setup L2 cache */
3927         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3928                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3929                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3930                EFFECTIVE_L2_QUEUE_SIZE(7) |
3931                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3932         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3933         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3934                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3935         /* setup context0 */
3936         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3937         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3938         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3939         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3940                         (u32)(rdev->dummy_page.addr >> 12));
3941         WREG32(VM_CONTEXT0_CNTL2, 0);
3942         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3943                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3944
3945         WREG32(0x15D4, 0);
3946         WREG32(0x15D8, 0);
3947         WREG32(0x15DC, 0);
3948
3949         /* empty context1-15 */
3950         /* set vm size, must be a multiple of 4 */
3951         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3952         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3953         /* Assign the pt base to something valid for now; the pts used for
3954          * the VMs are determined by the application and setup and assigned
3955          * on the fly in the vm part of radeon_gart.c
3956          */
3957         for (i = 1; i < 16; i++) {
3958                 if (i < 8)
3959                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3960                                rdev->gart.table_addr >> 12);
3961                 else
3962                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3963                                rdev->gart.table_addr >> 12);
3964         }
3965
3966         /* enable context1-15 */
3967         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3968                (u32)(rdev->dummy_page.addr >> 12));
3969         WREG32(VM_CONTEXT1_CNTL2, 4);
3970         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3971                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3972                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3973                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3974                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3975                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3977                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3983
3984         si_pcie_gart_tlb_flush(rdev);
3985         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3986                  (unsigned)(rdev->mc.gtt_size >> 20),
3987                  (unsigned long long)rdev->gart.table_addr);
3988         rdev->gart.ready = true;
3989         return 0;
3990 }
3991
3992 static void si_pcie_gart_disable(struct radeon_device *rdev)
3993 {
3994         /* Disable all tables */
3995         WREG32(VM_CONTEXT0_CNTL, 0);
3996         WREG32(VM_CONTEXT1_CNTL, 0);
3997         /* Setup TLB control */
3998         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3999                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4000         /* Setup L2 cache */
4001         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4002                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4003                EFFECTIVE_L2_QUEUE_SIZE(7) |
4004                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4005         WREG32(VM_L2_CNTL2, 0);
4006         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4007                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4008         radeon_gart_table_vram_unpin(rdev);
4009 }
4010
4011 static void si_pcie_gart_fini(struct radeon_device *rdev)
4012 {
4013         si_pcie_gart_disable(rdev);
4014         radeon_gart_table_vram_free(rdev);
4015         radeon_gart_fini(rdev);
4016 }
4017
4018 /* vm parser */
4019 static bool si_vm_reg_valid(u32 reg)
4020 {
4021         /* context regs are fine */
4022         if (reg >= 0x28000)
4023                 return true;
4024
4025         /* check config regs */
4026         switch (reg) {
4027         case GRBM_GFX_INDEX:
4028         case CP_STRMOUT_CNTL:
4029         case VGT_VTX_VECT_EJECT_REG:
4030         case VGT_CACHE_INVALIDATION:
4031         case VGT_ESGS_RING_SIZE:
4032         case VGT_GSVS_RING_SIZE:
4033         case VGT_GS_VERTEX_REUSE:
4034         case VGT_PRIMITIVE_TYPE:
4035         case VGT_INDEX_TYPE:
4036         case VGT_NUM_INDICES:
4037         case VGT_NUM_INSTANCES:
4038         case VGT_TF_RING_SIZE:
4039         case VGT_HS_OFFCHIP_PARAM:
4040         case VGT_TF_MEMORY_BASE:
4041         case PA_CL_ENHANCE:
4042         case PA_SU_LINE_STIPPLE_VALUE:
4043         case PA_SC_LINE_STIPPLE_STATE:
4044         case PA_SC_ENHANCE:
4045         case SQC_CACHES:
4046         case SPI_STATIC_THREAD_MGMT_1:
4047         case SPI_STATIC_THREAD_MGMT_2:
4048         case SPI_STATIC_THREAD_MGMT_3:
4049         case SPI_PS_MAX_WAVE_ID:
4050         case SPI_CONFIG_CNTL:
4051         case SPI_CONFIG_CNTL_1:
4052         case TA_CNTL_AUX:
4053                 return true;
4054         default:
4055                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4056                 return false;
4057         }
4058 }
4059
4060 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4061                                   u32 *ib, struct radeon_cs_packet *pkt)
4062 {
4063         switch (pkt->opcode) {
4064         case PACKET3_NOP:
4065         case PACKET3_SET_BASE:
4066         case PACKET3_SET_CE_DE_COUNTERS:
4067         case PACKET3_LOAD_CONST_RAM:
4068         case PACKET3_WRITE_CONST_RAM:
4069         case PACKET3_WRITE_CONST_RAM_OFFSET:
4070         case PACKET3_DUMP_CONST_RAM:
4071         case PACKET3_INCREMENT_CE_COUNTER:
4072         case PACKET3_WAIT_ON_DE_COUNTER:
4073         case PACKET3_CE_WRITE:
4074                 break;
4075         default:
4076                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4077                 return -EINVAL;
4078         }
4079         return 0;
4080 }
4081
4082 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4083                                    u32 *ib, struct radeon_cs_packet *pkt)
4084 {
4085         u32 idx = pkt->idx + 1;
4086         u32 idx_value = ib[idx];
4087         u32 start_reg, end_reg, reg, i;
4088         u32 command, info;
4089
4090         switch (pkt->opcode) {
4091         case PACKET3_NOP:
4092         case PACKET3_SET_BASE:
4093         case PACKET3_CLEAR_STATE:
4094         case PACKET3_INDEX_BUFFER_SIZE:
4095         case PACKET3_DISPATCH_DIRECT:
4096         case PACKET3_DISPATCH_INDIRECT:
4097         case PACKET3_ALLOC_GDS:
4098         case PACKET3_WRITE_GDS_RAM:
4099         case PACKET3_ATOMIC_GDS:
4100         case PACKET3_ATOMIC:
4101         case PACKET3_OCCLUSION_QUERY:
4102         case PACKET3_SET_PREDICATION:
4103         case PACKET3_COND_EXEC:
4104         case PACKET3_PRED_EXEC:
4105         case PACKET3_DRAW_INDIRECT:
4106         case PACKET3_DRAW_INDEX_INDIRECT:
4107         case PACKET3_INDEX_BASE:
4108         case PACKET3_DRAW_INDEX_2:
4109         case PACKET3_CONTEXT_CONTROL:
4110         case PACKET3_INDEX_TYPE:
4111         case PACKET3_DRAW_INDIRECT_MULTI:
4112         case PACKET3_DRAW_INDEX_AUTO:
4113         case PACKET3_DRAW_INDEX_IMMD:
4114         case PACKET3_NUM_INSTANCES:
4115         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4116         case PACKET3_STRMOUT_BUFFER_UPDATE:
4117         case PACKET3_DRAW_INDEX_OFFSET_2:
4118         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4119         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4120         case PACKET3_MPEG_INDEX:
4121         case PACKET3_WAIT_REG_MEM:
4122         case PACKET3_MEM_WRITE:
4123         case PACKET3_PFP_SYNC_ME:
4124         case PACKET3_SURFACE_SYNC:
4125         case PACKET3_EVENT_WRITE:
4126         case PACKET3_EVENT_WRITE_EOP:
4127         case PACKET3_EVENT_WRITE_EOS:
4128         case PACKET3_SET_CONTEXT_REG:
4129         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4130         case PACKET3_SET_SH_REG:
4131         case PACKET3_SET_SH_REG_OFFSET:
4132         case PACKET3_INCREMENT_DE_COUNTER:
4133         case PACKET3_WAIT_ON_CE_COUNTER:
4134         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4135         case PACKET3_ME_WRITE:
4136                 break;
4137         case PACKET3_COPY_DATA:
4138                 if ((idx_value & 0xf00) == 0) {
4139                         reg = ib[idx + 3] * 4;
4140                         if (!si_vm_reg_valid(reg))
4141                                 return -EINVAL;
4142                 }
4143                 break;
4144         case PACKET3_WRITE_DATA:
4145                 if ((idx_value & 0xf00) == 0) {
4146                         start_reg = ib[idx + 1] * 4;
4147                         if (idx_value & 0x10000) {
4148                                 if (!si_vm_reg_valid(start_reg))
4149                                         return -EINVAL;
4150                         } else {
4151                                 for (i = 0; i < (pkt->count - 2); i++) {
4152                                         reg = start_reg + (4 * i);
4153                                         if (!si_vm_reg_valid(reg))
4154                                                 return -EINVAL;
4155                                 }
4156                         }
4157                 }
4158                 break;
4159         case PACKET3_COND_WRITE:
4160                 if (idx_value & 0x100) {
4161                         reg = ib[idx + 5] * 4;
4162                         if (!si_vm_reg_valid(reg))
4163                                 return -EINVAL;
4164                 }
4165                 break;
4166         case PACKET3_COPY_DW:
4167                 if (idx_value & 0x2) {
4168                         reg = ib[idx + 3] * 4;
4169                         if (!si_vm_reg_valid(reg))
4170                                 return -EINVAL;
4171                 }
4172                 break;
4173         case PACKET3_SET_CONFIG_REG:
4174                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4175                 end_reg = 4 * pkt->count + start_reg - 4;
4176                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4177                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4178                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4179                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4180                         return -EINVAL;
4181                 }
4182                 for (i = 0; i < pkt->count; i++) {
4183                         reg = start_reg + (4 * i);
4184                         if (!si_vm_reg_valid(reg))
4185                                 return -EINVAL;
4186                 }
4187                 break;
4188         case PACKET3_CP_DMA:
4189                 command = ib[idx + 4];
4190                 info = ib[idx + 1];
4191                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4192                         /* src address space is register */
4193                         if (((info & 0x60000000) >> 29) == 0) {
4194                                 start_reg = idx_value << 2;
4195                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4196                                         reg = start_reg;
4197                                         if (!si_vm_reg_valid(reg)) {
4198                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4199                                                 return -EINVAL;
4200                                         }
4201                                 } else {
4202                                         for (i = 0; i < (command & 0x1fffff); i++) {
4203                                                 reg = start_reg + (4 * i);
4204                                                 if (!si_vm_reg_valid(reg)) {
4205                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4206                                                         return -EINVAL;
4207                                                 }
4208                                         }
4209                                 }
4210                         }
4211                 }
4212                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4213                         /* dst address space is register */
4214                         if (((info & 0x00300000) >> 20) == 0) {
4215                                 start_reg = ib[idx + 2];
4216                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4217                                         reg = start_reg;
4218                                         if (!si_vm_reg_valid(reg)) {
4219                                                 DRM_ERROR("CP DMA Bad DST register\n");
4220                                                 return -EINVAL;
4221                                         }
4222                                 } else {
4223                                         for (i = 0; i < (command & 0x1fffff); i++) {
4224                                                 reg = start_reg + (4 * i);
4225                                                 if (!si_vm_reg_valid(reg)) {
4226                                                         DRM_ERROR("CP DMA Bad DST register\n");
4227                                                         return -EINVAL;
4228                                                 }
4229                                         }
4230                                 }
4231                         }
4232                 }
4233                 break;
4234         default:
4235                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4236                 return -EINVAL;
4237         }
4238         return 0;
4239 }
4240
4241 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4242                                        u32 *ib, struct radeon_cs_packet *pkt)
4243 {
4244         u32 idx = pkt->idx + 1;
4245         u32 idx_value = ib[idx];
4246         u32 start_reg, reg, i;
4247
4248         switch (pkt->opcode) {
4249         case PACKET3_NOP:
4250         case PACKET3_SET_BASE:
4251         case PACKET3_CLEAR_STATE:
4252         case PACKET3_DISPATCH_DIRECT:
4253         case PACKET3_DISPATCH_INDIRECT:
4254         case PACKET3_ALLOC_GDS:
4255         case PACKET3_WRITE_GDS_RAM:
4256         case PACKET3_ATOMIC_GDS:
4257         case PACKET3_ATOMIC:
4258         case PACKET3_OCCLUSION_QUERY:
4259         case PACKET3_SET_PREDICATION:
4260         case PACKET3_COND_EXEC:
4261         case PACKET3_PRED_EXEC:
4262         case PACKET3_CONTEXT_CONTROL:
4263         case PACKET3_STRMOUT_BUFFER_UPDATE:
4264         case PACKET3_WAIT_REG_MEM:
4265         case PACKET3_MEM_WRITE:
4266         case PACKET3_PFP_SYNC_ME:
4267         case PACKET3_SURFACE_SYNC:
4268         case PACKET3_EVENT_WRITE:
4269         case PACKET3_EVENT_WRITE_EOP:
4270         case PACKET3_EVENT_WRITE_EOS:
4271         case PACKET3_SET_CONTEXT_REG:
4272         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4273         case PACKET3_SET_SH_REG:
4274         case PACKET3_SET_SH_REG_OFFSET:
4275         case PACKET3_INCREMENT_DE_COUNTER:
4276         case PACKET3_WAIT_ON_CE_COUNTER:
4277         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4278         case PACKET3_ME_WRITE:
4279                 break;
4280         case PACKET3_COPY_DATA:
4281                 if ((idx_value & 0xf00) == 0) {
4282                         reg = ib[idx + 3] * 4;
4283                         if (!si_vm_reg_valid(reg))
4284                                 return -EINVAL;
4285                 }
4286                 break;
4287         case PACKET3_WRITE_DATA:
4288                 if ((idx_value & 0xf00) == 0) {
4289                         start_reg = ib[idx + 1] * 4;
4290                         if (idx_value & 0x10000) {
4291                                 if (!si_vm_reg_valid(start_reg))
4292                                         return -EINVAL;
4293                         } else {
4294                                 for (i = 0; i < (pkt->count - 2); i++) {
4295                                         reg = start_reg + (4 * i);
4296                                         if (!si_vm_reg_valid(reg))
4297                                                 return -EINVAL;
4298                                 }
4299                         }
4300                 }
4301                 break;
4302         case PACKET3_COND_WRITE:
4303                 if (idx_value & 0x100) {
4304                         reg = ib[idx + 5] * 4;
4305                         if (!si_vm_reg_valid(reg))
4306                                 return -EINVAL;
4307                 }
4308                 break;
4309         case PACKET3_COPY_DW:
4310                 if (idx_value & 0x2) {
4311                         reg = ib[idx + 3] * 4;
4312                         if (!si_vm_reg_valid(reg))
4313                                 return -EINVAL;
4314                 }
4315                 break;
4316         default:
4317                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4318                 return -EINVAL;
4319         }
4320         return 0;
4321 }
4322
4323 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4324 {
4325         int ret = 0;
4326         u32 idx = 0;
4327         struct radeon_cs_packet pkt;
4328
4329         do {
4330                 pkt.idx = idx;
4331                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4332                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4333                 pkt.one_reg_wr = 0;
4334                 switch (pkt.type) {
4335                 case RADEON_PACKET_TYPE0:
4336                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4337                         ret = -EINVAL;
4338                         break;
4339                 case RADEON_PACKET_TYPE2:
4340                         idx += 1;
4341                         break;
4342                 case RADEON_PACKET_TYPE3:
4343                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4344                         if (ib->is_const_ib)
4345                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4346                         else {
4347                                 switch (ib->ring) {
4348                                 case RADEON_RING_TYPE_GFX_INDEX:
4349                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4350                                         break;
4351                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4352                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4353                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4354                                         break;
4355                                 default:
4356                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4357                                         ret = -EINVAL;
4358                                         break;
4359                                 }
4360                         }
4361                         idx += pkt.count + 2;
4362                         break;
4363                 default:
4364                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4365                         ret = -EINVAL;
4366                         break;
4367                 }
4368                 if (ret)
4369                         break;
4370         } while (idx < ib->length_dw);
4371
4372         return ret;
4373 }
4374
4375 /*
4376  * vm
4377  */
4378 int si_vm_init(struct radeon_device *rdev)
4379 {
4380         /* number of VMs */
4381         rdev->vm_manager.nvm = 16;
4382         /* base offset of vram pages */
4383         rdev->vm_manager.vram_base_offset = 0;
4384
4385         return 0;
4386 }
4387
4388 void si_vm_fini(struct radeon_device *rdev)
4389 {
4390 }
4391
4392 /**
4393  * si_vm_set_page - update the page tables using the CP
4394  *
4395  * @rdev: radeon_device pointer
4396  * @ib: indirect buffer to fill with commands
4397  * @pe: addr of the page entry
4398  * @addr: dst addr to write into pe
4399  * @count: number of page entries to update
4400  * @incr: increase next addr by incr bytes
4401  * @flags: access flags
4402  *
4403  * Update the page tables using the CP (SI).
4404  */
4405 void si_vm_set_page(struct radeon_device *rdev,
4406                     struct radeon_ib *ib,
4407                     uint64_t pe,
4408                     uint64_t addr, unsigned count,
4409                     uint32_t incr, uint32_t flags)
4410 {
4411         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4412         uint64_t value;
4413         unsigned ndw;
4414
4415         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4416                 while (count) {
4417                         ndw = 2 + count * 2;
4418                         if (ndw > 0x3FFE)
4419                                 ndw = 0x3FFE;
4420
4421                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4422                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4423                                         WRITE_DATA_DST_SEL(1));
4424                         ib->ptr[ib->length_dw++] = pe;
4425                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4426                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4427                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4428                                         value = radeon_vm_map_gart(rdev, addr);
4429                                         value &= 0xFFFFFFFFFFFFF000ULL;
4430                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4431                                         value = addr;
4432                                 } else {
4433                                         value = 0;
4434                                 }
4435                                 addr += incr;
4436                                 value |= r600_flags;
4437                                 ib->ptr[ib->length_dw++] = value;
4438                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4439                         }
4440                 }
4441         } else {
4442                 /* DMA */
4443                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4444                         while (count) {
4445                                 ndw = count * 2;
4446                                 if (ndw > 0xFFFFE)
4447                                         ndw = 0xFFFFE;
4448
4449                                 /* for non-physically contiguous pages (system) */
4450                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4451                                 ib->ptr[ib->length_dw++] = pe;
4452                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4453                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4454                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4455                                                 value = radeon_vm_map_gart(rdev, addr);
4456                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4457                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4458                                                 value = addr;
4459                                         } else {
4460                                                 value = 0;
4461                                         }
4462                                         addr += incr;
4463                                         value |= r600_flags;
4464                                         ib->ptr[ib->length_dw++] = value;
4465                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4466                                 }
4467                         }
4468                 } else {
4469                         while (count) {
4470                                 ndw = count * 2;
4471                                 if (ndw > 0xFFFFE)
4472                                         ndw = 0xFFFFE;
4473
4474                                 if (flags & RADEON_VM_PAGE_VALID)
4475                                         value = addr;
4476                                 else
4477                                         value = 0;
4478                                 /* for physically contiguous pages (vram) */
4479                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4480                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4481                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4482                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4483                                 ib->ptr[ib->length_dw++] = 0;
4484                                 ib->ptr[ib->length_dw++] = value; /* value */
4485                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4486                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4487                                 ib->ptr[ib->length_dw++] = 0;
4488                                 pe += ndw * 4;
4489                                 addr += (ndw / 2) * incr;
4490                                 count -= ndw / 2;
4491                         }
4492                 }
4493                 while (ib->length_dw & 0x7)
4494                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4495         }
4496 }
4497
4498 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4499 {
4500         struct radeon_ring *ring = &rdev->ring[ridx];
4501
4502         if (vm == NULL)
4503                 return;
4504
4505         /* write new base address */
4506         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4507         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4508                                  WRITE_DATA_DST_SEL(0)));
4509
4510         if (vm->id < 8) {
4511                 radeon_ring_write(ring,
4512                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4513         } else {
4514                 radeon_ring_write(ring,
4515                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4516         }
4517         radeon_ring_write(ring, 0);
4518         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4519
4520         /* flush hdp cache */
4521         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4522         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4523                                  WRITE_DATA_DST_SEL(0)));
4524         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4525         radeon_ring_write(ring, 0);
4526         radeon_ring_write(ring, 0x1);
4527
4528         /* bits 0-15 are the VM contexts0-15 */
4529         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4530         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4531                                  WRITE_DATA_DST_SEL(0)));
4532         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4533         radeon_ring_write(ring, 0);
4534         radeon_ring_write(ring, 1 << vm->id);
4535
4536         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4537         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4538         radeon_ring_write(ring, 0x0);
4539 }
4540
4541 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4542 {
4543         struct radeon_ring *ring = &rdev->ring[ridx];
4544
4545         if (vm == NULL)
4546                 return;
4547
4548         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4549         if (vm->id < 8) {
4550                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4551         } else {
4552                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4553         }
4554         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4555
4556         /* flush hdp cache */
4557         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4558         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4559         radeon_ring_write(ring, 1);
4560
4561         /* bits 0-7 are the VM contexts0-7 */
4562         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4563         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4564         radeon_ring_write(ring, 1 << vm->id);
4565 }
4566
4567 /*
4568  *  Power and clock gating
4569  */
4570 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4571 {
4572         int i;
4573
4574         for (i = 0; i < rdev->usec_timeout; i++) {
4575                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4576                         break;
4577                 udelay(1);
4578         }
4579
4580         for (i = 0; i < rdev->usec_timeout; i++) {
4581                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4582                         break;
4583                 udelay(1);
4584         }
4585 }
4586
4587 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4588                                          bool enable)
4589 {
4590         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4591         u32 mask;
4592         int i;
4593
4594         if (enable)
4595                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4596         else
4597                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4598         WREG32(CP_INT_CNTL_RING0, tmp);
4599
4600         if (!enable) {
4601                 /* read a gfx register */
4602                 tmp = RREG32(DB_DEPTH_INFO);
4603
4604                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4605                 for (i = 0; i < rdev->usec_timeout; i++) {
4606                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4607                                 break;
4608                         udelay(1);
4609                 }
4610         }
4611 }
4612
4613 static void si_set_uvd_dcm(struct radeon_device *rdev,
4614                            bool sw_mode)
4615 {
4616         u32 tmp, tmp2;
4617
4618         tmp = RREG32(UVD_CGC_CTRL);
4619         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4620         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4621
4622         if (sw_mode) {
4623                 tmp &= ~0x7ffff800;
4624                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4625         } else {
4626                 tmp |= 0x7ffff800;
4627                 tmp2 = 0;
4628         }
4629
4630         WREG32(UVD_CGC_CTRL, tmp);
4631         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4632 }
4633
4634 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4635 {
4636         bool hw_mode = true;
4637
4638         if (hw_mode) {
4639                 si_set_uvd_dcm(rdev, false);
4640         } else {
4641                 u32 tmp = RREG32(UVD_CGC_CTRL);
4642                 tmp &= ~DCM;
4643                 WREG32(UVD_CGC_CTRL, tmp);
4644         }
4645 }
4646
4647 static u32 si_halt_rlc(struct radeon_device *rdev)
4648 {
4649         u32 data, orig;
4650
4651         orig = data = RREG32(RLC_CNTL);
4652
4653         if (data & RLC_ENABLE) {
4654                 data &= ~RLC_ENABLE;
4655                 WREG32(RLC_CNTL, data);
4656
4657                 si_wait_for_rlc_serdes(rdev);
4658         }
4659
4660         return orig;
4661 }
4662
4663 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4664 {
4665         u32 tmp;
4666
4667         tmp = RREG32(RLC_CNTL);
4668         if (tmp != rlc)
4669                 WREG32(RLC_CNTL, rlc);
4670 }
4671
4672 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4673 {
4674         u32 data, orig;
4675
4676         orig = data = RREG32(DMA_PG);
4677         if (enable)
4678                 data |= PG_CNTL_ENABLE;
4679         else
4680                 data &= ~PG_CNTL_ENABLE;
4681         if (orig != data)
4682                 WREG32(DMA_PG, data);
4683 }
4684
4685 static void si_init_dma_pg(struct radeon_device *rdev)
4686 {
4687         u32 tmp;
4688
4689         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4690         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4691
4692         for (tmp = 0; tmp < 5; tmp++)
4693                 WREG32(DMA_PGFSM_WRITE, 0);
4694 }
4695
4696 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4697                                bool enable)
4698 {
4699         u32 tmp;
4700
4701         if (enable) {
4702                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4703                 WREG32(RLC_TTOP_D, tmp);
4704
4705                 tmp = RREG32(RLC_PG_CNTL);
4706                 tmp |= GFX_PG_ENABLE;
4707                 WREG32(RLC_PG_CNTL, tmp);
4708
4709                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4710                 tmp |= AUTO_PG_EN;
4711                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4712         } else {
4713                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4714                 tmp &= ~AUTO_PG_EN;
4715                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4716
4717                 tmp = RREG32(DB_RENDER_CONTROL);
4718         }
4719 }
4720
4721 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4722 {
4723         u32 tmp;
4724
4725         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4726
4727         tmp = RREG32(RLC_PG_CNTL);
4728         tmp |= GFX_PG_SRC;
4729         WREG32(RLC_PG_CNTL, tmp);
4730
4731         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4732
4733         tmp = RREG32(RLC_AUTO_PG_CTRL);
4734
4735         tmp &= ~GRBM_REG_SGIT_MASK;
4736         tmp |= GRBM_REG_SGIT(0x700);
4737         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4738         WREG32(RLC_AUTO_PG_CTRL, tmp);
4739 }
4740
4741 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4742 {
4743         u32 mask = 0, tmp, tmp1;
4744         int i;
4745
4746         si_select_se_sh(rdev, se, sh);
4747         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4748         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4749         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4750
4751         tmp &= 0xffff0000;
4752
4753         tmp |= tmp1;
4754         tmp >>= 16;
4755
4756         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4757                 mask <<= 1;
4758                 mask |= 1;
4759         }
4760
4761         return (~tmp) & mask;
4762 }
4763
4764 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4765 {
4766         u32 i, j, k, active_cu_number = 0;
4767         u32 mask, counter, cu_bitmap;
4768         u32 tmp = 0;
4769
4770         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4771                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4772                         mask = 1;
4773                         cu_bitmap = 0;
4774                         counter  = 0;
4775                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4776                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4777                                         if (counter < 2)
4778                                                 cu_bitmap |= mask;
4779                                         counter++;
4780                                 }
4781                                 mask <<= 1;
4782                         }
4783
4784                         active_cu_number += counter;
4785                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4786                 }
4787         }
4788
4789         WREG32(RLC_PG_AO_CU_MASK, tmp);
4790
4791         tmp = RREG32(RLC_MAX_PG_CU);
4792         tmp &= ~MAX_PU_CU_MASK;
4793         tmp |= MAX_PU_CU(active_cu_number);
4794         WREG32(RLC_MAX_PG_CU, tmp);
4795 }
4796
4797 static void si_enable_cgcg(struct radeon_device *rdev,
4798                            bool enable)
4799 {
4800         u32 data, orig, tmp;
4801
4802         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4803
4804         si_enable_gui_idle_interrupt(rdev, enable);
4805
4806         if (enable) {
4807                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4808
4809                 tmp = si_halt_rlc(rdev);
4810
4811                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4812                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4813                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4814
4815                 si_wait_for_rlc_serdes(rdev);
4816
4817                 si_update_rlc(rdev, tmp);
4818
4819                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4820
4821                 data |= CGCG_EN | CGLS_EN;
4822         } else {
4823                 RREG32(CB_CGTT_SCLK_CTRL);
4824                 RREG32(CB_CGTT_SCLK_CTRL);
4825                 RREG32(CB_CGTT_SCLK_CTRL);
4826                 RREG32(CB_CGTT_SCLK_CTRL);
4827
4828                 data &= ~(CGCG_EN | CGLS_EN);
4829         }
4830
4831         if (orig != data)
4832                 WREG32(RLC_CGCG_CGLS_CTRL, data);
4833 }
4834
4835 static void si_enable_mgcg(struct radeon_device *rdev,
4836                            bool enable)
4837 {
4838         u32 data, orig, tmp = 0;
4839
4840         if (enable) {
4841                 orig = data = RREG32(CGTS_SM_CTRL_REG);
4842                 data = 0x96940200;
4843                 if (orig != data)
4844                         WREG32(CGTS_SM_CTRL_REG, data);
4845
4846                 orig = data = RREG32(CP_MEM_SLP_CNTL);
4847                 data |= CP_MEM_LS_EN;
4848                 if (orig != data)
4849                         WREG32(CP_MEM_SLP_CNTL, data);
4850
4851                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
4852                 data &= 0xffffffc0;
4853                 if (orig != data)
4854                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
4855
4856                 tmp = si_halt_rlc(rdev);
4857
4858                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4859                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4860                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
4861
4862                 si_update_rlc(rdev, tmp);
4863         } else {
4864                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
4865                 data |= 0x00000003;
4866                 if (orig != data)
4867                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
4868
4869                 data = RREG32(CP_MEM_SLP_CNTL);
4870                 if (data & CP_MEM_LS_EN) {
4871                         data &= ~CP_MEM_LS_EN;
4872                         WREG32(CP_MEM_SLP_CNTL, data);
4873                 }
4874                 orig = data = RREG32(CGTS_SM_CTRL_REG);
4875                 data |= LS_OVERRIDE | OVERRIDE;
4876                 if (orig != data)
4877                         WREG32(CGTS_SM_CTRL_REG, data);
4878
4879                 tmp = si_halt_rlc(rdev);
4880
4881                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4882                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4883                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
4884
4885                 si_update_rlc(rdev, tmp);
4886         }
4887 }
4888
4889 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
4890                                bool enable)
4891 {
4892         u32 orig, data, tmp;
4893
4894         if (enable) {
4895                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
4896                 tmp |= 0x3fff;
4897                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
4898
4899                 orig = data = RREG32(UVD_CGC_CTRL);
4900                 data |= DCM;
4901                 if (orig != data)
4902                         WREG32(UVD_CGC_CTRL, data);
4903
4904                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
4905                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
4906         } else {
4907                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
4908                 tmp &= ~0x3fff;
4909                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
4910
4911                 orig = data = RREG32(UVD_CGC_CTRL);
4912                 data &= ~DCM;
4913                 if (orig != data)
4914                         WREG32(UVD_CGC_CTRL, data);
4915
4916                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
4917                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
4918         }
4919 }
4920
4921 static const u32 mc_cg_registers[] =
4922 {
4923         MC_HUB_MISC_HUB_CG,
4924         MC_HUB_MISC_SIP_CG,
4925         MC_HUB_MISC_VM_CG,
4926         MC_XPB_CLK_GAT,
4927         ATC_MISC_CG,
4928         MC_CITF_MISC_WR_CG,
4929         MC_CITF_MISC_RD_CG,
4930         MC_CITF_MISC_VM_CG,
4931         VM_L2_CG,
4932 };
4933
4934 static void si_enable_mc_ls(struct radeon_device *rdev,
4935                             bool enable)
4936 {
4937         int i;
4938         u32 orig, data;
4939
4940         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
4941                 orig = data = RREG32(mc_cg_registers[i]);
4942                 if (enable)
4943                         data |= MC_LS_ENABLE;
4944                 else
4945                         data &= ~MC_LS_ENABLE;
4946                 if (data != orig)
4947                         WREG32(mc_cg_registers[i], data);
4948         }
4949 }
4950
4951
4952 static void si_init_cg(struct radeon_device *rdev)
4953 {
4954         bool has_uvd = true;
4955
4956         si_enable_mgcg(rdev, true);
4957         si_enable_cgcg(rdev, true);
4958         /* disable MC LS on Tahiti */
4959         if (rdev->family == CHIP_TAHITI)
4960                 si_enable_mc_ls(rdev, false);
4961         if (has_uvd) {
4962                 si_enable_uvd_mgcg(rdev, true);
4963                 si_init_uvd_internal_cg(rdev);
4964         }
4965 }
4966
4967 static void si_fini_cg(struct radeon_device *rdev)
4968 {
4969         bool has_uvd = true;
4970
4971         if (has_uvd)
4972                 si_enable_uvd_mgcg(rdev, false);
4973         si_enable_cgcg(rdev, false);
4974         si_enable_mgcg(rdev, false);
4975 }
4976
4977 static void si_init_pg(struct radeon_device *rdev)
4978 {
4979         bool has_pg = false;
4980
4981         /* only cape verde supports PG */
4982         if (rdev->family == CHIP_VERDE)
4983                 has_pg = true;
4984
4985         if (has_pg) {
4986                 si_init_ao_cu_mask(rdev);
4987                 si_init_dma_pg(rdev);
4988                 si_enable_dma_pg(rdev, true);
4989                 si_init_gfx_cgpg(rdev);
4990                 si_enable_gfx_cgpg(rdev, true);
4991         } else {
4992                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4993                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4994         }
4995 }
4996
4997 static void si_fini_pg(struct radeon_device *rdev)
4998 {
4999         bool has_pg = false;
5000
5001         /* only cape verde supports PG */
5002         if (rdev->family == CHIP_VERDE)
5003                 has_pg = true;
5004
5005         if (has_pg) {
5006                 si_enable_dma_pg(rdev, false);
5007                 si_enable_gfx_cgpg(rdev, false);
5008         }
5009 }
5010
5011 /*
5012  * RLC
5013  */
5014 void si_rlc_fini(struct radeon_device *rdev)
5015 {
5016         int r;
5017
5018         /* save restore block */
5019         if (rdev->rlc.save_restore_obj) {
5020                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5021                 if (unlikely(r != 0))
5022                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5023                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5024                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5025
5026                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5027                 rdev->rlc.save_restore_obj = NULL;
5028         }
5029
5030         /* clear state block */
5031         if (rdev->rlc.clear_state_obj) {
5032                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5033                 if (unlikely(r != 0))
5034                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5035                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5036                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5037
5038                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5039                 rdev->rlc.clear_state_obj = NULL;
5040         }
5041 }
5042
5043 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5044
5045 int si_rlc_init(struct radeon_device *rdev)
5046 {
5047         volatile u32 *dst_ptr;
5048         u32 dws, data, i, j, k, reg_num;
5049         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5050         u64 reg_list_mc_addr;
5051         const struct cs_section_def *cs_data = si_cs_data;
5052         int r;
5053
5054         /* save restore block */
5055         if (rdev->rlc.save_restore_obj == NULL) {
5056                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5057                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5058                                      &rdev->rlc.save_restore_obj);
5059                 if (r) {
5060                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5061                         return r;
5062                 }
5063         }
5064
5065         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5066         if (unlikely(r != 0)) {
5067                 si_rlc_fini(rdev);
5068                 return r;
5069         }
5070         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5071                           &rdev->rlc.save_restore_gpu_addr);
5072         if (r) {
5073                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5074                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5075                 si_rlc_fini(rdev);
5076                 return r;
5077         }
5078
5079         if (rdev->family == CHIP_VERDE) {
5080                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5081                 if (r) {
5082                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5083                         si_rlc_fini(rdev);
5084                 return r;
5085                 }
5086                 /* write the sr buffer */
5087                 dst_ptr = rdev->rlc.sr_ptr;
5088                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5089                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5090                 }
5091                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5092         }
5093         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5094
5095         /* clear state block */
5096         reg_list_num = 0;
5097         dws = 0;
5098         for (i = 0; cs_data[i].section != NULL; i++) {
5099                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5100                         reg_list_num++;
5101                         dws += cs_data[i].section[j].reg_count;
5102                 }
5103         }
5104         reg_list_blk_index = (3 * reg_list_num + 2);
5105         dws += reg_list_blk_index;
5106
5107         if (rdev->rlc.clear_state_obj == NULL) {
5108                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5109                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5110                 if (r) {
5111                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5112                         si_rlc_fini(rdev);
5113                         return r;
5114                 }
5115         }
5116         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5117         if (unlikely(r != 0)) {
5118                 si_rlc_fini(rdev);
5119                 return r;
5120         }
5121         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5122                           &rdev->rlc.clear_state_gpu_addr);
5123         if (r) {
5124
5125                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5126                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5127                 si_rlc_fini(rdev);
5128                 return r;
5129         }
5130         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5131         if (r) {
5132                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5133                 si_rlc_fini(rdev);
5134                 return r;
5135         }
5136         /* set up the cs buffer */
5137         dst_ptr = rdev->rlc.cs_ptr;
5138         reg_list_hdr_blk_index = 0;
5139         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5140         data = upper_32_bits(reg_list_mc_addr);
5141         dst_ptr[reg_list_hdr_blk_index] = data;
5142         reg_list_hdr_blk_index++;
5143         for (i = 0; cs_data[i].section != NULL; i++) {
5144                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5145                         reg_num = cs_data[i].section[j].reg_count;
5146                         data = reg_list_mc_addr & 0xffffffff;
5147                         dst_ptr[reg_list_hdr_blk_index] = data;
5148                         reg_list_hdr_blk_index++;
5149
5150                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5151                         dst_ptr[reg_list_hdr_blk_index] = data;
5152                         reg_list_hdr_blk_index++;
5153
5154                         data = 0x08000000 | (reg_num * 4);
5155                         dst_ptr[reg_list_hdr_blk_index] = data;
5156                         reg_list_hdr_blk_index++;
5157
5158                         for (k = 0; k < reg_num; k++) {
5159                                 data = cs_data[i].section[j].extent[k];
5160                                 dst_ptr[reg_list_blk_index + k] = data;
5161                         }
5162                         reg_list_mc_addr += reg_num * 4;
5163                         reg_list_blk_index += reg_num;
5164                 }
5165         }
5166         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5167
5168         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5169         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5170
5171         return 0;
5172 }
5173
5174 static void si_rlc_reset(struct radeon_device *rdev)
5175 {
5176         u32 tmp = RREG32(GRBM_SOFT_RESET);
5177
5178         tmp |= SOFT_RESET_RLC;
5179         WREG32(GRBM_SOFT_RESET, tmp);
5180         udelay(50);
5181         tmp &= ~SOFT_RESET_RLC;
5182         WREG32(GRBM_SOFT_RESET, tmp);
5183         udelay(50);
5184 }
5185
5186 static void si_rlc_stop(struct radeon_device *rdev)
5187 {
5188         WREG32(RLC_CNTL, 0);
5189
5190         si_enable_gui_idle_interrupt(rdev, false);
5191
5192         si_wait_for_rlc_serdes(rdev);
5193 }
5194
5195 static void si_rlc_start(struct radeon_device *rdev)
5196 {
5197         WREG32(RLC_CNTL, RLC_ENABLE);
5198
5199         si_enable_gui_idle_interrupt(rdev, true);
5200
5201         udelay(50);
5202 }
5203
5204 static bool si_lbpw_supported(struct radeon_device *rdev)
5205 {
5206         u32 tmp;
5207
5208         /* Enable LBPW only for DDR3 */
5209         tmp = RREG32(MC_SEQ_MISC0);
5210         if ((tmp & 0xF0000000) == 0xB0000000)
5211                 return true;
5212         return false;
5213 }
5214
5215 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5216 {
5217         u32 tmp;
5218
5219         tmp = RREG32(RLC_LB_CNTL);
5220         if (enable)
5221                 tmp |= LOAD_BALANCE_ENABLE;
5222         else
5223                 tmp &= ~LOAD_BALANCE_ENABLE;
5224         WREG32(RLC_LB_CNTL, tmp);
5225
5226         if (!enable) {
5227                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5228                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5229         }
5230 }
5231
5232 static int si_rlc_resume(struct radeon_device *rdev)
5233 {
5234         u32 i;
5235         const __be32 *fw_data;
5236
5237         if (!rdev->rlc_fw)
5238                 return -EINVAL;
5239
5240         si_rlc_stop(rdev);
5241
5242         si_rlc_reset(rdev);
5243
5244         si_init_pg(rdev);
5245
5246         si_init_cg(rdev);
5247
5248         WREG32(RLC_RL_BASE, 0);
5249         WREG32(RLC_RL_SIZE, 0);
5250         WREG32(RLC_LB_CNTL, 0);
5251         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5252         WREG32(RLC_LB_CNTR_INIT, 0);
5253         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5254
5255         WREG32(RLC_MC_CNTL, 0);
5256         WREG32(RLC_UCODE_CNTL, 0);
5257
5258         fw_data = (const __be32 *)rdev->rlc_fw->data;
5259         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5260                 WREG32(RLC_UCODE_ADDR, i);
5261                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5262         }
5263         WREG32(RLC_UCODE_ADDR, 0);
5264
5265         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5266
5267         si_rlc_start(rdev);
5268
5269         return 0;
5270 }
5271
5272 static void si_enable_interrupts(struct radeon_device *rdev)
5273 {
5274         u32 ih_cntl = RREG32(IH_CNTL);
5275         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5276
5277         ih_cntl |= ENABLE_INTR;
5278         ih_rb_cntl |= IH_RB_ENABLE;
5279         WREG32(IH_CNTL, ih_cntl);
5280         WREG32(IH_RB_CNTL, ih_rb_cntl);
5281         rdev->ih.enabled = true;
5282 }
5283
5284 static void si_disable_interrupts(struct radeon_device *rdev)
5285 {
5286         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5287         u32 ih_cntl = RREG32(IH_CNTL);
5288
5289         ih_rb_cntl &= ~IH_RB_ENABLE;
5290         ih_cntl &= ~ENABLE_INTR;
5291         WREG32(IH_RB_CNTL, ih_rb_cntl);
5292         WREG32(IH_CNTL, ih_cntl);
5293         /* set rptr, wptr to 0 */
5294         WREG32(IH_RB_RPTR, 0);
5295         WREG32(IH_RB_WPTR, 0);
5296         rdev->ih.enabled = false;
5297         rdev->ih.rptr = 0;
5298 }
5299
5300 static void si_disable_interrupt_state(struct radeon_device *rdev)
5301 {
5302         u32 tmp;
5303
5304         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5305         WREG32(CP_INT_CNTL_RING1, 0);
5306         WREG32(CP_INT_CNTL_RING2, 0);
5307         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5308         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5309         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5310         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5311         WREG32(GRBM_INT_CNTL, 0);
5312         if (rdev->num_crtc >= 2) {
5313                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5314                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5315         }
5316         if (rdev->num_crtc >= 4) {
5317                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5318                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5319         }
5320         if (rdev->num_crtc >= 6) {
5321                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5322                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5323         }
5324
5325         if (rdev->num_crtc >= 2) {
5326                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5327                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5328         }
5329         if (rdev->num_crtc >= 4) {
5330                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5331                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5332         }
5333         if (rdev->num_crtc >= 6) {
5334                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5335                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5336         }
5337
5338         if (!ASIC_IS_NODCE(rdev)) {
5339                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5340
5341                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5342                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5343                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5344                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5345                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5346                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5347                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5348                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5349                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5350                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5351                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5352                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5353         }
5354 }
5355
5356 static int si_irq_init(struct radeon_device *rdev)
5357 {
5358         int ret = 0;
5359         int rb_bufsz;
5360         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5361
5362         /* allocate ring */
5363         ret = r600_ih_ring_alloc(rdev);
5364         if (ret)
5365                 return ret;
5366
5367         /* disable irqs */
5368         si_disable_interrupts(rdev);
5369
5370         /* init rlc */
5371         ret = si_rlc_resume(rdev);
5372         if (ret) {
5373                 r600_ih_ring_fini(rdev);
5374                 return ret;
5375         }
5376
5377         /* setup interrupt control */
5378         /* set dummy read address to ring address */
5379         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5380         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5381         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5382          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5383          */
5384         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5385         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5386         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5387         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5388
5389         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5390         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5391
5392         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5393                       IH_WPTR_OVERFLOW_CLEAR |
5394                       (rb_bufsz << 1));
5395
5396         if (rdev->wb.enabled)
5397                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5398
5399         /* set the writeback address whether it's enabled or not */
5400         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5401         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5402
5403         WREG32(IH_RB_CNTL, ih_rb_cntl);
5404
5405         /* set rptr, wptr to 0 */
5406         WREG32(IH_RB_RPTR, 0);
5407         WREG32(IH_RB_WPTR, 0);
5408
5409         /* Default settings for IH_CNTL (disabled at first) */
5410         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5411         /* RPTR_REARM only works if msi's are enabled */
5412         if (rdev->msi_enabled)
5413                 ih_cntl |= RPTR_REARM;
5414         WREG32(IH_CNTL, ih_cntl);
5415
5416         /* force the active interrupt state to all disabled */
5417         si_disable_interrupt_state(rdev);
5418
5419         pci_set_master(rdev->pdev);
5420
5421         /* enable irqs */
5422         si_enable_interrupts(rdev);
5423
5424         return ret;
5425 }
5426
5427 int si_irq_set(struct radeon_device *rdev)
5428 {
5429         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5430         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5431         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5432         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5433         u32 grbm_int_cntl = 0;
5434         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5435         u32 dma_cntl, dma_cntl1;
5436         u32 thermal_int = 0;
5437
5438         if (!rdev->irq.installed) {
5439                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5440                 return -EINVAL;
5441         }
5442         /* don't enable anything if the ih is disabled */
5443         if (!rdev->ih.enabled) {
5444                 si_disable_interrupts(rdev);
5445                 /* force the active interrupt state to all disabled */
5446                 si_disable_interrupt_state(rdev);
5447                 return 0;
5448         }
5449
5450         if (!ASIC_IS_NODCE(rdev)) {
5451                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5452                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5453                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5454                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5455                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5456                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5457         }
5458
5459         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5460         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5461
5462         thermal_int = RREG32(CG_THERMAL_INT) &
5463                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5464
5465         /* enable CP interrupts on all rings */
5466         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5467                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5468                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5469         }
5470         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5471                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5472                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5473         }
5474         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5475                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5476                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5477         }
5478         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5479                 DRM_DEBUG("si_irq_set: sw int dma\n");
5480                 dma_cntl |= TRAP_ENABLE;
5481         }
5482
5483         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5484                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5485                 dma_cntl1 |= TRAP_ENABLE;
5486         }
5487         if (rdev->irq.crtc_vblank_int[0] ||
5488             atomic_read(&rdev->irq.pflip[0])) {
5489                 DRM_DEBUG("si_irq_set: vblank 0\n");
5490                 crtc1 |= VBLANK_INT_MASK;
5491         }
5492         if (rdev->irq.crtc_vblank_int[1] ||
5493             atomic_read(&rdev->irq.pflip[1])) {
5494                 DRM_DEBUG("si_irq_set: vblank 1\n");
5495                 crtc2 |= VBLANK_INT_MASK;
5496         }
5497         if (rdev->irq.crtc_vblank_int[2] ||
5498             atomic_read(&rdev->irq.pflip[2])) {
5499                 DRM_DEBUG("si_irq_set: vblank 2\n");
5500                 crtc3 |= VBLANK_INT_MASK;
5501         }
5502         if (rdev->irq.crtc_vblank_int[3] ||
5503             atomic_read(&rdev->irq.pflip[3])) {
5504                 DRM_DEBUG("si_irq_set: vblank 3\n");
5505                 crtc4 |= VBLANK_INT_MASK;
5506         }
5507         if (rdev->irq.crtc_vblank_int[4] ||
5508             atomic_read(&rdev->irq.pflip[4])) {
5509                 DRM_DEBUG("si_irq_set: vblank 4\n");
5510                 crtc5 |= VBLANK_INT_MASK;
5511         }
5512         if (rdev->irq.crtc_vblank_int[5] ||
5513             atomic_read(&rdev->irq.pflip[5])) {
5514                 DRM_DEBUG("si_irq_set: vblank 5\n");
5515                 crtc6 |= VBLANK_INT_MASK;
5516         }
5517         if (rdev->irq.hpd[0]) {
5518                 DRM_DEBUG("si_irq_set: hpd 1\n");
5519                 hpd1 |= DC_HPDx_INT_EN;
5520         }
5521         if (rdev->irq.hpd[1]) {
5522                 DRM_DEBUG("si_irq_set: hpd 2\n");
5523                 hpd2 |= DC_HPDx_INT_EN;
5524         }
5525         if (rdev->irq.hpd[2]) {
5526                 DRM_DEBUG("si_irq_set: hpd 3\n");
5527                 hpd3 |= DC_HPDx_INT_EN;
5528         }
5529         if (rdev->irq.hpd[3]) {
5530                 DRM_DEBUG("si_irq_set: hpd 4\n");
5531                 hpd4 |= DC_HPDx_INT_EN;
5532         }
5533         if (rdev->irq.hpd[4]) {
5534                 DRM_DEBUG("si_irq_set: hpd 5\n");
5535                 hpd5 |= DC_HPDx_INT_EN;
5536         }
5537         if (rdev->irq.hpd[5]) {
5538                 DRM_DEBUG("si_irq_set: hpd 6\n");
5539                 hpd6 |= DC_HPDx_INT_EN;
5540         }
5541
5542         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5543         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5544         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5545
5546         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5547         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5548
5549         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5550
5551         if (rdev->irq.dpm_thermal) {
5552                 DRM_DEBUG("dpm thermal\n");
5553                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5554         }
5555
5556         if (rdev->num_crtc >= 2) {
5557                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5558                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5559         }
5560         if (rdev->num_crtc >= 4) {
5561                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5562                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5563         }
5564         if (rdev->num_crtc >= 6) {
5565                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5566                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5567         }
5568
5569         if (rdev->num_crtc >= 2) {
5570                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5571                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5572         }
5573         if (rdev->num_crtc >= 4) {
5574                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5575                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5576         }
5577         if (rdev->num_crtc >= 6) {
5578                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5579                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5580         }
5581
5582         if (!ASIC_IS_NODCE(rdev)) {
5583                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5584                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5585                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5586                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5587                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5588                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5589         }
5590
5591         WREG32(CG_THERMAL_INT, thermal_int);
5592
5593         return 0;
5594 }
5595
5596 static inline void si_irq_ack(struct radeon_device *rdev)
5597 {
5598         u32 tmp;
5599
5600         if (ASIC_IS_NODCE(rdev))
5601                 return;
5602
5603         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5604         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5605         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5606         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5607         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5608         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5609         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5610         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5611         if (rdev->num_crtc >= 4) {
5612                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5613                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5614         }
5615         if (rdev->num_crtc >= 6) {
5616                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5617                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5618         }
5619
5620         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5621                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5622         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5623                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5624         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5625                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5626         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5627                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5628         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5629                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5630         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5631                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5632
5633         if (rdev->num_crtc >= 4) {
5634                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5635                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5636                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5637                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5638                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5639                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5640                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5641                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5642                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5643                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5644                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5645                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5646         }
5647
5648         if (rdev->num_crtc >= 6) {
5649                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5650                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5651                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5652                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5653                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5654                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5655                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5656                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5657                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5658                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5659                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5660                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5661         }
5662
5663         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5664                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5665                 tmp |= DC_HPDx_INT_ACK;
5666                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5667         }
5668         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5669                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5670                 tmp |= DC_HPDx_INT_ACK;
5671                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5672         }
5673         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5674                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5675                 tmp |= DC_HPDx_INT_ACK;
5676                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5677         }
5678         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5679                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5680                 tmp |= DC_HPDx_INT_ACK;
5681                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5682         }
5683         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5684                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5685                 tmp |= DC_HPDx_INT_ACK;
5686                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5687         }
5688         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5689                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5690                 tmp |= DC_HPDx_INT_ACK;
5691                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5692         }
5693 }
5694
5695 static void si_irq_disable(struct radeon_device *rdev)
5696 {
5697         si_disable_interrupts(rdev);
5698         /* Wait and acknowledge irq */
5699         mdelay(1);
5700         si_irq_ack(rdev);
5701         si_disable_interrupt_state(rdev);
5702 }
5703
5704 static void si_irq_suspend(struct radeon_device *rdev)
5705 {
5706         si_irq_disable(rdev);
5707         si_rlc_stop(rdev);
5708 }
5709
5710 static void si_irq_fini(struct radeon_device *rdev)
5711 {
5712         si_irq_suspend(rdev);
5713         r600_ih_ring_fini(rdev);
5714 }
5715
5716 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5717 {
5718         u32 wptr, tmp;
5719
5720         if (rdev->wb.enabled)
5721                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5722         else
5723                 wptr = RREG32(IH_RB_WPTR);
5724
5725         if (wptr & RB_OVERFLOW) {
5726                 /* When a ring buffer overflow happen start parsing interrupt
5727                  * from the last not overwritten vector (wptr + 16). Hopefully
5728                  * this should allow us to catchup.
5729                  */
5730                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5731                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5732                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5733                 tmp = RREG32(IH_RB_CNTL);
5734                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5735                 WREG32(IH_RB_CNTL, tmp);
5736         }
5737         return (wptr & rdev->ih.ptr_mask);
5738 }
5739
5740 /*        SI IV Ring
5741  * Each IV ring entry is 128 bits:
5742  * [7:0]    - interrupt source id
5743  * [31:8]   - reserved
5744  * [59:32]  - interrupt source data
5745  * [63:60]  - reserved
5746  * [71:64]  - RINGID
5747  * [79:72]  - VMID
5748  * [127:80] - reserved
5749  */
5750 int si_irq_process(struct radeon_device *rdev)
5751 {
5752         u32 wptr;
5753         u32 rptr;
5754         u32 src_id, src_data, ring_id;
5755         u32 ring_index;
5756         bool queue_hotplug = false;
5757         bool queue_thermal = false;
5758
5759         if (!rdev->ih.enabled || rdev->shutdown)
5760                 return IRQ_NONE;
5761
5762         wptr = si_get_ih_wptr(rdev);
5763
5764 restart_ih:
5765         /* is somebody else already processing irqs? */
5766         if (atomic_xchg(&rdev->ih.lock, 1))
5767                 return IRQ_NONE;
5768
5769         rptr = rdev->ih.rptr;
5770         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5771
5772         /* Order reading of wptr vs. reading of IH ring data */
5773         rmb();
5774
5775         /* display interrupts */
5776         si_irq_ack(rdev);
5777
5778         while (rptr != wptr) {
5779                 /* wptr/rptr are in bytes! */
5780                 ring_index = rptr / 4;
5781                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5782                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5783                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5784
5785                 switch (src_id) {
5786                 case 1: /* D1 vblank/vline */
5787                         switch (src_data) {
5788                         case 0: /* D1 vblank */
5789                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
5790                                         if (rdev->irq.crtc_vblank_int[0]) {
5791                                                 drm_handle_vblank(rdev->ddev, 0);
5792                                                 rdev->pm.vblank_sync = true;
5793                                                 wake_up(&rdev->irq.vblank_queue);
5794                                         }
5795                                         if (atomic_read(&rdev->irq.pflip[0]))
5796                                                 radeon_crtc_handle_flip(rdev, 0);
5797                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5798                                         DRM_DEBUG("IH: D1 vblank\n");
5799                                 }
5800                                 break;
5801                         case 1: /* D1 vline */
5802                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
5803                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5804                                         DRM_DEBUG("IH: D1 vline\n");
5805                                 }
5806                                 break;
5807                         default:
5808                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5809                                 break;
5810                         }
5811                         break;
5812                 case 2: /* D2 vblank/vline */
5813                         switch (src_data) {
5814                         case 0: /* D2 vblank */
5815                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5816                                         if (rdev->irq.crtc_vblank_int[1]) {
5817                                                 drm_handle_vblank(rdev->ddev, 1);
5818                                                 rdev->pm.vblank_sync = true;
5819                                                 wake_up(&rdev->irq.vblank_queue);
5820                                         }
5821                                         if (atomic_read(&rdev->irq.pflip[1]))
5822                                                 radeon_crtc_handle_flip(rdev, 1);
5823                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5824                                         DRM_DEBUG("IH: D2 vblank\n");
5825                                 }
5826                                 break;
5827                         case 1: /* D2 vline */
5828                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5829                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5830                                         DRM_DEBUG("IH: D2 vline\n");
5831                                 }
5832                                 break;
5833                         default:
5834                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5835                                 break;
5836                         }
5837                         break;
5838                 case 3: /* D3 vblank/vline */
5839                         switch (src_data) {
5840                         case 0: /* D3 vblank */
5841                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5842                                         if (rdev->irq.crtc_vblank_int[2]) {
5843                                                 drm_handle_vblank(rdev->ddev, 2);
5844                                                 rdev->pm.vblank_sync = true;
5845                                                 wake_up(&rdev->irq.vblank_queue);
5846                                         }
5847                                         if (atomic_read(&rdev->irq.pflip[2]))
5848                                                 radeon_crtc_handle_flip(rdev, 2);
5849                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5850                                         DRM_DEBUG("IH: D3 vblank\n");
5851                                 }
5852                                 break;
5853                         case 1: /* D3 vline */
5854                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5855                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5856                                         DRM_DEBUG("IH: D3 vline\n");
5857                                 }
5858                                 break;
5859                         default:
5860                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5861                                 break;
5862                         }
5863                         break;
5864                 case 4: /* D4 vblank/vline */
5865                         switch (src_data) {
5866                         case 0: /* D4 vblank */
5867                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5868                                         if (rdev->irq.crtc_vblank_int[3]) {
5869                                                 drm_handle_vblank(rdev->ddev, 3);
5870                                                 rdev->pm.vblank_sync = true;
5871                                                 wake_up(&rdev->irq.vblank_queue);
5872                                         }
5873                                         if (atomic_read(&rdev->irq.pflip[3]))
5874                                                 radeon_crtc_handle_flip(rdev, 3);
5875                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5876                                         DRM_DEBUG("IH: D4 vblank\n");
5877                                 }
5878                                 break;
5879                         case 1: /* D4 vline */
5880                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5881                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5882                                         DRM_DEBUG("IH: D4 vline\n");
5883                                 }
5884                                 break;
5885                         default:
5886                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5887                                 break;
5888                         }
5889                         break;
5890                 case 5: /* D5 vblank/vline */
5891                         switch (src_data) {
5892                         case 0: /* D5 vblank */
5893                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5894                                         if (rdev->irq.crtc_vblank_int[4]) {
5895                                                 drm_handle_vblank(rdev->ddev, 4);
5896                                                 rdev->pm.vblank_sync = true;
5897                                                 wake_up(&rdev->irq.vblank_queue);
5898                                         }
5899                                         if (atomic_read(&rdev->irq.pflip[4]))
5900                                                 radeon_crtc_handle_flip(rdev, 4);
5901                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5902                                         DRM_DEBUG("IH: D5 vblank\n");
5903                                 }
5904                                 break;
5905                         case 1: /* D5 vline */
5906                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5907                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5908                                         DRM_DEBUG("IH: D5 vline\n");
5909                                 }
5910                                 break;
5911                         default:
5912                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5913                                 break;
5914                         }
5915                         break;
5916                 case 6: /* D6 vblank/vline */
5917                         switch (src_data) {
5918                         case 0: /* D6 vblank */
5919                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5920                                         if (rdev->irq.crtc_vblank_int[5]) {
5921                                                 drm_handle_vblank(rdev->ddev, 5);
5922                                                 rdev->pm.vblank_sync = true;
5923                                                 wake_up(&rdev->irq.vblank_queue);
5924                                         }
5925                                         if (atomic_read(&rdev->irq.pflip[5]))
5926                                                 radeon_crtc_handle_flip(rdev, 5);
5927                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5928                                         DRM_DEBUG("IH: D6 vblank\n");
5929                                 }
5930                                 break;
5931                         case 1: /* D6 vline */
5932                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5933                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5934                                         DRM_DEBUG("IH: D6 vline\n");
5935                                 }
5936                                 break;
5937                         default:
5938                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5939                                 break;
5940                         }
5941                         break;
5942                 case 42: /* HPD hotplug */
5943                         switch (src_data) {
5944                         case 0:
5945                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5946                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5947                                         queue_hotplug = true;
5948                                         DRM_DEBUG("IH: HPD1\n");
5949                                 }
5950                                 break;
5951                         case 1:
5952                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5953                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5954                                         queue_hotplug = true;
5955                                         DRM_DEBUG("IH: HPD2\n");
5956                                 }
5957                                 break;
5958                         case 2:
5959                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5960                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5961                                         queue_hotplug = true;
5962                                         DRM_DEBUG("IH: HPD3\n");
5963                                 }
5964                                 break;
5965                         case 3:
5966                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5967                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5968                                         queue_hotplug = true;
5969                                         DRM_DEBUG("IH: HPD4\n");
5970                                 }
5971                                 break;
5972                         case 4:
5973                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5974                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5975                                         queue_hotplug = true;
5976                                         DRM_DEBUG("IH: HPD5\n");
5977                                 }
5978                                 break;
5979                         case 5:
5980                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5981                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5982                                         queue_hotplug = true;
5983                                         DRM_DEBUG("IH: HPD6\n");
5984                                 }
5985                                 break;
5986                         default:
5987                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5988                                 break;
5989                         }
5990                         break;
5991                 case 146:
5992                 case 147:
5993                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5994                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5995                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5996                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5997                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5998                         /* reset addr and status */
5999                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6000                         break;
6001                 case 176: /* RINGID0 CP_INT */
6002                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6003                         break;
6004                 case 177: /* RINGID1 CP_INT */
6005                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6006                         break;
6007                 case 178: /* RINGID2 CP_INT */
6008                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6009                         break;
6010                 case 181: /* CP EOP event */
6011                         DRM_DEBUG("IH: CP EOP\n");
6012                         switch (ring_id) {
6013                         case 0:
6014                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6015                                 break;
6016                         case 1:
6017                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6018                                 break;
6019                         case 2:
6020                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6021                                 break;
6022                         }
6023                         break;
6024                 case 224: /* DMA trap event */
6025                         DRM_DEBUG("IH: DMA trap\n");
6026                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6027                         break;
6028                 case 230: /* thermal low to high */
6029                         DRM_DEBUG("IH: thermal low to high\n");
6030                         rdev->pm.dpm.thermal.high_to_low = false;
6031                         queue_thermal = true;
6032                         break;
6033                 case 231: /* thermal high to low */
6034                         DRM_DEBUG("IH: thermal high to low\n");
6035                         rdev->pm.dpm.thermal.high_to_low = true;
6036                         queue_thermal = true;
6037                         break;
6038                 case 233: /* GUI IDLE */
6039                         DRM_DEBUG("IH: GUI idle\n");
6040                         break;
6041                 case 244: /* DMA trap event */
6042                         DRM_DEBUG("IH: DMA1 trap\n");
6043                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6044                         break;
6045                 default:
6046                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6047                         break;
6048                 }
6049
6050                 /* wptr/rptr are in bytes! */
6051                 rptr += 16;
6052                 rptr &= rdev->ih.ptr_mask;
6053         }
6054         if (queue_hotplug)
6055                 schedule_work(&rdev->hotplug_work);
6056         if (queue_thermal && rdev->pm.dpm_enabled)
6057                 schedule_work(&rdev->pm.dpm.thermal.work);
6058         rdev->ih.rptr = rptr;
6059         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6060         atomic_set(&rdev->ih.lock, 0);
6061
6062         /* make sure wptr hasn't changed while processing */
6063         wptr = si_get_ih_wptr(rdev);
6064         if (wptr != rptr)
6065                 goto restart_ih;
6066
6067         return IRQ_HANDLED;
6068 }
6069
6070 /**
6071  * si_copy_dma - copy pages using the DMA engine
6072  *
6073  * @rdev: radeon_device pointer
6074  * @src_offset: src GPU address
6075  * @dst_offset: dst GPU address
6076  * @num_gpu_pages: number of GPU pages to xfer
6077  * @fence: radeon fence object
6078  *
6079  * Copy GPU paging using the DMA engine (SI).
6080  * Used by the radeon ttm implementation to move pages if
6081  * registered as the asic copy callback.
6082  */
6083 int si_copy_dma(struct radeon_device *rdev,
6084                 uint64_t src_offset, uint64_t dst_offset,
6085                 unsigned num_gpu_pages,
6086                 struct radeon_fence **fence)
6087 {
6088         struct radeon_semaphore *sem = NULL;
6089         int ring_index = rdev->asic->copy.dma_ring_index;
6090         struct radeon_ring *ring = &rdev->ring[ring_index];
6091         u32 size_in_bytes, cur_size_in_bytes;
6092         int i, num_loops;
6093         int r = 0;
6094
6095         r = radeon_semaphore_create(rdev, &sem);
6096         if (r) {
6097                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6098                 return r;
6099         }
6100
6101         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6102         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6103         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6104         if (r) {
6105                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6106                 radeon_semaphore_free(rdev, &sem, NULL);
6107                 return r;
6108         }
6109
6110         if (radeon_fence_need_sync(*fence, ring->idx)) {
6111                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6112                                             ring->idx);
6113                 radeon_fence_note_sync(*fence, ring->idx);
6114         } else {
6115                 radeon_semaphore_free(rdev, &sem, NULL);
6116         }
6117
6118         for (i = 0; i < num_loops; i++) {
6119                 cur_size_in_bytes = size_in_bytes;
6120                 if (cur_size_in_bytes > 0xFFFFF)
6121                         cur_size_in_bytes = 0xFFFFF;
6122                 size_in_bytes -= cur_size_in_bytes;
6123                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6124                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6125                 radeon_ring_write(ring, src_offset & 0xffffffff);
6126                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6127                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6128                 src_offset += cur_size_in_bytes;
6129                 dst_offset += cur_size_in_bytes;
6130         }
6131
6132         r = radeon_fence_emit(rdev, fence, ring->idx);
6133         if (r) {
6134                 radeon_ring_unlock_undo(rdev, ring);
6135                 return r;
6136         }
6137
6138         radeon_ring_unlock_commit(rdev, ring);
6139         radeon_semaphore_free(rdev, &sem, *fence);
6140
6141         return r;
6142 }
6143
6144 /*
6145  * startup/shutdown callbacks
6146  */
6147 static int si_startup(struct radeon_device *rdev)
6148 {
6149         struct radeon_ring *ring;
6150         int r;
6151
6152         /* enable pcie gen2/3 link */
6153         si_pcie_gen3_enable(rdev);
6154         /* enable aspm */
6155         si_program_aspm(rdev);
6156
6157         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6158             !rdev->rlc_fw || !rdev->mc_fw) {
6159                 r = si_init_microcode(rdev);
6160                 if (r) {
6161                         DRM_ERROR("Failed to load firmware!\n");
6162                         return r;
6163                 }
6164         }
6165
6166         r = si_mc_load_microcode(rdev);
6167         if (r) {
6168                 DRM_ERROR("Failed to load MC firmware!\n");
6169                 return r;
6170         }
6171
6172         r = r600_vram_scratch_init(rdev);
6173         if (r)
6174                 return r;
6175
6176         si_mc_program(rdev);
6177         r = si_pcie_gart_enable(rdev);
6178         if (r)
6179                 return r;
6180         si_gpu_init(rdev);
6181
6182         /* allocate rlc buffers */
6183         r = si_rlc_init(rdev);
6184         if (r) {
6185                 DRM_ERROR("Failed to init rlc BOs!\n");
6186                 return r;
6187         }
6188
6189         /* allocate wb buffer */
6190         r = radeon_wb_init(rdev);
6191         if (r)
6192                 return r;
6193
6194         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6195         if (r) {
6196                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6197                 return r;
6198         }
6199
6200         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6201         if (r) {
6202                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6203                 return r;
6204         }
6205
6206         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6207         if (r) {
6208                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6209                 return r;
6210         }
6211
6212         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6213         if (r) {
6214                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6215                 return r;
6216         }
6217
6218         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6219         if (r) {
6220                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6221                 return r;
6222         }
6223
6224         if (rdev->has_uvd) {
6225                 r = rv770_uvd_resume(rdev);
6226                 if (!r) {
6227                         r = radeon_fence_driver_start_ring(rdev,
6228                                                            R600_RING_TYPE_UVD_INDEX);
6229                         if (r)
6230                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6231                 }
6232                 if (r)
6233                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6234         }
6235
6236         /* Enable IRQ */
6237         if (!rdev->irq.installed) {
6238                 r = radeon_irq_kms_init(rdev);
6239                 if (r)
6240                         return r;
6241         }
6242
6243         r = si_irq_init(rdev);
6244         if (r) {
6245                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6246                 radeon_irq_kms_fini(rdev);
6247                 return r;
6248         }
6249         si_irq_set(rdev);
6250
6251         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6252         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6253                              CP_RB0_RPTR, CP_RB0_WPTR,
6254                              0, 0xfffff, RADEON_CP_PACKET2);
6255         if (r)
6256                 return r;
6257
6258         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6259         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6260                              CP_RB1_RPTR, CP_RB1_WPTR,
6261                              0, 0xfffff, RADEON_CP_PACKET2);
6262         if (r)
6263                 return r;
6264
6265         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6266         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6267                              CP_RB2_RPTR, CP_RB2_WPTR,
6268                              0, 0xfffff, RADEON_CP_PACKET2);
6269         if (r)
6270                 return r;
6271
6272         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6273         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6274                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6275                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6276                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6277         if (r)
6278                 return r;
6279
6280         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6281         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6282                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6283                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6284                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6285         if (r)
6286                 return r;
6287
6288         r = si_cp_load_microcode(rdev);
6289         if (r)
6290                 return r;
6291         r = si_cp_resume(rdev);
6292         if (r)
6293                 return r;
6294
6295         r = cayman_dma_resume(rdev);
6296         if (r)
6297                 return r;
6298
6299         if (rdev->has_uvd) {
6300                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6301                 if (ring->ring_size) {
6302                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6303                                              R600_WB_UVD_RPTR_OFFSET,
6304                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6305                                              0, 0xfffff, RADEON_CP_PACKET2);
6306                         if (!r)
6307                                 r = r600_uvd_init(rdev);
6308                         if (r)
6309                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6310                 }
6311         }
6312
6313         r = radeon_ib_pool_init(rdev);
6314         if (r) {
6315                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6316                 return r;
6317         }
6318
6319         r = radeon_vm_manager_init(rdev);
6320         if (r) {
6321                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6322                 return r;
6323         }
6324
6325         return 0;
6326 }
6327
6328 int si_resume(struct radeon_device *rdev)
6329 {
6330         int r;
6331
6332         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6333          * posting will perform necessary task to bring back GPU into good
6334          * shape.
6335          */
6336         /* post card */
6337         atom_asic_init(rdev->mode_info.atom_context);
6338
6339         /* init golden registers */
6340         si_init_golden_registers(rdev);
6341
6342         rdev->accel_working = true;
6343         r = si_startup(rdev);
6344         if (r) {
6345                 DRM_ERROR("si startup failed on resume\n");
6346                 rdev->accel_working = false;
6347                 return r;
6348         }
6349
6350         return r;
6351
6352 }
6353
6354 int si_suspend(struct radeon_device *rdev)
6355 {
6356         radeon_vm_manager_fini(rdev);
6357         si_cp_enable(rdev, false);
6358         cayman_dma_stop(rdev);
6359         if (rdev->has_uvd) {
6360                 r600_uvd_rbc_stop(rdev);
6361                 radeon_uvd_suspend(rdev);
6362         }
6363         si_irq_suspend(rdev);
6364         radeon_wb_disable(rdev);
6365         si_pcie_gart_disable(rdev);
6366         return 0;
6367 }
6368
6369 /* Plan is to move initialization in that function and use
6370  * helper function so that radeon_device_init pretty much
6371  * do nothing more than calling asic specific function. This
6372  * should also allow to remove a bunch of callback function
6373  * like vram_info.
6374  */
6375 int si_init(struct radeon_device *rdev)
6376 {
6377         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6378         int r;
6379
6380         /* Read BIOS */
6381         if (!radeon_get_bios(rdev)) {
6382                 if (ASIC_IS_AVIVO(rdev))
6383                         return -EINVAL;
6384         }
6385         /* Must be an ATOMBIOS */
6386         if (!rdev->is_atom_bios) {
6387                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6388                 return -EINVAL;
6389         }
6390         r = radeon_atombios_init(rdev);
6391         if (r)
6392                 return r;
6393
6394         /* Post card if necessary */
6395         if (!radeon_card_posted(rdev)) {
6396                 if (!rdev->bios) {
6397                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6398                         return -EINVAL;
6399                 }
6400                 DRM_INFO("GPU not posted. posting now...\n");
6401                 atom_asic_init(rdev->mode_info.atom_context);
6402         }
6403         /* init golden registers */
6404         si_init_golden_registers(rdev);
6405         /* Initialize scratch registers */
6406         si_scratch_init(rdev);
6407         /* Initialize surface registers */
6408         radeon_surface_init(rdev);
6409         /* Initialize clocks */
6410         radeon_get_clock_info(rdev->ddev);
6411
6412         /* Fence driver */
6413         r = radeon_fence_driver_init(rdev);
6414         if (r)
6415                 return r;
6416
6417         /* initialize memory controller */
6418         r = si_mc_init(rdev);
6419         if (r)
6420                 return r;
6421         /* Memory manager */
6422         r = radeon_bo_init(rdev);
6423         if (r)
6424                 return r;
6425
6426         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6427         ring->ring_obj = NULL;
6428         r600_ring_init(rdev, ring, 1024 * 1024);
6429
6430         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6431         ring->ring_obj = NULL;
6432         r600_ring_init(rdev, ring, 1024 * 1024);
6433
6434         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6435         ring->ring_obj = NULL;
6436         r600_ring_init(rdev, ring, 1024 * 1024);
6437
6438         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6439         ring->ring_obj = NULL;
6440         r600_ring_init(rdev, ring, 64 * 1024);
6441
6442         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6443         ring->ring_obj = NULL;
6444         r600_ring_init(rdev, ring, 64 * 1024);
6445
6446         if (rdev->has_uvd) {
6447                 r = radeon_uvd_init(rdev);
6448                 if (!r) {
6449                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6450                         ring->ring_obj = NULL;
6451                         r600_ring_init(rdev, ring, 4096);
6452                 }
6453         }
6454
6455         rdev->ih.ring_obj = NULL;
6456         r600_ih_ring_init(rdev, 64 * 1024);
6457
6458         r = r600_pcie_gart_init(rdev);
6459         if (r)
6460                 return r;
6461
6462         rdev->accel_working = true;
6463         r = si_startup(rdev);
6464         if (r) {
6465                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6466                 si_cp_fini(rdev);
6467                 cayman_dma_fini(rdev);
6468                 si_irq_fini(rdev);
6469                 si_rlc_fini(rdev);
6470                 radeon_wb_fini(rdev);
6471                 radeon_ib_pool_fini(rdev);
6472                 radeon_vm_manager_fini(rdev);
6473                 radeon_irq_kms_fini(rdev);
6474                 si_pcie_gart_fini(rdev);
6475                 rdev->accel_working = false;
6476         }
6477
6478         /* Don't start up if the MC ucode is missing.
6479          * The default clocks and voltages before the MC ucode
6480          * is loaded are not suffient for advanced operations.
6481          */
6482         if (!rdev->mc_fw) {
6483                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6484                 return -EINVAL;
6485         }
6486
6487         return 0;
6488 }
6489
6490 void si_fini(struct radeon_device *rdev)
6491 {
6492         si_cp_fini(rdev);
6493         cayman_dma_fini(rdev);
6494         si_irq_fini(rdev);
6495         si_rlc_fini(rdev);
6496         si_fini_cg(rdev);
6497         si_fini_pg(rdev);
6498         radeon_wb_fini(rdev);
6499         radeon_vm_manager_fini(rdev);
6500         radeon_ib_pool_fini(rdev);
6501         radeon_irq_kms_fini(rdev);
6502         if (rdev->has_uvd)
6503                 radeon_uvd_fini(rdev);
6504         si_pcie_gart_fini(rdev);
6505         r600_vram_scratch_fini(rdev);
6506         radeon_gem_fini(rdev);
6507         radeon_fence_driver_fini(rdev);
6508         radeon_bo_fini(rdev);
6509         radeon_atombios_fini(rdev);
6510         kfree(rdev->bios);
6511         rdev->bios = NULL;
6512 }
6513
6514 /**
6515  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6516  *
6517  * @rdev: radeon_device pointer
6518  *
6519  * Fetches a GPU clock counter snapshot (SI).
6520  * Returns the 64 bit clock counter snapshot.
6521  */
6522 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6523 {
6524         uint64_t clock;
6525
6526         mutex_lock(&rdev->gpu_clock_mutex);
6527         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6528         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6529                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6530         mutex_unlock(&rdev->gpu_clock_mutex);
6531         return clock;
6532 }
6533
6534 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6535 {
6536         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6537         int r;
6538
6539         /* bypass vclk and dclk with bclk */
6540         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6541                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6542                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6543
6544         /* put PLL in bypass mode */
6545         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6546
6547         if (!vclk || !dclk) {
6548                 /* keep the Bypass mode, put PLL to sleep */
6549                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6550                 return 0;
6551         }
6552
6553         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6554                                           16384, 0x03FFFFFF, 0, 128, 5,
6555                                           &fb_div, &vclk_div, &dclk_div);
6556         if (r)
6557                 return r;
6558
6559         /* set RESET_ANTI_MUX to 0 */
6560         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6561
6562         /* set VCO_MODE to 1 */
6563         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6564
6565         /* toggle UPLL_SLEEP to 1 then back to 0 */
6566         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6567         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6568
6569         /* deassert UPLL_RESET */
6570         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6571
6572         mdelay(1);
6573
6574         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6575         if (r)
6576                 return r;
6577
6578         /* assert UPLL_RESET again */
6579         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6580
6581         /* disable spread spectrum. */
6582         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6583
6584         /* set feedback divider */
6585         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6586
6587         /* set ref divider to 0 */
6588         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6589
6590         if (fb_div < 307200)
6591                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6592         else
6593                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6594
6595         /* set PDIV_A and PDIV_B */
6596         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6597                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6598                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6599
6600         /* give the PLL some time to settle */
6601         mdelay(15);
6602
6603         /* deassert PLL_RESET */
6604         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6605
6606         mdelay(15);
6607
6608         /* switch from bypass mode to normal mode */
6609         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6610
6611         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6612         if (r)
6613                 return r;
6614
6615         /* switch VCLK and DCLK selection */
6616         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6617                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6618                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6619
6620         mdelay(100);
6621
6622         return 0;
6623 }
6624
6625 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6626 {
6627         struct pci_dev *root = rdev->pdev->bus->self;
6628         int bridge_pos, gpu_pos;
6629         u32 speed_cntl, mask, current_data_rate;
6630         int ret, i;
6631         u16 tmp16;
6632
6633         if (radeon_pcie_gen2 == 0)
6634                 return;
6635
6636         if (rdev->flags & RADEON_IS_IGP)
6637                 return;
6638
6639         if (!(rdev->flags & RADEON_IS_PCIE))
6640                 return;
6641
6642         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6643         if (ret != 0)
6644                 return;
6645
6646         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6647                 return;
6648
6649         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6650         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6651                 LC_CURRENT_DATA_RATE_SHIFT;
6652         if (mask & DRM_PCIE_SPEED_80) {
6653                 if (current_data_rate == 2) {
6654                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6655                         return;
6656                 }
6657                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6658         } else if (mask & DRM_PCIE_SPEED_50) {
6659                 if (current_data_rate == 1) {
6660                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6661                         return;
6662                 }
6663                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6664         }
6665
6666         bridge_pos = pci_pcie_cap(root);
6667         if (!bridge_pos)
6668                 return;
6669
6670         gpu_pos = pci_pcie_cap(rdev->pdev);
6671         if (!gpu_pos)
6672                 return;
6673
6674         if (mask & DRM_PCIE_SPEED_80) {
6675                 /* re-try equalization if gen3 is not already enabled */
6676                 if (current_data_rate != 2) {
6677                         u16 bridge_cfg, gpu_cfg;
6678                         u16 bridge_cfg2, gpu_cfg2;
6679                         u32 max_lw, current_lw, tmp;
6680
6681                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6682                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6683
6684                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6685                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6686
6687                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6688                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6689
6690                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6691                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6692                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6693
6694                         if (current_lw < max_lw) {
6695                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6696                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6697                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6698                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6699                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6700                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6701                                 }
6702                         }
6703
6704                         for (i = 0; i < 10; i++) {
6705                                 /* check status */
6706                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6707                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6708                                         break;
6709
6710                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6711                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6712
6713                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6714                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6715
6716                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6717                                 tmp |= LC_SET_QUIESCE;
6718                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6719
6720                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6721                                 tmp |= LC_REDO_EQ;
6722                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6723
6724                                 mdelay(100);
6725
6726                                 /* linkctl */
6727                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6728                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6729                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6730                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6731
6732                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6733                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6734                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6735                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6736
6737                                 /* linkctl2 */
6738                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6739                                 tmp16 &= ~((1 << 4) | (7 << 9));
6740                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6741                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6742
6743                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6744                                 tmp16 &= ~((1 << 4) | (7 << 9));
6745                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6746                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6747
6748                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6749                                 tmp &= ~LC_SET_QUIESCE;
6750                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6751                         }
6752                 }
6753         }
6754
6755         /* set the link speed */
6756         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6757         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6758         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6759
6760         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6761         tmp16 &= ~0xf;
6762         if (mask & DRM_PCIE_SPEED_80)
6763                 tmp16 |= 3; /* gen3 */
6764         else if (mask & DRM_PCIE_SPEED_50)
6765                 tmp16 |= 2; /* gen2 */
6766         else
6767                 tmp16 |= 1; /* gen1 */
6768         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6769
6770         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6771         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6772         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6773
6774         for (i = 0; i < rdev->usec_timeout; i++) {
6775                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6776                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6777                         break;
6778                 udelay(1);
6779         }
6780 }
6781
6782 static void si_program_aspm(struct radeon_device *rdev)
6783 {
6784         u32 data, orig;
6785         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6786         bool disable_clkreq = false;
6787
6788         if (!(rdev->flags & RADEON_IS_PCIE))
6789                 return;
6790
6791         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6792         data &= ~LC_XMIT_N_FTS_MASK;
6793         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6794         if (orig != data)
6795                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6796
6797         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6798         data |= LC_GO_TO_RECOVERY;
6799         if (orig != data)
6800                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6801
6802         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6803         data |= P_IGNORE_EDB_ERR;
6804         if (orig != data)
6805                 WREG32_PCIE(PCIE_P_CNTL, data);
6806
6807         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6808         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6809         data |= LC_PMI_TO_L1_DIS;
6810         if (!disable_l0s)
6811                 data |= LC_L0S_INACTIVITY(7);
6812
6813         if (!disable_l1) {
6814                 data |= LC_L1_INACTIVITY(7);
6815                 data &= ~LC_PMI_TO_L1_DIS;
6816                 if (orig != data)
6817                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6818
6819                 if (!disable_plloff_in_l1) {
6820                         bool clk_req_support;
6821
6822                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6823                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6824                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6825                         if (orig != data)
6826                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6827
6828                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6829                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6830                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6831                         if (orig != data)
6832                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6833
6834                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6835                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6836                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6837                         if (orig != data)
6838                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6839
6840                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6841                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6842                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6843                         if (orig != data)
6844                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6845
6846                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
6847                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6848                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6849                                 if (orig != data)
6850                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6851
6852                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6853                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6854                                 if (orig != data)
6855                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6856
6857                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
6858                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6859                                 if (orig != data)
6860                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
6861
6862                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
6863                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6864                                 if (orig != data)
6865                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
6866
6867                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6868                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6869                                 if (orig != data)
6870                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6871
6872                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6873                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6874                                 if (orig != data)
6875                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6876
6877                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
6878                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6879                                 if (orig != data)
6880                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
6881
6882                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
6883                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6884                                 if (orig != data)
6885                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
6886                         }
6887                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6888                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
6889                         data |= LC_DYN_LANES_PWR_STATE(3);
6890                         if (orig != data)
6891                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
6892
6893                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
6894                         data &= ~LS2_EXIT_TIME_MASK;
6895                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6896                                 data |= LS2_EXIT_TIME(5);
6897                         if (orig != data)
6898                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
6899
6900                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
6901                         data &= ~LS2_EXIT_TIME_MASK;
6902                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6903                                 data |= LS2_EXIT_TIME(5);
6904                         if (orig != data)
6905                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
6906
6907                         if (!disable_clkreq) {
6908                                 struct pci_dev *root = rdev->pdev->bus->self;
6909                                 u32 lnkcap;
6910
6911                                 clk_req_support = false;
6912                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
6913                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
6914                                         clk_req_support = true;
6915                         } else {
6916                                 clk_req_support = false;
6917                         }
6918
6919                         if (clk_req_support) {
6920                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
6921                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
6922                                 if (orig != data)
6923                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
6924
6925                                 orig = data = RREG32(THM_CLK_CNTL);
6926                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
6927                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
6928                                 if (orig != data)
6929                                         WREG32(THM_CLK_CNTL, data);
6930
6931                                 orig = data = RREG32(MISC_CLK_CNTL);
6932                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
6933                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
6934                                 if (orig != data)
6935                                         WREG32(MISC_CLK_CNTL, data);
6936
6937                                 orig = data = RREG32(CG_CLKPIN_CNTL);
6938                                 data &= ~BCLK_AS_XCLK;
6939                                 if (orig != data)
6940                                         WREG32(CG_CLKPIN_CNTL, data);
6941
6942                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
6943                                 data &= ~FORCE_BIF_REFCLK_EN;
6944                                 if (orig != data)
6945                                         WREG32(CG_CLKPIN_CNTL_2, data);
6946
6947                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
6948                                 data &= ~MPLL_CLKOUT_SEL_MASK;
6949                                 data |= MPLL_CLKOUT_SEL(4);
6950                                 if (orig != data)
6951                                         WREG32(MPLL_BYPASSCLK_SEL, data);
6952
6953                                 orig = data = RREG32(SPLL_CNTL_MODE);
6954                                 data &= ~SPLL_REFCLK_SEL_MASK;
6955                                 if (orig != data)
6956                                         WREG32(SPLL_CNTL_MODE, data);
6957                         }
6958                 }
6959         } else {
6960                 if (orig != data)
6961                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6962         }
6963
6964         orig = data = RREG32_PCIE(PCIE_CNTL2);
6965         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
6966         if (orig != data)
6967                 WREG32_PCIE(PCIE_CNTL2, data);
6968
6969         if (!disable_l0s) {
6970                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6971                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
6972                         data = RREG32_PCIE(PCIE_LC_STATUS1);
6973                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
6974                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6975                                 data &= ~LC_L0S_INACTIVITY_MASK;
6976                                 if (orig != data)
6977                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6978                         }
6979                 }
6980         }
6981 }