]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/si.c
Merge remote-tracking branch 'asoc/fix/wm8994' into asoc-linus
[linux-imx.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
51 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
52 MODULE_FIRMWARE("radeon/VERDE_me.bin");
53 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
54 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
57 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
58 MODULE_FIRMWARE("radeon/OLAND_me.bin");
59 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
60 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
69
70 static void si_pcie_gen3_enable(struct radeon_device *rdev);
71 static void si_program_aspm(struct radeon_device *rdev);
72 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
73 extern void r600_ih_ring_fini(struct radeon_device *rdev);
74 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
78 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
79 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
80
81 static const u32 verde_rlc_save_restore_register_list[] =
82 {
83         (0x8000 << 16) | (0x98f4 >> 2),
84         0x00000000,
85         (0x8040 << 16) | (0x98f4 >> 2),
86         0x00000000,
87         (0x8000 << 16) | (0xe80 >> 2),
88         0x00000000,
89         (0x8040 << 16) | (0xe80 >> 2),
90         0x00000000,
91         (0x8000 << 16) | (0x89bc >> 2),
92         0x00000000,
93         (0x8040 << 16) | (0x89bc >> 2),
94         0x00000000,
95         (0x8000 << 16) | (0x8c1c >> 2),
96         0x00000000,
97         (0x8040 << 16) | (0x8c1c >> 2),
98         0x00000000,
99         (0x9c00 << 16) | (0x98f0 >> 2),
100         0x00000000,
101         (0x9c00 << 16) | (0xe7c >> 2),
102         0x00000000,
103         (0x8000 << 16) | (0x9148 >> 2),
104         0x00000000,
105         (0x8040 << 16) | (0x9148 >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0x9150 >> 2),
108         0x00000000,
109         (0x9c00 << 16) | (0x897c >> 2),
110         0x00000000,
111         (0x9c00 << 16) | (0x8d8c >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0xac54 >> 2),
114         0X00000000,
115         0x3,
116         (0x9c00 << 16) | (0x98f8 >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0x9910 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x9914 >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x9918 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x991c >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9920 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9924 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x9928 >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x992c >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9930 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9934 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x9938 >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x993c >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9940 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9944 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x9948 >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x994c >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9950 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9954 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x9958 >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x995c >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9960 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9964 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x9968 >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x996c >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9970 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9974 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x9978 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x997c >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9980 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9984 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x9988 >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x998c >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x8c00 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x8c14 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x8c04 >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c08 >> 2),
189         0x00000000,
190         (0x8000 << 16) | (0x9b7c >> 2),
191         0x00000000,
192         (0x8040 << 16) | (0x9b7c >> 2),
193         0x00000000,
194         (0x8000 << 16) | (0xe84 >> 2),
195         0x00000000,
196         (0x8040 << 16) | (0xe84 >> 2),
197         0x00000000,
198         (0x8000 << 16) | (0x89c0 >> 2),
199         0x00000000,
200         (0x8040 << 16) | (0x89c0 >> 2),
201         0x00000000,
202         (0x8000 << 16) | (0x914c >> 2),
203         0x00000000,
204         (0x8040 << 16) | (0x914c >> 2),
205         0x00000000,
206         (0x8000 << 16) | (0x8c20 >> 2),
207         0x00000000,
208         (0x8040 << 16) | (0x8c20 >> 2),
209         0x00000000,
210         (0x8000 << 16) | (0x9354 >> 2),
211         0x00000000,
212         (0x8040 << 16) | (0x9354 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9060 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9364 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x9100 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x913c >> 2),
221         0x00000000,
222         (0x8000 << 16) | (0x90e0 >> 2),
223         0x00000000,
224         (0x8000 << 16) | (0x90e4 >> 2),
225         0x00000000,
226         (0x8000 << 16) | (0x90e8 >> 2),
227         0x00000000,
228         (0x8040 << 16) | (0x90e0 >> 2),
229         0x00000000,
230         (0x8040 << 16) | (0x90e4 >> 2),
231         0x00000000,
232         (0x8040 << 16) | (0x90e8 >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x8bcc >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8b24 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x88c4 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8e50 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8c0c >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x8e58 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8e5c >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x9508 >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x950c >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x9494 >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0xac0c >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0xac10 >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0xac14 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xae00 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac08 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0x88d4 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0x88c8 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x88cc >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x89b0 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x8b10 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x8a14 >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x9830 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x9834 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x9838 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9a10 >> 2),
283         0x00000000,
284         (0x8000 << 16) | (0x9870 >> 2),
285         0x00000000,
286         (0x8000 << 16) | (0x9874 >> 2),
287         0x00000000,
288         (0x8001 << 16) | (0x9870 >> 2),
289         0x00000000,
290         (0x8001 << 16) | (0x9874 >> 2),
291         0x00000000,
292         (0x8040 << 16) | (0x9870 >> 2),
293         0x00000000,
294         (0x8040 << 16) | (0x9874 >> 2),
295         0x00000000,
296         (0x8041 << 16) | (0x9870 >> 2),
297         0x00000000,
298         (0x8041 << 16) | (0x9874 >> 2),
299         0x00000000,
300         0x00000000
301 };
302
303 static const u32 tahiti_golden_rlc_registers[] =
304 {
305         0xc424, 0xffffffff, 0x00601005,
306         0xc47c, 0xffffffff, 0x10104040,
307         0xc488, 0xffffffff, 0x0100000a,
308         0xc314, 0xffffffff, 0x00000800,
309         0xc30c, 0xffffffff, 0x800000f4,
310         0xf4a8, 0xffffffff, 0x00000000
311 };
312
313 static const u32 tahiti_golden_registers[] =
314 {
315         0x9a10, 0x00010000, 0x00018208,
316         0x9830, 0xffffffff, 0x00000000,
317         0x9834, 0xf00fffff, 0x00000400,
318         0x9838, 0x0002021c, 0x00020200,
319         0xc78, 0x00000080, 0x00000000,
320         0xd030, 0x000300c0, 0x00800040,
321         0xd830, 0x000300c0, 0x00800040,
322         0x5bb0, 0x000000f0, 0x00000070,
323         0x5bc0, 0x00200000, 0x50100000,
324         0x7030, 0x31000311, 0x00000011,
325         0x277c, 0x00000003, 0x000007ff,
326         0x240c, 0x000007ff, 0x00000000,
327         0x8a14, 0xf000001f, 0x00000007,
328         0x8b24, 0xffffffff, 0x00ffffff,
329         0x8b10, 0x0000ff0f, 0x00000000,
330         0x28a4c, 0x07ffffff, 0x4e000000,
331         0x28350, 0x3f3f3fff, 0x2a00126a,
332         0x30, 0x000000ff, 0x0040,
333         0x34, 0x00000040, 0x00004040,
334         0x9100, 0x07ffffff, 0x03000000,
335         0x8e88, 0x01ff1f3f, 0x00000000,
336         0x8e84, 0x01ff1f3f, 0x00000000,
337         0x9060, 0x0000007f, 0x00000020,
338         0x9508, 0x00010000, 0x00010000,
339         0xac14, 0x00000200, 0x000002fb,
340         0xac10, 0xffffffff, 0x0000543b,
341         0xac0c, 0xffffffff, 0xa9210876,
342         0x88d0, 0xffffffff, 0x000fff40,
343         0x88d4, 0x0000001f, 0x00000010,
344         0x1410, 0x20000000, 0x20fffed8,
345         0x15c0, 0x000c0fc0, 0x000c0400
346 };
347
348 static const u32 tahiti_golden_registers2[] =
349 {
350         0xc64, 0x00000001, 0x00000001
351 };
352
353 static const u32 pitcairn_golden_rlc_registers[] =
354 {
355         0xc424, 0xffffffff, 0x00601004,
356         0xc47c, 0xffffffff, 0x10102020,
357         0xc488, 0xffffffff, 0x01000020,
358         0xc314, 0xffffffff, 0x00000800,
359         0xc30c, 0xffffffff, 0x800000a4
360 };
361
362 static const u32 pitcairn_golden_registers[] =
363 {
364         0x9a10, 0x00010000, 0x00018208,
365         0x9830, 0xffffffff, 0x00000000,
366         0x9834, 0xf00fffff, 0x00000400,
367         0x9838, 0x0002021c, 0x00020200,
368         0xc78, 0x00000080, 0x00000000,
369         0xd030, 0x000300c0, 0x00800040,
370         0xd830, 0x000300c0, 0x00800040,
371         0x5bb0, 0x000000f0, 0x00000070,
372         0x5bc0, 0x00200000, 0x50100000,
373         0x7030, 0x31000311, 0x00000011,
374         0x2ae4, 0x00073ffe, 0x000022a2,
375         0x240c, 0x000007ff, 0x00000000,
376         0x8a14, 0xf000001f, 0x00000007,
377         0x8b24, 0xffffffff, 0x00ffffff,
378         0x8b10, 0x0000ff0f, 0x00000000,
379         0x28a4c, 0x07ffffff, 0x4e000000,
380         0x28350, 0x3f3f3fff, 0x2a00126a,
381         0x30, 0x000000ff, 0x0040,
382         0x34, 0x00000040, 0x00004040,
383         0x9100, 0x07ffffff, 0x03000000,
384         0x9060, 0x0000007f, 0x00000020,
385         0x9508, 0x00010000, 0x00010000,
386         0xac14, 0x000003ff, 0x000000f7,
387         0xac10, 0xffffffff, 0x00000000,
388         0xac0c, 0xffffffff, 0x32761054,
389         0x88d4, 0x0000001f, 0x00000010,
390         0x15c0, 0x000c0fc0, 0x000c0400
391 };
392
393 static const u32 verde_golden_rlc_registers[] =
394 {
395         0xc424, 0xffffffff, 0x033f1005,
396         0xc47c, 0xffffffff, 0x10808020,
397         0xc488, 0xffffffff, 0x00800008,
398         0xc314, 0xffffffff, 0x00001000,
399         0xc30c, 0xffffffff, 0x80010014
400 };
401
402 static const u32 verde_golden_registers[] =
403 {
404         0x9a10, 0x00010000, 0x00018208,
405         0x9830, 0xffffffff, 0x00000000,
406         0x9834, 0xf00fffff, 0x00000400,
407         0x9838, 0x0002021c, 0x00020200,
408         0xc78, 0x00000080, 0x00000000,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd030, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0xd830, 0x000300c0, 0x00800040,
413         0x5bb0, 0x000000f0, 0x00000070,
414         0x5bc0, 0x00200000, 0x50100000,
415         0x7030, 0x31000311, 0x00000011,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x2ae4, 0x00073ffe, 0x000022a2,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x240c, 0x000007ff, 0x00000000,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8a14, 0xf000001f, 0x00000007,
425         0x8b24, 0xffffffff, 0x00ffffff,
426         0x8b10, 0x0000ff0f, 0x00000000,
427         0x28a4c, 0x07ffffff, 0x4e000000,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x28350, 0x3f3f3fff, 0x0000124a,
431         0x30, 0x000000ff, 0x0040,
432         0x34, 0x00000040, 0x00004040,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x9100, 0x07ffffff, 0x03000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e88, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x8e84, 0x01ff1f3f, 0x00000000,
441         0x9060, 0x0000007f, 0x00000020,
442         0x9508, 0x00010000, 0x00010000,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac14, 0x000003ff, 0x00000003,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac10, 0xffffffff, 0x00000000,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0xac0c, 0xffffffff, 0x00001032,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x88d4, 0x0000001f, 0x00000010,
455         0x15c0, 0x000c0fc0, 0x000c0400
456 };
457
458 static const u32 oland_golden_rlc_registers[] =
459 {
460         0xc424, 0xffffffff, 0x00601005,
461         0xc47c, 0xffffffff, 0x10104040,
462         0xc488, 0xffffffff, 0x0100000a,
463         0xc314, 0xffffffff, 0x00000800,
464         0xc30c, 0xffffffff, 0x800000f4
465 };
466
467 static const u32 oland_golden_registers[] =
468 {
469         0x9a10, 0x00010000, 0x00018208,
470         0x9830, 0xffffffff, 0x00000000,
471         0x9834, 0xf00fffff, 0x00000400,
472         0x9838, 0x0002021c, 0x00020200,
473         0xc78, 0x00000080, 0x00000000,
474         0xd030, 0x000300c0, 0x00800040,
475         0xd830, 0x000300c0, 0x00800040,
476         0x5bb0, 0x000000f0, 0x00000070,
477         0x5bc0, 0x00200000, 0x50100000,
478         0x7030, 0x31000311, 0x00000011,
479         0x2ae4, 0x00073ffe, 0x000022a2,
480         0x240c, 0x000007ff, 0x00000000,
481         0x8a14, 0xf000001f, 0x00000007,
482         0x8b24, 0xffffffff, 0x00ffffff,
483         0x8b10, 0x0000ff0f, 0x00000000,
484         0x28a4c, 0x07ffffff, 0x4e000000,
485         0x28350, 0x3f3f3fff, 0x00000082,
486         0x30, 0x000000ff, 0x0040,
487         0x34, 0x00000040, 0x00004040,
488         0x9100, 0x07ffffff, 0x03000000,
489         0x9060, 0x0000007f, 0x00000020,
490         0x9508, 0x00010000, 0x00010000,
491         0xac14, 0x000003ff, 0x000000f3,
492         0xac10, 0xffffffff, 0x00000000,
493         0xac0c, 0xffffffff, 0x00003210,
494         0x88d4, 0x0000001f, 0x00000010,
495         0x15c0, 0x000c0fc0, 0x000c0400
496 };
497
498 static const u32 hainan_golden_registers[] =
499 {
500         0x9a10, 0x00010000, 0x00018208,
501         0x9830, 0xffffffff, 0x00000000,
502         0x9834, 0xf00fffff, 0x00000400,
503         0x9838, 0x0002021c, 0x00020200,
504         0xd0c0, 0xff000fff, 0x00000100,
505         0xd030, 0x000300c0, 0x00800040,
506         0xd8c0, 0xff000fff, 0x00000100,
507         0xd830, 0x000300c0, 0x00800040,
508         0x2ae4, 0x00073ffe, 0x000022a2,
509         0x240c, 0x000007ff, 0x00000000,
510         0x8a14, 0xf000001f, 0x00000007,
511         0x8b24, 0xffffffff, 0x00ffffff,
512         0x8b10, 0x0000ff0f, 0x00000000,
513         0x28a4c, 0x07ffffff, 0x4e000000,
514         0x28350, 0x3f3f3fff, 0x00000000,
515         0x30, 0x000000ff, 0x0040,
516         0x34, 0x00000040, 0x00004040,
517         0x9100, 0x03e00000, 0x03600000,
518         0x9060, 0x0000007f, 0x00000020,
519         0x9508, 0x00010000, 0x00010000,
520         0xac14, 0x000003ff, 0x000000f1,
521         0xac10, 0xffffffff, 0x00000000,
522         0xac0c, 0xffffffff, 0x00003210,
523         0x88d4, 0x0000001f, 0x00000010,
524         0x15c0, 0x000c0fc0, 0x000c0400
525 };
526
527 static const u32 hainan_golden_registers2[] =
528 {
529         0x98f8, 0xffffffff, 0x02010001
530 };
531
532 static const u32 tahiti_mgcg_cgcg_init[] =
533 {
534         0xc400, 0xffffffff, 0xfffffffc,
535         0x802c, 0xffffffff, 0xe0000000,
536         0x9a60, 0xffffffff, 0x00000100,
537         0x92a4, 0xffffffff, 0x00000100,
538         0xc164, 0xffffffff, 0x00000100,
539         0x9774, 0xffffffff, 0x00000100,
540         0x8984, 0xffffffff, 0x06000100,
541         0x8a18, 0xffffffff, 0x00000100,
542         0x92a0, 0xffffffff, 0x00000100,
543         0xc380, 0xffffffff, 0x00000100,
544         0x8b28, 0xffffffff, 0x00000100,
545         0x9144, 0xffffffff, 0x00000100,
546         0x8d88, 0xffffffff, 0x00000100,
547         0x8d8c, 0xffffffff, 0x00000100,
548         0x9030, 0xffffffff, 0x00000100,
549         0x9034, 0xffffffff, 0x00000100,
550         0x9038, 0xffffffff, 0x00000100,
551         0x903c, 0xffffffff, 0x00000100,
552         0xad80, 0xffffffff, 0x00000100,
553         0xac54, 0xffffffff, 0x00000100,
554         0x897c, 0xffffffff, 0x06000100,
555         0x9868, 0xffffffff, 0x00000100,
556         0x9510, 0xffffffff, 0x00000100,
557         0xaf04, 0xffffffff, 0x00000100,
558         0xae04, 0xffffffff, 0x00000100,
559         0x949c, 0xffffffff, 0x00000100,
560         0x802c, 0xffffffff, 0xe0000000,
561         0x9160, 0xffffffff, 0x00010000,
562         0x9164, 0xffffffff, 0x00030002,
563         0x9168, 0xffffffff, 0x00040007,
564         0x916c, 0xffffffff, 0x00060005,
565         0x9170, 0xffffffff, 0x00090008,
566         0x9174, 0xffffffff, 0x00020001,
567         0x9178, 0xffffffff, 0x00040003,
568         0x917c, 0xffffffff, 0x00000007,
569         0x9180, 0xffffffff, 0x00060005,
570         0x9184, 0xffffffff, 0x00090008,
571         0x9188, 0xffffffff, 0x00030002,
572         0x918c, 0xffffffff, 0x00050004,
573         0x9190, 0xffffffff, 0x00000008,
574         0x9194, 0xffffffff, 0x00070006,
575         0x9198, 0xffffffff, 0x000a0009,
576         0x919c, 0xffffffff, 0x00040003,
577         0x91a0, 0xffffffff, 0x00060005,
578         0x91a4, 0xffffffff, 0x00000009,
579         0x91a8, 0xffffffff, 0x00080007,
580         0x91ac, 0xffffffff, 0x000b000a,
581         0x91b0, 0xffffffff, 0x00050004,
582         0x91b4, 0xffffffff, 0x00070006,
583         0x91b8, 0xffffffff, 0x0008000b,
584         0x91bc, 0xffffffff, 0x000a0009,
585         0x91c0, 0xffffffff, 0x000d000c,
586         0x91c4, 0xffffffff, 0x00060005,
587         0x91c8, 0xffffffff, 0x00080007,
588         0x91cc, 0xffffffff, 0x0000000b,
589         0x91d0, 0xffffffff, 0x000a0009,
590         0x91d4, 0xffffffff, 0x000d000c,
591         0x91d8, 0xffffffff, 0x00070006,
592         0x91dc, 0xffffffff, 0x00090008,
593         0x91e0, 0xffffffff, 0x0000000c,
594         0x91e4, 0xffffffff, 0x000b000a,
595         0x91e8, 0xffffffff, 0x000e000d,
596         0x91ec, 0xffffffff, 0x00080007,
597         0x91f0, 0xffffffff, 0x000a0009,
598         0x91f4, 0xffffffff, 0x0000000d,
599         0x91f8, 0xffffffff, 0x000c000b,
600         0x91fc, 0xffffffff, 0x000f000e,
601         0x9200, 0xffffffff, 0x00090008,
602         0x9204, 0xffffffff, 0x000b000a,
603         0x9208, 0xffffffff, 0x000c000f,
604         0x920c, 0xffffffff, 0x000e000d,
605         0x9210, 0xffffffff, 0x00110010,
606         0x9214, 0xffffffff, 0x000a0009,
607         0x9218, 0xffffffff, 0x000c000b,
608         0x921c, 0xffffffff, 0x0000000f,
609         0x9220, 0xffffffff, 0x000e000d,
610         0x9224, 0xffffffff, 0x00110010,
611         0x9228, 0xffffffff, 0x000b000a,
612         0x922c, 0xffffffff, 0x000d000c,
613         0x9230, 0xffffffff, 0x00000010,
614         0x9234, 0xffffffff, 0x000f000e,
615         0x9238, 0xffffffff, 0x00120011,
616         0x923c, 0xffffffff, 0x000c000b,
617         0x9240, 0xffffffff, 0x000e000d,
618         0x9244, 0xffffffff, 0x00000011,
619         0x9248, 0xffffffff, 0x0010000f,
620         0x924c, 0xffffffff, 0x00130012,
621         0x9250, 0xffffffff, 0x000d000c,
622         0x9254, 0xffffffff, 0x000f000e,
623         0x9258, 0xffffffff, 0x00100013,
624         0x925c, 0xffffffff, 0x00120011,
625         0x9260, 0xffffffff, 0x00150014,
626         0x9264, 0xffffffff, 0x000e000d,
627         0x9268, 0xffffffff, 0x0010000f,
628         0x926c, 0xffffffff, 0x00000013,
629         0x9270, 0xffffffff, 0x00120011,
630         0x9274, 0xffffffff, 0x00150014,
631         0x9278, 0xffffffff, 0x000f000e,
632         0x927c, 0xffffffff, 0x00110010,
633         0x9280, 0xffffffff, 0x00000014,
634         0x9284, 0xffffffff, 0x00130012,
635         0x9288, 0xffffffff, 0x00160015,
636         0x928c, 0xffffffff, 0x0010000f,
637         0x9290, 0xffffffff, 0x00120011,
638         0x9294, 0xffffffff, 0x00000015,
639         0x9298, 0xffffffff, 0x00140013,
640         0x929c, 0xffffffff, 0x00170016,
641         0x9150, 0xffffffff, 0x96940200,
642         0x8708, 0xffffffff, 0x00900100,
643         0xc478, 0xffffffff, 0x00000080,
644         0xc404, 0xffffffff, 0x0020003f,
645         0x30, 0xffffffff, 0x0000001c,
646         0x34, 0x000f0000, 0x000f0000,
647         0x160c, 0xffffffff, 0x00000100,
648         0x1024, 0xffffffff, 0x00000100,
649         0x102c, 0x00000101, 0x00000000,
650         0x20a8, 0xffffffff, 0x00000104,
651         0x264c, 0x000c0000, 0x000c0000,
652         0x2648, 0x000c0000, 0x000c0000,
653         0x55e4, 0xff000fff, 0x00000100,
654         0x55e8, 0x00000001, 0x00000001,
655         0x2f50, 0x00000001, 0x00000001,
656         0x30cc, 0xc0000fff, 0x00000104,
657         0xc1e4, 0x00000001, 0x00000001,
658         0xd0c0, 0xfffffff0, 0x00000100,
659         0xd8c0, 0xfffffff0, 0x00000100
660 };
661
662 static const u32 pitcairn_mgcg_cgcg_init[] =
663 {
664         0xc400, 0xffffffff, 0xfffffffc,
665         0x802c, 0xffffffff, 0xe0000000,
666         0x9a60, 0xffffffff, 0x00000100,
667         0x92a4, 0xffffffff, 0x00000100,
668         0xc164, 0xffffffff, 0x00000100,
669         0x9774, 0xffffffff, 0x00000100,
670         0x8984, 0xffffffff, 0x06000100,
671         0x8a18, 0xffffffff, 0x00000100,
672         0x92a0, 0xffffffff, 0x00000100,
673         0xc380, 0xffffffff, 0x00000100,
674         0x8b28, 0xffffffff, 0x00000100,
675         0x9144, 0xffffffff, 0x00000100,
676         0x8d88, 0xffffffff, 0x00000100,
677         0x8d8c, 0xffffffff, 0x00000100,
678         0x9030, 0xffffffff, 0x00000100,
679         0x9034, 0xffffffff, 0x00000100,
680         0x9038, 0xffffffff, 0x00000100,
681         0x903c, 0xffffffff, 0x00000100,
682         0xad80, 0xffffffff, 0x00000100,
683         0xac54, 0xffffffff, 0x00000100,
684         0x897c, 0xffffffff, 0x06000100,
685         0x9868, 0xffffffff, 0x00000100,
686         0x9510, 0xffffffff, 0x00000100,
687         0xaf04, 0xffffffff, 0x00000100,
688         0xae04, 0xffffffff, 0x00000100,
689         0x949c, 0xffffffff, 0x00000100,
690         0x802c, 0xffffffff, 0xe0000000,
691         0x9160, 0xffffffff, 0x00010000,
692         0x9164, 0xffffffff, 0x00030002,
693         0x9168, 0xffffffff, 0x00040007,
694         0x916c, 0xffffffff, 0x00060005,
695         0x9170, 0xffffffff, 0x00090008,
696         0x9174, 0xffffffff, 0x00020001,
697         0x9178, 0xffffffff, 0x00040003,
698         0x917c, 0xffffffff, 0x00000007,
699         0x9180, 0xffffffff, 0x00060005,
700         0x9184, 0xffffffff, 0x00090008,
701         0x9188, 0xffffffff, 0x00030002,
702         0x918c, 0xffffffff, 0x00050004,
703         0x9190, 0xffffffff, 0x00000008,
704         0x9194, 0xffffffff, 0x00070006,
705         0x9198, 0xffffffff, 0x000a0009,
706         0x919c, 0xffffffff, 0x00040003,
707         0x91a0, 0xffffffff, 0x00060005,
708         0x91a4, 0xffffffff, 0x00000009,
709         0x91a8, 0xffffffff, 0x00080007,
710         0x91ac, 0xffffffff, 0x000b000a,
711         0x91b0, 0xffffffff, 0x00050004,
712         0x91b4, 0xffffffff, 0x00070006,
713         0x91b8, 0xffffffff, 0x0008000b,
714         0x91bc, 0xffffffff, 0x000a0009,
715         0x91c0, 0xffffffff, 0x000d000c,
716         0x9200, 0xffffffff, 0x00090008,
717         0x9204, 0xffffffff, 0x000b000a,
718         0x9208, 0xffffffff, 0x000c000f,
719         0x920c, 0xffffffff, 0x000e000d,
720         0x9210, 0xffffffff, 0x00110010,
721         0x9214, 0xffffffff, 0x000a0009,
722         0x9218, 0xffffffff, 0x000c000b,
723         0x921c, 0xffffffff, 0x0000000f,
724         0x9220, 0xffffffff, 0x000e000d,
725         0x9224, 0xffffffff, 0x00110010,
726         0x9228, 0xffffffff, 0x000b000a,
727         0x922c, 0xffffffff, 0x000d000c,
728         0x9230, 0xffffffff, 0x00000010,
729         0x9234, 0xffffffff, 0x000f000e,
730         0x9238, 0xffffffff, 0x00120011,
731         0x923c, 0xffffffff, 0x000c000b,
732         0x9240, 0xffffffff, 0x000e000d,
733         0x9244, 0xffffffff, 0x00000011,
734         0x9248, 0xffffffff, 0x0010000f,
735         0x924c, 0xffffffff, 0x00130012,
736         0x9250, 0xffffffff, 0x000d000c,
737         0x9254, 0xffffffff, 0x000f000e,
738         0x9258, 0xffffffff, 0x00100013,
739         0x925c, 0xffffffff, 0x00120011,
740         0x9260, 0xffffffff, 0x00150014,
741         0x9150, 0xffffffff, 0x96940200,
742         0x8708, 0xffffffff, 0x00900100,
743         0xc478, 0xffffffff, 0x00000080,
744         0xc404, 0xffffffff, 0x0020003f,
745         0x30, 0xffffffff, 0x0000001c,
746         0x34, 0x000f0000, 0x000f0000,
747         0x160c, 0xffffffff, 0x00000100,
748         0x1024, 0xffffffff, 0x00000100,
749         0x102c, 0x00000101, 0x00000000,
750         0x20a8, 0xffffffff, 0x00000104,
751         0x55e4, 0xff000fff, 0x00000100,
752         0x55e8, 0x00000001, 0x00000001,
753         0x2f50, 0x00000001, 0x00000001,
754         0x30cc, 0xc0000fff, 0x00000104,
755         0xc1e4, 0x00000001, 0x00000001,
756         0xd0c0, 0xfffffff0, 0x00000100,
757         0xd8c0, 0xfffffff0, 0x00000100
758 };
759
760 static const u32 verde_mgcg_cgcg_init[] =
761 {
762         0xc400, 0xffffffff, 0xfffffffc,
763         0x802c, 0xffffffff, 0xe0000000,
764         0x9a60, 0xffffffff, 0x00000100,
765         0x92a4, 0xffffffff, 0x00000100,
766         0xc164, 0xffffffff, 0x00000100,
767         0x9774, 0xffffffff, 0x00000100,
768         0x8984, 0xffffffff, 0x06000100,
769         0x8a18, 0xffffffff, 0x00000100,
770         0x92a0, 0xffffffff, 0x00000100,
771         0xc380, 0xffffffff, 0x00000100,
772         0x8b28, 0xffffffff, 0x00000100,
773         0x9144, 0xffffffff, 0x00000100,
774         0x8d88, 0xffffffff, 0x00000100,
775         0x8d8c, 0xffffffff, 0x00000100,
776         0x9030, 0xffffffff, 0x00000100,
777         0x9034, 0xffffffff, 0x00000100,
778         0x9038, 0xffffffff, 0x00000100,
779         0x903c, 0xffffffff, 0x00000100,
780         0xad80, 0xffffffff, 0x00000100,
781         0xac54, 0xffffffff, 0x00000100,
782         0x897c, 0xffffffff, 0x06000100,
783         0x9868, 0xffffffff, 0x00000100,
784         0x9510, 0xffffffff, 0x00000100,
785         0xaf04, 0xffffffff, 0x00000100,
786         0xae04, 0xffffffff, 0x00000100,
787         0x949c, 0xffffffff, 0x00000100,
788         0x802c, 0xffffffff, 0xe0000000,
789         0x9160, 0xffffffff, 0x00010000,
790         0x9164, 0xffffffff, 0x00030002,
791         0x9168, 0xffffffff, 0x00040007,
792         0x916c, 0xffffffff, 0x00060005,
793         0x9170, 0xffffffff, 0x00090008,
794         0x9174, 0xffffffff, 0x00020001,
795         0x9178, 0xffffffff, 0x00040003,
796         0x917c, 0xffffffff, 0x00000007,
797         0x9180, 0xffffffff, 0x00060005,
798         0x9184, 0xffffffff, 0x00090008,
799         0x9188, 0xffffffff, 0x00030002,
800         0x918c, 0xffffffff, 0x00050004,
801         0x9190, 0xffffffff, 0x00000008,
802         0x9194, 0xffffffff, 0x00070006,
803         0x9198, 0xffffffff, 0x000a0009,
804         0x919c, 0xffffffff, 0x00040003,
805         0x91a0, 0xffffffff, 0x00060005,
806         0x91a4, 0xffffffff, 0x00000009,
807         0x91a8, 0xffffffff, 0x00080007,
808         0x91ac, 0xffffffff, 0x000b000a,
809         0x91b0, 0xffffffff, 0x00050004,
810         0x91b4, 0xffffffff, 0x00070006,
811         0x91b8, 0xffffffff, 0x0008000b,
812         0x91bc, 0xffffffff, 0x000a0009,
813         0x91c0, 0xffffffff, 0x000d000c,
814         0x9200, 0xffffffff, 0x00090008,
815         0x9204, 0xffffffff, 0x000b000a,
816         0x9208, 0xffffffff, 0x000c000f,
817         0x920c, 0xffffffff, 0x000e000d,
818         0x9210, 0xffffffff, 0x00110010,
819         0x9214, 0xffffffff, 0x000a0009,
820         0x9218, 0xffffffff, 0x000c000b,
821         0x921c, 0xffffffff, 0x0000000f,
822         0x9220, 0xffffffff, 0x000e000d,
823         0x9224, 0xffffffff, 0x00110010,
824         0x9228, 0xffffffff, 0x000b000a,
825         0x922c, 0xffffffff, 0x000d000c,
826         0x9230, 0xffffffff, 0x00000010,
827         0x9234, 0xffffffff, 0x000f000e,
828         0x9238, 0xffffffff, 0x00120011,
829         0x923c, 0xffffffff, 0x000c000b,
830         0x9240, 0xffffffff, 0x000e000d,
831         0x9244, 0xffffffff, 0x00000011,
832         0x9248, 0xffffffff, 0x0010000f,
833         0x924c, 0xffffffff, 0x00130012,
834         0x9250, 0xffffffff, 0x000d000c,
835         0x9254, 0xffffffff, 0x000f000e,
836         0x9258, 0xffffffff, 0x00100013,
837         0x925c, 0xffffffff, 0x00120011,
838         0x9260, 0xffffffff, 0x00150014,
839         0x9150, 0xffffffff, 0x96940200,
840         0x8708, 0xffffffff, 0x00900100,
841         0xc478, 0xffffffff, 0x00000080,
842         0xc404, 0xffffffff, 0x0020003f,
843         0x30, 0xffffffff, 0x0000001c,
844         0x34, 0x000f0000, 0x000f0000,
845         0x160c, 0xffffffff, 0x00000100,
846         0x1024, 0xffffffff, 0x00000100,
847         0x102c, 0x00000101, 0x00000000,
848         0x20a8, 0xffffffff, 0x00000104,
849         0x264c, 0x000c0000, 0x000c0000,
850         0x2648, 0x000c0000, 0x000c0000,
851         0x55e4, 0xff000fff, 0x00000100,
852         0x55e8, 0x00000001, 0x00000001,
853         0x2f50, 0x00000001, 0x00000001,
854         0x30cc, 0xc0000fff, 0x00000104,
855         0xc1e4, 0x00000001, 0x00000001,
856         0xd0c0, 0xfffffff0, 0x00000100,
857         0xd8c0, 0xfffffff0, 0x00000100
858 };
859
860 static const u32 oland_mgcg_cgcg_init[] =
861 {
862         0xc400, 0xffffffff, 0xfffffffc,
863         0x802c, 0xffffffff, 0xe0000000,
864         0x9a60, 0xffffffff, 0x00000100,
865         0x92a4, 0xffffffff, 0x00000100,
866         0xc164, 0xffffffff, 0x00000100,
867         0x9774, 0xffffffff, 0x00000100,
868         0x8984, 0xffffffff, 0x06000100,
869         0x8a18, 0xffffffff, 0x00000100,
870         0x92a0, 0xffffffff, 0x00000100,
871         0xc380, 0xffffffff, 0x00000100,
872         0x8b28, 0xffffffff, 0x00000100,
873         0x9144, 0xffffffff, 0x00000100,
874         0x8d88, 0xffffffff, 0x00000100,
875         0x8d8c, 0xffffffff, 0x00000100,
876         0x9030, 0xffffffff, 0x00000100,
877         0x9034, 0xffffffff, 0x00000100,
878         0x9038, 0xffffffff, 0x00000100,
879         0x903c, 0xffffffff, 0x00000100,
880         0xad80, 0xffffffff, 0x00000100,
881         0xac54, 0xffffffff, 0x00000100,
882         0x897c, 0xffffffff, 0x06000100,
883         0x9868, 0xffffffff, 0x00000100,
884         0x9510, 0xffffffff, 0x00000100,
885         0xaf04, 0xffffffff, 0x00000100,
886         0xae04, 0xffffffff, 0x00000100,
887         0x949c, 0xffffffff, 0x00000100,
888         0x802c, 0xffffffff, 0xe0000000,
889         0x9160, 0xffffffff, 0x00010000,
890         0x9164, 0xffffffff, 0x00030002,
891         0x9168, 0xffffffff, 0x00040007,
892         0x916c, 0xffffffff, 0x00060005,
893         0x9170, 0xffffffff, 0x00090008,
894         0x9174, 0xffffffff, 0x00020001,
895         0x9178, 0xffffffff, 0x00040003,
896         0x917c, 0xffffffff, 0x00000007,
897         0x9180, 0xffffffff, 0x00060005,
898         0x9184, 0xffffffff, 0x00090008,
899         0x9188, 0xffffffff, 0x00030002,
900         0x918c, 0xffffffff, 0x00050004,
901         0x9190, 0xffffffff, 0x00000008,
902         0x9194, 0xffffffff, 0x00070006,
903         0x9198, 0xffffffff, 0x000a0009,
904         0x919c, 0xffffffff, 0x00040003,
905         0x91a0, 0xffffffff, 0x00060005,
906         0x91a4, 0xffffffff, 0x00000009,
907         0x91a8, 0xffffffff, 0x00080007,
908         0x91ac, 0xffffffff, 0x000b000a,
909         0x91b0, 0xffffffff, 0x00050004,
910         0x91b4, 0xffffffff, 0x00070006,
911         0x91b8, 0xffffffff, 0x0008000b,
912         0x91bc, 0xffffffff, 0x000a0009,
913         0x91c0, 0xffffffff, 0x000d000c,
914         0x91c4, 0xffffffff, 0x00060005,
915         0x91c8, 0xffffffff, 0x00080007,
916         0x91cc, 0xffffffff, 0x0000000b,
917         0x91d0, 0xffffffff, 0x000a0009,
918         0x91d4, 0xffffffff, 0x000d000c,
919         0x9150, 0xffffffff, 0x96940200,
920         0x8708, 0xffffffff, 0x00900100,
921         0xc478, 0xffffffff, 0x00000080,
922         0xc404, 0xffffffff, 0x0020003f,
923         0x30, 0xffffffff, 0x0000001c,
924         0x34, 0x000f0000, 0x000f0000,
925         0x160c, 0xffffffff, 0x00000100,
926         0x1024, 0xffffffff, 0x00000100,
927         0x102c, 0x00000101, 0x00000000,
928         0x20a8, 0xffffffff, 0x00000104,
929         0x264c, 0x000c0000, 0x000c0000,
930         0x2648, 0x000c0000, 0x000c0000,
931         0x55e4, 0xff000fff, 0x00000100,
932         0x55e8, 0x00000001, 0x00000001,
933         0x2f50, 0x00000001, 0x00000001,
934         0x30cc, 0xc0000fff, 0x00000104,
935         0xc1e4, 0x00000001, 0x00000001,
936         0xd0c0, 0xfffffff0, 0x00000100,
937         0xd8c0, 0xfffffff0, 0x00000100
938 };
939
940 static const u32 hainan_mgcg_cgcg_init[] =
941 {
942         0xc400, 0xffffffff, 0xfffffffc,
943         0x802c, 0xffffffff, 0xe0000000,
944         0x9a60, 0xffffffff, 0x00000100,
945         0x92a4, 0xffffffff, 0x00000100,
946         0xc164, 0xffffffff, 0x00000100,
947         0x9774, 0xffffffff, 0x00000100,
948         0x8984, 0xffffffff, 0x06000100,
949         0x8a18, 0xffffffff, 0x00000100,
950         0x92a0, 0xffffffff, 0x00000100,
951         0xc380, 0xffffffff, 0x00000100,
952         0x8b28, 0xffffffff, 0x00000100,
953         0x9144, 0xffffffff, 0x00000100,
954         0x8d88, 0xffffffff, 0x00000100,
955         0x8d8c, 0xffffffff, 0x00000100,
956         0x9030, 0xffffffff, 0x00000100,
957         0x9034, 0xffffffff, 0x00000100,
958         0x9038, 0xffffffff, 0x00000100,
959         0x903c, 0xffffffff, 0x00000100,
960         0xad80, 0xffffffff, 0x00000100,
961         0xac54, 0xffffffff, 0x00000100,
962         0x897c, 0xffffffff, 0x06000100,
963         0x9868, 0xffffffff, 0x00000100,
964         0x9510, 0xffffffff, 0x00000100,
965         0xaf04, 0xffffffff, 0x00000100,
966         0xae04, 0xffffffff, 0x00000100,
967         0x949c, 0xffffffff, 0x00000100,
968         0x802c, 0xffffffff, 0xe0000000,
969         0x9160, 0xffffffff, 0x00010000,
970         0x9164, 0xffffffff, 0x00030002,
971         0x9168, 0xffffffff, 0x00040007,
972         0x916c, 0xffffffff, 0x00060005,
973         0x9170, 0xffffffff, 0x00090008,
974         0x9174, 0xffffffff, 0x00020001,
975         0x9178, 0xffffffff, 0x00040003,
976         0x917c, 0xffffffff, 0x00000007,
977         0x9180, 0xffffffff, 0x00060005,
978         0x9184, 0xffffffff, 0x00090008,
979         0x9188, 0xffffffff, 0x00030002,
980         0x918c, 0xffffffff, 0x00050004,
981         0x9190, 0xffffffff, 0x00000008,
982         0x9194, 0xffffffff, 0x00070006,
983         0x9198, 0xffffffff, 0x000a0009,
984         0x919c, 0xffffffff, 0x00040003,
985         0x91a0, 0xffffffff, 0x00060005,
986         0x91a4, 0xffffffff, 0x00000009,
987         0x91a8, 0xffffffff, 0x00080007,
988         0x91ac, 0xffffffff, 0x000b000a,
989         0x91b0, 0xffffffff, 0x00050004,
990         0x91b4, 0xffffffff, 0x00070006,
991         0x91b8, 0xffffffff, 0x0008000b,
992         0x91bc, 0xffffffff, 0x000a0009,
993         0x91c0, 0xffffffff, 0x000d000c,
994         0x91c4, 0xffffffff, 0x00060005,
995         0x91c8, 0xffffffff, 0x00080007,
996         0x91cc, 0xffffffff, 0x0000000b,
997         0x91d0, 0xffffffff, 0x000a0009,
998         0x91d4, 0xffffffff, 0x000d000c,
999         0x9150, 0xffffffff, 0x96940200,
1000         0x8708, 0xffffffff, 0x00900100,
1001         0xc478, 0xffffffff, 0x00000080,
1002         0xc404, 0xffffffff, 0x0020003f,
1003         0x30, 0xffffffff, 0x0000001c,
1004         0x34, 0x000f0000, 0x000f0000,
1005         0x160c, 0xffffffff, 0x00000100,
1006         0x1024, 0xffffffff, 0x00000100,
1007         0x20a8, 0xffffffff, 0x00000104,
1008         0x264c, 0x000c0000, 0x000c0000,
1009         0x2648, 0x000c0000, 0x000c0000,
1010         0x2f50, 0x00000001, 0x00000001,
1011         0x30cc, 0xc0000fff, 0x00000104,
1012         0xc1e4, 0x00000001, 0x00000001,
1013         0xd0c0, 0xfffffff0, 0x00000100,
1014         0xd8c0, 0xfffffff0, 0x00000100
1015 };
1016
1017 static u32 verde_pg_init[] =
1018 {
1019         0x353c, 0xffffffff, 0x40000,
1020         0x3538, 0xffffffff, 0x200010ff,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x0,
1026         0x353c, 0xffffffff, 0x7007,
1027         0x3538, 0xffffffff, 0x300010ff,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x400000,
1034         0x3538, 0xffffffff, 0x100010ff,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x120200,
1041         0x3538, 0xffffffff, 0x500010ff,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x1e1e16,
1048         0x3538, 0xffffffff, 0x600010ff,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x171f1e,
1055         0x3538, 0xffffffff, 0x700010ff,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x3538, 0xffffffff, 0x9ff,
1063         0x3500, 0xffffffff, 0x0,
1064         0x3504, 0xffffffff, 0x10000800,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3504, 0xffffffff, 0xf,
1067         0x3500, 0xffffffff, 0x4,
1068         0x3504, 0xffffffff, 0x1000051e,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3504, 0xffffffff, 0xffff,
1071         0x3500, 0xffffffff, 0x8,
1072         0x3504, 0xffffffff, 0x80500,
1073         0x3500, 0xffffffff, 0x12,
1074         0x3504, 0xffffffff, 0x9050c,
1075         0x3500, 0xffffffff, 0x1d,
1076         0x3504, 0xffffffff, 0xb052c,
1077         0x3500, 0xffffffff, 0x2a,
1078         0x3504, 0xffffffff, 0x1053e,
1079         0x3500, 0xffffffff, 0x2d,
1080         0x3504, 0xffffffff, 0x10546,
1081         0x3500, 0xffffffff, 0x30,
1082         0x3504, 0xffffffff, 0xa054e,
1083         0x3500, 0xffffffff, 0x3c,
1084         0x3504, 0xffffffff, 0x1055f,
1085         0x3500, 0xffffffff, 0x3f,
1086         0x3504, 0xffffffff, 0x10567,
1087         0x3500, 0xffffffff, 0x42,
1088         0x3504, 0xffffffff, 0x1056f,
1089         0x3500, 0xffffffff, 0x45,
1090         0x3504, 0xffffffff, 0x10572,
1091         0x3500, 0xffffffff, 0x48,
1092         0x3504, 0xffffffff, 0x20575,
1093         0x3500, 0xffffffff, 0x4c,
1094         0x3504, 0xffffffff, 0x190801,
1095         0x3500, 0xffffffff, 0x67,
1096         0x3504, 0xffffffff, 0x1082a,
1097         0x3500, 0xffffffff, 0x6a,
1098         0x3504, 0xffffffff, 0x1b082d,
1099         0x3500, 0xffffffff, 0x87,
1100         0x3504, 0xffffffff, 0x310851,
1101         0x3500, 0xffffffff, 0xba,
1102         0x3504, 0xffffffff, 0x891,
1103         0x3500, 0xffffffff, 0xbc,
1104         0x3504, 0xffffffff, 0x893,
1105         0x3500, 0xffffffff, 0xbe,
1106         0x3504, 0xffffffff, 0x20895,
1107         0x3500, 0xffffffff, 0xc2,
1108         0x3504, 0xffffffff, 0x20899,
1109         0x3500, 0xffffffff, 0xc6,
1110         0x3504, 0xffffffff, 0x2089d,
1111         0x3500, 0xffffffff, 0xca,
1112         0x3504, 0xffffffff, 0x8a1,
1113         0x3500, 0xffffffff, 0xcc,
1114         0x3504, 0xffffffff, 0x8a3,
1115         0x3500, 0xffffffff, 0xce,
1116         0x3504, 0xffffffff, 0x308a5,
1117         0x3500, 0xffffffff, 0xd3,
1118         0x3504, 0xffffffff, 0x6d08cd,
1119         0x3500, 0xffffffff, 0x142,
1120         0x3504, 0xffffffff, 0x2000095a,
1121         0x3504, 0xffffffff, 0x1,
1122         0x3500, 0xffffffff, 0x144,
1123         0x3504, 0xffffffff, 0x301f095b,
1124         0x3500, 0xffffffff, 0x165,
1125         0x3504, 0xffffffff, 0xc094d,
1126         0x3500, 0xffffffff, 0x173,
1127         0x3504, 0xffffffff, 0xf096d,
1128         0x3500, 0xffffffff, 0x184,
1129         0x3504, 0xffffffff, 0x15097f,
1130         0x3500, 0xffffffff, 0x19b,
1131         0x3504, 0xffffffff, 0xc0998,
1132         0x3500, 0xffffffff, 0x1a9,
1133         0x3504, 0xffffffff, 0x409a7,
1134         0x3500, 0xffffffff, 0x1af,
1135         0x3504, 0xffffffff, 0xcdc,
1136         0x3500, 0xffffffff, 0x1b1,
1137         0x3504, 0xffffffff, 0x800,
1138         0x3508, 0xffffffff, 0x6c9b2000,
1139         0x3510, 0xfc00, 0x2000,
1140         0x3544, 0xffffffff, 0xfc0,
1141         0x28d4, 0x00000100, 0x100
1142 };
1143
1144 static void si_init_golden_registers(struct radeon_device *rdev)
1145 {
1146         switch (rdev->family) {
1147         case CHIP_TAHITI:
1148                 radeon_program_register_sequence(rdev,
1149                                                  tahiti_golden_registers,
1150                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1151                 radeon_program_register_sequence(rdev,
1152                                                  tahiti_golden_rlc_registers,
1153                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1154                 radeon_program_register_sequence(rdev,
1155                                                  tahiti_mgcg_cgcg_init,
1156                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1157                 radeon_program_register_sequence(rdev,
1158                                                  tahiti_golden_registers2,
1159                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1160                 break;
1161         case CHIP_PITCAIRN:
1162                 radeon_program_register_sequence(rdev,
1163                                                  pitcairn_golden_registers,
1164                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1165                 radeon_program_register_sequence(rdev,
1166                                                  pitcairn_golden_rlc_registers,
1167                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1168                 radeon_program_register_sequence(rdev,
1169                                                  pitcairn_mgcg_cgcg_init,
1170                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1171                 break;
1172         case CHIP_VERDE:
1173                 radeon_program_register_sequence(rdev,
1174                                                  verde_golden_registers,
1175                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1176                 radeon_program_register_sequence(rdev,
1177                                                  verde_golden_rlc_registers,
1178                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1179                 radeon_program_register_sequence(rdev,
1180                                                  verde_mgcg_cgcg_init,
1181                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1182                 radeon_program_register_sequence(rdev,
1183                                                  verde_pg_init,
1184                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1185                 break;
1186         case CHIP_OLAND:
1187                 radeon_program_register_sequence(rdev,
1188                                                  oland_golden_registers,
1189                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1190                 radeon_program_register_sequence(rdev,
1191                                                  oland_golden_rlc_registers,
1192                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1193                 radeon_program_register_sequence(rdev,
1194                                                  oland_mgcg_cgcg_init,
1195                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1196                 break;
1197         case CHIP_HAINAN:
1198                 radeon_program_register_sequence(rdev,
1199                                                  hainan_golden_registers,
1200                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1201                 radeon_program_register_sequence(rdev,
1202                                                  hainan_golden_registers2,
1203                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1204                 radeon_program_register_sequence(rdev,
1205                                                  hainan_mgcg_cgcg_init,
1206                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1207                 break;
1208         default:
1209                 break;
1210         }
1211 }
1212
1213 #define PCIE_BUS_CLK                10000
1214 #define TCLK                        (PCIE_BUS_CLK / 10)
1215
1216 /**
1217  * si_get_xclk - get the xclk
1218  *
1219  * @rdev: radeon_device pointer
1220  *
1221  * Returns the reference clock used by the gfx engine
1222  * (SI).
1223  */
1224 u32 si_get_xclk(struct radeon_device *rdev)
1225 {
1226         u32 reference_clock = rdev->clock.spll.reference_freq;
1227         u32 tmp;
1228
1229         tmp = RREG32(CG_CLKPIN_CNTL_2);
1230         if (tmp & MUX_TCLK_TO_XCLK)
1231                 return TCLK;
1232
1233         tmp = RREG32(CG_CLKPIN_CNTL);
1234         if (tmp & XTALIN_DIVIDE)
1235                 return reference_clock / 4;
1236
1237         return reference_clock;
1238 }
1239
1240 /* get temperature in millidegrees */
1241 int si_get_temp(struct radeon_device *rdev)
1242 {
1243         u32 temp;
1244         int actual_temp = 0;
1245
1246         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1247                 CTF_TEMP_SHIFT;
1248
1249         if (temp & 0x200)
1250                 actual_temp = 255;
1251         else
1252                 actual_temp = temp & 0x1ff;
1253
1254         actual_temp = (actual_temp * 1000);
1255
1256         return actual_temp;
1257 }
1258
1259 #define TAHITI_IO_MC_REGS_SIZE 36
1260
1261 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1262         {0x0000006f, 0x03044000},
1263         {0x00000070, 0x0480c018},
1264         {0x00000071, 0x00000040},
1265         {0x00000072, 0x01000000},
1266         {0x00000074, 0x000000ff},
1267         {0x00000075, 0x00143400},
1268         {0x00000076, 0x08ec0800},
1269         {0x00000077, 0x040000cc},
1270         {0x00000079, 0x00000000},
1271         {0x0000007a, 0x21000409},
1272         {0x0000007c, 0x00000000},
1273         {0x0000007d, 0xe8000000},
1274         {0x0000007e, 0x044408a8},
1275         {0x0000007f, 0x00000003},
1276         {0x00000080, 0x00000000},
1277         {0x00000081, 0x01000000},
1278         {0x00000082, 0x02000000},
1279         {0x00000083, 0x00000000},
1280         {0x00000084, 0xe3f3e4f4},
1281         {0x00000085, 0x00052024},
1282         {0x00000087, 0x00000000},
1283         {0x00000088, 0x66036603},
1284         {0x00000089, 0x01000000},
1285         {0x0000008b, 0x1c0a0000},
1286         {0x0000008c, 0xff010000},
1287         {0x0000008e, 0xffffefff},
1288         {0x0000008f, 0xfff3efff},
1289         {0x00000090, 0xfff3efbf},
1290         {0x00000094, 0x00101101},
1291         {0x00000095, 0x00000fff},
1292         {0x00000096, 0x00116fff},
1293         {0x00000097, 0x60010000},
1294         {0x00000098, 0x10010000},
1295         {0x00000099, 0x00006000},
1296         {0x0000009a, 0x00001000},
1297         {0x0000009f, 0x00a77400}
1298 };
1299
1300 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1301         {0x0000006f, 0x03044000},
1302         {0x00000070, 0x0480c018},
1303         {0x00000071, 0x00000040},
1304         {0x00000072, 0x01000000},
1305         {0x00000074, 0x000000ff},
1306         {0x00000075, 0x00143400},
1307         {0x00000076, 0x08ec0800},
1308         {0x00000077, 0x040000cc},
1309         {0x00000079, 0x00000000},
1310         {0x0000007a, 0x21000409},
1311         {0x0000007c, 0x00000000},
1312         {0x0000007d, 0xe8000000},
1313         {0x0000007e, 0x044408a8},
1314         {0x0000007f, 0x00000003},
1315         {0x00000080, 0x00000000},
1316         {0x00000081, 0x01000000},
1317         {0x00000082, 0x02000000},
1318         {0x00000083, 0x00000000},
1319         {0x00000084, 0xe3f3e4f4},
1320         {0x00000085, 0x00052024},
1321         {0x00000087, 0x00000000},
1322         {0x00000088, 0x66036603},
1323         {0x00000089, 0x01000000},
1324         {0x0000008b, 0x1c0a0000},
1325         {0x0000008c, 0xff010000},
1326         {0x0000008e, 0xffffefff},
1327         {0x0000008f, 0xfff3efff},
1328         {0x00000090, 0xfff3efbf},
1329         {0x00000094, 0x00101101},
1330         {0x00000095, 0x00000fff},
1331         {0x00000096, 0x00116fff},
1332         {0x00000097, 0x60010000},
1333         {0x00000098, 0x10010000},
1334         {0x00000099, 0x00006000},
1335         {0x0000009a, 0x00001000},
1336         {0x0000009f, 0x00a47400}
1337 };
1338
1339 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1340         {0x0000006f, 0x03044000},
1341         {0x00000070, 0x0480c018},
1342         {0x00000071, 0x00000040},
1343         {0x00000072, 0x01000000},
1344         {0x00000074, 0x000000ff},
1345         {0x00000075, 0x00143400},
1346         {0x00000076, 0x08ec0800},
1347         {0x00000077, 0x040000cc},
1348         {0x00000079, 0x00000000},
1349         {0x0000007a, 0x21000409},
1350         {0x0000007c, 0x00000000},
1351         {0x0000007d, 0xe8000000},
1352         {0x0000007e, 0x044408a8},
1353         {0x0000007f, 0x00000003},
1354         {0x00000080, 0x00000000},
1355         {0x00000081, 0x01000000},
1356         {0x00000082, 0x02000000},
1357         {0x00000083, 0x00000000},
1358         {0x00000084, 0xe3f3e4f4},
1359         {0x00000085, 0x00052024},
1360         {0x00000087, 0x00000000},
1361         {0x00000088, 0x66036603},
1362         {0x00000089, 0x01000000},
1363         {0x0000008b, 0x1c0a0000},
1364         {0x0000008c, 0xff010000},
1365         {0x0000008e, 0xffffefff},
1366         {0x0000008f, 0xfff3efff},
1367         {0x00000090, 0xfff3efbf},
1368         {0x00000094, 0x00101101},
1369         {0x00000095, 0x00000fff},
1370         {0x00000096, 0x00116fff},
1371         {0x00000097, 0x60010000},
1372         {0x00000098, 0x10010000},
1373         {0x00000099, 0x00006000},
1374         {0x0000009a, 0x00001000},
1375         {0x0000009f, 0x00a37400}
1376 };
1377
1378 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1379         {0x0000006f, 0x03044000},
1380         {0x00000070, 0x0480c018},
1381         {0x00000071, 0x00000040},
1382         {0x00000072, 0x01000000},
1383         {0x00000074, 0x000000ff},
1384         {0x00000075, 0x00143400},
1385         {0x00000076, 0x08ec0800},
1386         {0x00000077, 0x040000cc},
1387         {0x00000079, 0x00000000},
1388         {0x0000007a, 0x21000409},
1389         {0x0000007c, 0x00000000},
1390         {0x0000007d, 0xe8000000},
1391         {0x0000007e, 0x044408a8},
1392         {0x0000007f, 0x00000003},
1393         {0x00000080, 0x00000000},
1394         {0x00000081, 0x01000000},
1395         {0x00000082, 0x02000000},
1396         {0x00000083, 0x00000000},
1397         {0x00000084, 0xe3f3e4f4},
1398         {0x00000085, 0x00052024},
1399         {0x00000087, 0x00000000},
1400         {0x00000088, 0x66036603},
1401         {0x00000089, 0x01000000},
1402         {0x0000008b, 0x1c0a0000},
1403         {0x0000008c, 0xff010000},
1404         {0x0000008e, 0xffffefff},
1405         {0x0000008f, 0xfff3efff},
1406         {0x00000090, 0xfff3efbf},
1407         {0x00000094, 0x00101101},
1408         {0x00000095, 0x00000fff},
1409         {0x00000096, 0x00116fff},
1410         {0x00000097, 0x60010000},
1411         {0x00000098, 0x10010000},
1412         {0x00000099, 0x00006000},
1413         {0x0000009a, 0x00001000},
1414         {0x0000009f, 0x00a17730}
1415 };
1416
1417 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1418         {0x0000006f, 0x03044000},
1419         {0x00000070, 0x0480c018},
1420         {0x00000071, 0x00000040},
1421         {0x00000072, 0x01000000},
1422         {0x00000074, 0x000000ff},
1423         {0x00000075, 0x00143400},
1424         {0x00000076, 0x08ec0800},
1425         {0x00000077, 0x040000cc},
1426         {0x00000079, 0x00000000},
1427         {0x0000007a, 0x21000409},
1428         {0x0000007c, 0x00000000},
1429         {0x0000007d, 0xe8000000},
1430         {0x0000007e, 0x044408a8},
1431         {0x0000007f, 0x00000003},
1432         {0x00000080, 0x00000000},
1433         {0x00000081, 0x01000000},
1434         {0x00000082, 0x02000000},
1435         {0x00000083, 0x00000000},
1436         {0x00000084, 0xe3f3e4f4},
1437         {0x00000085, 0x00052024},
1438         {0x00000087, 0x00000000},
1439         {0x00000088, 0x66036603},
1440         {0x00000089, 0x01000000},
1441         {0x0000008b, 0x1c0a0000},
1442         {0x0000008c, 0xff010000},
1443         {0x0000008e, 0xffffefff},
1444         {0x0000008f, 0xfff3efff},
1445         {0x00000090, 0xfff3efbf},
1446         {0x00000094, 0x00101101},
1447         {0x00000095, 0x00000fff},
1448         {0x00000096, 0x00116fff},
1449         {0x00000097, 0x60010000},
1450         {0x00000098, 0x10010000},
1451         {0x00000099, 0x00006000},
1452         {0x0000009a, 0x00001000},
1453         {0x0000009f, 0x00a07730}
1454 };
1455
1456 /* ucode loading */
1457 static int si_mc_load_microcode(struct radeon_device *rdev)
1458 {
1459         const __be32 *fw_data;
1460         u32 running, blackout = 0;
1461         u32 *io_mc_regs;
1462         int i, ucode_size, regs_size;
1463
1464         if (!rdev->mc_fw)
1465                 return -EINVAL;
1466
1467         switch (rdev->family) {
1468         case CHIP_TAHITI:
1469                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1470                 ucode_size = SI_MC_UCODE_SIZE;
1471                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1472                 break;
1473         case CHIP_PITCAIRN:
1474                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1475                 ucode_size = SI_MC_UCODE_SIZE;
1476                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1477                 break;
1478         case CHIP_VERDE:
1479         default:
1480                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_OLAND:
1485                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1486                 ucode_size = OLAND_MC_UCODE_SIZE;
1487                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1488                 break;
1489         case CHIP_HAINAN:
1490                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1491                 ucode_size = OLAND_MC_UCODE_SIZE;
1492                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1493                 break;
1494         }
1495
1496         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1497
1498         if (running == 0) {
1499                 if (running) {
1500                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1501                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1502                 }
1503
1504                 /* reset the engine and set to writable */
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1506                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1507
1508                 /* load mc io regs */
1509                 for (i = 0; i < regs_size; i++) {
1510                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1511                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1512                 }
1513                 /* load the MC ucode */
1514                 fw_data = (const __be32 *)rdev->mc_fw->data;
1515                 for (i = 0; i < ucode_size; i++)
1516                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1517
1518                 /* put the engine back into the active state */
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1521                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1522
1523                 /* wait for training to complete */
1524                 for (i = 0; i < rdev->usec_timeout; i++) {
1525                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1526                                 break;
1527                         udelay(1);
1528                 }
1529                 for (i = 0; i < rdev->usec_timeout; i++) {
1530                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1531                                 break;
1532                         udelay(1);
1533                 }
1534
1535                 if (running)
1536                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1537         }
1538
1539         return 0;
1540 }
1541
1542 static int si_init_microcode(struct radeon_device *rdev)
1543 {
1544         struct platform_device *pdev;
1545         const char *chip_name;
1546         const char *rlc_chip_name;
1547         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1548         size_t smc_req_size;
1549         char fw_name[30];
1550         int err;
1551
1552         DRM_DEBUG("\n");
1553
1554         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1555         err = IS_ERR(pdev);
1556         if (err) {
1557                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1558                 return -EINVAL;
1559         }
1560
1561         switch (rdev->family) {
1562         case CHIP_TAHITI:
1563                 chip_name = "TAHITI";
1564                 rlc_chip_name = "TAHITI";
1565                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1566                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1567                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1568                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1569                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1570                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1571                 break;
1572         case CHIP_PITCAIRN:
1573                 chip_name = "PITCAIRN";
1574                 rlc_chip_name = "PITCAIRN";
1575                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1576                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1577                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1578                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1579                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1580                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1581                 break;
1582         case CHIP_VERDE:
1583                 chip_name = "VERDE";
1584                 rlc_chip_name = "VERDE";
1585                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1586                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1587                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1588                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1589                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1590                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1591                 break;
1592         case CHIP_OLAND:
1593                 chip_name = "OLAND";
1594                 rlc_chip_name = "OLAND";
1595                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1596                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1597                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1598                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1599                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1600                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1601                 break;
1602         case CHIP_HAINAN:
1603                 chip_name = "HAINAN";
1604                 rlc_chip_name = "HAINAN";
1605                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1606                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1607                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1608                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1609                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1610                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1611                 break;
1612         default: BUG();
1613         }
1614
1615         DRM_INFO("Loading %s Microcode\n", chip_name);
1616
1617         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1618         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1619         if (err)
1620                 goto out;
1621         if (rdev->pfp_fw->size != pfp_req_size) {
1622                 printk(KERN_ERR
1623                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1624                        rdev->pfp_fw->size, fw_name);
1625                 err = -EINVAL;
1626                 goto out;
1627         }
1628
1629         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1630         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1631         if (err)
1632                 goto out;
1633         if (rdev->me_fw->size != me_req_size) {
1634                 printk(KERN_ERR
1635                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1636                        rdev->me_fw->size, fw_name);
1637                 err = -EINVAL;
1638         }
1639
1640         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1641         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1642         if (err)
1643                 goto out;
1644         if (rdev->ce_fw->size != ce_req_size) {
1645                 printk(KERN_ERR
1646                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1647                        rdev->ce_fw->size, fw_name);
1648                 err = -EINVAL;
1649         }
1650
1651         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1652         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1653         if (err)
1654                 goto out;
1655         if (rdev->rlc_fw->size != rlc_req_size) {
1656                 printk(KERN_ERR
1657                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1658                        rdev->rlc_fw->size, fw_name);
1659                 err = -EINVAL;
1660         }
1661
1662         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1663         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1664         if (err)
1665                 goto out;
1666         if (rdev->mc_fw->size != mc_req_size) {
1667                 printk(KERN_ERR
1668                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1669                        rdev->mc_fw->size, fw_name);
1670                 err = -EINVAL;
1671         }
1672
1673         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1674         err = request_firmware(&rdev->smc_fw, fw_name, &pdev->dev);
1675         if (err)
1676                 goto out;
1677         if (rdev->smc_fw->size != smc_req_size) {
1678                 printk(KERN_ERR
1679                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1680                        rdev->smc_fw->size, fw_name);
1681                 err = -EINVAL;
1682         }
1683
1684 out:
1685         platform_device_unregister(pdev);
1686
1687         if (err) {
1688                 if (err != -EINVAL)
1689                         printk(KERN_ERR
1690                                "si_cp: Failed to load firmware \"%s\"\n",
1691                                fw_name);
1692                 release_firmware(rdev->pfp_fw);
1693                 rdev->pfp_fw = NULL;
1694                 release_firmware(rdev->me_fw);
1695                 rdev->me_fw = NULL;
1696                 release_firmware(rdev->ce_fw);
1697                 rdev->ce_fw = NULL;
1698                 release_firmware(rdev->rlc_fw);
1699                 rdev->rlc_fw = NULL;
1700                 release_firmware(rdev->mc_fw);
1701                 rdev->mc_fw = NULL;
1702                 release_firmware(rdev->smc_fw);
1703                 rdev->smc_fw = NULL;
1704         }
1705         return err;
1706 }
1707
1708 /* watermark setup */
1709 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1710                                    struct radeon_crtc *radeon_crtc,
1711                                    struct drm_display_mode *mode,
1712                                    struct drm_display_mode *other_mode)
1713 {
1714         u32 tmp;
1715         /*
1716          * Line Buffer Setup
1717          * There are 3 line buffers, each one shared by 2 display controllers.
1718          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1719          * the display controllers.  The paritioning is done via one of four
1720          * preset allocations specified in bits 21:20:
1721          *  0 - half lb
1722          *  2 - whole lb, other crtc must be disabled
1723          */
1724         /* this can get tricky if we have two large displays on a paired group
1725          * of crtcs.  Ideally for multiple large displays we'd assign them to
1726          * non-linked crtcs for maximum line buffer allocation.
1727          */
1728         if (radeon_crtc->base.enabled && mode) {
1729                 if (other_mode)
1730                         tmp = 0; /* 1/2 */
1731                 else
1732                         tmp = 2; /* whole */
1733         } else
1734                 tmp = 0;
1735
1736         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1737                DC_LB_MEMORY_CONFIG(tmp));
1738
1739         if (radeon_crtc->base.enabled && mode) {
1740                 switch (tmp) {
1741                 case 0:
1742                 default:
1743                         return 4096 * 2;
1744                 case 2:
1745                         return 8192 * 2;
1746                 }
1747         }
1748
1749         /* controller not enabled, so no lb used */
1750         return 0;
1751 }
1752
1753 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1754 {
1755         u32 tmp = RREG32(MC_SHARED_CHMAP);
1756
1757         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1758         case 0:
1759         default:
1760                 return 1;
1761         case 1:
1762                 return 2;
1763         case 2:
1764                 return 4;
1765         case 3:
1766                 return 8;
1767         case 4:
1768                 return 3;
1769         case 5:
1770                 return 6;
1771         case 6:
1772                 return 10;
1773         case 7:
1774                 return 12;
1775         case 8:
1776                 return 16;
1777         }
1778 }
1779
1780 struct dce6_wm_params {
1781         u32 dram_channels; /* number of dram channels */
1782         u32 yclk;          /* bandwidth per dram data pin in kHz */
1783         u32 sclk;          /* engine clock in kHz */
1784         u32 disp_clk;      /* display clock in kHz */
1785         u32 src_width;     /* viewport width */
1786         u32 active_time;   /* active display time in ns */
1787         u32 blank_time;    /* blank time in ns */
1788         bool interlaced;    /* mode is interlaced */
1789         fixed20_12 vsc;    /* vertical scale ratio */
1790         u32 num_heads;     /* number of active crtcs */
1791         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1792         u32 lb_size;       /* line buffer allocated to pipe */
1793         u32 vtaps;         /* vertical scaler taps */
1794 };
1795
1796 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1797 {
1798         /* Calculate raw DRAM Bandwidth */
1799         fixed20_12 dram_efficiency; /* 0.7 */
1800         fixed20_12 yclk, dram_channels, bandwidth;
1801         fixed20_12 a;
1802
1803         a.full = dfixed_const(1000);
1804         yclk.full = dfixed_const(wm->yclk);
1805         yclk.full = dfixed_div(yclk, a);
1806         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1807         a.full = dfixed_const(10);
1808         dram_efficiency.full = dfixed_const(7);
1809         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1810         bandwidth.full = dfixed_mul(dram_channels, yclk);
1811         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1812
1813         return dfixed_trunc(bandwidth);
1814 }
1815
1816 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1817 {
1818         /* Calculate DRAM Bandwidth and the part allocated to display. */
1819         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1820         fixed20_12 yclk, dram_channels, bandwidth;
1821         fixed20_12 a;
1822
1823         a.full = dfixed_const(1000);
1824         yclk.full = dfixed_const(wm->yclk);
1825         yclk.full = dfixed_div(yclk, a);
1826         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1827         a.full = dfixed_const(10);
1828         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1829         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1830         bandwidth.full = dfixed_mul(dram_channels, yclk);
1831         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1832
1833         return dfixed_trunc(bandwidth);
1834 }
1835
1836 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1837 {
1838         /* Calculate the display Data return Bandwidth */
1839         fixed20_12 return_efficiency; /* 0.8 */
1840         fixed20_12 sclk, bandwidth;
1841         fixed20_12 a;
1842
1843         a.full = dfixed_const(1000);
1844         sclk.full = dfixed_const(wm->sclk);
1845         sclk.full = dfixed_div(sclk, a);
1846         a.full = dfixed_const(10);
1847         return_efficiency.full = dfixed_const(8);
1848         return_efficiency.full = dfixed_div(return_efficiency, a);
1849         a.full = dfixed_const(32);
1850         bandwidth.full = dfixed_mul(a, sclk);
1851         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1852
1853         return dfixed_trunc(bandwidth);
1854 }
1855
1856 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1857 {
1858         return 32;
1859 }
1860
1861 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1862 {
1863         /* Calculate the DMIF Request Bandwidth */
1864         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1865         fixed20_12 disp_clk, sclk, bandwidth;
1866         fixed20_12 a, b1, b2;
1867         u32 min_bandwidth;
1868
1869         a.full = dfixed_const(1000);
1870         disp_clk.full = dfixed_const(wm->disp_clk);
1871         disp_clk.full = dfixed_div(disp_clk, a);
1872         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1873         b1.full = dfixed_mul(a, disp_clk);
1874
1875         a.full = dfixed_const(1000);
1876         sclk.full = dfixed_const(wm->sclk);
1877         sclk.full = dfixed_div(sclk, a);
1878         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1879         b2.full = dfixed_mul(a, sclk);
1880
1881         a.full = dfixed_const(10);
1882         disp_clk_request_efficiency.full = dfixed_const(8);
1883         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1884
1885         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1886
1887         a.full = dfixed_const(min_bandwidth);
1888         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1889
1890         return dfixed_trunc(bandwidth);
1891 }
1892
1893 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1894 {
1895         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1896         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1897         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1898         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1899
1900         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1901 }
1902
1903 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1904 {
1905         /* Calculate the display mode Average Bandwidth
1906          * DisplayMode should contain the source and destination dimensions,
1907          * timing, etc.
1908          */
1909         fixed20_12 bpp;
1910         fixed20_12 line_time;
1911         fixed20_12 src_width;
1912         fixed20_12 bandwidth;
1913         fixed20_12 a;
1914
1915         a.full = dfixed_const(1000);
1916         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1917         line_time.full = dfixed_div(line_time, a);
1918         bpp.full = dfixed_const(wm->bytes_per_pixel);
1919         src_width.full = dfixed_const(wm->src_width);
1920         bandwidth.full = dfixed_mul(src_width, bpp);
1921         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1922         bandwidth.full = dfixed_div(bandwidth, line_time);
1923
1924         return dfixed_trunc(bandwidth);
1925 }
1926
1927 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1928 {
1929         /* First calcualte the latency in ns */
1930         u32 mc_latency = 2000; /* 2000 ns. */
1931         u32 available_bandwidth = dce6_available_bandwidth(wm);
1932         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1933         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1934         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1935         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1936                 (wm->num_heads * cursor_line_pair_return_time);
1937         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1938         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1939         u32 tmp, dmif_size = 12288;
1940         fixed20_12 a, b, c;
1941
1942         if (wm->num_heads == 0)
1943                 return 0;
1944
1945         a.full = dfixed_const(2);
1946         b.full = dfixed_const(1);
1947         if ((wm->vsc.full > a.full) ||
1948             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1949             (wm->vtaps >= 5) ||
1950             ((wm->vsc.full >= a.full) && wm->interlaced))
1951                 max_src_lines_per_dst_line = 4;
1952         else
1953                 max_src_lines_per_dst_line = 2;
1954
1955         a.full = dfixed_const(available_bandwidth);
1956         b.full = dfixed_const(wm->num_heads);
1957         a.full = dfixed_div(a, b);
1958
1959         b.full = dfixed_const(mc_latency + 512);
1960         c.full = dfixed_const(wm->disp_clk);
1961         b.full = dfixed_div(b, c);
1962
1963         c.full = dfixed_const(dmif_size);
1964         b.full = dfixed_div(c, b);
1965
1966         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1967
1968         b.full = dfixed_const(1000);
1969         c.full = dfixed_const(wm->disp_clk);
1970         b.full = dfixed_div(c, b);
1971         c.full = dfixed_const(wm->bytes_per_pixel);
1972         b.full = dfixed_mul(b, c);
1973
1974         lb_fill_bw = min(tmp, dfixed_trunc(b));
1975
1976         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1977         b.full = dfixed_const(1000);
1978         c.full = dfixed_const(lb_fill_bw);
1979         b.full = dfixed_div(c, b);
1980         a.full = dfixed_div(a, b);
1981         line_fill_time = dfixed_trunc(a);
1982
1983         if (line_fill_time < wm->active_time)
1984                 return latency;
1985         else
1986                 return latency + (line_fill_time - wm->active_time);
1987
1988 }
1989
1990 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1991 {
1992         if (dce6_average_bandwidth(wm) <=
1993             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1994                 return true;
1995         else
1996                 return false;
1997 };
1998
1999 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2000 {
2001         if (dce6_average_bandwidth(wm) <=
2002             (dce6_available_bandwidth(wm) / wm->num_heads))
2003                 return true;
2004         else
2005                 return false;
2006 };
2007
2008 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2009 {
2010         u32 lb_partitions = wm->lb_size / wm->src_width;
2011         u32 line_time = wm->active_time + wm->blank_time;
2012         u32 latency_tolerant_lines;
2013         u32 latency_hiding;
2014         fixed20_12 a;
2015
2016         a.full = dfixed_const(1);
2017         if (wm->vsc.full > a.full)
2018                 latency_tolerant_lines = 1;
2019         else {
2020                 if (lb_partitions <= (wm->vtaps + 1))
2021                         latency_tolerant_lines = 1;
2022                 else
2023                         latency_tolerant_lines = 2;
2024         }
2025
2026         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2027
2028         if (dce6_latency_watermark(wm) <= latency_hiding)
2029                 return true;
2030         else
2031                 return false;
2032 }
2033
2034 static void dce6_program_watermarks(struct radeon_device *rdev,
2035                                          struct radeon_crtc *radeon_crtc,
2036                                          u32 lb_size, u32 num_heads)
2037 {
2038         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2039         struct dce6_wm_params wm_low, wm_high;
2040         u32 dram_channels;
2041         u32 pixel_period;
2042         u32 line_time = 0;
2043         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2044         u32 priority_a_mark = 0, priority_b_mark = 0;
2045         u32 priority_a_cnt = PRIORITY_OFF;
2046         u32 priority_b_cnt = PRIORITY_OFF;
2047         u32 tmp, arb_control3;
2048         fixed20_12 a, b, c;
2049
2050         if (radeon_crtc->base.enabled && num_heads && mode) {
2051                 pixel_period = 1000000 / (u32)mode->clock;
2052                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2053                 priority_a_cnt = 0;
2054                 priority_b_cnt = 0;
2055
2056                 if (rdev->family == CHIP_ARUBA)
2057                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2058                 else
2059                         dram_channels = si_get_number_of_dram_channels(rdev);
2060
2061                 /* watermark for high clocks */
2062                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2063                         wm_high.yclk =
2064                                 radeon_dpm_get_mclk(rdev, false) * 10;
2065                         wm_high.sclk =
2066                                 radeon_dpm_get_sclk(rdev, false) * 10;
2067                 } else {
2068                         wm_high.yclk = rdev->pm.current_mclk * 10;
2069                         wm_high.sclk = rdev->pm.current_sclk * 10;
2070                 }
2071
2072                 wm_high.disp_clk = mode->clock;
2073                 wm_high.src_width = mode->crtc_hdisplay;
2074                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2075                 wm_high.blank_time = line_time - wm_high.active_time;
2076                 wm_high.interlaced = false;
2077                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2078                         wm_high.interlaced = true;
2079                 wm_high.vsc = radeon_crtc->vsc;
2080                 wm_high.vtaps = 1;
2081                 if (radeon_crtc->rmx_type != RMX_OFF)
2082                         wm_high.vtaps = 2;
2083                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2084                 wm_high.lb_size = lb_size;
2085                 wm_high.dram_channels = dram_channels;
2086                 wm_high.num_heads = num_heads;
2087
2088                 /* watermark for low clocks */
2089                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2090                         wm_low.yclk =
2091                                 radeon_dpm_get_mclk(rdev, true) * 10;
2092                         wm_low.sclk =
2093                                 radeon_dpm_get_sclk(rdev, true) * 10;
2094                 } else {
2095                         wm_low.yclk = rdev->pm.current_mclk * 10;
2096                         wm_low.sclk = rdev->pm.current_sclk * 10;
2097                 }
2098
2099                 wm_low.disp_clk = mode->clock;
2100                 wm_low.src_width = mode->crtc_hdisplay;
2101                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2102                 wm_low.blank_time = line_time - wm_low.active_time;
2103                 wm_low.interlaced = false;
2104                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2105                         wm_low.interlaced = true;
2106                 wm_low.vsc = radeon_crtc->vsc;
2107                 wm_low.vtaps = 1;
2108                 if (radeon_crtc->rmx_type != RMX_OFF)
2109                         wm_low.vtaps = 2;
2110                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2111                 wm_low.lb_size = lb_size;
2112                 wm_low.dram_channels = dram_channels;
2113                 wm_low.num_heads = num_heads;
2114
2115                 /* set for high clocks */
2116                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2117                 /* set for low clocks */
2118                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2119
2120                 /* possibly force display priority to high */
2121                 /* should really do this at mode validation time... */
2122                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2123                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2124                     !dce6_check_latency_hiding(&wm_high) ||
2125                     (rdev->disp_priority == 2)) {
2126                         DRM_DEBUG_KMS("force priority to high\n");
2127                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2128                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2129                 }
2130                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2131                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2132                     !dce6_check_latency_hiding(&wm_low) ||
2133                     (rdev->disp_priority == 2)) {
2134                         DRM_DEBUG_KMS("force priority to high\n");
2135                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2136                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2137                 }
2138
2139                 a.full = dfixed_const(1000);
2140                 b.full = dfixed_const(mode->clock);
2141                 b.full = dfixed_div(b, a);
2142                 c.full = dfixed_const(latency_watermark_a);
2143                 c.full = dfixed_mul(c, b);
2144                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2145                 c.full = dfixed_div(c, a);
2146                 a.full = dfixed_const(16);
2147                 c.full = dfixed_div(c, a);
2148                 priority_a_mark = dfixed_trunc(c);
2149                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2150
2151                 a.full = dfixed_const(1000);
2152                 b.full = dfixed_const(mode->clock);
2153                 b.full = dfixed_div(b, a);
2154                 c.full = dfixed_const(latency_watermark_b);
2155                 c.full = dfixed_mul(c, b);
2156                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2157                 c.full = dfixed_div(c, a);
2158                 a.full = dfixed_const(16);
2159                 c.full = dfixed_div(c, a);
2160                 priority_b_mark = dfixed_trunc(c);
2161                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2162         }
2163
2164         /* select wm A */
2165         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2166         tmp = arb_control3;
2167         tmp &= ~LATENCY_WATERMARK_MASK(3);
2168         tmp |= LATENCY_WATERMARK_MASK(1);
2169         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2170         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2171                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2172                 LATENCY_HIGH_WATERMARK(line_time)));
2173         /* select wm B */
2174         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2175         tmp &= ~LATENCY_WATERMARK_MASK(3);
2176         tmp |= LATENCY_WATERMARK_MASK(2);
2177         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2178         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2179                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2180                 LATENCY_HIGH_WATERMARK(line_time)));
2181         /* restore original selection */
2182         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2183
2184         /* write the priority marks */
2185         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2186         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2187
2188         /* save values for DPM */
2189         radeon_crtc->line_time = line_time;
2190         radeon_crtc->wm_high = latency_watermark_a;
2191         radeon_crtc->wm_low = latency_watermark_b;
2192 }
2193
2194 void dce6_bandwidth_update(struct radeon_device *rdev)
2195 {
2196         struct drm_display_mode *mode0 = NULL;
2197         struct drm_display_mode *mode1 = NULL;
2198         u32 num_heads = 0, lb_size;
2199         int i;
2200
2201         radeon_update_display_priority(rdev);
2202
2203         for (i = 0; i < rdev->num_crtc; i++) {
2204                 if (rdev->mode_info.crtcs[i]->base.enabled)
2205                         num_heads++;
2206         }
2207         for (i = 0; i < rdev->num_crtc; i += 2) {
2208                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2209                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2210                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2211                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2212                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2213                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2214         }
2215 }
2216
2217 /*
2218  * Core functions
2219  */
2220 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2221 {
2222         const u32 num_tile_mode_states = 32;
2223         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2224
2225         switch (rdev->config.si.mem_row_size_in_kb) {
2226         case 1:
2227                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2228                 break;
2229         case 2:
2230         default:
2231                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2232                 break;
2233         case 4:
2234                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2235                 break;
2236         }
2237
2238         if ((rdev->family == CHIP_TAHITI) ||
2239             (rdev->family == CHIP_PITCAIRN)) {
2240                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2241                         switch (reg_offset) {
2242                         case 0:  /* non-AA compressed depth or any compressed stencil */
2243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2245                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2246                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2247                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2248                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2251                                 break;
2252                         case 1:  /* 2xAA/4xAA compressed depth only */
2253                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2255                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2256                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2257                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2258                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2261                                 break;
2262                         case 2:  /* 8xAA compressed depth only */
2263                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2266                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2267                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2268                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2271                                 break;
2272                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2273                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2275                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2276                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2277                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2278                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2281                                 break;
2282                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2284                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2285                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2286                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2287                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2288                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2291                                 break;
2292                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2293                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2295                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2296                                                  TILE_SPLIT(split_equal_to_row_size) |
2297                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2298                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2301                                 break;
2302                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2303                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2305                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2306                                                  TILE_SPLIT(split_equal_to_row_size) |
2307                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2308                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2311                                 break;
2312                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2313                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2315                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2316                                                  TILE_SPLIT(split_equal_to_row_size) |
2317                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2318                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2321                                 break;
2322                         case 8:  /* 1D and 1D Array Surfaces */
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2324                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2325                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2327                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2328                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2331                                 break;
2332                         case 9:  /* Displayable maps. */
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2337                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2338                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2340                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2341                                 break;
2342                         case 10:  /* Display 8bpp. */
2343                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2346                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2347                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2348                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2351                                 break;
2352                         case 11:  /* Display 16bpp. */
2353                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2356                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2357                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2358                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2361                                 break;
2362                         case 12:  /* Display 32bpp. */
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2367                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2368                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2371                                 break;
2372                         case 13:  /* Thin. */
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2376                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2377                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2378                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2381                                 break;
2382                         case 14:  /* Thin 8 bpp. */
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2386                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2387                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2388                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2391                                 break;
2392                         case 15:  /* Thin 16 bpp. */
2393                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2395                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2396                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2397                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2398                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2401                                 break;
2402                         case 16:  /* Thin 32 bpp. */
2403                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2406                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2407                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2408                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2411                                 break;
2412                         case 17:  /* Thin 64 bpp. */
2413                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2415                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2416                                                  TILE_SPLIT(split_equal_to_row_size) |
2417                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2418                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2421                                 break;
2422                         case 21:  /* 8 bpp PRT. */
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2426                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2428                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2429                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2431                                 break;
2432                         case 22:  /* 16 bpp PRT */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2441                                 break;
2442                         case 23:  /* 32 bpp PRT */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 24:  /* 64 bpp PRT */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                                 break;
2462                         case 25:  /* 128 bpp PRT */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2467                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2471                                 break;
2472                         default:
2473                                 gb_tile_moden = 0;
2474                                 break;
2475                         }
2476                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2477                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2478                 }
2479         } else if ((rdev->family == CHIP_VERDE) ||
2480                    (rdev->family == CHIP_OLAND) ||
2481                    (rdev->family == CHIP_HAINAN)) {
2482                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2483                         switch (reg_offset) {
2484                         case 0:  /* non-AA compressed depth or any compressed stencil */
2485                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2488                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2490                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2493                                 break;
2494                         case 1:  /* 2xAA/4xAA compressed depth only */
2495                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2497                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2498                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2500                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2503                                 break;
2504                         case 2:  /* 8xAA compressed depth only */
2505                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2508                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2509                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2510                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2513                                 break;
2514                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2515                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2518                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2520                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2523                                 break;
2524                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2526                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2530                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533                                 break;
2534                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2535                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2537                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2538                                                  TILE_SPLIT(split_equal_to_row_size) |
2539                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2540                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2543                                 break;
2544                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2548                                                  TILE_SPLIT(split_equal_to_row_size) |
2549                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2550                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553                                 break;
2554                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558                                                  TILE_SPLIT(split_equal_to_row_size) |
2559                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2560                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2562                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2563                                 break;
2564                         case 8:  /* 1D and 1D Array Surfaces */
2565                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2566                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2567                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2569                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2570                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573                                 break;
2574                         case 9:  /* Displayable maps. */
2575                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2577                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2579                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2580                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583                                 break;
2584                         case 10:  /* Display 8bpp. */
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2590                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2593                                 break;
2594                         case 11:  /* Display 16bpp. */
2595                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2600                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603                                 break;
2604                         case 12:  /* Display 32bpp. */
2605                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2609                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2610                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613                                 break;
2614                         case 13:  /* Thin. */
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2619                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2620                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623                                 break;
2624                         case 14:  /* Thin 8 bpp. */
2625                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2627                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2629                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2630                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633                                 break;
2634                         case 15:  /* Thin 16 bpp. */
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2639                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2640                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643                                 break;
2644                         case 16:  /* Thin 32 bpp. */
2645                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2649                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2650                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653                                 break;
2654                         case 17:  /* Thin 64 bpp. */
2655                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                                                  TILE_SPLIT(split_equal_to_row_size) |
2659                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2660                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663                                 break;
2664                         case 21:  /* 8 bpp PRT. */
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2669                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2670                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2671                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2672                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673                                 break;
2674                         case 22:  /* 16 bpp PRT */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2678                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683                                 break;
2684                         case 23:  /* 32 bpp PRT */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693                                 break;
2694                         case 24:  /* 64 bpp PRT */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703                                 break;
2704                         case 25:  /* 128 bpp PRT */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2709                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2713                                 break;
2714                         default:
2715                                 gb_tile_moden = 0;
2716                                 break;
2717                         }
2718                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2719                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2720                 }
2721         } else
2722                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2723 }
2724
2725 static void si_select_se_sh(struct radeon_device *rdev,
2726                             u32 se_num, u32 sh_num)
2727 {
2728         u32 data = INSTANCE_BROADCAST_WRITES;
2729
2730         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2731                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2732         else if (se_num == 0xffffffff)
2733                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2734         else if (sh_num == 0xffffffff)
2735                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2736         else
2737                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2738         WREG32(GRBM_GFX_INDEX, data);
2739 }
2740
2741 static u32 si_create_bitmask(u32 bit_width)
2742 {
2743         u32 i, mask = 0;
2744
2745         for (i = 0; i < bit_width; i++) {
2746                 mask <<= 1;
2747                 mask |= 1;
2748         }
2749         return mask;
2750 }
2751
2752 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2753 {
2754         u32 data, mask;
2755
2756         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2757         if (data & 1)
2758                 data &= INACTIVE_CUS_MASK;
2759         else
2760                 data = 0;
2761         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2762
2763         data >>= INACTIVE_CUS_SHIFT;
2764
2765         mask = si_create_bitmask(cu_per_sh);
2766
2767         return ~data & mask;
2768 }
2769
2770 static void si_setup_spi(struct radeon_device *rdev,
2771                          u32 se_num, u32 sh_per_se,
2772                          u32 cu_per_sh)
2773 {
2774         int i, j, k;
2775         u32 data, mask, active_cu;
2776
2777         for (i = 0; i < se_num; i++) {
2778                 for (j = 0; j < sh_per_se; j++) {
2779                         si_select_se_sh(rdev, i, j);
2780                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2781                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2782
2783                         mask = 1;
2784                         for (k = 0; k < 16; k++) {
2785                                 mask <<= k;
2786                                 if (active_cu & mask) {
2787                                         data &= ~mask;
2788                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2789                                         break;
2790                                 }
2791                         }
2792                 }
2793         }
2794         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2795 }
2796
2797 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2798                               u32 max_rb_num, u32 se_num,
2799                               u32 sh_per_se)
2800 {
2801         u32 data, mask;
2802
2803         data = RREG32(CC_RB_BACKEND_DISABLE);
2804         if (data & 1)
2805                 data &= BACKEND_DISABLE_MASK;
2806         else
2807                 data = 0;
2808         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2809
2810         data >>= BACKEND_DISABLE_SHIFT;
2811
2812         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2813
2814         return data & mask;
2815 }
2816
2817 static void si_setup_rb(struct radeon_device *rdev,
2818                         u32 se_num, u32 sh_per_se,
2819                         u32 max_rb_num)
2820 {
2821         int i, j;
2822         u32 data, mask;
2823         u32 disabled_rbs = 0;
2824         u32 enabled_rbs = 0;
2825
2826         for (i = 0; i < se_num; i++) {
2827                 for (j = 0; j < sh_per_se; j++) {
2828                         si_select_se_sh(rdev, i, j);
2829                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2830                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2831                 }
2832         }
2833         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2834
2835         mask = 1;
2836         for (i = 0; i < max_rb_num; i++) {
2837                 if (!(disabled_rbs & mask))
2838                         enabled_rbs |= mask;
2839                 mask <<= 1;
2840         }
2841
2842         for (i = 0; i < se_num; i++) {
2843                 si_select_se_sh(rdev, i, 0xffffffff);
2844                 data = 0;
2845                 for (j = 0; j < sh_per_se; j++) {
2846                         switch (enabled_rbs & 3) {
2847                         case 1:
2848                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2849                                 break;
2850                         case 2:
2851                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2852                                 break;
2853                         case 3:
2854                         default:
2855                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2856                                 break;
2857                         }
2858                         enabled_rbs >>= 2;
2859                 }
2860                 WREG32(PA_SC_RASTER_CONFIG, data);
2861         }
2862         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2863 }
2864
2865 static void si_gpu_init(struct radeon_device *rdev)
2866 {
2867         u32 gb_addr_config = 0;
2868         u32 mc_shared_chmap, mc_arb_ramcfg;
2869         u32 sx_debug_1;
2870         u32 hdp_host_path_cntl;
2871         u32 tmp;
2872         int i, j;
2873
2874         switch (rdev->family) {
2875         case CHIP_TAHITI:
2876                 rdev->config.si.max_shader_engines = 2;
2877                 rdev->config.si.max_tile_pipes = 12;
2878                 rdev->config.si.max_cu_per_sh = 8;
2879                 rdev->config.si.max_sh_per_se = 2;
2880                 rdev->config.si.max_backends_per_se = 4;
2881                 rdev->config.si.max_texture_channel_caches = 12;
2882                 rdev->config.si.max_gprs = 256;
2883                 rdev->config.si.max_gs_threads = 32;
2884                 rdev->config.si.max_hw_contexts = 8;
2885
2886                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2887                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2888                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2889                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2890                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2891                 break;
2892         case CHIP_PITCAIRN:
2893                 rdev->config.si.max_shader_engines = 2;
2894                 rdev->config.si.max_tile_pipes = 8;
2895                 rdev->config.si.max_cu_per_sh = 5;
2896                 rdev->config.si.max_sh_per_se = 2;
2897                 rdev->config.si.max_backends_per_se = 4;
2898                 rdev->config.si.max_texture_channel_caches = 8;
2899                 rdev->config.si.max_gprs = 256;
2900                 rdev->config.si.max_gs_threads = 32;
2901                 rdev->config.si.max_hw_contexts = 8;
2902
2903                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2904                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2905                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2906                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2907                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2908                 break;
2909         case CHIP_VERDE:
2910         default:
2911                 rdev->config.si.max_shader_engines = 1;
2912                 rdev->config.si.max_tile_pipes = 4;
2913                 rdev->config.si.max_cu_per_sh = 5;
2914                 rdev->config.si.max_sh_per_se = 2;
2915                 rdev->config.si.max_backends_per_se = 4;
2916                 rdev->config.si.max_texture_channel_caches = 4;
2917                 rdev->config.si.max_gprs = 256;
2918                 rdev->config.si.max_gs_threads = 32;
2919                 rdev->config.si.max_hw_contexts = 8;
2920
2921                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2922                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2923                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2924                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2925                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2926                 break;
2927         case CHIP_OLAND:
2928                 rdev->config.si.max_shader_engines = 1;
2929                 rdev->config.si.max_tile_pipes = 4;
2930                 rdev->config.si.max_cu_per_sh = 6;
2931                 rdev->config.si.max_sh_per_se = 1;
2932                 rdev->config.si.max_backends_per_se = 2;
2933                 rdev->config.si.max_texture_channel_caches = 4;
2934                 rdev->config.si.max_gprs = 256;
2935                 rdev->config.si.max_gs_threads = 16;
2936                 rdev->config.si.max_hw_contexts = 8;
2937
2938                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2939                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2940                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2941                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2942                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2943                 break;
2944         case CHIP_HAINAN:
2945                 rdev->config.si.max_shader_engines = 1;
2946                 rdev->config.si.max_tile_pipes = 4;
2947                 rdev->config.si.max_cu_per_sh = 5;
2948                 rdev->config.si.max_sh_per_se = 1;
2949                 rdev->config.si.max_backends_per_se = 1;
2950                 rdev->config.si.max_texture_channel_caches = 2;
2951                 rdev->config.si.max_gprs = 256;
2952                 rdev->config.si.max_gs_threads = 16;
2953                 rdev->config.si.max_hw_contexts = 8;
2954
2955                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2956                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2957                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2958                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2959                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2960                 break;
2961         }
2962
2963         /* Initialize HDP */
2964         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2965                 WREG32((0x2c14 + j), 0x00000000);
2966                 WREG32((0x2c18 + j), 0x00000000);
2967                 WREG32((0x2c1c + j), 0x00000000);
2968                 WREG32((0x2c20 + j), 0x00000000);
2969                 WREG32((0x2c24 + j), 0x00000000);
2970         }
2971
2972         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2973
2974         evergreen_fix_pci_max_read_req_size(rdev);
2975
2976         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2977
2978         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2979         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2980
2981         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2982         rdev->config.si.mem_max_burst_length_bytes = 256;
2983         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2984         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2985         if (rdev->config.si.mem_row_size_in_kb > 4)
2986                 rdev->config.si.mem_row_size_in_kb = 4;
2987         /* XXX use MC settings? */
2988         rdev->config.si.shader_engine_tile_size = 32;
2989         rdev->config.si.num_gpus = 1;
2990         rdev->config.si.multi_gpu_tile_size = 64;
2991
2992         /* fix up row size */
2993         gb_addr_config &= ~ROW_SIZE_MASK;
2994         switch (rdev->config.si.mem_row_size_in_kb) {
2995         case 1:
2996         default:
2997                 gb_addr_config |= ROW_SIZE(0);
2998                 break;
2999         case 2:
3000                 gb_addr_config |= ROW_SIZE(1);
3001                 break;
3002         case 4:
3003                 gb_addr_config |= ROW_SIZE(2);
3004                 break;
3005         }
3006
3007         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3008          * not have bank info, so create a custom tiling dword.
3009          * bits 3:0   num_pipes
3010          * bits 7:4   num_banks
3011          * bits 11:8  group_size
3012          * bits 15:12 row_size
3013          */
3014         rdev->config.si.tile_config = 0;
3015         switch (rdev->config.si.num_tile_pipes) {
3016         case 1:
3017                 rdev->config.si.tile_config |= (0 << 0);
3018                 break;
3019         case 2:
3020                 rdev->config.si.tile_config |= (1 << 0);
3021                 break;
3022         case 4:
3023                 rdev->config.si.tile_config |= (2 << 0);
3024                 break;
3025         case 8:
3026         default:
3027                 /* XXX what about 12? */
3028                 rdev->config.si.tile_config |= (3 << 0);
3029                 break;
3030         }       
3031         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3032         case 0: /* four banks */
3033                 rdev->config.si.tile_config |= 0 << 4;
3034                 break;
3035         case 1: /* eight banks */
3036                 rdev->config.si.tile_config |= 1 << 4;
3037                 break;
3038         case 2: /* sixteen banks */
3039         default:
3040                 rdev->config.si.tile_config |= 2 << 4;
3041                 break;
3042         }
3043         rdev->config.si.tile_config |=
3044                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3045         rdev->config.si.tile_config |=
3046                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3047
3048         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3049         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3050         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3051         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3052         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3053         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3054         if (rdev->has_uvd) {
3055                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3056                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3057                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3058         }
3059
3060         si_tiling_mode_table_init(rdev);
3061
3062         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3063                     rdev->config.si.max_sh_per_se,
3064                     rdev->config.si.max_backends_per_se);
3065
3066         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3067                      rdev->config.si.max_sh_per_se,
3068                      rdev->config.si.max_cu_per_sh);
3069
3070
3071         /* set HW defaults for 3D engine */
3072         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3073                                      ROQ_IB2_START(0x2b)));
3074         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3075
3076         sx_debug_1 = RREG32(SX_DEBUG_1);
3077         WREG32(SX_DEBUG_1, sx_debug_1);
3078
3079         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3080
3081         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3082                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3083                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3084                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3085
3086         WREG32(VGT_NUM_INSTANCES, 1);
3087
3088         WREG32(CP_PERFMON_CNTL, 0);
3089
3090         WREG32(SQ_CONFIG, 0);
3091
3092         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3093                                           FORCE_EOV_MAX_REZ_CNT(255)));
3094
3095         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3096                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3097
3098         WREG32(VGT_GS_VERTEX_REUSE, 16);
3099         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3100
3101         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3102         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3103         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3104         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3105         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3106         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3107         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3108         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3109
3110         tmp = RREG32(HDP_MISC_CNTL);
3111         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3112         WREG32(HDP_MISC_CNTL, tmp);
3113
3114         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3115         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3116
3117         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3118
3119         udelay(50);
3120 }
3121
3122 /*
3123  * GPU scratch registers helpers function.
3124  */
3125 static void si_scratch_init(struct radeon_device *rdev)
3126 {
3127         int i;
3128
3129         rdev->scratch.num_reg = 7;
3130         rdev->scratch.reg_base = SCRATCH_REG0;
3131         for (i = 0; i < rdev->scratch.num_reg; i++) {
3132                 rdev->scratch.free[i] = true;
3133                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3134         }
3135 }
3136
3137 void si_fence_ring_emit(struct radeon_device *rdev,
3138                         struct radeon_fence *fence)
3139 {
3140         struct radeon_ring *ring = &rdev->ring[fence->ring];
3141         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3142
3143         /* flush read cache over gart */
3144         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3145         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3146         radeon_ring_write(ring, 0);
3147         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3148         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3149                           PACKET3_TC_ACTION_ENA |
3150                           PACKET3_SH_KCACHE_ACTION_ENA |
3151                           PACKET3_SH_ICACHE_ACTION_ENA);
3152         radeon_ring_write(ring, 0xFFFFFFFF);
3153         radeon_ring_write(ring, 0);
3154         radeon_ring_write(ring, 10); /* poll interval */
3155         /* EVENT_WRITE_EOP - flush caches, send int */
3156         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3157         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3158         radeon_ring_write(ring, addr & 0xffffffff);
3159         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3160         radeon_ring_write(ring, fence->seq);
3161         radeon_ring_write(ring, 0);
3162 }
3163
3164 /*
3165  * IB stuff
3166  */
3167 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3168 {
3169         struct radeon_ring *ring = &rdev->ring[ib->ring];
3170         u32 header;
3171
3172         if (ib->is_const_ib) {
3173                 /* set switch buffer packet before const IB */
3174                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3175                 radeon_ring_write(ring, 0);
3176
3177                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3178         } else {
3179                 u32 next_rptr;
3180                 if (ring->rptr_save_reg) {
3181                         next_rptr = ring->wptr + 3 + 4 + 8;
3182                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3183                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3184                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3185                         radeon_ring_write(ring, next_rptr);
3186                 } else if (rdev->wb.enabled) {
3187                         next_rptr = ring->wptr + 5 + 4 + 8;
3188                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3189                         radeon_ring_write(ring, (1 << 8));
3190                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3191                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3192                         radeon_ring_write(ring, next_rptr);
3193                 }
3194
3195                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3196         }
3197
3198         radeon_ring_write(ring, header);
3199         radeon_ring_write(ring,
3200 #ifdef __BIG_ENDIAN
3201                           (2 << 0) |
3202 #endif
3203                           (ib->gpu_addr & 0xFFFFFFFC));
3204         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3205         radeon_ring_write(ring, ib->length_dw |
3206                           (ib->vm ? (ib->vm->id << 24) : 0));
3207
3208         if (!ib->is_const_ib) {
3209                 /* flush read cache over gart for this vmid */
3210                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3211                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3212                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3213                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3214                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3215                                   PACKET3_TC_ACTION_ENA |
3216                                   PACKET3_SH_KCACHE_ACTION_ENA |
3217                                   PACKET3_SH_ICACHE_ACTION_ENA);
3218                 radeon_ring_write(ring, 0xFFFFFFFF);
3219                 radeon_ring_write(ring, 0);
3220                 radeon_ring_write(ring, 10); /* poll interval */
3221         }
3222 }
3223
3224 /*
3225  * CP.
3226  */
3227 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3228 {
3229         if (enable)
3230                 WREG32(CP_ME_CNTL, 0);
3231         else {
3232                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3233                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3234                 WREG32(SCRATCH_UMSK, 0);
3235                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3236                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3237                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3238         }
3239         udelay(50);
3240 }
3241
3242 static int si_cp_load_microcode(struct radeon_device *rdev)
3243 {
3244         const __be32 *fw_data;
3245         int i;
3246
3247         if (!rdev->me_fw || !rdev->pfp_fw)
3248                 return -EINVAL;
3249
3250         si_cp_enable(rdev, false);
3251
3252         /* PFP */
3253         fw_data = (const __be32 *)rdev->pfp_fw->data;
3254         WREG32(CP_PFP_UCODE_ADDR, 0);
3255         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3256                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3257         WREG32(CP_PFP_UCODE_ADDR, 0);
3258
3259         /* CE */
3260         fw_data = (const __be32 *)rdev->ce_fw->data;
3261         WREG32(CP_CE_UCODE_ADDR, 0);
3262         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3263                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3264         WREG32(CP_CE_UCODE_ADDR, 0);
3265
3266         /* ME */
3267         fw_data = (const __be32 *)rdev->me_fw->data;
3268         WREG32(CP_ME_RAM_WADDR, 0);
3269         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3270                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3271         WREG32(CP_ME_RAM_WADDR, 0);
3272
3273         WREG32(CP_PFP_UCODE_ADDR, 0);
3274         WREG32(CP_CE_UCODE_ADDR, 0);
3275         WREG32(CP_ME_RAM_WADDR, 0);
3276         WREG32(CP_ME_RAM_RADDR, 0);
3277         return 0;
3278 }
3279
3280 static int si_cp_start(struct radeon_device *rdev)
3281 {
3282         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3283         int r, i;
3284
3285         r = radeon_ring_lock(rdev, ring, 7 + 4);
3286         if (r) {
3287                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3288                 return r;
3289         }
3290         /* init the CP */
3291         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3292         radeon_ring_write(ring, 0x1);
3293         radeon_ring_write(ring, 0x0);
3294         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3295         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3296         radeon_ring_write(ring, 0);
3297         radeon_ring_write(ring, 0);
3298
3299         /* init the CE partitions */
3300         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3301         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3302         radeon_ring_write(ring, 0xc000);
3303         radeon_ring_write(ring, 0xe000);
3304         radeon_ring_unlock_commit(rdev, ring);
3305
3306         si_cp_enable(rdev, true);
3307
3308         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3309         if (r) {
3310                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3311                 return r;
3312         }
3313
3314         /* setup clear context state */
3315         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3316         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3317
3318         for (i = 0; i < si_default_size; i++)
3319                 radeon_ring_write(ring, si_default_state[i]);
3320
3321         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3322         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3323
3324         /* set clear context state */
3325         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3326         radeon_ring_write(ring, 0);
3327
3328         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3329         radeon_ring_write(ring, 0x00000316);
3330         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3331         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3332
3333         radeon_ring_unlock_commit(rdev, ring);
3334
3335         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3336                 ring = &rdev->ring[i];
3337                 r = radeon_ring_lock(rdev, ring, 2);
3338
3339                 /* clear the compute context state */
3340                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3341                 radeon_ring_write(ring, 0);
3342
3343                 radeon_ring_unlock_commit(rdev, ring);
3344         }
3345
3346         return 0;
3347 }
3348
3349 static void si_cp_fini(struct radeon_device *rdev)
3350 {
3351         struct radeon_ring *ring;
3352         si_cp_enable(rdev, false);
3353
3354         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3355         radeon_ring_fini(rdev, ring);
3356         radeon_scratch_free(rdev, ring->rptr_save_reg);
3357
3358         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3359         radeon_ring_fini(rdev, ring);
3360         radeon_scratch_free(rdev, ring->rptr_save_reg);
3361
3362         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3363         radeon_ring_fini(rdev, ring);
3364         radeon_scratch_free(rdev, ring->rptr_save_reg);
3365 }
3366
3367 static int si_cp_resume(struct radeon_device *rdev)
3368 {
3369         struct radeon_ring *ring;
3370         u32 tmp;
3371         u32 rb_bufsz;
3372         int r;
3373
3374         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3375         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3376                                  SOFT_RESET_PA |
3377                                  SOFT_RESET_VGT |
3378                                  SOFT_RESET_SPI |
3379                                  SOFT_RESET_SX));
3380         RREG32(GRBM_SOFT_RESET);
3381         mdelay(15);
3382         WREG32(GRBM_SOFT_RESET, 0);
3383         RREG32(GRBM_SOFT_RESET);
3384
3385         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3386         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3387
3388         /* Set the write pointer delay */
3389         WREG32(CP_RB_WPTR_DELAY, 0);
3390
3391         WREG32(CP_DEBUG, 0);
3392         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3393
3394         /* ring 0 - compute and gfx */
3395         /* Set ring buffer size */
3396         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3397         rb_bufsz = drm_order(ring->ring_size / 8);
3398         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3399 #ifdef __BIG_ENDIAN
3400         tmp |= BUF_SWAP_32BIT;
3401 #endif
3402         WREG32(CP_RB0_CNTL, tmp);
3403
3404         /* Initialize the ring buffer's read and write pointers */
3405         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3406         ring->wptr = 0;
3407         WREG32(CP_RB0_WPTR, ring->wptr);
3408
3409         /* set the wb address whether it's enabled or not */
3410         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3411         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3412
3413         if (rdev->wb.enabled)
3414                 WREG32(SCRATCH_UMSK, 0xff);
3415         else {
3416                 tmp |= RB_NO_UPDATE;
3417                 WREG32(SCRATCH_UMSK, 0);
3418         }
3419
3420         mdelay(1);
3421         WREG32(CP_RB0_CNTL, tmp);
3422
3423         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3424
3425         ring->rptr = RREG32(CP_RB0_RPTR);
3426
3427         /* ring1  - compute only */
3428         /* Set ring buffer size */
3429         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3430         rb_bufsz = drm_order(ring->ring_size / 8);
3431         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3432 #ifdef __BIG_ENDIAN
3433         tmp |= BUF_SWAP_32BIT;
3434 #endif
3435         WREG32(CP_RB1_CNTL, tmp);
3436
3437         /* Initialize the ring buffer's read and write pointers */
3438         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3439         ring->wptr = 0;
3440         WREG32(CP_RB1_WPTR, ring->wptr);
3441
3442         /* set the wb address whether it's enabled or not */
3443         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3444         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3445
3446         mdelay(1);
3447         WREG32(CP_RB1_CNTL, tmp);
3448
3449         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3450
3451         ring->rptr = RREG32(CP_RB1_RPTR);
3452
3453         /* ring2 - compute only */
3454         /* Set ring buffer size */
3455         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3456         rb_bufsz = drm_order(ring->ring_size / 8);
3457         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3458 #ifdef __BIG_ENDIAN
3459         tmp |= BUF_SWAP_32BIT;
3460 #endif
3461         WREG32(CP_RB2_CNTL, tmp);
3462
3463         /* Initialize the ring buffer's read and write pointers */
3464         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3465         ring->wptr = 0;
3466         WREG32(CP_RB2_WPTR, ring->wptr);
3467
3468         /* set the wb address whether it's enabled or not */
3469         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3470         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3471
3472         mdelay(1);
3473         WREG32(CP_RB2_CNTL, tmp);
3474
3475         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3476
3477         ring->rptr = RREG32(CP_RB2_RPTR);
3478
3479         /* start the rings */
3480         si_cp_start(rdev);
3481         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3482         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3483         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3484         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3485         if (r) {
3486                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3487                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3488                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3489                 return r;
3490         }
3491         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3492         if (r) {
3493                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3494         }
3495         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3496         if (r) {
3497                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3498         }
3499
3500         return 0;
3501 }
3502
3503 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3504 {
3505         u32 reset_mask = 0;
3506         u32 tmp;
3507
3508         /* GRBM_STATUS */
3509         tmp = RREG32(GRBM_STATUS);
3510         if (tmp & (PA_BUSY | SC_BUSY |
3511                    BCI_BUSY | SX_BUSY |
3512                    TA_BUSY | VGT_BUSY |
3513                    DB_BUSY | CB_BUSY |
3514                    GDS_BUSY | SPI_BUSY |
3515                    IA_BUSY | IA_BUSY_NO_DMA))
3516                 reset_mask |= RADEON_RESET_GFX;
3517
3518         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3519                    CP_BUSY | CP_COHERENCY_BUSY))
3520                 reset_mask |= RADEON_RESET_CP;
3521
3522         if (tmp & GRBM_EE_BUSY)
3523                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3524
3525         /* GRBM_STATUS2 */
3526         tmp = RREG32(GRBM_STATUS2);
3527         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3528                 reset_mask |= RADEON_RESET_RLC;
3529
3530         /* DMA_STATUS_REG 0 */
3531         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3532         if (!(tmp & DMA_IDLE))
3533                 reset_mask |= RADEON_RESET_DMA;
3534
3535         /* DMA_STATUS_REG 1 */
3536         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3537         if (!(tmp & DMA_IDLE))
3538                 reset_mask |= RADEON_RESET_DMA1;
3539
3540         /* SRBM_STATUS2 */
3541         tmp = RREG32(SRBM_STATUS2);
3542         if (tmp & DMA_BUSY)
3543                 reset_mask |= RADEON_RESET_DMA;
3544
3545         if (tmp & DMA1_BUSY)
3546                 reset_mask |= RADEON_RESET_DMA1;
3547
3548         /* SRBM_STATUS */
3549         tmp = RREG32(SRBM_STATUS);
3550
3551         if (tmp & IH_BUSY)
3552                 reset_mask |= RADEON_RESET_IH;
3553
3554         if (tmp & SEM_BUSY)
3555                 reset_mask |= RADEON_RESET_SEM;
3556
3557         if (tmp & GRBM_RQ_PENDING)
3558                 reset_mask |= RADEON_RESET_GRBM;
3559
3560         if (tmp & VMC_BUSY)
3561                 reset_mask |= RADEON_RESET_VMC;
3562
3563         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3564                    MCC_BUSY | MCD_BUSY))
3565                 reset_mask |= RADEON_RESET_MC;
3566
3567         if (evergreen_is_display_hung(rdev))
3568                 reset_mask |= RADEON_RESET_DISPLAY;
3569
3570         /* VM_L2_STATUS */
3571         tmp = RREG32(VM_L2_STATUS);
3572         if (tmp & L2_BUSY)
3573                 reset_mask |= RADEON_RESET_VMC;
3574
3575         /* Skip MC reset as it's mostly likely not hung, just busy */
3576         if (reset_mask & RADEON_RESET_MC) {
3577                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3578                 reset_mask &= ~RADEON_RESET_MC;
3579         }
3580
3581         return reset_mask;
3582 }
3583
3584 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3585 {
3586         struct evergreen_mc_save save;
3587         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3588         u32 tmp;
3589
3590         if (reset_mask == 0)
3591                 return;
3592
3593         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3594
3595         evergreen_print_gpu_status_regs(rdev);
3596         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3597                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3598         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3599                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3600
3601         /* Disable CP parsing/prefetching */
3602         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3603
3604         if (reset_mask & RADEON_RESET_DMA) {
3605                 /* dma0 */
3606                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3607                 tmp &= ~DMA_RB_ENABLE;
3608                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3609         }
3610         if (reset_mask & RADEON_RESET_DMA1) {
3611                 /* dma1 */
3612                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3613                 tmp &= ~DMA_RB_ENABLE;
3614                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3615         }
3616
3617         udelay(50);
3618
3619         evergreen_mc_stop(rdev, &save);
3620         if (evergreen_mc_wait_for_idle(rdev)) {
3621                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3622         }
3623
3624         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3625                 grbm_soft_reset = SOFT_RESET_CB |
3626                         SOFT_RESET_DB |
3627                         SOFT_RESET_GDS |
3628                         SOFT_RESET_PA |
3629                         SOFT_RESET_SC |
3630                         SOFT_RESET_BCI |
3631                         SOFT_RESET_SPI |
3632                         SOFT_RESET_SX |
3633                         SOFT_RESET_TC |
3634                         SOFT_RESET_TA |
3635                         SOFT_RESET_VGT |
3636                         SOFT_RESET_IA;
3637         }
3638
3639         if (reset_mask & RADEON_RESET_CP) {
3640                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3641
3642                 srbm_soft_reset |= SOFT_RESET_GRBM;
3643         }
3644
3645         if (reset_mask & RADEON_RESET_DMA)
3646                 srbm_soft_reset |= SOFT_RESET_DMA;
3647
3648         if (reset_mask & RADEON_RESET_DMA1)
3649                 srbm_soft_reset |= SOFT_RESET_DMA1;
3650
3651         if (reset_mask & RADEON_RESET_DISPLAY)
3652                 srbm_soft_reset |= SOFT_RESET_DC;
3653
3654         if (reset_mask & RADEON_RESET_RLC)
3655                 grbm_soft_reset |= SOFT_RESET_RLC;
3656
3657         if (reset_mask & RADEON_RESET_SEM)
3658                 srbm_soft_reset |= SOFT_RESET_SEM;
3659
3660         if (reset_mask & RADEON_RESET_IH)
3661                 srbm_soft_reset |= SOFT_RESET_IH;
3662
3663         if (reset_mask & RADEON_RESET_GRBM)
3664                 srbm_soft_reset |= SOFT_RESET_GRBM;
3665
3666         if (reset_mask & RADEON_RESET_VMC)
3667                 srbm_soft_reset |= SOFT_RESET_VMC;
3668
3669         if (reset_mask & RADEON_RESET_MC)
3670                 srbm_soft_reset |= SOFT_RESET_MC;
3671
3672         if (grbm_soft_reset) {
3673                 tmp = RREG32(GRBM_SOFT_RESET);
3674                 tmp |= grbm_soft_reset;
3675                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3676                 WREG32(GRBM_SOFT_RESET, tmp);
3677                 tmp = RREG32(GRBM_SOFT_RESET);
3678
3679                 udelay(50);
3680
3681                 tmp &= ~grbm_soft_reset;
3682                 WREG32(GRBM_SOFT_RESET, tmp);
3683                 tmp = RREG32(GRBM_SOFT_RESET);
3684         }
3685
3686         if (srbm_soft_reset) {
3687                 tmp = RREG32(SRBM_SOFT_RESET);
3688                 tmp |= srbm_soft_reset;
3689                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3690                 WREG32(SRBM_SOFT_RESET, tmp);
3691                 tmp = RREG32(SRBM_SOFT_RESET);
3692
3693                 udelay(50);
3694
3695                 tmp &= ~srbm_soft_reset;
3696                 WREG32(SRBM_SOFT_RESET, tmp);
3697                 tmp = RREG32(SRBM_SOFT_RESET);
3698         }
3699
3700         /* Wait a little for things to settle down */
3701         udelay(50);
3702
3703         evergreen_mc_resume(rdev, &save);
3704         udelay(50);
3705
3706         evergreen_print_gpu_status_regs(rdev);
3707 }
3708
3709 int si_asic_reset(struct radeon_device *rdev)
3710 {
3711         u32 reset_mask;
3712
3713         reset_mask = si_gpu_check_soft_reset(rdev);
3714
3715         if (reset_mask)
3716                 r600_set_bios_scratch_engine_hung(rdev, true);
3717
3718         si_gpu_soft_reset(rdev, reset_mask);
3719
3720         reset_mask = si_gpu_check_soft_reset(rdev);
3721
3722         if (!reset_mask)
3723                 r600_set_bios_scratch_engine_hung(rdev, false);
3724
3725         return 0;
3726 }
3727
3728 /**
3729  * si_gfx_is_lockup - Check if the GFX engine is locked up
3730  *
3731  * @rdev: radeon_device pointer
3732  * @ring: radeon_ring structure holding ring information
3733  *
3734  * Check if the GFX engine is locked up.
3735  * Returns true if the engine appears to be locked up, false if not.
3736  */
3737 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3738 {
3739         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3740
3741         if (!(reset_mask & (RADEON_RESET_GFX |
3742                             RADEON_RESET_COMPUTE |
3743                             RADEON_RESET_CP))) {
3744                 radeon_ring_lockup_update(ring);
3745                 return false;
3746         }
3747         /* force CP activities */
3748         radeon_ring_force_activity(rdev, ring);
3749         return radeon_ring_test_lockup(rdev, ring);
3750 }
3751
3752 /**
3753  * si_dma_is_lockup - Check if the DMA engine is locked up
3754  *
3755  * @rdev: radeon_device pointer
3756  * @ring: radeon_ring structure holding ring information
3757  *
3758  * Check if the async DMA engine is locked up.
3759  * Returns true if the engine appears to be locked up, false if not.
3760  */
3761 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3762 {
3763         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3764         u32 mask;
3765
3766         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3767                 mask = RADEON_RESET_DMA;
3768         else
3769                 mask = RADEON_RESET_DMA1;
3770
3771         if (!(reset_mask & mask)) {
3772                 radeon_ring_lockup_update(ring);
3773                 return false;
3774         }
3775         /* force ring activities */
3776         radeon_ring_force_activity(rdev, ring);
3777         return radeon_ring_test_lockup(rdev, ring);
3778 }
3779
3780 /* MC */
3781 static void si_mc_program(struct radeon_device *rdev)
3782 {
3783         struct evergreen_mc_save save;
3784         u32 tmp;
3785         int i, j;
3786
3787         /* Initialize HDP */
3788         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3789                 WREG32((0x2c14 + j), 0x00000000);
3790                 WREG32((0x2c18 + j), 0x00000000);
3791                 WREG32((0x2c1c + j), 0x00000000);
3792                 WREG32((0x2c20 + j), 0x00000000);
3793                 WREG32((0x2c24 + j), 0x00000000);
3794         }
3795         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3796
3797         evergreen_mc_stop(rdev, &save);
3798         if (radeon_mc_wait_for_idle(rdev)) {
3799                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3800         }
3801         if (!ASIC_IS_NODCE(rdev))
3802                 /* Lockout access through VGA aperture*/
3803                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3804         /* Update configuration */
3805         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3806                rdev->mc.vram_start >> 12);
3807         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3808                rdev->mc.vram_end >> 12);
3809         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3810                rdev->vram_scratch.gpu_addr >> 12);
3811         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3812         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3813         WREG32(MC_VM_FB_LOCATION, tmp);
3814         /* XXX double check these! */
3815         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3816         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3817         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3818         WREG32(MC_VM_AGP_BASE, 0);
3819         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3820         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3821         if (radeon_mc_wait_for_idle(rdev)) {
3822                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3823         }
3824         evergreen_mc_resume(rdev, &save);
3825         if (!ASIC_IS_NODCE(rdev)) {
3826                 /* we need to own VRAM, so turn off the VGA renderer here
3827                  * to stop it overwriting our objects */
3828                 rv515_vga_render_disable(rdev);
3829         }
3830 }
3831
3832 void si_vram_gtt_location(struct radeon_device *rdev,
3833                           struct radeon_mc *mc)
3834 {
3835         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3836                 /* leave room for at least 1024M GTT */
3837                 dev_warn(rdev->dev, "limiting VRAM\n");
3838                 mc->real_vram_size = 0xFFC0000000ULL;
3839                 mc->mc_vram_size = 0xFFC0000000ULL;
3840         }
3841         radeon_vram_location(rdev, &rdev->mc, 0);
3842         rdev->mc.gtt_base_align = 0;
3843         radeon_gtt_location(rdev, mc);
3844 }
3845
3846 static int si_mc_init(struct radeon_device *rdev)
3847 {
3848         u32 tmp;
3849         int chansize, numchan;
3850
3851         /* Get VRAM informations */
3852         rdev->mc.vram_is_ddr = true;
3853         tmp = RREG32(MC_ARB_RAMCFG);
3854         if (tmp & CHANSIZE_OVERRIDE) {
3855                 chansize = 16;
3856         } else if (tmp & CHANSIZE_MASK) {
3857                 chansize = 64;
3858         } else {
3859                 chansize = 32;
3860         }
3861         tmp = RREG32(MC_SHARED_CHMAP);
3862         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3863         case 0:
3864         default:
3865                 numchan = 1;
3866                 break;
3867         case 1:
3868                 numchan = 2;
3869                 break;
3870         case 2:
3871                 numchan = 4;
3872                 break;
3873         case 3:
3874                 numchan = 8;
3875                 break;
3876         case 4:
3877                 numchan = 3;
3878                 break;
3879         case 5:
3880                 numchan = 6;
3881                 break;
3882         case 6:
3883                 numchan = 10;
3884                 break;
3885         case 7:
3886                 numchan = 12;
3887                 break;
3888         case 8:
3889                 numchan = 16;
3890                 break;
3891         }
3892         rdev->mc.vram_width = numchan * chansize;
3893         /* Could aper size report 0 ? */
3894         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3895         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3896         /* size in MB on si */
3897         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3898         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3899         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3900         si_vram_gtt_location(rdev, &rdev->mc);
3901         radeon_update_bandwidth_info(rdev);
3902
3903         return 0;
3904 }
3905
3906 /*
3907  * GART
3908  */
3909 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3910 {
3911         /* flush hdp cache */
3912         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3913
3914         /* bits 0-15 are the VM contexts0-15 */
3915         WREG32(VM_INVALIDATE_REQUEST, 1);
3916 }
3917
3918 static int si_pcie_gart_enable(struct radeon_device *rdev)
3919 {
3920         int r, i;
3921
3922         if (rdev->gart.robj == NULL) {
3923                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3924                 return -EINVAL;
3925         }
3926         r = radeon_gart_table_vram_pin(rdev);
3927         if (r)
3928                 return r;
3929         radeon_gart_restore(rdev);
3930         /* Setup TLB control */
3931         WREG32(MC_VM_MX_L1_TLB_CNTL,
3932                (0xA << 7) |
3933                ENABLE_L1_TLB |
3934                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3935                ENABLE_ADVANCED_DRIVER_MODEL |
3936                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3937         /* Setup L2 cache */
3938         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3939                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3940                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3941                EFFECTIVE_L2_QUEUE_SIZE(7) |
3942                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3943         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3944         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3945                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3946         /* setup context0 */
3947         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3948         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3949         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3950         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3951                         (u32)(rdev->dummy_page.addr >> 12));
3952         WREG32(VM_CONTEXT0_CNTL2, 0);
3953         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3954                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3955
3956         WREG32(0x15D4, 0);
3957         WREG32(0x15D8, 0);
3958         WREG32(0x15DC, 0);
3959
3960         /* empty context1-15 */
3961         /* set vm size, must be a multiple of 4 */
3962         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3963         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3964         /* Assign the pt base to something valid for now; the pts used for
3965          * the VMs are determined by the application and setup and assigned
3966          * on the fly in the vm part of radeon_gart.c
3967          */
3968         for (i = 1; i < 16; i++) {
3969                 if (i < 8)
3970                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3971                                rdev->gart.table_addr >> 12);
3972                 else
3973                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3974                                rdev->gart.table_addr >> 12);
3975         }
3976
3977         /* enable context1-15 */
3978         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3979                (u32)(rdev->dummy_page.addr >> 12));
3980         WREG32(VM_CONTEXT1_CNTL2, 4);
3981         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3982                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3983                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3984                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3985                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3986                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3987                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3988                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3989                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3990                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3991                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3992                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3993                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3994
3995         si_pcie_gart_tlb_flush(rdev);
3996         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3997                  (unsigned)(rdev->mc.gtt_size >> 20),
3998                  (unsigned long long)rdev->gart.table_addr);
3999         rdev->gart.ready = true;
4000         return 0;
4001 }
4002
4003 static void si_pcie_gart_disable(struct radeon_device *rdev)
4004 {
4005         /* Disable all tables */
4006         WREG32(VM_CONTEXT0_CNTL, 0);
4007         WREG32(VM_CONTEXT1_CNTL, 0);
4008         /* Setup TLB control */
4009         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4010                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4011         /* Setup L2 cache */
4012         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4013                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4014                EFFECTIVE_L2_QUEUE_SIZE(7) |
4015                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4016         WREG32(VM_L2_CNTL2, 0);
4017         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4018                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4019         radeon_gart_table_vram_unpin(rdev);
4020 }
4021
4022 static void si_pcie_gart_fini(struct radeon_device *rdev)
4023 {
4024         si_pcie_gart_disable(rdev);
4025         radeon_gart_table_vram_free(rdev);
4026         radeon_gart_fini(rdev);
4027 }
4028
4029 /* vm parser */
4030 static bool si_vm_reg_valid(u32 reg)
4031 {
4032         /* context regs are fine */
4033         if (reg >= 0x28000)
4034                 return true;
4035
4036         /* check config regs */
4037         switch (reg) {
4038         case GRBM_GFX_INDEX:
4039         case CP_STRMOUT_CNTL:
4040         case VGT_VTX_VECT_EJECT_REG:
4041         case VGT_CACHE_INVALIDATION:
4042         case VGT_ESGS_RING_SIZE:
4043         case VGT_GSVS_RING_SIZE:
4044         case VGT_GS_VERTEX_REUSE:
4045         case VGT_PRIMITIVE_TYPE:
4046         case VGT_INDEX_TYPE:
4047         case VGT_NUM_INDICES:
4048         case VGT_NUM_INSTANCES:
4049         case VGT_TF_RING_SIZE:
4050         case VGT_HS_OFFCHIP_PARAM:
4051         case VGT_TF_MEMORY_BASE:
4052         case PA_CL_ENHANCE:
4053         case PA_SU_LINE_STIPPLE_VALUE:
4054         case PA_SC_LINE_STIPPLE_STATE:
4055         case PA_SC_ENHANCE:
4056         case SQC_CACHES:
4057         case SPI_STATIC_THREAD_MGMT_1:
4058         case SPI_STATIC_THREAD_MGMT_2:
4059         case SPI_STATIC_THREAD_MGMT_3:
4060         case SPI_PS_MAX_WAVE_ID:
4061         case SPI_CONFIG_CNTL:
4062         case SPI_CONFIG_CNTL_1:
4063         case TA_CNTL_AUX:
4064                 return true;
4065         default:
4066                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4067                 return false;
4068         }
4069 }
4070
4071 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4072                                   u32 *ib, struct radeon_cs_packet *pkt)
4073 {
4074         switch (pkt->opcode) {
4075         case PACKET3_NOP:
4076         case PACKET3_SET_BASE:
4077         case PACKET3_SET_CE_DE_COUNTERS:
4078         case PACKET3_LOAD_CONST_RAM:
4079         case PACKET3_WRITE_CONST_RAM:
4080         case PACKET3_WRITE_CONST_RAM_OFFSET:
4081         case PACKET3_DUMP_CONST_RAM:
4082         case PACKET3_INCREMENT_CE_COUNTER:
4083         case PACKET3_WAIT_ON_DE_COUNTER:
4084         case PACKET3_CE_WRITE:
4085                 break;
4086         default:
4087                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4088                 return -EINVAL;
4089         }
4090         return 0;
4091 }
4092
4093 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4094                                    u32 *ib, struct radeon_cs_packet *pkt)
4095 {
4096         u32 idx = pkt->idx + 1;
4097         u32 idx_value = ib[idx];
4098         u32 start_reg, end_reg, reg, i;
4099         u32 command, info;
4100
4101         switch (pkt->opcode) {
4102         case PACKET3_NOP:
4103         case PACKET3_SET_BASE:
4104         case PACKET3_CLEAR_STATE:
4105         case PACKET3_INDEX_BUFFER_SIZE:
4106         case PACKET3_DISPATCH_DIRECT:
4107         case PACKET3_DISPATCH_INDIRECT:
4108         case PACKET3_ALLOC_GDS:
4109         case PACKET3_WRITE_GDS_RAM:
4110         case PACKET3_ATOMIC_GDS:
4111         case PACKET3_ATOMIC:
4112         case PACKET3_OCCLUSION_QUERY:
4113         case PACKET3_SET_PREDICATION:
4114         case PACKET3_COND_EXEC:
4115         case PACKET3_PRED_EXEC:
4116         case PACKET3_DRAW_INDIRECT:
4117         case PACKET3_DRAW_INDEX_INDIRECT:
4118         case PACKET3_INDEX_BASE:
4119         case PACKET3_DRAW_INDEX_2:
4120         case PACKET3_CONTEXT_CONTROL:
4121         case PACKET3_INDEX_TYPE:
4122         case PACKET3_DRAW_INDIRECT_MULTI:
4123         case PACKET3_DRAW_INDEX_AUTO:
4124         case PACKET3_DRAW_INDEX_IMMD:
4125         case PACKET3_NUM_INSTANCES:
4126         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4127         case PACKET3_STRMOUT_BUFFER_UPDATE:
4128         case PACKET3_DRAW_INDEX_OFFSET_2:
4129         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4130         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4131         case PACKET3_MPEG_INDEX:
4132         case PACKET3_WAIT_REG_MEM:
4133         case PACKET3_MEM_WRITE:
4134         case PACKET3_PFP_SYNC_ME:
4135         case PACKET3_SURFACE_SYNC:
4136         case PACKET3_EVENT_WRITE:
4137         case PACKET3_EVENT_WRITE_EOP:
4138         case PACKET3_EVENT_WRITE_EOS:
4139         case PACKET3_SET_CONTEXT_REG:
4140         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4141         case PACKET3_SET_SH_REG:
4142         case PACKET3_SET_SH_REG_OFFSET:
4143         case PACKET3_INCREMENT_DE_COUNTER:
4144         case PACKET3_WAIT_ON_CE_COUNTER:
4145         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4146         case PACKET3_ME_WRITE:
4147                 break;
4148         case PACKET3_COPY_DATA:
4149                 if ((idx_value & 0xf00) == 0) {
4150                         reg = ib[idx + 3] * 4;
4151                         if (!si_vm_reg_valid(reg))
4152                                 return -EINVAL;
4153                 }
4154                 break;
4155         case PACKET3_WRITE_DATA:
4156                 if ((idx_value & 0xf00) == 0) {
4157                         start_reg = ib[idx + 1] * 4;
4158                         if (idx_value & 0x10000) {
4159                                 if (!si_vm_reg_valid(start_reg))
4160                                         return -EINVAL;
4161                         } else {
4162                                 for (i = 0; i < (pkt->count - 2); i++) {
4163                                         reg = start_reg + (4 * i);
4164                                         if (!si_vm_reg_valid(reg))
4165                                                 return -EINVAL;
4166                                 }
4167                         }
4168                 }
4169                 break;
4170         case PACKET3_COND_WRITE:
4171                 if (idx_value & 0x100) {
4172                         reg = ib[idx + 5] * 4;
4173                         if (!si_vm_reg_valid(reg))
4174                                 return -EINVAL;
4175                 }
4176                 break;
4177         case PACKET3_COPY_DW:
4178                 if (idx_value & 0x2) {
4179                         reg = ib[idx + 3] * 4;
4180                         if (!si_vm_reg_valid(reg))
4181                                 return -EINVAL;
4182                 }
4183                 break;
4184         case PACKET3_SET_CONFIG_REG:
4185                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4186                 end_reg = 4 * pkt->count + start_reg - 4;
4187                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4188                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4189                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4190                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4191                         return -EINVAL;
4192                 }
4193                 for (i = 0; i < pkt->count; i++) {
4194                         reg = start_reg + (4 * i);
4195                         if (!si_vm_reg_valid(reg))
4196                                 return -EINVAL;
4197                 }
4198                 break;
4199         case PACKET3_CP_DMA:
4200                 command = ib[idx + 4];
4201                 info = ib[idx + 1];
4202                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4203                         /* src address space is register */
4204                         if (((info & 0x60000000) >> 29) == 0) {
4205                                 start_reg = idx_value << 2;
4206                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4207                                         reg = start_reg;
4208                                         if (!si_vm_reg_valid(reg)) {
4209                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4210                                                 return -EINVAL;
4211                                         }
4212                                 } else {
4213                                         for (i = 0; i < (command & 0x1fffff); i++) {
4214                                                 reg = start_reg + (4 * i);
4215                                                 if (!si_vm_reg_valid(reg)) {
4216                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4217                                                         return -EINVAL;
4218                                                 }
4219                                         }
4220                                 }
4221                         }
4222                 }
4223                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4224                         /* dst address space is register */
4225                         if (((info & 0x00300000) >> 20) == 0) {
4226                                 start_reg = ib[idx + 2];
4227                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4228                                         reg = start_reg;
4229                                         if (!si_vm_reg_valid(reg)) {
4230                                                 DRM_ERROR("CP DMA Bad DST register\n");
4231                                                 return -EINVAL;
4232                                         }
4233                                 } else {
4234                                         for (i = 0; i < (command & 0x1fffff); i++) {
4235                                                 reg = start_reg + (4 * i);
4236                                                 if (!si_vm_reg_valid(reg)) {
4237                                                         DRM_ERROR("CP DMA Bad DST register\n");
4238                                                         return -EINVAL;
4239                                                 }
4240                                         }
4241                                 }
4242                         }
4243                 }
4244                 break;
4245         default:
4246                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4247                 return -EINVAL;
4248         }
4249         return 0;
4250 }
4251
4252 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4253                                        u32 *ib, struct radeon_cs_packet *pkt)
4254 {
4255         u32 idx = pkt->idx + 1;
4256         u32 idx_value = ib[idx];
4257         u32 start_reg, reg, i;
4258
4259         switch (pkt->opcode) {
4260         case PACKET3_NOP:
4261         case PACKET3_SET_BASE:
4262         case PACKET3_CLEAR_STATE:
4263         case PACKET3_DISPATCH_DIRECT:
4264         case PACKET3_DISPATCH_INDIRECT:
4265         case PACKET3_ALLOC_GDS:
4266         case PACKET3_WRITE_GDS_RAM:
4267         case PACKET3_ATOMIC_GDS:
4268         case PACKET3_ATOMIC:
4269         case PACKET3_OCCLUSION_QUERY:
4270         case PACKET3_SET_PREDICATION:
4271         case PACKET3_COND_EXEC:
4272         case PACKET3_PRED_EXEC:
4273         case PACKET3_CONTEXT_CONTROL:
4274         case PACKET3_STRMOUT_BUFFER_UPDATE:
4275         case PACKET3_WAIT_REG_MEM:
4276         case PACKET3_MEM_WRITE:
4277         case PACKET3_PFP_SYNC_ME:
4278         case PACKET3_SURFACE_SYNC:
4279         case PACKET3_EVENT_WRITE:
4280         case PACKET3_EVENT_WRITE_EOP:
4281         case PACKET3_EVENT_WRITE_EOS:
4282         case PACKET3_SET_CONTEXT_REG:
4283         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4284         case PACKET3_SET_SH_REG:
4285         case PACKET3_SET_SH_REG_OFFSET:
4286         case PACKET3_INCREMENT_DE_COUNTER:
4287         case PACKET3_WAIT_ON_CE_COUNTER:
4288         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4289         case PACKET3_ME_WRITE:
4290                 break;
4291         case PACKET3_COPY_DATA:
4292                 if ((idx_value & 0xf00) == 0) {
4293                         reg = ib[idx + 3] * 4;
4294                         if (!si_vm_reg_valid(reg))
4295                                 return -EINVAL;
4296                 }
4297                 break;
4298         case PACKET3_WRITE_DATA:
4299                 if ((idx_value & 0xf00) == 0) {
4300                         start_reg = ib[idx + 1] * 4;
4301                         if (idx_value & 0x10000) {
4302                                 if (!si_vm_reg_valid(start_reg))
4303                                         return -EINVAL;
4304                         } else {
4305                                 for (i = 0; i < (pkt->count - 2); i++) {
4306                                         reg = start_reg + (4 * i);
4307                                         if (!si_vm_reg_valid(reg))
4308                                                 return -EINVAL;
4309                                 }
4310                         }
4311                 }
4312                 break;
4313         case PACKET3_COND_WRITE:
4314                 if (idx_value & 0x100) {
4315                         reg = ib[idx + 5] * 4;
4316                         if (!si_vm_reg_valid(reg))
4317                                 return -EINVAL;
4318                 }
4319                 break;
4320         case PACKET3_COPY_DW:
4321                 if (idx_value & 0x2) {
4322                         reg = ib[idx + 3] * 4;
4323                         if (!si_vm_reg_valid(reg))
4324                                 return -EINVAL;
4325                 }
4326                 break;
4327         default:
4328                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4329                 return -EINVAL;
4330         }
4331         return 0;
4332 }
4333
4334 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4335 {
4336         int ret = 0;
4337         u32 idx = 0;
4338         struct radeon_cs_packet pkt;
4339
4340         do {
4341                 pkt.idx = idx;
4342                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4343                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4344                 pkt.one_reg_wr = 0;
4345                 switch (pkt.type) {
4346                 case RADEON_PACKET_TYPE0:
4347                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4348                         ret = -EINVAL;
4349                         break;
4350                 case RADEON_PACKET_TYPE2:
4351                         idx += 1;
4352                         break;
4353                 case RADEON_PACKET_TYPE3:
4354                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4355                         if (ib->is_const_ib)
4356                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4357                         else {
4358                                 switch (ib->ring) {
4359                                 case RADEON_RING_TYPE_GFX_INDEX:
4360                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4361                                         break;
4362                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4363                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4364                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4365                                         break;
4366                                 default:
4367                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4368                                         ret = -EINVAL;
4369                                         break;
4370                                 }
4371                         }
4372                         idx += pkt.count + 2;
4373                         break;
4374                 default:
4375                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4376                         ret = -EINVAL;
4377                         break;
4378                 }
4379                 if (ret)
4380                         break;
4381         } while (idx < ib->length_dw);
4382
4383         return ret;
4384 }
4385
4386 /*
4387  * vm
4388  */
4389 int si_vm_init(struct radeon_device *rdev)
4390 {
4391         /* number of VMs */
4392         rdev->vm_manager.nvm = 16;
4393         /* base offset of vram pages */
4394         rdev->vm_manager.vram_base_offset = 0;
4395
4396         return 0;
4397 }
4398
4399 void si_vm_fini(struct radeon_device *rdev)
4400 {
4401 }
4402
4403 /**
4404  * si_vm_set_page - update the page tables using the CP
4405  *
4406  * @rdev: radeon_device pointer
4407  * @ib: indirect buffer to fill with commands
4408  * @pe: addr of the page entry
4409  * @addr: dst addr to write into pe
4410  * @count: number of page entries to update
4411  * @incr: increase next addr by incr bytes
4412  * @flags: access flags
4413  *
4414  * Update the page tables using the CP (SI).
4415  */
4416 void si_vm_set_page(struct radeon_device *rdev,
4417                     struct radeon_ib *ib,
4418                     uint64_t pe,
4419                     uint64_t addr, unsigned count,
4420                     uint32_t incr, uint32_t flags)
4421 {
4422         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4423         uint64_t value;
4424         unsigned ndw;
4425
4426         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4427                 while (count) {
4428                         ndw = 2 + count * 2;
4429                         if (ndw > 0x3FFE)
4430                                 ndw = 0x3FFE;
4431
4432                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4433                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4434                                         WRITE_DATA_DST_SEL(1));
4435                         ib->ptr[ib->length_dw++] = pe;
4436                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4437                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4438                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4439                                         value = radeon_vm_map_gart(rdev, addr);
4440                                         value &= 0xFFFFFFFFFFFFF000ULL;
4441                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4442                                         value = addr;
4443                                 } else {
4444                                         value = 0;
4445                                 }
4446                                 addr += incr;
4447                                 value |= r600_flags;
4448                                 ib->ptr[ib->length_dw++] = value;
4449                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4450                         }
4451                 }
4452         } else {
4453                 /* DMA */
4454                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4455                         while (count) {
4456                                 ndw = count * 2;
4457                                 if (ndw > 0xFFFFE)
4458                                         ndw = 0xFFFFE;
4459
4460                                 /* for non-physically contiguous pages (system) */
4461                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4462                                 ib->ptr[ib->length_dw++] = pe;
4463                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4464                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4465                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4466                                                 value = radeon_vm_map_gart(rdev, addr);
4467                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4468                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4469                                                 value = addr;
4470                                         } else {
4471                                                 value = 0;
4472                                         }
4473                                         addr += incr;
4474                                         value |= r600_flags;
4475                                         ib->ptr[ib->length_dw++] = value;
4476                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4477                                 }
4478                         }
4479                 } else {
4480                         while (count) {
4481                                 ndw = count * 2;
4482                                 if (ndw > 0xFFFFE)
4483                                         ndw = 0xFFFFE;
4484
4485                                 if (flags & RADEON_VM_PAGE_VALID)
4486                                         value = addr;
4487                                 else
4488                                         value = 0;
4489                                 /* for physically contiguous pages (vram) */
4490                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4491                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4492                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4493                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4494                                 ib->ptr[ib->length_dw++] = 0;
4495                                 ib->ptr[ib->length_dw++] = value; /* value */
4496                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4497                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4498                                 ib->ptr[ib->length_dw++] = 0;
4499                                 pe += ndw * 4;
4500                                 addr += (ndw / 2) * incr;
4501                                 count -= ndw / 2;
4502                         }
4503                 }
4504                 while (ib->length_dw & 0x7)
4505                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4506         }
4507 }
4508
4509 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4510 {
4511         struct radeon_ring *ring = &rdev->ring[ridx];
4512
4513         if (vm == NULL)
4514                 return;
4515
4516         /* write new base address */
4517         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4518         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4519                                  WRITE_DATA_DST_SEL(0)));
4520
4521         if (vm->id < 8) {
4522                 radeon_ring_write(ring,
4523                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4524         } else {
4525                 radeon_ring_write(ring,
4526                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4527         }
4528         radeon_ring_write(ring, 0);
4529         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4530
4531         /* flush hdp cache */
4532         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4533         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4534                                  WRITE_DATA_DST_SEL(0)));
4535         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4536         radeon_ring_write(ring, 0);
4537         radeon_ring_write(ring, 0x1);
4538
4539         /* bits 0-15 are the VM contexts0-15 */
4540         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4541         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4542                                  WRITE_DATA_DST_SEL(0)));
4543         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4544         radeon_ring_write(ring, 0);
4545         radeon_ring_write(ring, 1 << vm->id);
4546
4547         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4548         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4549         radeon_ring_write(ring, 0x0);
4550 }
4551
4552 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4553 {
4554         struct radeon_ring *ring = &rdev->ring[ridx];
4555
4556         if (vm == NULL)
4557                 return;
4558
4559         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4560         if (vm->id < 8) {
4561                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4562         } else {
4563                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4564         }
4565         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4566
4567         /* flush hdp cache */
4568         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4569         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4570         radeon_ring_write(ring, 1);
4571
4572         /* bits 0-7 are the VM contexts0-7 */
4573         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4574         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4575         radeon_ring_write(ring, 1 << vm->id);
4576 }
4577
4578 /*
4579  *  Power and clock gating
4580  */
4581 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4582 {
4583         int i;
4584
4585         for (i = 0; i < rdev->usec_timeout; i++) {
4586                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4587                         break;
4588                 udelay(1);
4589         }
4590
4591         for (i = 0; i < rdev->usec_timeout; i++) {
4592                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4593                         break;
4594                 udelay(1);
4595         }
4596 }
4597
4598 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4599                                          bool enable)
4600 {
4601         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4602         u32 mask;
4603         int i;
4604
4605         if (enable)
4606                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4607         else
4608                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4609         WREG32(CP_INT_CNTL_RING0, tmp);
4610
4611         if (!enable) {
4612                 /* read a gfx register */
4613                 tmp = RREG32(DB_DEPTH_INFO);
4614
4615                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4616                 for (i = 0; i < rdev->usec_timeout; i++) {
4617                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4618                                 break;
4619                         udelay(1);
4620                 }
4621         }
4622 }
4623
4624 static void si_set_uvd_dcm(struct radeon_device *rdev,
4625                            bool sw_mode)
4626 {
4627         u32 tmp, tmp2;
4628
4629         tmp = RREG32(UVD_CGC_CTRL);
4630         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4631         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4632
4633         if (sw_mode) {
4634                 tmp &= ~0x7ffff800;
4635                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4636         } else {
4637                 tmp |= 0x7ffff800;
4638                 tmp2 = 0;
4639         }
4640
4641         WREG32(UVD_CGC_CTRL, tmp);
4642         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4643 }
4644
4645 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4646 {
4647         bool hw_mode = true;
4648
4649         if (hw_mode) {
4650                 si_set_uvd_dcm(rdev, false);
4651         } else {
4652                 u32 tmp = RREG32(UVD_CGC_CTRL);
4653                 tmp &= ~DCM;
4654                 WREG32(UVD_CGC_CTRL, tmp);
4655         }
4656 }
4657
4658 static u32 si_halt_rlc(struct radeon_device *rdev)
4659 {
4660         u32 data, orig;
4661
4662         orig = data = RREG32(RLC_CNTL);
4663
4664         if (data & RLC_ENABLE) {
4665                 data &= ~RLC_ENABLE;
4666                 WREG32(RLC_CNTL, data);
4667
4668                 si_wait_for_rlc_serdes(rdev);
4669         }
4670
4671         return orig;
4672 }
4673
4674 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4675 {
4676         u32 tmp;
4677
4678         tmp = RREG32(RLC_CNTL);
4679         if (tmp != rlc)
4680                 WREG32(RLC_CNTL, rlc);
4681 }
4682
4683 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4684 {
4685         u32 data, orig;
4686
4687         orig = data = RREG32(DMA_PG);
4688         if (enable)
4689                 data |= PG_CNTL_ENABLE;
4690         else
4691                 data &= ~PG_CNTL_ENABLE;
4692         if (orig != data)
4693                 WREG32(DMA_PG, data);
4694 }
4695
4696 static void si_init_dma_pg(struct radeon_device *rdev)
4697 {
4698         u32 tmp;
4699
4700         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4701         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4702
4703         for (tmp = 0; tmp < 5; tmp++)
4704                 WREG32(DMA_PGFSM_WRITE, 0);
4705 }
4706
4707 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4708                                bool enable)
4709 {
4710         u32 tmp;
4711
4712         if (enable) {
4713                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4714                 WREG32(RLC_TTOP_D, tmp);
4715
4716                 tmp = RREG32(RLC_PG_CNTL);
4717                 tmp |= GFX_PG_ENABLE;
4718                 WREG32(RLC_PG_CNTL, tmp);
4719
4720                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4721                 tmp |= AUTO_PG_EN;
4722                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4723         } else {
4724                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4725                 tmp &= ~AUTO_PG_EN;
4726                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4727
4728                 tmp = RREG32(DB_RENDER_CONTROL);
4729         }
4730 }
4731
4732 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4733 {
4734         u32 tmp;
4735
4736         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4737
4738         tmp = RREG32(RLC_PG_CNTL);
4739         tmp |= GFX_PG_SRC;
4740         WREG32(RLC_PG_CNTL, tmp);
4741
4742         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4743
4744         tmp = RREG32(RLC_AUTO_PG_CTRL);
4745
4746         tmp &= ~GRBM_REG_SGIT_MASK;
4747         tmp |= GRBM_REG_SGIT(0x700);
4748         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4749         WREG32(RLC_AUTO_PG_CTRL, tmp);
4750 }
4751
4752 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4753 {
4754         u32 mask = 0, tmp, tmp1;
4755         int i;
4756
4757         si_select_se_sh(rdev, se, sh);
4758         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4759         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4760         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4761
4762         tmp &= 0xffff0000;
4763
4764         tmp |= tmp1;
4765         tmp >>= 16;
4766
4767         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4768                 mask <<= 1;
4769                 mask |= 1;
4770         }
4771
4772         return (~tmp) & mask;
4773 }
4774
4775 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4776 {
4777         u32 i, j, k, active_cu_number = 0;
4778         u32 mask, counter, cu_bitmap;
4779         u32 tmp = 0;
4780
4781         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4782                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4783                         mask = 1;
4784                         cu_bitmap = 0;
4785                         counter  = 0;
4786                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4787                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4788                                         if (counter < 2)
4789                                                 cu_bitmap |= mask;
4790                                         counter++;
4791                                 }
4792                                 mask <<= 1;
4793                         }
4794
4795                         active_cu_number += counter;
4796                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4797                 }
4798         }
4799
4800         WREG32(RLC_PG_AO_CU_MASK, tmp);
4801
4802         tmp = RREG32(RLC_MAX_PG_CU);
4803         tmp &= ~MAX_PU_CU_MASK;
4804         tmp |= MAX_PU_CU(active_cu_number);
4805         WREG32(RLC_MAX_PG_CU, tmp);
4806 }
4807
4808 static void si_enable_cgcg(struct radeon_device *rdev,
4809                            bool enable)
4810 {
4811         u32 data, orig, tmp;
4812
4813         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4814
4815         si_enable_gui_idle_interrupt(rdev, enable);
4816
4817         if (enable) {
4818                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4819
4820                 tmp = si_halt_rlc(rdev);
4821
4822                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4823                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4824                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4825
4826                 si_wait_for_rlc_serdes(rdev);
4827
4828                 si_update_rlc(rdev, tmp);
4829
4830                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4831
4832                 data |= CGCG_EN | CGLS_EN;
4833         } else {
4834                 RREG32(CB_CGTT_SCLK_CTRL);
4835                 RREG32(CB_CGTT_SCLK_CTRL);
4836                 RREG32(CB_CGTT_SCLK_CTRL);
4837                 RREG32(CB_CGTT_SCLK_CTRL);
4838
4839                 data &= ~(CGCG_EN | CGLS_EN);
4840         }
4841
4842         if (orig != data)
4843                 WREG32(RLC_CGCG_CGLS_CTRL, data);
4844 }
4845
4846 static void si_enable_mgcg(struct radeon_device *rdev,
4847                            bool enable)
4848 {
4849         u32 data, orig, tmp = 0;
4850
4851         if (enable) {
4852                 orig = data = RREG32(CGTS_SM_CTRL_REG);
4853                 data = 0x96940200;
4854                 if (orig != data)
4855                         WREG32(CGTS_SM_CTRL_REG, data);
4856
4857                 orig = data = RREG32(CP_MEM_SLP_CNTL);
4858                 data |= CP_MEM_LS_EN;
4859                 if (orig != data)
4860                         WREG32(CP_MEM_SLP_CNTL, data);
4861
4862                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
4863                 data &= 0xffffffc0;
4864                 if (orig != data)
4865                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
4866
4867                 tmp = si_halt_rlc(rdev);
4868
4869                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4870                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4871                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
4872
4873                 si_update_rlc(rdev, tmp);
4874         } else {
4875                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
4876                 data |= 0x00000003;
4877                 if (orig != data)
4878                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
4879
4880                 data = RREG32(CP_MEM_SLP_CNTL);
4881                 if (data & CP_MEM_LS_EN) {
4882                         data &= ~CP_MEM_LS_EN;
4883                         WREG32(CP_MEM_SLP_CNTL, data);
4884                 }
4885                 orig = data = RREG32(CGTS_SM_CTRL_REG);
4886                 data |= LS_OVERRIDE | OVERRIDE;
4887                 if (orig != data)
4888                         WREG32(CGTS_SM_CTRL_REG, data);
4889
4890                 tmp = si_halt_rlc(rdev);
4891
4892                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4893                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4894                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
4895
4896                 si_update_rlc(rdev, tmp);
4897         }
4898 }
4899
4900 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
4901                                bool enable)
4902 {
4903         u32 orig, data, tmp;
4904
4905         if (enable) {
4906                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
4907                 tmp |= 0x3fff;
4908                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
4909
4910                 orig = data = RREG32(UVD_CGC_CTRL);
4911                 data |= DCM;
4912                 if (orig != data)
4913                         WREG32(UVD_CGC_CTRL, data);
4914
4915                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
4916                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
4917         } else {
4918                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
4919                 tmp &= ~0x3fff;
4920                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
4921
4922                 orig = data = RREG32(UVD_CGC_CTRL);
4923                 data &= ~DCM;
4924                 if (orig != data)
4925                         WREG32(UVD_CGC_CTRL, data);
4926
4927                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
4928                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
4929         }
4930 }
4931
4932 static const u32 mc_cg_registers[] =
4933 {
4934         MC_HUB_MISC_HUB_CG,
4935         MC_HUB_MISC_SIP_CG,
4936         MC_HUB_MISC_VM_CG,
4937         MC_XPB_CLK_GAT,
4938         ATC_MISC_CG,
4939         MC_CITF_MISC_WR_CG,
4940         MC_CITF_MISC_RD_CG,
4941         MC_CITF_MISC_VM_CG,
4942         VM_L2_CG,
4943 };
4944
4945 static void si_enable_mc_ls(struct radeon_device *rdev,
4946                             bool enable)
4947 {
4948         int i;
4949         u32 orig, data;
4950
4951         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
4952                 orig = data = RREG32(mc_cg_registers[i]);
4953                 if (enable)
4954                         data |= MC_LS_ENABLE;
4955                 else
4956                         data &= ~MC_LS_ENABLE;
4957                 if (data != orig)
4958                         WREG32(mc_cg_registers[i], data);
4959         }
4960 }
4961
4962
4963 static void si_init_cg(struct radeon_device *rdev)
4964 {
4965         bool has_uvd = true;
4966
4967         si_enable_mgcg(rdev, true);
4968         si_enable_cgcg(rdev, true);
4969         /* disable MC LS on Tahiti */
4970         if (rdev->family == CHIP_TAHITI)
4971                 si_enable_mc_ls(rdev, false);
4972         if (has_uvd) {
4973                 si_enable_uvd_mgcg(rdev, true);
4974                 si_init_uvd_internal_cg(rdev);
4975         }
4976 }
4977
4978 static void si_fini_cg(struct radeon_device *rdev)
4979 {
4980         bool has_uvd = true;
4981
4982         if (has_uvd)
4983                 si_enable_uvd_mgcg(rdev, false);
4984         si_enable_cgcg(rdev, false);
4985         si_enable_mgcg(rdev, false);
4986 }
4987
4988 static void si_init_pg(struct radeon_device *rdev)
4989 {
4990         bool has_pg = false;
4991
4992         /* only cape verde supports PG */
4993         if (rdev->family == CHIP_VERDE)
4994                 has_pg = true;
4995
4996         if (has_pg) {
4997                 si_init_ao_cu_mask(rdev);
4998                 si_init_dma_pg(rdev);
4999                 si_enable_dma_pg(rdev, true);
5000                 si_init_gfx_cgpg(rdev);
5001                 si_enable_gfx_cgpg(rdev, true);
5002         } else {
5003                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5004                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5005         }
5006 }
5007
5008 static void si_fini_pg(struct radeon_device *rdev)
5009 {
5010         bool has_pg = false;
5011
5012         /* only cape verde supports PG */
5013         if (rdev->family == CHIP_VERDE)
5014                 has_pg = true;
5015
5016         if (has_pg) {
5017                 si_enable_dma_pg(rdev, false);
5018                 si_enable_gfx_cgpg(rdev, false);
5019         }
5020 }
5021
5022 /*
5023  * RLC
5024  */
5025 void si_rlc_fini(struct radeon_device *rdev)
5026 {
5027         int r;
5028
5029         /* save restore block */
5030         if (rdev->rlc.save_restore_obj) {
5031                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5032                 if (unlikely(r != 0))
5033                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5034                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5035                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5036
5037                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5038                 rdev->rlc.save_restore_obj = NULL;
5039         }
5040
5041         /* clear state block */
5042         if (rdev->rlc.clear_state_obj) {
5043                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5044                 if (unlikely(r != 0))
5045                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5046                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5047                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5048
5049                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5050                 rdev->rlc.clear_state_obj = NULL;
5051         }
5052 }
5053
5054 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5055
5056 int si_rlc_init(struct radeon_device *rdev)
5057 {
5058         volatile u32 *dst_ptr;
5059         u32 dws, data, i, j, k, reg_num;
5060         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5061         u64 reg_list_mc_addr;
5062         const struct cs_section_def *cs_data = si_cs_data;
5063         int r;
5064
5065         /* save restore block */
5066         if (rdev->rlc.save_restore_obj == NULL) {
5067                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5068                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5069                                      &rdev->rlc.save_restore_obj);
5070                 if (r) {
5071                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5072                         return r;
5073                 }
5074         }
5075
5076         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5077         if (unlikely(r != 0)) {
5078                 si_rlc_fini(rdev);
5079                 return r;
5080         }
5081         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5082                           &rdev->rlc.save_restore_gpu_addr);
5083         if (r) {
5084                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5085                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5086                 si_rlc_fini(rdev);
5087                 return r;
5088         }
5089
5090         if (rdev->family == CHIP_VERDE) {
5091                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5092                 if (r) {
5093                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5094                         si_rlc_fini(rdev);
5095                 return r;
5096                 }
5097                 /* write the sr buffer */
5098                 dst_ptr = rdev->rlc.sr_ptr;
5099                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5100                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5101                 }
5102                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5103         }
5104         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5105
5106         /* clear state block */
5107         reg_list_num = 0;
5108         dws = 0;
5109         for (i = 0; cs_data[i].section != NULL; i++) {
5110                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5111                         reg_list_num++;
5112                         dws += cs_data[i].section[j].reg_count;
5113                 }
5114         }
5115         reg_list_blk_index = (3 * reg_list_num + 2);
5116         dws += reg_list_blk_index;
5117
5118         if (rdev->rlc.clear_state_obj == NULL) {
5119                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5120                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5121                 if (r) {
5122                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5123                         si_rlc_fini(rdev);
5124                         return r;
5125                 }
5126         }
5127         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5128         if (unlikely(r != 0)) {
5129                 si_rlc_fini(rdev);
5130                 return r;
5131         }
5132         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5133                           &rdev->rlc.clear_state_gpu_addr);
5134         if (r) {
5135
5136                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5137                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5138                 si_rlc_fini(rdev);
5139                 return r;
5140         }
5141         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5142         if (r) {
5143                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5144                 si_rlc_fini(rdev);
5145                 return r;
5146         }
5147         /* set up the cs buffer */
5148         dst_ptr = rdev->rlc.cs_ptr;
5149         reg_list_hdr_blk_index = 0;
5150         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5151         data = upper_32_bits(reg_list_mc_addr);
5152         dst_ptr[reg_list_hdr_blk_index] = data;
5153         reg_list_hdr_blk_index++;
5154         for (i = 0; cs_data[i].section != NULL; i++) {
5155                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5156                         reg_num = cs_data[i].section[j].reg_count;
5157                         data = reg_list_mc_addr & 0xffffffff;
5158                         dst_ptr[reg_list_hdr_blk_index] = data;
5159                         reg_list_hdr_blk_index++;
5160
5161                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5162                         dst_ptr[reg_list_hdr_blk_index] = data;
5163                         reg_list_hdr_blk_index++;
5164
5165                         data = 0x08000000 | (reg_num * 4);
5166                         dst_ptr[reg_list_hdr_blk_index] = data;
5167                         reg_list_hdr_blk_index++;
5168
5169                         for (k = 0; k < reg_num; k++) {
5170                                 data = cs_data[i].section[j].extent[k];
5171                                 dst_ptr[reg_list_blk_index + k] = data;
5172                         }
5173                         reg_list_mc_addr += reg_num * 4;
5174                         reg_list_blk_index += reg_num;
5175                 }
5176         }
5177         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5178
5179         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5180         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5181
5182         return 0;
5183 }
5184
5185 static void si_rlc_reset(struct radeon_device *rdev)
5186 {
5187         u32 tmp = RREG32(GRBM_SOFT_RESET);
5188
5189         tmp |= SOFT_RESET_RLC;
5190         WREG32(GRBM_SOFT_RESET, tmp);
5191         udelay(50);
5192         tmp &= ~SOFT_RESET_RLC;
5193         WREG32(GRBM_SOFT_RESET, tmp);
5194         udelay(50);
5195 }
5196
5197 static void si_rlc_stop(struct radeon_device *rdev)
5198 {
5199         WREG32(RLC_CNTL, 0);
5200
5201         si_enable_gui_idle_interrupt(rdev, false);
5202
5203         si_wait_for_rlc_serdes(rdev);
5204 }
5205
5206 static void si_rlc_start(struct radeon_device *rdev)
5207 {
5208         WREG32(RLC_CNTL, RLC_ENABLE);
5209
5210         si_enable_gui_idle_interrupt(rdev, true);
5211
5212         udelay(50);
5213 }
5214
5215 static bool si_lbpw_supported(struct radeon_device *rdev)
5216 {
5217         u32 tmp;
5218
5219         /* Enable LBPW only for DDR3 */
5220         tmp = RREG32(MC_SEQ_MISC0);
5221         if ((tmp & 0xF0000000) == 0xB0000000)
5222                 return true;
5223         return false;
5224 }
5225
5226 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5227 {
5228         u32 tmp;
5229
5230         tmp = RREG32(RLC_LB_CNTL);
5231         if (enable)
5232                 tmp |= LOAD_BALANCE_ENABLE;
5233         else
5234                 tmp &= ~LOAD_BALANCE_ENABLE;
5235         WREG32(RLC_LB_CNTL, tmp);
5236
5237         if (!enable) {
5238                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5239                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5240         }
5241 }
5242
5243 static int si_rlc_resume(struct radeon_device *rdev)
5244 {
5245         u32 i;
5246         const __be32 *fw_data;
5247
5248         if (!rdev->rlc_fw)
5249                 return -EINVAL;
5250
5251         si_rlc_stop(rdev);
5252
5253         si_rlc_reset(rdev);
5254
5255         si_init_pg(rdev);
5256
5257         si_init_cg(rdev);
5258
5259         WREG32(RLC_RL_BASE, 0);
5260         WREG32(RLC_RL_SIZE, 0);
5261         WREG32(RLC_LB_CNTL, 0);
5262         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5263         WREG32(RLC_LB_CNTR_INIT, 0);
5264         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5265
5266         WREG32(RLC_MC_CNTL, 0);
5267         WREG32(RLC_UCODE_CNTL, 0);
5268
5269         fw_data = (const __be32 *)rdev->rlc_fw->data;
5270         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5271                 WREG32(RLC_UCODE_ADDR, i);
5272                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5273         }
5274         WREG32(RLC_UCODE_ADDR, 0);
5275
5276         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5277
5278         si_rlc_start(rdev);
5279
5280         return 0;
5281 }
5282
5283 static void si_enable_interrupts(struct radeon_device *rdev)
5284 {
5285         u32 ih_cntl = RREG32(IH_CNTL);
5286         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5287
5288         ih_cntl |= ENABLE_INTR;
5289         ih_rb_cntl |= IH_RB_ENABLE;
5290         WREG32(IH_CNTL, ih_cntl);
5291         WREG32(IH_RB_CNTL, ih_rb_cntl);
5292         rdev->ih.enabled = true;
5293 }
5294
5295 static void si_disable_interrupts(struct radeon_device *rdev)
5296 {
5297         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5298         u32 ih_cntl = RREG32(IH_CNTL);
5299
5300         ih_rb_cntl &= ~IH_RB_ENABLE;
5301         ih_cntl &= ~ENABLE_INTR;
5302         WREG32(IH_RB_CNTL, ih_rb_cntl);
5303         WREG32(IH_CNTL, ih_cntl);
5304         /* set rptr, wptr to 0 */
5305         WREG32(IH_RB_RPTR, 0);
5306         WREG32(IH_RB_WPTR, 0);
5307         rdev->ih.enabled = false;
5308         rdev->ih.rptr = 0;
5309 }
5310
5311 static void si_disable_interrupt_state(struct radeon_device *rdev)
5312 {
5313         u32 tmp;
5314
5315         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5316         WREG32(CP_INT_CNTL_RING1, 0);
5317         WREG32(CP_INT_CNTL_RING2, 0);
5318         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5319         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5320         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5321         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5322         WREG32(GRBM_INT_CNTL, 0);
5323         if (rdev->num_crtc >= 2) {
5324                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5325                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5326         }
5327         if (rdev->num_crtc >= 4) {
5328                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5329                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5330         }
5331         if (rdev->num_crtc >= 6) {
5332                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5333                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5334         }
5335
5336         if (rdev->num_crtc >= 2) {
5337                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5338                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5339         }
5340         if (rdev->num_crtc >= 4) {
5341                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5342                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5343         }
5344         if (rdev->num_crtc >= 6) {
5345                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5346                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5347         }
5348
5349         if (!ASIC_IS_NODCE(rdev)) {
5350                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5351
5352                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5353                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5354                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5355                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5356                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5357                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5358                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5359                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5360                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5361                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5362                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5363                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5364         }
5365 }
5366
5367 static int si_irq_init(struct radeon_device *rdev)
5368 {
5369         int ret = 0;
5370         int rb_bufsz;
5371         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5372
5373         /* allocate ring */
5374         ret = r600_ih_ring_alloc(rdev);
5375         if (ret)
5376                 return ret;
5377
5378         /* disable irqs */
5379         si_disable_interrupts(rdev);
5380
5381         /* init rlc */
5382         ret = si_rlc_resume(rdev);
5383         if (ret) {
5384                 r600_ih_ring_fini(rdev);
5385                 return ret;
5386         }
5387
5388         /* setup interrupt control */
5389         /* set dummy read address to ring address */
5390         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5391         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5392         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5393          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5394          */
5395         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5396         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5397         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5398         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5399
5400         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5401         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5402
5403         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5404                       IH_WPTR_OVERFLOW_CLEAR |
5405                       (rb_bufsz << 1));
5406
5407         if (rdev->wb.enabled)
5408                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5409
5410         /* set the writeback address whether it's enabled or not */
5411         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5412         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5413
5414         WREG32(IH_RB_CNTL, ih_rb_cntl);
5415
5416         /* set rptr, wptr to 0 */
5417         WREG32(IH_RB_RPTR, 0);
5418         WREG32(IH_RB_WPTR, 0);
5419
5420         /* Default settings for IH_CNTL (disabled at first) */
5421         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5422         /* RPTR_REARM only works if msi's are enabled */
5423         if (rdev->msi_enabled)
5424                 ih_cntl |= RPTR_REARM;
5425         WREG32(IH_CNTL, ih_cntl);
5426
5427         /* force the active interrupt state to all disabled */
5428         si_disable_interrupt_state(rdev);
5429
5430         pci_set_master(rdev->pdev);
5431
5432         /* enable irqs */
5433         si_enable_interrupts(rdev);
5434
5435         return ret;
5436 }
5437
5438 int si_irq_set(struct radeon_device *rdev)
5439 {
5440         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5441         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5442         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5443         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5444         u32 grbm_int_cntl = 0;
5445         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5446         u32 dma_cntl, dma_cntl1;
5447         u32 thermal_int = 0;
5448
5449         if (!rdev->irq.installed) {
5450                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5451                 return -EINVAL;
5452         }
5453         /* don't enable anything if the ih is disabled */
5454         if (!rdev->ih.enabled) {
5455                 si_disable_interrupts(rdev);
5456                 /* force the active interrupt state to all disabled */
5457                 si_disable_interrupt_state(rdev);
5458                 return 0;
5459         }
5460
5461         if (!ASIC_IS_NODCE(rdev)) {
5462                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5463                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5464                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5465                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5466                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5467                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5468         }
5469
5470         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5471         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5472
5473         thermal_int = RREG32(CG_THERMAL_INT) &
5474                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5475
5476         /* enable CP interrupts on all rings */
5477         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5478                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5479                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5480         }
5481         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5482                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5483                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5484         }
5485         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5486                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5487                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5488         }
5489         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5490                 DRM_DEBUG("si_irq_set: sw int dma\n");
5491                 dma_cntl |= TRAP_ENABLE;
5492         }
5493
5494         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5495                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5496                 dma_cntl1 |= TRAP_ENABLE;
5497         }
5498         if (rdev->irq.crtc_vblank_int[0] ||
5499             atomic_read(&rdev->irq.pflip[0])) {
5500                 DRM_DEBUG("si_irq_set: vblank 0\n");
5501                 crtc1 |= VBLANK_INT_MASK;
5502         }
5503         if (rdev->irq.crtc_vblank_int[1] ||
5504             atomic_read(&rdev->irq.pflip[1])) {
5505                 DRM_DEBUG("si_irq_set: vblank 1\n");
5506                 crtc2 |= VBLANK_INT_MASK;
5507         }
5508         if (rdev->irq.crtc_vblank_int[2] ||
5509             atomic_read(&rdev->irq.pflip[2])) {
5510                 DRM_DEBUG("si_irq_set: vblank 2\n");
5511                 crtc3 |= VBLANK_INT_MASK;
5512         }
5513         if (rdev->irq.crtc_vblank_int[3] ||
5514             atomic_read(&rdev->irq.pflip[3])) {
5515                 DRM_DEBUG("si_irq_set: vblank 3\n");
5516                 crtc4 |= VBLANK_INT_MASK;
5517         }
5518         if (rdev->irq.crtc_vblank_int[4] ||
5519             atomic_read(&rdev->irq.pflip[4])) {
5520                 DRM_DEBUG("si_irq_set: vblank 4\n");
5521                 crtc5 |= VBLANK_INT_MASK;
5522         }
5523         if (rdev->irq.crtc_vblank_int[5] ||
5524             atomic_read(&rdev->irq.pflip[5])) {
5525                 DRM_DEBUG("si_irq_set: vblank 5\n");
5526                 crtc6 |= VBLANK_INT_MASK;
5527         }
5528         if (rdev->irq.hpd[0]) {
5529                 DRM_DEBUG("si_irq_set: hpd 1\n");
5530                 hpd1 |= DC_HPDx_INT_EN;
5531         }
5532         if (rdev->irq.hpd[1]) {
5533                 DRM_DEBUG("si_irq_set: hpd 2\n");
5534                 hpd2 |= DC_HPDx_INT_EN;
5535         }
5536         if (rdev->irq.hpd[2]) {
5537                 DRM_DEBUG("si_irq_set: hpd 3\n");
5538                 hpd3 |= DC_HPDx_INT_EN;
5539         }
5540         if (rdev->irq.hpd[3]) {
5541                 DRM_DEBUG("si_irq_set: hpd 4\n");
5542                 hpd4 |= DC_HPDx_INT_EN;
5543         }
5544         if (rdev->irq.hpd[4]) {
5545                 DRM_DEBUG("si_irq_set: hpd 5\n");
5546                 hpd5 |= DC_HPDx_INT_EN;
5547         }
5548         if (rdev->irq.hpd[5]) {
5549                 DRM_DEBUG("si_irq_set: hpd 6\n");
5550                 hpd6 |= DC_HPDx_INT_EN;
5551         }
5552
5553         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5554         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5555         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5556
5557         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5558         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5559
5560         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5561
5562         if (rdev->irq.dpm_thermal) {
5563                 DRM_DEBUG("dpm thermal\n");
5564                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5565         }
5566
5567         if (rdev->num_crtc >= 2) {
5568                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5569                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5570         }
5571         if (rdev->num_crtc >= 4) {
5572                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5573                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5574         }
5575         if (rdev->num_crtc >= 6) {
5576                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5577                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5578         }
5579
5580         if (rdev->num_crtc >= 2) {
5581                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5582                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5583         }
5584         if (rdev->num_crtc >= 4) {
5585                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5586                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5587         }
5588         if (rdev->num_crtc >= 6) {
5589                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5590                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5591         }
5592
5593         if (!ASIC_IS_NODCE(rdev)) {
5594                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5595                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5596                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5597                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5598                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5599                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5600         }
5601
5602         WREG32(CG_THERMAL_INT, thermal_int);
5603
5604         return 0;
5605 }
5606
5607 static inline void si_irq_ack(struct radeon_device *rdev)
5608 {
5609         u32 tmp;
5610
5611         if (ASIC_IS_NODCE(rdev))
5612                 return;
5613
5614         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5615         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5616         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5617         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5618         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5619         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5620         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5621         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5622         if (rdev->num_crtc >= 4) {
5623                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5624                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5625         }
5626         if (rdev->num_crtc >= 6) {
5627                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5628                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5629         }
5630
5631         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5632                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5633         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5634                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5635         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5636                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5637         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5638                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5639         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5640                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5641         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5642                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5643
5644         if (rdev->num_crtc >= 4) {
5645                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5646                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5647                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5648                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5649                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5650                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5651                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5652                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5653                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5654                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5655                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5656                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5657         }
5658
5659         if (rdev->num_crtc >= 6) {
5660                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5661                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5662                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5663                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5664                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5665                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5666                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5667                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5668                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5669                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5670                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5671                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5672         }
5673
5674         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5675                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5676                 tmp |= DC_HPDx_INT_ACK;
5677                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5678         }
5679         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5680                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5681                 tmp |= DC_HPDx_INT_ACK;
5682                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5683         }
5684         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5685                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5686                 tmp |= DC_HPDx_INT_ACK;
5687                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5688         }
5689         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5690                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5691                 tmp |= DC_HPDx_INT_ACK;
5692                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5693         }
5694         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5695                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5696                 tmp |= DC_HPDx_INT_ACK;
5697                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5698         }
5699         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5700                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5701                 tmp |= DC_HPDx_INT_ACK;
5702                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5703         }
5704 }
5705
5706 static void si_irq_disable(struct radeon_device *rdev)
5707 {
5708         si_disable_interrupts(rdev);
5709         /* Wait and acknowledge irq */
5710         mdelay(1);
5711         si_irq_ack(rdev);
5712         si_disable_interrupt_state(rdev);
5713 }
5714
5715 static void si_irq_suspend(struct radeon_device *rdev)
5716 {
5717         si_irq_disable(rdev);
5718         si_rlc_stop(rdev);
5719 }
5720
5721 static void si_irq_fini(struct radeon_device *rdev)
5722 {
5723         si_irq_suspend(rdev);
5724         r600_ih_ring_fini(rdev);
5725 }
5726
5727 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5728 {
5729         u32 wptr, tmp;
5730
5731         if (rdev->wb.enabled)
5732                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5733         else
5734                 wptr = RREG32(IH_RB_WPTR);
5735
5736         if (wptr & RB_OVERFLOW) {
5737                 /* When a ring buffer overflow happen start parsing interrupt
5738                  * from the last not overwritten vector (wptr + 16). Hopefully
5739                  * this should allow us to catchup.
5740                  */
5741                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5742                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5743                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5744                 tmp = RREG32(IH_RB_CNTL);
5745                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5746                 WREG32(IH_RB_CNTL, tmp);
5747         }
5748         return (wptr & rdev->ih.ptr_mask);
5749 }
5750
5751 /*        SI IV Ring
5752  * Each IV ring entry is 128 bits:
5753  * [7:0]    - interrupt source id
5754  * [31:8]   - reserved
5755  * [59:32]  - interrupt source data
5756  * [63:60]  - reserved
5757  * [71:64]  - RINGID
5758  * [79:72]  - VMID
5759  * [127:80] - reserved
5760  */
5761 int si_irq_process(struct radeon_device *rdev)
5762 {
5763         u32 wptr;
5764         u32 rptr;
5765         u32 src_id, src_data, ring_id;
5766         u32 ring_index;
5767         bool queue_hotplug = false;
5768         bool queue_thermal = false;
5769
5770         if (!rdev->ih.enabled || rdev->shutdown)
5771                 return IRQ_NONE;
5772
5773         wptr = si_get_ih_wptr(rdev);
5774
5775 restart_ih:
5776         /* is somebody else already processing irqs? */
5777         if (atomic_xchg(&rdev->ih.lock, 1))
5778                 return IRQ_NONE;
5779
5780         rptr = rdev->ih.rptr;
5781         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5782
5783         /* Order reading of wptr vs. reading of IH ring data */
5784         rmb();
5785
5786         /* display interrupts */
5787         si_irq_ack(rdev);
5788
5789         while (rptr != wptr) {
5790                 /* wptr/rptr are in bytes! */
5791                 ring_index = rptr / 4;
5792                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5793                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5794                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5795
5796                 switch (src_id) {
5797                 case 1: /* D1 vblank/vline */
5798                         switch (src_data) {
5799                         case 0: /* D1 vblank */
5800                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
5801                                         if (rdev->irq.crtc_vblank_int[0]) {
5802                                                 drm_handle_vblank(rdev->ddev, 0);
5803                                                 rdev->pm.vblank_sync = true;
5804                                                 wake_up(&rdev->irq.vblank_queue);
5805                                         }
5806                                         if (atomic_read(&rdev->irq.pflip[0]))
5807                                                 radeon_crtc_handle_flip(rdev, 0);
5808                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5809                                         DRM_DEBUG("IH: D1 vblank\n");
5810                                 }
5811                                 break;
5812                         case 1: /* D1 vline */
5813                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
5814                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5815                                         DRM_DEBUG("IH: D1 vline\n");
5816                                 }
5817                                 break;
5818                         default:
5819                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5820                                 break;
5821                         }
5822                         break;
5823                 case 2: /* D2 vblank/vline */
5824                         switch (src_data) {
5825                         case 0: /* D2 vblank */
5826                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5827                                         if (rdev->irq.crtc_vblank_int[1]) {
5828                                                 drm_handle_vblank(rdev->ddev, 1);
5829                                                 rdev->pm.vblank_sync = true;
5830                                                 wake_up(&rdev->irq.vblank_queue);
5831                                         }
5832                                         if (atomic_read(&rdev->irq.pflip[1]))
5833                                                 radeon_crtc_handle_flip(rdev, 1);
5834                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5835                                         DRM_DEBUG("IH: D2 vblank\n");
5836                                 }
5837                                 break;
5838                         case 1: /* D2 vline */
5839                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5840                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5841                                         DRM_DEBUG("IH: D2 vline\n");
5842                                 }
5843                                 break;
5844                         default:
5845                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5846                                 break;
5847                         }
5848                         break;
5849                 case 3: /* D3 vblank/vline */
5850                         switch (src_data) {
5851                         case 0: /* D3 vblank */
5852                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5853                                         if (rdev->irq.crtc_vblank_int[2]) {
5854                                                 drm_handle_vblank(rdev->ddev, 2);
5855                                                 rdev->pm.vblank_sync = true;
5856                                                 wake_up(&rdev->irq.vblank_queue);
5857                                         }
5858                                         if (atomic_read(&rdev->irq.pflip[2]))
5859                                                 radeon_crtc_handle_flip(rdev, 2);
5860                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5861                                         DRM_DEBUG("IH: D3 vblank\n");
5862                                 }
5863                                 break;
5864                         case 1: /* D3 vline */
5865                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5866                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5867                                         DRM_DEBUG("IH: D3 vline\n");
5868                                 }
5869                                 break;
5870                         default:
5871                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5872                                 break;
5873                         }
5874                         break;
5875                 case 4: /* D4 vblank/vline */
5876                         switch (src_data) {
5877                         case 0: /* D4 vblank */
5878                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5879                                         if (rdev->irq.crtc_vblank_int[3]) {
5880                                                 drm_handle_vblank(rdev->ddev, 3);
5881                                                 rdev->pm.vblank_sync = true;
5882                                                 wake_up(&rdev->irq.vblank_queue);
5883                                         }
5884                                         if (atomic_read(&rdev->irq.pflip[3]))
5885                                                 radeon_crtc_handle_flip(rdev, 3);
5886                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5887                                         DRM_DEBUG("IH: D4 vblank\n");
5888                                 }
5889                                 break;
5890                         case 1: /* D4 vline */
5891                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5892                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5893                                         DRM_DEBUG("IH: D4 vline\n");
5894                                 }
5895                                 break;
5896                         default:
5897                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5898                                 break;
5899                         }
5900                         break;
5901                 case 5: /* D5 vblank/vline */
5902                         switch (src_data) {
5903                         case 0: /* D5 vblank */
5904                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5905                                         if (rdev->irq.crtc_vblank_int[4]) {
5906                                                 drm_handle_vblank(rdev->ddev, 4);
5907                                                 rdev->pm.vblank_sync = true;
5908                                                 wake_up(&rdev->irq.vblank_queue);
5909                                         }
5910                                         if (atomic_read(&rdev->irq.pflip[4]))
5911                                                 radeon_crtc_handle_flip(rdev, 4);
5912                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5913                                         DRM_DEBUG("IH: D5 vblank\n");
5914                                 }
5915                                 break;
5916                         case 1: /* D5 vline */
5917                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5918                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5919                                         DRM_DEBUG("IH: D5 vline\n");
5920                                 }
5921                                 break;
5922                         default:
5923                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5924                                 break;
5925                         }
5926                         break;
5927                 case 6: /* D6 vblank/vline */
5928                         switch (src_data) {
5929                         case 0: /* D6 vblank */
5930                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5931                                         if (rdev->irq.crtc_vblank_int[5]) {
5932                                                 drm_handle_vblank(rdev->ddev, 5);
5933                                                 rdev->pm.vblank_sync = true;
5934                                                 wake_up(&rdev->irq.vblank_queue);
5935                                         }
5936                                         if (atomic_read(&rdev->irq.pflip[5]))
5937                                                 radeon_crtc_handle_flip(rdev, 5);
5938                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5939                                         DRM_DEBUG("IH: D6 vblank\n");
5940                                 }
5941                                 break;
5942                         case 1: /* D6 vline */
5943                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5944                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5945                                         DRM_DEBUG("IH: D6 vline\n");
5946                                 }
5947                                 break;
5948                         default:
5949                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5950                                 break;
5951                         }
5952                         break;
5953                 case 42: /* HPD hotplug */
5954                         switch (src_data) {
5955                         case 0:
5956                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5957                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5958                                         queue_hotplug = true;
5959                                         DRM_DEBUG("IH: HPD1\n");
5960                                 }
5961                                 break;
5962                         case 1:
5963                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5964                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5965                                         queue_hotplug = true;
5966                                         DRM_DEBUG("IH: HPD2\n");
5967                                 }
5968                                 break;
5969                         case 2:
5970                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5971                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5972                                         queue_hotplug = true;
5973                                         DRM_DEBUG("IH: HPD3\n");
5974                                 }
5975                                 break;
5976                         case 3:
5977                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5978                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5979                                         queue_hotplug = true;
5980                                         DRM_DEBUG("IH: HPD4\n");
5981                                 }
5982                                 break;
5983                         case 4:
5984                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5985                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5986                                         queue_hotplug = true;
5987                                         DRM_DEBUG("IH: HPD5\n");
5988                                 }
5989                                 break;
5990                         case 5:
5991                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5992                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5993                                         queue_hotplug = true;
5994                                         DRM_DEBUG("IH: HPD6\n");
5995                                 }
5996                                 break;
5997                         default:
5998                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5999                                 break;
6000                         }
6001                         break;
6002                 case 146:
6003                 case 147:
6004                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6005                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6006                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
6007                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6008                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
6009                         /* reset addr and status */
6010                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6011                         break;
6012                 case 176: /* RINGID0 CP_INT */
6013                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6014                         break;
6015                 case 177: /* RINGID1 CP_INT */
6016                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6017                         break;
6018                 case 178: /* RINGID2 CP_INT */
6019                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6020                         break;
6021                 case 181: /* CP EOP event */
6022                         DRM_DEBUG("IH: CP EOP\n");
6023                         switch (ring_id) {
6024                         case 0:
6025                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6026                                 break;
6027                         case 1:
6028                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6029                                 break;
6030                         case 2:
6031                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6032                                 break;
6033                         }
6034                         break;
6035                 case 224: /* DMA trap event */
6036                         DRM_DEBUG("IH: DMA trap\n");
6037                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6038                         break;
6039                 case 230: /* thermal low to high */
6040                         DRM_DEBUG("IH: thermal low to high\n");
6041                         rdev->pm.dpm.thermal.high_to_low = false;
6042                         queue_thermal = true;
6043                         break;
6044                 case 231: /* thermal high to low */
6045                         DRM_DEBUG("IH: thermal high to low\n");
6046                         rdev->pm.dpm.thermal.high_to_low = true;
6047                         queue_thermal = true;
6048                         break;
6049                 case 233: /* GUI IDLE */
6050                         DRM_DEBUG("IH: GUI idle\n");
6051                         break;
6052                 case 244: /* DMA trap event */
6053                         DRM_DEBUG("IH: DMA1 trap\n");
6054                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6055                         break;
6056                 default:
6057                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6058                         break;
6059                 }
6060
6061                 /* wptr/rptr are in bytes! */
6062                 rptr += 16;
6063                 rptr &= rdev->ih.ptr_mask;
6064         }
6065         if (queue_hotplug)
6066                 schedule_work(&rdev->hotplug_work);
6067         if (queue_thermal && rdev->pm.dpm_enabled)
6068                 schedule_work(&rdev->pm.dpm.thermal.work);
6069         rdev->ih.rptr = rptr;
6070         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6071         atomic_set(&rdev->ih.lock, 0);
6072
6073         /* make sure wptr hasn't changed while processing */
6074         wptr = si_get_ih_wptr(rdev);
6075         if (wptr != rptr)
6076                 goto restart_ih;
6077
6078         return IRQ_HANDLED;
6079 }
6080
6081 /**
6082  * si_copy_dma - copy pages using the DMA engine
6083  *
6084  * @rdev: radeon_device pointer
6085  * @src_offset: src GPU address
6086  * @dst_offset: dst GPU address
6087  * @num_gpu_pages: number of GPU pages to xfer
6088  * @fence: radeon fence object
6089  *
6090  * Copy GPU paging using the DMA engine (SI).
6091  * Used by the radeon ttm implementation to move pages if
6092  * registered as the asic copy callback.
6093  */
6094 int si_copy_dma(struct radeon_device *rdev,
6095                 uint64_t src_offset, uint64_t dst_offset,
6096                 unsigned num_gpu_pages,
6097                 struct radeon_fence **fence)
6098 {
6099         struct radeon_semaphore *sem = NULL;
6100         int ring_index = rdev->asic->copy.dma_ring_index;
6101         struct radeon_ring *ring = &rdev->ring[ring_index];
6102         u32 size_in_bytes, cur_size_in_bytes;
6103         int i, num_loops;
6104         int r = 0;
6105
6106         r = radeon_semaphore_create(rdev, &sem);
6107         if (r) {
6108                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6109                 return r;
6110         }
6111
6112         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6113         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6114         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6115         if (r) {
6116                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6117                 radeon_semaphore_free(rdev, &sem, NULL);
6118                 return r;
6119         }
6120
6121         if (radeon_fence_need_sync(*fence, ring->idx)) {
6122                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6123                                             ring->idx);
6124                 radeon_fence_note_sync(*fence, ring->idx);
6125         } else {
6126                 radeon_semaphore_free(rdev, &sem, NULL);
6127         }
6128
6129         for (i = 0; i < num_loops; i++) {
6130                 cur_size_in_bytes = size_in_bytes;
6131                 if (cur_size_in_bytes > 0xFFFFF)
6132                         cur_size_in_bytes = 0xFFFFF;
6133                 size_in_bytes -= cur_size_in_bytes;
6134                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6135                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6136                 radeon_ring_write(ring, src_offset & 0xffffffff);
6137                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6138                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6139                 src_offset += cur_size_in_bytes;
6140                 dst_offset += cur_size_in_bytes;
6141         }
6142
6143         r = radeon_fence_emit(rdev, fence, ring->idx);
6144         if (r) {
6145                 radeon_ring_unlock_undo(rdev, ring);
6146                 return r;
6147         }
6148
6149         radeon_ring_unlock_commit(rdev, ring);
6150         radeon_semaphore_free(rdev, &sem, *fence);
6151
6152         return r;
6153 }
6154
6155 /*
6156  * startup/shutdown callbacks
6157  */
6158 static int si_startup(struct radeon_device *rdev)
6159 {
6160         struct radeon_ring *ring;
6161         int r;
6162
6163         /* enable pcie gen2/3 link */
6164         si_pcie_gen3_enable(rdev);
6165         /* enable aspm */
6166         si_program_aspm(rdev);
6167
6168         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6169             !rdev->rlc_fw || !rdev->mc_fw) {
6170                 r = si_init_microcode(rdev);
6171                 if (r) {
6172                         DRM_ERROR("Failed to load firmware!\n");
6173                         return r;
6174                 }
6175         }
6176
6177         r = si_mc_load_microcode(rdev);
6178         if (r) {
6179                 DRM_ERROR("Failed to load MC firmware!\n");
6180                 return r;
6181         }
6182
6183         r = r600_vram_scratch_init(rdev);
6184         if (r)
6185                 return r;
6186
6187         si_mc_program(rdev);
6188         r = si_pcie_gart_enable(rdev);
6189         if (r)
6190                 return r;
6191         si_gpu_init(rdev);
6192
6193         /* allocate rlc buffers */
6194         r = si_rlc_init(rdev);
6195         if (r) {
6196                 DRM_ERROR("Failed to init rlc BOs!\n");
6197                 return r;
6198         }
6199
6200         /* allocate wb buffer */
6201         r = radeon_wb_init(rdev);
6202         if (r)
6203                 return r;
6204
6205         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6206         if (r) {
6207                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6208                 return r;
6209         }
6210
6211         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6212         if (r) {
6213                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6214                 return r;
6215         }
6216
6217         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6218         if (r) {
6219                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6220                 return r;
6221         }
6222
6223         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6224         if (r) {
6225                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6226                 return r;
6227         }
6228
6229         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6230         if (r) {
6231                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6232                 return r;
6233         }
6234
6235         if (rdev->has_uvd) {
6236                 r = rv770_uvd_resume(rdev);
6237                 if (!r) {
6238                         r = radeon_fence_driver_start_ring(rdev,
6239                                                            R600_RING_TYPE_UVD_INDEX);
6240                         if (r)
6241                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6242                 }
6243                 if (r)
6244                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6245         }
6246
6247         /* Enable IRQ */
6248         if (!rdev->irq.installed) {
6249                 r = radeon_irq_kms_init(rdev);
6250                 if (r)
6251                         return r;
6252         }
6253
6254         r = si_irq_init(rdev);
6255         if (r) {
6256                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6257                 radeon_irq_kms_fini(rdev);
6258                 return r;
6259         }
6260         si_irq_set(rdev);
6261
6262         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6263         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6264                              CP_RB0_RPTR, CP_RB0_WPTR,
6265                              0, 0xfffff, RADEON_CP_PACKET2);
6266         if (r)
6267                 return r;
6268
6269         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6270         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6271                              CP_RB1_RPTR, CP_RB1_WPTR,
6272                              0, 0xfffff, RADEON_CP_PACKET2);
6273         if (r)
6274                 return r;
6275
6276         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6277         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6278                              CP_RB2_RPTR, CP_RB2_WPTR,
6279                              0, 0xfffff, RADEON_CP_PACKET2);
6280         if (r)
6281                 return r;
6282
6283         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6284         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6285                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6286                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6287                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6288         if (r)
6289                 return r;
6290
6291         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6292         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6293                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6294                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6295                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6296         if (r)
6297                 return r;
6298
6299         r = si_cp_load_microcode(rdev);
6300         if (r)
6301                 return r;
6302         r = si_cp_resume(rdev);
6303         if (r)
6304                 return r;
6305
6306         r = cayman_dma_resume(rdev);
6307         if (r)
6308                 return r;
6309
6310         if (rdev->has_uvd) {
6311                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6312                 if (ring->ring_size) {
6313                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6314                                              R600_WB_UVD_RPTR_OFFSET,
6315                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6316                                              0, 0xfffff, RADEON_CP_PACKET2);
6317                         if (!r)
6318                                 r = r600_uvd_init(rdev);
6319                         if (r)
6320                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6321                 }
6322         }
6323
6324         r = radeon_ib_pool_init(rdev);
6325         if (r) {
6326                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6327                 return r;
6328         }
6329
6330         r = radeon_vm_manager_init(rdev);
6331         if (r) {
6332                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6333                 return r;
6334         }
6335
6336         return 0;
6337 }
6338
6339 int si_resume(struct radeon_device *rdev)
6340 {
6341         int r;
6342
6343         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6344          * posting will perform necessary task to bring back GPU into good
6345          * shape.
6346          */
6347         /* post card */
6348         atom_asic_init(rdev->mode_info.atom_context);
6349
6350         /* init golden registers */
6351         si_init_golden_registers(rdev);
6352
6353         rdev->accel_working = true;
6354         r = si_startup(rdev);
6355         if (r) {
6356                 DRM_ERROR("si startup failed on resume\n");
6357                 rdev->accel_working = false;
6358                 return r;
6359         }
6360
6361         return r;
6362
6363 }
6364
6365 int si_suspend(struct radeon_device *rdev)
6366 {
6367         radeon_vm_manager_fini(rdev);
6368         si_cp_enable(rdev, false);
6369         cayman_dma_stop(rdev);
6370         if (rdev->has_uvd) {
6371                 r600_uvd_rbc_stop(rdev);
6372                 radeon_uvd_suspend(rdev);
6373         }
6374         si_irq_suspend(rdev);
6375         radeon_wb_disable(rdev);
6376         si_pcie_gart_disable(rdev);
6377         return 0;
6378 }
6379
6380 /* Plan is to move initialization in that function and use
6381  * helper function so that radeon_device_init pretty much
6382  * do nothing more than calling asic specific function. This
6383  * should also allow to remove a bunch of callback function
6384  * like vram_info.
6385  */
6386 int si_init(struct radeon_device *rdev)
6387 {
6388         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6389         int r;
6390
6391         /* Read BIOS */
6392         if (!radeon_get_bios(rdev)) {
6393                 if (ASIC_IS_AVIVO(rdev))
6394                         return -EINVAL;
6395         }
6396         /* Must be an ATOMBIOS */
6397         if (!rdev->is_atom_bios) {
6398                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6399                 return -EINVAL;
6400         }
6401         r = radeon_atombios_init(rdev);
6402         if (r)
6403                 return r;
6404
6405         /* Post card if necessary */
6406         if (!radeon_card_posted(rdev)) {
6407                 if (!rdev->bios) {
6408                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6409                         return -EINVAL;
6410                 }
6411                 DRM_INFO("GPU not posted. posting now...\n");
6412                 atom_asic_init(rdev->mode_info.atom_context);
6413         }
6414         /* init golden registers */
6415         si_init_golden_registers(rdev);
6416         /* Initialize scratch registers */
6417         si_scratch_init(rdev);
6418         /* Initialize surface registers */
6419         radeon_surface_init(rdev);
6420         /* Initialize clocks */
6421         radeon_get_clock_info(rdev->ddev);
6422
6423         /* Fence driver */
6424         r = radeon_fence_driver_init(rdev);
6425         if (r)
6426                 return r;
6427
6428         /* initialize memory controller */
6429         r = si_mc_init(rdev);
6430         if (r)
6431                 return r;
6432         /* Memory manager */
6433         r = radeon_bo_init(rdev);
6434         if (r)
6435                 return r;
6436
6437         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6438         ring->ring_obj = NULL;
6439         r600_ring_init(rdev, ring, 1024 * 1024);
6440
6441         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6442         ring->ring_obj = NULL;
6443         r600_ring_init(rdev, ring, 1024 * 1024);
6444
6445         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6446         ring->ring_obj = NULL;
6447         r600_ring_init(rdev, ring, 1024 * 1024);
6448
6449         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6450         ring->ring_obj = NULL;
6451         r600_ring_init(rdev, ring, 64 * 1024);
6452
6453         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6454         ring->ring_obj = NULL;
6455         r600_ring_init(rdev, ring, 64 * 1024);
6456
6457         if (rdev->has_uvd) {
6458                 r = radeon_uvd_init(rdev);
6459                 if (!r) {
6460                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6461                         ring->ring_obj = NULL;
6462                         r600_ring_init(rdev, ring, 4096);
6463                 }
6464         }
6465
6466         rdev->ih.ring_obj = NULL;
6467         r600_ih_ring_init(rdev, 64 * 1024);
6468
6469         r = r600_pcie_gart_init(rdev);
6470         if (r)
6471                 return r;
6472
6473         rdev->accel_working = true;
6474         r = si_startup(rdev);
6475         if (r) {
6476                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6477                 si_cp_fini(rdev);
6478                 cayman_dma_fini(rdev);
6479                 si_irq_fini(rdev);
6480                 si_rlc_fini(rdev);
6481                 radeon_wb_fini(rdev);
6482                 radeon_ib_pool_fini(rdev);
6483                 radeon_vm_manager_fini(rdev);
6484                 radeon_irq_kms_fini(rdev);
6485                 si_pcie_gart_fini(rdev);
6486                 rdev->accel_working = false;
6487         }
6488
6489         /* Don't start up if the MC ucode is missing.
6490          * The default clocks and voltages before the MC ucode
6491          * is loaded are not suffient for advanced operations.
6492          */
6493         if (!rdev->mc_fw) {
6494                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6495                 return -EINVAL;
6496         }
6497
6498         return 0;
6499 }
6500
6501 void si_fini(struct radeon_device *rdev)
6502 {
6503         si_cp_fini(rdev);
6504         cayman_dma_fini(rdev);
6505         si_irq_fini(rdev);
6506         si_rlc_fini(rdev);
6507         si_fini_cg(rdev);
6508         si_fini_pg(rdev);
6509         radeon_wb_fini(rdev);
6510         radeon_vm_manager_fini(rdev);
6511         radeon_ib_pool_fini(rdev);
6512         radeon_irq_kms_fini(rdev);
6513         if (rdev->has_uvd)
6514                 radeon_uvd_fini(rdev);
6515         si_pcie_gart_fini(rdev);
6516         r600_vram_scratch_fini(rdev);
6517         radeon_gem_fini(rdev);
6518         radeon_fence_driver_fini(rdev);
6519         radeon_bo_fini(rdev);
6520         radeon_atombios_fini(rdev);
6521         kfree(rdev->bios);
6522         rdev->bios = NULL;
6523 }
6524
6525 /**
6526  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6527  *
6528  * @rdev: radeon_device pointer
6529  *
6530  * Fetches a GPU clock counter snapshot (SI).
6531  * Returns the 64 bit clock counter snapshot.
6532  */
6533 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6534 {
6535         uint64_t clock;
6536
6537         mutex_lock(&rdev->gpu_clock_mutex);
6538         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6539         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6540                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6541         mutex_unlock(&rdev->gpu_clock_mutex);
6542         return clock;
6543 }
6544
6545 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6546 {
6547         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6548         int r;
6549
6550         /* bypass vclk and dclk with bclk */
6551         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6552                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6553                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6554
6555         /* put PLL in bypass mode */
6556         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6557
6558         if (!vclk || !dclk) {
6559                 /* keep the Bypass mode, put PLL to sleep */
6560                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6561                 return 0;
6562         }
6563
6564         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6565                                           16384, 0x03FFFFFF, 0, 128, 5,
6566                                           &fb_div, &vclk_div, &dclk_div);
6567         if (r)
6568                 return r;
6569
6570         /* set RESET_ANTI_MUX to 0 */
6571         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6572
6573         /* set VCO_MODE to 1 */
6574         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6575
6576         /* toggle UPLL_SLEEP to 1 then back to 0 */
6577         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6578         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6579
6580         /* deassert UPLL_RESET */
6581         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6582
6583         mdelay(1);
6584
6585         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6586         if (r)
6587                 return r;
6588
6589         /* assert UPLL_RESET again */
6590         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6591
6592         /* disable spread spectrum. */
6593         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6594
6595         /* set feedback divider */
6596         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6597
6598         /* set ref divider to 0 */
6599         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6600
6601         if (fb_div < 307200)
6602                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6603         else
6604                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6605
6606         /* set PDIV_A and PDIV_B */
6607         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6608                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6609                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6610
6611         /* give the PLL some time to settle */
6612         mdelay(15);
6613
6614         /* deassert PLL_RESET */
6615         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6616
6617         mdelay(15);
6618
6619         /* switch from bypass mode to normal mode */
6620         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6621
6622         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6623         if (r)
6624                 return r;
6625
6626         /* switch VCLK and DCLK selection */
6627         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6628                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6629                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6630
6631         mdelay(100);
6632
6633         return 0;
6634 }
6635
6636 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6637 {
6638         struct pci_dev *root = rdev->pdev->bus->self;
6639         int bridge_pos, gpu_pos;
6640         u32 speed_cntl, mask, current_data_rate;
6641         int ret, i;
6642         u16 tmp16;
6643
6644         if (radeon_pcie_gen2 == 0)
6645                 return;
6646
6647         if (rdev->flags & RADEON_IS_IGP)
6648                 return;
6649
6650         if (!(rdev->flags & RADEON_IS_PCIE))
6651                 return;
6652
6653         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6654         if (ret != 0)
6655                 return;
6656
6657         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6658                 return;
6659
6660         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6661         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6662                 LC_CURRENT_DATA_RATE_SHIFT;
6663         if (mask & DRM_PCIE_SPEED_80) {
6664                 if (current_data_rate == 2) {
6665                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6666                         return;
6667                 }
6668                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6669         } else if (mask & DRM_PCIE_SPEED_50) {
6670                 if (current_data_rate == 1) {
6671                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6672                         return;
6673                 }
6674                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6675         }
6676
6677         bridge_pos = pci_pcie_cap(root);
6678         if (!bridge_pos)
6679                 return;
6680
6681         gpu_pos = pci_pcie_cap(rdev->pdev);
6682         if (!gpu_pos)
6683                 return;
6684
6685         if (mask & DRM_PCIE_SPEED_80) {
6686                 /* re-try equalization if gen3 is not already enabled */
6687                 if (current_data_rate != 2) {
6688                         u16 bridge_cfg, gpu_cfg;
6689                         u16 bridge_cfg2, gpu_cfg2;
6690                         u32 max_lw, current_lw, tmp;
6691
6692                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6693                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6694
6695                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6696                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6697
6698                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6699                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6700
6701                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6702                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6703                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6704
6705                         if (current_lw < max_lw) {
6706                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6707                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6708                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6709                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6710                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6711                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6712                                 }
6713                         }
6714
6715                         for (i = 0; i < 10; i++) {
6716                                 /* check status */
6717                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6718                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6719                                         break;
6720
6721                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6722                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6723
6724                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6725                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6726
6727                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6728                                 tmp |= LC_SET_QUIESCE;
6729                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6730
6731                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6732                                 tmp |= LC_REDO_EQ;
6733                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6734
6735                                 mdelay(100);
6736
6737                                 /* linkctl */
6738                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6739                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6740                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6741                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6742
6743                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6744                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6745                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6746                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6747
6748                                 /* linkctl2 */
6749                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6750                                 tmp16 &= ~((1 << 4) | (7 << 9));
6751                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6752                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6753
6754                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6755                                 tmp16 &= ~((1 << 4) | (7 << 9));
6756                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6757                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6758
6759                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6760                                 tmp &= ~LC_SET_QUIESCE;
6761                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6762                         }
6763                 }
6764         }
6765
6766         /* set the link speed */
6767         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6768         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6769         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6770
6771         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6772         tmp16 &= ~0xf;
6773         if (mask & DRM_PCIE_SPEED_80)
6774                 tmp16 |= 3; /* gen3 */
6775         else if (mask & DRM_PCIE_SPEED_50)
6776                 tmp16 |= 2; /* gen2 */
6777         else
6778                 tmp16 |= 1; /* gen1 */
6779         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6780
6781         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6782         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6783         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6784
6785         for (i = 0; i < rdev->usec_timeout; i++) {
6786                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6787                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6788                         break;
6789                 udelay(1);
6790         }
6791 }
6792
6793 static void si_program_aspm(struct radeon_device *rdev)
6794 {
6795         u32 data, orig;
6796         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6797         bool disable_clkreq = false;
6798
6799         if (!(rdev->flags & RADEON_IS_PCIE))
6800                 return;
6801
6802         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6803         data &= ~LC_XMIT_N_FTS_MASK;
6804         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6805         if (orig != data)
6806                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6807
6808         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6809         data |= LC_GO_TO_RECOVERY;
6810         if (orig != data)
6811                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6812
6813         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6814         data |= P_IGNORE_EDB_ERR;
6815         if (orig != data)
6816                 WREG32_PCIE(PCIE_P_CNTL, data);
6817
6818         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6819         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6820         data |= LC_PMI_TO_L1_DIS;
6821         if (!disable_l0s)
6822                 data |= LC_L0S_INACTIVITY(7);
6823
6824         if (!disable_l1) {
6825                 data |= LC_L1_INACTIVITY(7);
6826                 data &= ~LC_PMI_TO_L1_DIS;
6827                 if (orig != data)
6828                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6829
6830                 if (!disable_plloff_in_l1) {
6831                         bool clk_req_support;
6832
6833                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6834                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6835                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6836                         if (orig != data)
6837                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6838
6839                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6840                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6841                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6842                         if (orig != data)
6843                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6844
6845                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6846                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6847                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6848                         if (orig != data)
6849                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6850
6851                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6852                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6853                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6854                         if (orig != data)
6855                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6856
6857                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
6858                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6859                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6860                                 if (orig != data)
6861                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6862
6863                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6864                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6865                                 if (orig != data)
6866                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6867
6868                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
6869                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6870                                 if (orig != data)
6871                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
6872
6873                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
6874                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6875                                 if (orig != data)
6876                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
6877
6878                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6879                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6880                                 if (orig != data)
6881                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6882
6883                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6884                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6885                                 if (orig != data)
6886                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6887
6888                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
6889                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6890                                 if (orig != data)
6891                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
6892
6893                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
6894                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6895                                 if (orig != data)
6896                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
6897                         }
6898                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6899                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
6900                         data |= LC_DYN_LANES_PWR_STATE(3);
6901                         if (orig != data)
6902                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
6903
6904                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
6905                         data &= ~LS2_EXIT_TIME_MASK;
6906                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6907                                 data |= LS2_EXIT_TIME(5);
6908                         if (orig != data)
6909                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
6910
6911                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
6912                         data &= ~LS2_EXIT_TIME_MASK;
6913                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
6914                                 data |= LS2_EXIT_TIME(5);
6915                         if (orig != data)
6916                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
6917
6918                         if (!disable_clkreq) {
6919                                 struct pci_dev *root = rdev->pdev->bus->self;
6920                                 u32 lnkcap;
6921
6922                                 clk_req_support = false;
6923                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
6924                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
6925                                         clk_req_support = true;
6926                         } else {
6927                                 clk_req_support = false;
6928                         }
6929
6930                         if (clk_req_support) {
6931                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
6932                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
6933                                 if (orig != data)
6934                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
6935
6936                                 orig = data = RREG32(THM_CLK_CNTL);
6937                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
6938                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
6939                                 if (orig != data)
6940                                         WREG32(THM_CLK_CNTL, data);
6941
6942                                 orig = data = RREG32(MISC_CLK_CNTL);
6943                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
6944                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
6945                                 if (orig != data)
6946                                         WREG32(MISC_CLK_CNTL, data);
6947
6948                                 orig = data = RREG32(CG_CLKPIN_CNTL);
6949                                 data &= ~BCLK_AS_XCLK;
6950                                 if (orig != data)
6951                                         WREG32(CG_CLKPIN_CNTL, data);
6952
6953                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
6954                                 data &= ~FORCE_BIF_REFCLK_EN;
6955                                 if (orig != data)
6956                                         WREG32(CG_CLKPIN_CNTL_2, data);
6957
6958                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
6959                                 data &= ~MPLL_CLKOUT_SEL_MASK;
6960                                 data |= MPLL_CLKOUT_SEL(4);
6961                                 if (orig != data)
6962                                         WREG32(MPLL_BYPASSCLK_SEL, data);
6963
6964                                 orig = data = RREG32(SPLL_CNTL_MODE);
6965                                 data &= ~SPLL_REFCLK_SEL_MASK;
6966                                 if (orig != data)
6967                                         WREG32(SPLL_CNTL_MODE, data);
6968                         }
6969                 }
6970         } else {
6971                 if (orig != data)
6972                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6973         }
6974
6975         orig = data = RREG32_PCIE(PCIE_CNTL2);
6976         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
6977         if (orig != data)
6978                 WREG32_PCIE(PCIE_CNTL2, data);
6979
6980         if (!disable_l0s) {
6981                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6982                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
6983                         data = RREG32_PCIE(PCIE_LC_STATUS1);
6984                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
6985                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6986                                 data &= ~LC_L0S_INACTIVITY_MASK;
6987                                 if (orig != data)
6988                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6989                         }
6990                 }
6991         }
6992 }