]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/si.c
Merge branch 'drm-fixes-3.11' of git://people.freedesktop.org/~agd5f/linux
[linux-imx.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82         (0x8000 << 16) | (0x98f4 >> 2),
83         0x00000000,
84         (0x8040 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8000 << 16) | (0xe80 >> 2),
87         0x00000000,
88         (0x8040 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8000 << 16) | (0x89bc >> 2),
91         0x00000000,
92         (0x8040 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8000 << 16) | (0x8c1c >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x9c00 << 16) | (0x98f0 >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0xe7c >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x9148 >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0x9150 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x897c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x8d8c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xac54 >> 2),
113         0X00000000,
114         0x3,
115         (0x9c00 << 16) | (0x98f8 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x9910 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9914 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9918 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x991c >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9920 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9924 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9928 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x992c >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9930 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9934 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9938 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x993c >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9940 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9944 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9948 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x994c >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9950 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9954 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9958 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x995c >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9960 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9964 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9968 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x996c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9970 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9974 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9978 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x997c >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9980 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9984 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9988 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x998c >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x8c00 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c14 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c04 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c08 >> 2),
188         0x00000000,
189         (0x8000 << 16) | (0x9b7c >> 2),
190         0x00000000,
191         (0x8040 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8000 << 16) | (0xe84 >> 2),
194         0x00000000,
195         (0x8040 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8000 << 16) | (0x89c0 >> 2),
198         0x00000000,
199         (0x8040 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x914c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0x8c20 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x9354 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9060 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9364 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9100 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x913c >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x90e0 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e4 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e8 >> 2),
226         0x00000000,
227         (0x8040 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x8bcc >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8b24 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x88c4 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8e50 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c0c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8e58 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e5c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9508 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x950c >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9494 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0xac0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac10 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac14 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xae00 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac08 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x88d4 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88c8 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88cc >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x89b0 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8b10 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8a14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9830 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9834 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9838 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9a10 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x9870 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9874 >> 2),
286         0x00000000,
287         (0x8001 << 16) | (0x9870 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9874 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x9870 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9874 >> 2),
294         0x00000000,
295         (0x8041 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9874 >> 2),
298         0x00000000,
299         0x00000000
300 };
301
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304         0xc424, 0xffffffff, 0x00601005,
305         0xc47c, 0xffffffff, 0x10104040,
306         0xc488, 0xffffffff, 0x0100000a,
307         0xc314, 0xffffffff, 0x00000800,
308         0xc30c, 0xffffffff, 0x800000f4,
309         0xf4a8, 0xffffffff, 0x00000000
310 };
311
312 static const u32 tahiti_golden_registers[] =
313 {
314         0x9a10, 0x00010000, 0x00018208,
315         0x9830, 0xffffffff, 0x00000000,
316         0x9834, 0xf00fffff, 0x00000400,
317         0x9838, 0x0002021c, 0x00020200,
318         0xc78, 0x00000080, 0x00000000,
319         0xd030, 0x000300c0, 0x00800040,
320         0xd830, 0x000300c0, 0x00800040,
321         0x5bb0, 0x000000f0, 0x00000070,
322         0x5bc0, 0x00200000, 0x50100000,
323         0x7030, 0x31000311, 0x00000011,
324         0x277c, 0x00000003, 0x000007ff,
325         0x240c, 0x000007ff, 0x00000000,
326         0x8a14, 0xf000001f, 0x00000007,
327         0x8b24, 0xffffffff, 0x00ffffff,
328         0x8b10, 0x0000ff0f, 0x00000000,
329         0x28a4c, 0x07ffffff, 0x4e000000,
330         0x28350, 0x3f3f3fff, 0x2a00126a,
331         0x30, 0x000000ff, 0x0040,
332         0x34, 0x00000040, 0x00004040,
333         0x9100, 0x07ffffff, 0x03000000,
334         0x8e88, 0x01ff1f3f, 0x00000000,
335         0x8e84, 0x01ff1f3f, 0x00000000,
336         0x9060, 0x0000007f, 0x00000020,
337         0x9508, 0x00010000, 0x00010000,
338         0xac14, 0x00000200, 0x000002fb,
339         0xac10, 0xffffffff, 0x0000543b,
340         0xac0c, 0xffffffff, 0xa9210876,
341         0x88d0, 0xffffffff, 0x000fff40,
342         0x88d4, 0x0000001f, 0x00000010,
343         0x1410, 0x20000000, 0x20fffed8,
344         0x15c0, 0x000c0fc0, 0x000c0400
345 };
346
347 static const u32 tahiti_golden_registers2[] =
348 {
349         0xc64, 0x00000001, 0x00000001
350 };
351
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354         0xc424, 0xffffffff, 0x00601004,
355         0xc47c, 0xffffffff, 0x10102020,
356         0xc488, 0xffffffff, 0x01000020,
357         0xc314, 0xffffffff, 0x00000800,
358         0xc30c, 0xffffffff, 0x800000a4
359 };
360
361 static const u32 pitcairn_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x2ae4, 0x00073ffe, 0x000022a2,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x9060, 0x0000007f, 0x00000020,
384         0x9508, 0x00010000, 0x00010000,
385         0xac14, 0x000003ff, 0x000000f7,
386         0xac10, 0xffffffff, 0x00000000,
387         0xac0c, 0xffffffff, 0x32761054,
388         0x88d4, 0x0000001f, 0x00000010,
389         0x15c0, 0x000c0fc0, 0x000c0400
390 };
391
392 static const u32 verde_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x033f1005,
395         0xc47c, 0xffffffff, 0x10808020,
396         0xc488, 0xffffffff, 0x00800008,
397         0xc314, 0xffffffff, 0x00001000,
398         0xc30c, 0xffffffff, 0x80010014
399 };
400
401 static const u32 verde_golden_registers[] =
402 {
403         0x9a10, 0x00010000, 0x00018208,
404         0x9830, 0xffffffff, 0x00000000,
405         0x9834, 0xf00fffff, 0x00000400,
406         0x9838, 0x0002021c, 0x00020200,
407         0xc78, 0x00000080, 0x00000000,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0x5bb0, 0x000000f0, 0x00000070,
413         0x5bc0, 0x00200000, 0x50100000,
414         0x7030, 0x31000311, 0x00000011,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8b24, 0xffffffff, 0x00ffffff,
425         0x8b10, 0x0000ff0f, 0x00000000,
426         0x28a4c, 0x07ffffff, 0x4e000000,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x30, 0x000000ff, 0x0040,
431         0x34, 0x00000040, 0x00004040,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x9060, 0x0000007f, 0x00000020,
441         0x9508, 0x00010000, 0x00010000,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x15c0, 0x000c0fc0, 0x000c0400
455 };
456
457 static const u32 oland_golden_rlc_registers[] =
458 {
459         0xc424, 0xffffffff, 0x00601005,
460         0xc47c, 0xffffffff, 0x10104040,
461         0xc488, 0xffffffff, 0x0100000a,
462         0xc314, 0xffffffff, 0x00000800,
463         0xc30c, 0xffffffff, 0x800000f4
464 };
465
466 static const u32 oland_golden_registers[] =
467 {
468         0x9a10, 0x00010000, 0x00018208,
469         0x9830, 0xffffffff, 0x00000000,
470         0x9834, 0xf00fffff, 0x00000400,
471         0x9838, 0x0002021c, 0x00020200,
472         0xc78, 0x00000080, 0x00000000,
473         0xd030, 0x000300c0, 0x00800040,
474         0xd830, 0x000300c0, 0x00800040,
475         0x5bb0, 0x000000f0, 0x00000070,
476         0x5bc0, 0x00200000, 0x50100000,
477         0x7030, 0x31000311, 0x00000011,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x240c, 0x000007ff, 0x00000000,
480         0x8a14, 0xf000001f, 0x00000007,
481         0x8b24, 0xffffffff, 0x00ffffff,
482         0x8b10, 0x0000ff0f, 0x00000000,
483         0x28a4c, 0x07ffffff, 0x4e000000,
484         0x28350, 0x3f3f3fff, 0x00000082,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9060, 0x0000007f, 0x00000020,
489         0x9508, 0x00010000, 0x00010000,
490         0xac14, 0x000003ff, 0x000000f3,
491         0xac10, 0xffffffff, 0x00000000,
492         0xac0c, 0xffffffff, 0x00003210,
493         0x88d4, 0x0000001f, 0x00000010,
494         0x15c0, 0x000c0fc0, 0x000c0400
495 };
496
497 static const u32 hainan_golden_registers[] =
498 {
499         0x9a10, 0x00010000, 0x00018208,
500         0x9830, 0xffffffff, 0x00000000,
501         0x9834, 0xf00fffff, 0x00000400,
502         0x9838, 0x0002021c, 0x00020200,
503         0xd0c0, 0xff000fff, 0x00000100,
504         0xd030, 0x000300c0, 0x00800040,
505         0xd8c0, 0xff000fff, 0x00000100,
506         0xd830, 0x000300c0, 0x00800040,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x8a14, 0xf000001f, 0x00000007,
510         0x8b24, 0xffffffff, 0x00ffffff,
511         0x8b10, 0x0000ff0f, 0x00000000,
512         0x28a4c, 0x07ffffff, 0x4e000000,
513         0x28350, 0x3f3f3fff, 0x00000000,
514         0x30, 0x000000ff, 0x0040,
515         0x34, 0x00000040, 0x00004040,
516         0x9100, 0x03e00000, 0x03600000,
517         0x9060, 0x0000007f, 0x00000020,
518         0x9508, 0x00010000, 0x00010000,
519         0xac14, 0x000003ff, 0x000000f1,
520         0xac10, 0xffffffff, 0x00000000,
521         0xac0c, 0xffffffff, 0x00003210,
522         0x88d4, 0x0000001f, 0x00000010,
523         0x15c0, 0x000c0fc0, 0x000c0400
524 };
525
526 static const u32 hainan_golden_registers2[] =
527 {
528         0x98f8, 0xffffffff, 0x02010001
529 };
530
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533         0xc400, 0xffffffff, 0xfffffffc,
534         0x802c, 0xffffffff, 0xe0000000,
535         0x9a60, 0xffffffff, 0x00000100,
536         0x92a4, 0xffffffff, 0x00000100,
537         0xc164, 0xffffffff, 0x00000100,
538         0x9774, 0xffffffff, 0x00000100,
539         0x8984, 0xffffffff, 0x06000100,
540         0x8a18, 0xffffffff, 0x00000100,
541         0x92a0, 0xffffffff, 0x00000100,
542         0xc380, 0xffffffff, 0x00000100,
543         0x8b28, 0xffffffff, 0x00000100,
544         0x9144, 0xffffffff, 0x00000100,
545         0x8d88, 0xffffffff, 0x00000100,
546         0x8d8c, 0xffffffff, 0x00000100,
547         0x9030, 0xffffffff, 0x00000100,
548         0x9034, 0xffffffff, 0x00000100,
549         0x9038, 0xffffffff, 0x00000100,
550         0x903c, 0xffffffff, 0x00000100,
551         0xad80, 0xffffffff, 0x00000100,
552         0xac54, 0xffffffff, 0x00000100,
553         0x897c, 0xffffffff, 0x06000100,
554         0x9868, 0xffffffff, 0x00000100,
555         0x9510, 0xffffffff, 0x00000100,
556         0xaf04, 0xffffffff, 0x00000100,
557         0xae04, 0xffffffff, 0x00000100,
558         0x949c, 0xffffffff, 0x00000100,
559         0x802c, 0xffffffff, 0xe0000000,
560         0x9160, 0xffffffff, 0x00010000,
561         0x9164, 0xffffffff, 0x00030002,
562         0x9168, 0xffffffff, 0x00040007,
563         0x916c, 0xffffffff, 0x00060005,
564         0x9170, 0xffffffff, 0x00090008,
565         0x9174, 0xffffffff, 0x00020001,
566         0x9178, 0xffffffff, 0x00040003,
567         0x917c, 0xffffffff, 0x00000007,
568         0x9180, 0xffffffff, 0x00060005,
569         0x9184, 0xffffffff, 0x00090008,
570         0x9188, 0xffffffff, 0x00030002,
571         0x918c, 0xffffffff, 0x00050004,
572         0x9190, 0xffffffff, 0x00000008,
573         0x9194, 0xffffffff, 0x00070006,
574         0x9198, 0xffffffff, 0x000a0009,
575         0x919c, 0xffffffff, 0x00040003,
576         0x91a0, 0xffffffff, 0x00060005,
577         0x91a4, 0xffffffff, 0x00000009,
578         0x91a8, 0xffffffff, 0x00080007,
579         0x91ac, 0xffffffff, 0x000b000a,
580         0x91b0, 0xffffffff, 0x00050004,
581         0x91b4, 0xffffffff, 0x00070006,
582         0x91b8, 0xffffffff, 0x0008000b,
583         0x91bc, 0xffffffff, 0x000a0009,
584         0x91c0, 0xffffffff, 0x000d000c,
585         0x91c4, 0xffffffff, 0x00060005,
586         0x91c8, 0xffffffff, 0x00080007,
587         0x91cc, 0xffffffff, 0x0000000b,
588         0x91d0, 0xffffffff, 0x000a0009,
589         0x91d4, 0xffffffff, 0x000d000c,
590         0x91d8, 0xffffffff, 0x00070006,
591         0x91dc, 0xffffffff, 0x00090008,
592         0x91e0, 0xffffffff, 0x0000000c,
593         0x91e4, 0xffffffff, 0x000b000a,
594         0x91e8, 0xffffffff, 0x000e000d,
595         0x91ec, 0xffffffff, 0x00080007,
596         0x91f0, 0xffffffff, 0x000a0009,
597         0x91f4, 0xffffffff, 0x0000000d,
598         0x91f8, 0xffffffff, 0x000c000b,
599         0x91fc, 0xffffffff, 0x000f000e,
600         0x9200, 0xffffffff, 0x00090008,
601         0x9204, 0xffffffff, 0x000b000a,
602         0x9208, 0xffffffff, 0x000c000f,
603         0x920c, 0xffffffff, 0x000e000d,
604         0x9210, 0xffffffff, 0x00110010,
605         0x9214, 0xffffffff, 0x000a0009,
606         0x9218, 0xffffffff, 0x000c000b,
607         0x921c, 0xffffffff, 0x0000000f,
608         0x9220, 0xffffffff, 0x000e000d,
609         0x9224, 0xffffffff, 0x00110010,
610         0x9228, 0xffffffff, 0x000b000a,
611         0x922c, 0xffffffff, 0x000d000c,
612         0x9230, 0xffffffff, 0x00000010,
613         0x9234, 0xffffffff, 0x000f000e,
614         0x9238, 0xffffffff, 0x00120011,
615         0x923c, 0xffffffff, 0x000c000b,
616         0x9240, 0xffffffff, 0x000e000d,
617         0x9244, 0xffffffff, 0x00000011,
618         0x9248, 0xffffffff, 0x0010000f,
619         0x924c, 0xffffffff, 0x00130012,
620         0x9250, 0xffffffff, 0x000d000c,
621         0x9254, 0xffffffff, 0x000f000e,
622         0x9258, 0xffffffff, 0x00100013,
623         0x925c, 0xffffffff, 0x00120011,
624         0x9260, 0xffffffff, 0x00150014,
625         0x9264, 0xffffffff, 0x000e000d,
626         0x9268, 0xffffffff, 0x0010000f,
627         0x926c, 0xffffffff, 0x00000013,
628         0x9270, 0xffffffff, 0x00120011,
629         0x9274, 0xffffffff, 0x00150014,
630         0x9278, 0xffffffff, 0x000f000e,
631         0x927c, 0xffffffff, 0x00110010,
632         0x9280, 0xffffffff, 0x00000014,
633         0x9284, 0xffffffff, 0x00130012,
634         0x9288, 0xffffffff, 0x00160015,
635         0x928c, 0xffffffff, 0x0010000f,
636         0x9290, 0xffffffff, 0x00120011,
637         0x9294, 0xffffffff, 0x00000015,
638         0x9298, 0xffffffff, 0x00140013,
639         0x929c, 0xffffffff, 0x00170016,
640         0x9150, 0xffffffff, 0x96940200,
641         0x8708, 0xffffffff, 0x00900100,
642         0xc478, 0xffffffff, 0x00000080,
643         0xc404, 0xffffffff, 0x0020003f,
644         0x30, 0xffffffff, 0x0000001c,
645         0x34, 0x000f0000, 0x000f0000,
646         0x160c, 0xffffffff, 0x00000100,
647         0x1024, 0xffffffff, 0x00000100,
648         0x102c, 0x00000101, 0x00000000,
649         0x20a8, 0xffffffff, 0x00000104,
650         0x264c, 0x000c0000, 0x000c0000,
651         0x2648, 0x000c0000, 0x000c0000,
652         0x55e4, 0xff000fff, 0x00000100,
653         0x55e8, 0x00000001, 0x00000001,
654         0x2f50, 0x00000001, 0x00000001,
655         0x30cc, 0xc0000fff, 0x00000104,
656         0xc1e4, 0x00000001, 0x00000001,
657         0xd0c0, 0xfffffff0, 0x00000100,
658         0xd8c0, 0xfffffff0, 0x00000100
659 };
660
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663         0xc400, 0xffffffff, 0xfffffffc,
664         0x802c, 0xffffffff, 0xe0000000,
665         0x9a60, 0xffffffff, 0x00000100,
666         0x92a4, 0xffffffff, 0x00000100,
667         0xc164, 0xffffffff, 0x00000100,
668         0x9774, 0xffffffff, 0x00000100,
669         0x8984, 0xffffffff, 0x06000100,
670         0x8a18, 0xffffffff, 0x00000100,
671         0x92a0, 0xffffffff, 0x00000100,
672         0xc380, 0xffffffff, 0x00000100,
673         0x8b28, 0xffffffff, 0x00000100,
674         0x9144, 0xffffffff, 0x00000100,
675         0x8d88, 0xffffffff, 0x00000100,
676         0x8d8c, 0xffffffff, 0x00000100,
677         0x9030, 0xffffffff, 0x00000100,
678         0x9034, 0xffffffff, 0x00000100,
679         0x9038, 0xffffffff, 0x00000100,
680         0x903c, 0xffffffff, 0x00000100,
681         0xad80, 0xffffffff, 0x00000100,
682         0xac54, 0xffffffff, 0x00000100,
683         0x897c, 0xffffffff, 0x06000100,
684         0x9868, 0xffffffff, 0x00000100,
685         0x9510, 0xffffffff, 0x00000100,
686         0xaf04, 0xffffffff, 0x00000100,
687         0xae04, 0xffffffff, 0x00000100,
688         0x949c, 0xffffffff, 0x00000100,
689         0x802c, 0xffffffff, 0xe0000000,
690         0x9160, 0xffffffff, 0x00010000,
691         0x9164, 0xffffffff, 0x00030002,
692         0x9168, 0xffffffff, 0x00040007,
693         0x916c, 0xffffffff, 0x00060005,
694         0x9170, 0xffffffff, 0x00090008,
695         0x9174, 0xffffffff, 0x00020001,
696         0x9178, 0xffffffff, 0x00040003,
697         0x917c, 0xffffffff, 0x00000007,
698         0x9180, 0xffffffff, 0x00060005,
699         0x9184, 0xffffffff, 0x00090008,
700         0x9188, 0xffffffff, 0x00030002,
701         0x918c, 0xffffffff, 0x00050004,
702         0x9190, 0xffffffff, 0x00000008,
703         0x9194, 0xffffffff, 0x00070006,
704         0x9198, 0xffffffff, 0x000a0009,
705         0x919c, 0xffffffff, 0x00040003,
706         0x91a0, 0xffffffff, 0x00060005,
707         0x91a4, 0xffffffff, 0x00000009,
708         0x91a8, 0xffffffff, 0x00080007,
709         0x91ac, 0xffffffff, 0x000b000a,
710         0x91b0, 0xffffffff, 0x00050004,
711         0x91b4, 0xffffffff, 0x00070006,
712         0x91b8, 0xffffffff, 0x0008000b,
713         0x91bc, 0xffffffff, 0x000a0009,
714         0x91c0, 0xffffffff, 0x000d000c,
715         0x9200, 0xffffffff, 0x00090008,
716         0x9204, 0xffffffff, 0x000b000a,
717         0x9208, 0xffffffff, 0x000c000f,
718         0x920c, 0xffffffff, 0x000e000d,
719         0x9210, 0xffffffff, 0x00110010,
720         0x9214, 0xffffffff, 0x000a0009,
721         0x9218, 0xffffffff, 0x000c000b,
722         0x921c, 0xffffffff, 0x0000000f,
723         0x9220, 0xffffffff, 0x000e000d,
724         0x9224, 0xffffffff, 0x00110010,
725         0x9228, 0xffffffff, 0x000b000a,
726         0x922c, 0xffffffff, 0x000d000c,
727         0x9230, 0xffffffff, 0x00000010,
728         0x9234, 0xffffffff, 0x000f000e,
729         0x9238, 0xffffffff, 0x00120011,
730         0x923c, 0xffffffff, 0x000c000b,
731         0x9240, 0xffffffff, 0x000e000d,
732         0x9244, 0xffffffff, 0x00000011,
733         0x9248, 0xffffffff, 0x0010000f,
734         0x924c, 0xffffffff, 0x00130012,
735         0x9250, 0xffffffff, 0x000d000c,
736         0x9254, 0xffffffff, 0x000f000e,
737         0x9258, 0xffffffff, 0x00100013,
738         0x925c, 0xffffffff, 0x00120011,
739         0x9260, 0xffffffff, 0x00150014,
740         0x9150, 0xffffffff, 0x96940200,
741         0x8708, 0xffffffff, 0x00900100,
742         0xc478, 0xffffffff, 0x00000080,
743         0xc404, 0xffffffff, 0x0020003f,
744         0x30, 0xffffffff, 0x0000001c,
745         0x34, 0x000f0000, 0x000f0000,
746         0x160c, 0xffffffff, 0x00000100,
747         0x1024, 0xffffffff, 0x00000100,
748         0x102c, 0x00000101, 0x00000000,
749         0x20a8, 0xffffffff, 0x00000104,
750         0x55e4, 0xff000fff, 0x00000100,
751         0x55e8, 0x00000001, 0x00000001,
752         0x2f50, 0x00000001, 0x00000001,
753         0x30cc, 0xc0000fff, 0x00000104,
754         0xc1e4, 0x00000001, 0x00000001,
755         0xd0c0, 0xfffffff0, 0x00000100,
756         0xd8c0, 0xfffffff0, 0x00000100
757 };
758
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761         0xc400, 0xffffffff, 0xfffffffc,
762         0x802c, 0xffffffff, 0xe0000000,
763         0x9a60, 0xffffffff, 0x00000100,
764         0x92a4, 0xffffffff, 0x00000100,
765         0xc164, 0xffffffff, 0x00000100,
766         0x9774, 0xffffffff, 0x00000100,
767         0x8984, 0xffffffff, 0x06000100,
768         0x8a18, 0xffffffff, 0x00000100,
769         0x92a0, 0xffffffff, 0x00000100,
770         0xc380, 0xffffffff, 0x00000100,
771         0x8b28, 0xffffffff, 0x00000100,
772         0x9144, 0xffffffff, 0x00000100,
773         0x8d88, 0xffffffff, 0x00000100,
774         0x8d8c, 0xffffffff, 0x00000100,
775         0x9030, 0xffffffff, 0x00000100,
776         0x9034, 0xffffffff, 0x00000100,
777         0x9038, 0xffffffff, 0x00000100,
778         0x903c, 0xffffffff, 0x00000100,
779         0xad80, 0xffffffff, 0x00000100,
780         0xac54, 0xffffffff, 0x00000100,
781         0x897c, 0xffffffff, 0x06000100,
782         0x9868, 0xffffffff, 0x00000100,
783         0x9510, 0xffffffff, 0x00000100,
784         0xaf04, 0xffffffff, 0x00000100,
785         0xae04, 0xffffffff, 0x00000100,
786         0x949c, 0xffffffff, 0x00000100,
787         0x802c, 0xffffffff, 0xe0000000,
788         0x9160, 0xffffffff, 0x00010000,
789         0x9164, 0xffffffff, 0x00030002,
790         0x9168, 0xffffffff, 0x00040007,
791         0x916c, 0xffffffff, 0x00060005,
792         0x9170, 0xffffffff, 0x00090008,
793         0x9174, 0xffffffff, 0x00020001,
794         0x9178, 0xffffffff, 0x00040003,
795         0x917c, 0xffffffff, 0x00000007,
796         0x9180, 0xffffffff, 0x00060005,
797         0x9184, 0xffffffff, 0x00090008,
798         0x9188, 0xffffffff, 0x00030002,
799         0x918c, 0xffffffff, 0x00050004,
800         0x9190, 0xffffffff, 0x00000008,
801         0x9194, 0xffffffff, 0x00070006,
802         0x9198, 0xffffffff, 0x000a0009,
803         0x919c, 0xffffffff, 0x00040003,
804         0x91a0, 0xffffffff, 0x00060005,
805         0x91a4, 0xffffffff, 0x00000009,
806         0x91a8, 0xffffffff, 0x00080007,
807         0x91ac, 0xffffffff, 0x000b000a,
808         0x91b0, 0xffffffff, 0x00050004,
809         0x91b4, 0xffffffff, 0x00070006,
810         0x91b8, 0xffffffff, 0x0008000b,
811         0x91bc, 0xffffffff, 0x000a0009,
812         0x91c0, 0xffffffff, 0x000d000c,
813         0x9200, 0xffffffff, 0x00090008,
814         0x9204, 0xffffffff, 0x000b000a,
815         0x9208, 0xffffffff, 0x000c000f,
816         0x920c, 0xffffffff, 0x000e000d,
817         0x9210, 0xffffffff, 0x00110010,
818         0x9214, 0xffffffff, 0x000a0009,
819         0x9218, 0xffffffff, 0x000c000b,
820         0x921c, 0xffffffff, 0x0000000f,
821         0x9220, 0xffffffff, 0x000e000d,
822         0x9224, 0xffffffff, 0x00110010,
823         0x9228, 0xffffffff, 0x000b000a,
824         0x922c, 0xffffffff, 0x000d000c,
825         0x9230, 0xffffffff, 0x00000010,
826         0x9234, 0xffffffff, 0x000f000e,
827         0x9238, 0xffffffff, 0x00120011,
828         0x923c, 0xffffffff, 0x000c000b,
829         0x9240, 0xffffffff, 0x000e000d,
830         0x9244, 0xffffffff, 0x00000011,
831         0x9248, 0xffffffff, 0x0010000f,
832         0x924c, 0xffffffff, 0x00130012,
833         0x9250, 0xffffffff, 0x000d000c,
834         0x9254, 0xffffffff, 0x000f000e,
835         0x9258, 0xffffffff, 0x00100013,
836         0x925c, 0xffffffff, 0x00120011,
837         0x9260, 0xffffffff, 0x00150014,
838         0x9150, 0xffffffff, 0x96940200,
839         0x8708, 0xffffffff, 0x00900100,
840         0xc478, 0xffffffff, 0x00000080,
841         0xc404, 0xffffffff, 0x0020003f,
842         0x30, 0xffffffff, 0x0000001c,
843         0x34, 0x000f0000, 0x000f0000,
844         0x160c, 0xffffffff, 0x00000100,
845         0x1024, 0xffffffff, 0x00000100,
846         0x102c, 0x00000101, 0x00000000,
847         0x20a8, 0xffffffff, 0x00000104,
848         0x264c, 0x000c0000, 0x000c0000,
849         0x2648, 0x000c0000, 0x000c0000,
850         0x55e4, 0xff000fff, 0x00000100,
851         0x55e8, 0x00000001, 0x00000001,
852         0x2f50, 0x00000001, 0x00000001,
853         0x30cc, 0xc0000fff, 0x00000104,
854         0xc1e4, 0x00000001, 0x00000001,
855         0xd0c0, 0xfffffff0, 0x00000100,
856         0xd8c0, 0xfffffff0, 0x00000100
857 };
858
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861         0xc400, 0xffffffff, 0xfffffffc,
862         0x802c, 0xffffffff, 0xe0000000,
863         0x9a60, 0xffffffff, 0x00000100,
864         0x92a4, 0xffffffff, 0x00000100,
865         0xc164, 0xffffffff, 0x00000100,
866         0x9774, 0xffffffff, 0x00000100,
867         0x8984, 0xffffffff, 0x06000100,
868         0x8a18, 0xffffffff, 0x00000100,
869         0x92a0, 0xffffffff, 0x00000100,
870         0xc380, 0xffffffff, 0x00000100,
871         0x8b28, 0xffffffff, 0x00000100,
872         0x9144, 0xffffffff, 0x00000100,
873         0x8d88, 0xffffffff, 0x00000100,
874         0x8d8c, 0xffffffff, 0x00000100,
875         0x9030, 0xffffffff, 0x00000100,
876         0x9034, 0xffffffff, 0x00000100,
877         0x9038, 0xffffffff, 0x00000100,
878         0x903c, 0xffffffff, 0x00000100,
879         0xad80, 0xffffffff, 0x00000100,
880         0xac54, 0xffffffff, 0x00000100,
881         0x897c, 0xffffffff, 0x06000100,
882         0x9868, 0xffffffff, 0x00000100,
883         0x9510, 0xffffffff, 0x00000100,
884         0xaf04, 0xffffffff, 0x00000100,
885         0xae04, 0xffffffff, 0x00000100,
886         0x949c, 0xffffffff, 0x00000100,
887         0x802c, 0xffffffff, 0xe0000000,
888         0x9160, 0xffffffff, 0x00010000,
889         0x9164, 0xffffffff, 0x00030002,
890         0x9168, 0xffffffff, 0x00040007,
891         0x916c, 0xffffffff, 0x00060005,
892         0x9170, 0xffffffff, 0x00090008,
893         0x9174, 0xffffffff, 0x00020001,
894         0x9178, 0xffffffff, 0x00040003,
895         0x917c, 0xffffffff, 0x00000007,
896         0x9180, 0xffffffff, 0x00060005,
897         0x9184, 0xffffffff, 0x00090008,
898         0x9188, 0xffffffff, 0x00030002,
899         0x918c, 0xffffffff, 0x00050004,
900         0x9190, 0xffffffff, 0x00000008,
901         0x9194, 0xffffffff, 0x00070006,
902         0x9198, 0xffffffff, 0x000a0009,
903         0x919c, 0xffffffff, 0x00040003,
904         0x91a0, 0xffffffff, 0x00060005,
905         0x91a4, 0xffffffff, 0x00000009,
906         0x91a8, 0xffffffff, 0x00080007,
907         0x91ac, 0xffffffff, 0x000b000a,
908         0x91b0, 0xffffffff, 0x00050004,
909         0x91b4, 0xffffffff, 0x00070006,
910         0x91b8, 0xffffffff, 0x0008000b,
911         0x91bc, 0xffffffff, 0x000a0009,
912         0x91c0, 0xffffffff, 0x000d000c,
913         0x91c4, 0xffffffff, 0x00060005,
914         0x91c8, 0xffffffff, 0x00080007,
915         0x91cc, 0xffffffff, 0x0000000b,
916         0x91d0, 0xffffffff, 0x000a0009,
917         0x91d4, 0xffffffff, 0x000d000c,
918         0x9150, 0xffffffff, 0x96940200,
919         0x8708, 0xffffffff, 0x00900100,
920         0xc478, 0xffffffff, 0x00000080,
921         0xc404, 0xffffffff, 0x0020003f,
922         0x30, 0xffffffff, 0x0000001c,
923         0x34, 0x000f0000, 0x000f0000,
924         0x160c, 0xffffffff, 0x00000100,
925         0x1024, 0xffffffff, 0x00000100,
926         0x102c, 0x00000101, 0x00000000,
927         0x20a8, 0xffffffff, 0x00000104,
928         0x264c, 0x000c0000, 0x000c0000,
929         0x2648, 0x000c0000, 0x000c0000,
930         0x55e4, 0xff000fff, 0x00000100,
931         0x55e8, 0x00000001, 0x00000001,
932         0x2f50, 0x00000001, 0x00000001,
933         0x30cc, 0xc0000fff, 0x00000104,
934         0xc1e4, 0x00000001, 0x00000001,
935         0xd0c0, 0xfffffff0, 0x00000100,
936         0xd8c0, 0xfffffff0, 0x00000100
937 };
938
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941         0xc400, 0xffffffff, 0xfffffffc,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9a60, 0xffffffff, 0x00000100,
944         0x92a4, 0xffffffff, 0x00000100,
945         0xc164, 0xffffffff, 0x00000100,
946         0x9774, 0xffffffff, 0x00000100,
947         0x8984, 0xffffffff, 0x06000100,
948         0x8a18, 0xffffffff, 0x00000100,
949         0x92a0, 0xffffffff, 0x00000100,
950         0xc380, 0xffffffff, 0x00000100,
951         0x8b28, 0xffffffff, 0x00000100,
952         0x9144, 0xffffffff, 0x00000100,
953         0x8d88, 0xffffffff, 0x00000100,
954         0x8d8c, 0xffffffff, 0x00000100,
955         0x9030, 0xffffffff, 0x00000100,
956         0x9034, 0xffffffff, 0x00000100,
957         0x9038, 0xffffffff, 0x00000100,
958         0x903c, 0xffffffff, 0x00000100,
959         0xad80, 0xffffffff, 0x00000100,
960         0xac54, 0xffffffff, 0x00000100,
961         0x897c, 0xffffffff, 0x06000100,
962         0x9868, 0xffffffff, 0x00000100,
963         0x9510, 0xffffffff, 0x00000100,
964         0xaf04, 0xffffffff, 0x00000100,
965         0xae04, 0xffffffff, 0x00000100,
966         0x949c, 0xffffffff, 0x00000100,
967         0x802c, 0xffffffff, 0xe0000000,
968         0x9160, 0xffffffff, 0x00010000,
969         0x9164, 0xffffffff, 0x00030002,
970         0x9168, 0xffffffff, 0x00040007,
971         0x916c, 0xffffffff, 0x00060005,
972         0x9170, 0xffffffff, 0x00090008,
973         0x9174, 0xffffffff, 0x00020001,
974         0x9178, 0xffffffff, 0x00040003,
975         0x917c, 0xffffffff, 0x00000007,
976         0x9180, 0xffffffff, 0x00060005,
977         0x9184, 0xffffffff, 0x00090008,
978         0x9188, 0xffffffff, 0x00030002,
979         0x918c, 0xffffffff, 0x00050004,
980         0x9190, 0xffffffff, 0x00000008,
981         0x9194, 0xffffffff, 0x00070006,
982         0x9198, 0xffffffff, 0x000a0009,
983         0x919c, 0xffffffff, 0x00040003,
984         0x91a0, 0xffffffff, 0x00060005,
985         0x91a4, 0xffffffff, 0x00000009,
986         0x91a8, 0xffffffff, 0x00080007,
987         0x91ac, 0xffffffff, 0x000b000a,
988         0x91b0, 0xffffffff, 0x00050004,
989         0x91b4, 0xffffffff, 0x00070006,
990         0x91b8, 0xffffffff, 0x0008000b,
991         0x91bc, 0xffffffff, 0x000a0009,
992         0x91c0, 0xffffffff, 0x000d000c,
993         0x91c4, 0xffffffff, 0x00060005,
994         0x91c8, 0xffffffff, 0x00080007,
995         0x91cc, 0xffffffff, 0x0000000b,
996         0x91d0, 0xffffffff, 0x000a0009,
997         0x91d4, 0xffffffff, 0x000d000c,
998         0x9150, 0xffffffff, 0x96940200,
999         0x8708, 0xffffffff, 0x00900100,
1000         0xc478, 0xffffffff, 0x00000080,
1001         0xc404, 0xffffffff, 0x0020003f,
1002         0x30, 0xffffffff, 0x0000001c,
1003         0x34, 0x000f0000, 0x000f0000,
1004         0x160c, 0xffffffff, 0x00000100,
1005         0x1024, 0xffffffff, 0x00000100,
1006         0x20a8, 0xffffffff, 0x00000104,
1007         0x264c, 0x000c0000, 0x000c0000,
1008         0x2648, 0x000c0000, 0x000c0000,
1009         0x2f50, 0x00000001, 0x00000001,
1010         0x30cc, 0xc0000fff, 0x00000104,
1011         0xc1e4, 0x00000001, 0x00000001,
1012         0xd0c0, 0xfffffff0, 0x00000100,
1013         0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015
1016 static u32 verde_pg_init[] =
1017 {
1018         0x353c, 0xffffffff, 0x40000,
1019         0x3538, 0xffffffff, 0x200010ff,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x7007,
1026         0x3538, 0xffffffff, 0x300010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x400000,
1033         0x3538, 0xffffffff, 0x100010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x120200,
1040         0x3538, 0xffffffff, 0x500010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x1e1e16,
1047         0x3538, 0xffffffff, 0x600010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x171f1e,
1054         0x3538, 0xffffffff, 0x700010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x3538, 0xffffffff, 0x9ff,
1062         0x3500, 0xffffffff, 0x0,
1063         0x3504, 0xffffffff, 0x10000800,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3500, 0xffffffff, 0x4,
1067         0x3504, 0xffffffff, 0x1000051e,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3500, 0xffffffff, 0x8,
1071         0x3504, 0xffffffff, 0x80500,
1072         0x3500, 0xffffffff, 0x12,
1073         0x3504, 0xffffffff, 0x9050c,
1074         0x3500, 0xffffffff, 0x1d,
1075         0x3504, 0xffffffff, 0xb052c,
1076         0x3500, 0xffffffff, 0x2a,
1077         0x3504, 0xffffffff, 0x1053e,
1078         0x3500, 0xffffffff, 0x2d,
1079         0x3504, 0xffffffff, 0x10546,
1080         0x3500, 0xffffffff, 0x30,
1081         0x3504, 0xffffffff, 0xa054e,
1082         0x3500, 0xffffffff, 0x3c,
1083         0x3504, 0xffffffff, 0x1055f,
1084         0x3500, 0xffffffff, 0x3f,
1085         0x3504, 0xffffffff, 0x10567,
1086         0x3500, 0xffffffff, 0x42,
1087         0x3504, 0xffffffff, 0x1056f,
1088         0x3500, 0xffffffff, 0x45,
1089         0x3504, 0xffffffff, 0x10572,
1090         0x3500, 0xffffffff, 0x48,
1091         0x3504, 0xffffffff, 0x20575,
1092         0x3500, 0xffffffff, 0x4c,
1093         0x3504, 0xffffffff, 0x190801,
1094         0x3500, 0xffffffff, 0x67,
1095         0x3504, 0xffffffff, 0x1082a,
1096         0x3500, 0xffffffff, 0x6a,
1097         0x3504, 0xffffffff, 0x1b082d,
1098         0x3500, 0xffffffff, 0x87,
1099         0x3504, 0xffffffff, 0x310851,
1100         0x3500, 0xffffffff, 0xba,
1101         0x3504, 0xffffffff, 0x891,
1102         0x3500, 0xffffffff, 0xbc,
1103         0x3504, 0xffffffff, 0x893,
1104         0x3500, 0xffffffff, 0xbe,
1105         0x3504, 0xffffffff, 0x20895,
1106         0x3500, 0xffffffff, 0xc2,
1107         0x3504, 0xffffffff, 0x20899,
1108         0x3500, 0xffffffff, 0xc6,
1109         0x3504, 0xffffffff, 0x2089d,
1110         0x3500, 0xffffffff, 0xca,
1111         0x3504, 0xffffffff, 0x8a1,
1112         0x3500, 0xffffffff, 0xcc,
1113         0x3504, 0xffffffff, 0x8a3,
1114         0x3500, 0xffffffff, 0xce,
1115         0x3504, 0xffffffff, 0x308a5,
1116         0x3500, 0xffffffff, 0xd3,
1117         0x3504, 0xffffffff, 0x6d08cd,
1118         0x3500, 0xffffffff, 0x142,
1119         0x3504, 0xffffffff, 0x2000095a,
1120         0x3504, 0xffffffff, 0x1,
1121         0x3500, 0xffffffff, 0x144,
1122         0x3504, 0xffffffff, 0x301f095b,
1123         0x3500, 0xffffffff, 0x165,
1124         0x3504, 0xffffffff, 0xc094d,
1125         0x3500, 0xffffffff, 0x173,
1126         0x3504, 0xffffffff, 0xf096d,
1127         0x3500, 0xffffffff, 0x184,
1128         0x3504, 0xffffffff, 0x15097f,
1129         0x3500, 0xffffffff, 0x19b,
1130         0x3504, 0xffffffff, 0xc0998,
1131         0x3500, 0xffffffff, 0x1a9,
1132         0x3504, 0xffffffff, 0x409a7,
1133         0x3500, 0xffffffff, 0x1af,
1134         0x3504, 0xffffffff, 0xcdc,
1135         0x3500, 0xffffffff, 0x1b1,
1136         0x3504, 0xffffffff, 0x800,
1137         0x3508, 0xffffffff, 0x6c9b2000,
1138         0x3510, 0xfc00, 0x2000,
1139         0x3544, 0xffffffff, 0xfc0,
1140         0x28d4, 0x00000100, 0x100
1141 };
1142
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145         switch (rdev->family) {
1146         case CHIP_TAHITI:
1147                 radeon_program_register_sequence(rdev,
1148                                                  tahiti_golden_registers,
1149                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_rlc_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_mgcg_cgcg_init,
1155                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers2,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159                 break;
1160         case CHIP_PITCAIRN:
1161                 radeon_program_register_sequence(rdev,
1162                                                  pitcairn_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170                 break;
1171         case CHIP_VERDE:
1172                 radeon_program_register_sequence(rdev,
1173                                                  verde_golden_registers,
1174                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_rlc_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_mgcg_cgcg_init,
1180                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_pg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1184                 break;
1185         case CHIP_OLAND:
1186                 radeon_program_register_sequence(rdev,
1187                                                  oland_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195                 break;
1196         case CHIP_HAINAN:
1197                 radeon_program_register_sequence(rdev,
1198                                                  hainan_golden_registers,
1199                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers2,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_mgcg_cgcg_init,
1205                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206                 break;
1207         default:
1208                 break;
1209         }
1210 }
1211
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226         u32 tmp;
1227
1228         tmp = RREG32(CG_CLKPIN_CNTL_2);
1229         if (tmp & MUX_TCLK_TO_XCLK)
1230                 return TCLK;
1231
1232         tmp = RREG32(CG_CLKPIN_CNTL);
1233         if (tmp & XTALIN_DIVIDE)
1234                 return reference_clock / 4;
1235
1236         return reference_clock;
1237 }
1238
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242         u32 temp;
1243         int actual_temp = 0;
1244
1245         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246                 CTF_TEMP_SHIFT;
1247
1248         if (temp & 0x200)
1249                 actual_temp = 255;
1250         else
1251                 actual_temp = temp & 0x1ff;
1252
1253         actual_temp = (actual_temp * 1000);
1254
1255         return actual_temp;
1256 }
1257
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261         {0x0000006f, 0x03044000},
1262         {0x00000070, 0x0480c018},
1263         {0x00000071, 0x00000040},
1264         {0x00000072, 0x01000000},
1265         {0x00000074, 0x000000ff},
1266         {0x00000075, 0x00143400},
1267         {0x00000076, 0x08ec0800},
1268         {0x00000077, 0x040000cc},
1269         {0x00000079, 0x00000000},
1270         {0x0000007a, 0x21000409},
1271         {0x0000007c, 0x00000000},
1272         {0x0000007d, 0xe8000000},
1273         {0x0000007e, 0x044408a8},
1274         {0x0000007f, 0x00000003},
1275         {0x00000080, 0x00000000},
1276         {0x00000081, 0x01000000},
1277         {0x00000082, 0x02000000},
1278         {0x00000083, 0x00000000},
1279         {0x00000084, 0xe3f3e4f4},
1280         {0x00000085, 0x00052024},
1281         {0x00000087, 0x00000000},
1282         {0x00000088, 0x66036603},
1283         {0x00000089, 0x01000000},
1284         {0x0000008b, 0x1c0a0000},
1285         {0x0000008c, 0xff010000},
1286         {0x0000008e, 0xffffefff},
1287         {0x0000008f, 0xfff3efff},
1288         {0x00000090, 0xfff3efbf},
1289         {0x00000094, 0x00101101},
1290         {0x00000095, 0x00000fff},
1291         {0x00000096, 0x00116fff},
1292         {0x00000097, 0x60010000},
1293         {0x00000098, 0x10010000},
1294         {0x00000099, 0x00006000},
1295         {0x0000009a, 0x00001000},
1296         {0x0000009f, 0x00a77400}
1297 };
1298
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300         {0x0000006f, 0x03044000},
1301         {0x00000070, 0x0480c018},
1302         {0x00000071, 0x00000040},
1303         {0x00000072, 0x01000000},
1304         {0x00000074, 0x000000ff},
1305         {0x00000075, 0x00143400},
1306         {0x00000076, 0x08ec0800},
1307         {0x00000077, 0x040000cc},
1308         {0x00000079, 0x00000000},
1309         {0x0000007a, 0x21000409},
1310         {0x0000007c, 0x00000000},
1311         {0x0000007d, 0xe8000000},
1312         {0x0000007e, 0x044408a8},
1313         {0x0000007f, 0x00000003},
1314         {0x00000080, 0x00000000},
1315         {0x00000081, 0x01000000},
1316         {0x00000082, 0x02000000},
1317         {0x00000083, 0x00000000},
1318         {0x00000084, 0xe3f3e4f4},
1319         {0x00000085, 0x00052024},
1320         {0x00000087, 0x00000000},
1321         {0x00000088, 0x66036603},
1322         {0x00000089, 0x01000000},
1323         {0x0000008b, 0x1c0a0000},
1324         {0x0000008c, 0xff010000},
1325         {0x0000008e, 0xffffefff},
1326         {0x0000008f, 0xfff3efff},
1327         {0x00000090, 0xfff3efbf},
1328         {0x00000094, 0x00101101},
1329         {0x00000095, 0x00000fff},
1330         {0x00000096, 0x00116fff},
1331         {0x00000097, 0x60010000},
1332         {0x00000098, 0x10010000},
1333         {0x00000099, 0x00006000},
1334         {0x0000009a, 0x00001000},
1335         {0x0000009f, 0x00a47400}
1336 };
1337
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339         {0x0000006f, 0x03044000},
1340         {0x00000070, 0x0480c018},
1341         {0x00000071, 0x00000040},
1342         {0x00000072, 0x01000000},
1343         {0x00000074, 0x000000ff},
1344         {0x00000075, 0x00143400},
1345         {0x00000076, 0x08ec0800},
1346         {0x00000077, 0x040000cc},
1347         {0x00000079, 0x00000000},
1348         {0x0000007a, 0x21000409},
1349         {0x0000007c, 0x00000000},
1350         {0x0000007d, 0xe8000000},
1351         {0x0000007e, 0x044408a8},
1352         {0x0000007f, 0x00000003},
1353         {0x00000080, 0x00000000},
1354         {0x00000081, 0x01000000},
1355         {0x00000082, 0x02000000},
1356         {0x00000083, 0x00000000},
1357         {0x00000084, 0xe3f3e4f4},
1358         {0x00000085, 0x00052024},
1359         {0x00000087, 0x00000000},
1360         {0x00000088, 0x66036603},
1361         {0x00000089, 0x01000000},
1362         {0x0000008b, 0x1c0a0000},
1363         {0x0000008c, 0xff010000},
1364         {0x0000008e, 0xffffefff},
1365         {0x0000008f, 0xfff3efff},
1366         {0x00000090, 0xfff3efbf},
1367         {0x00000094, 0x00101101},
1368         {0x00000095, 0x00000fff},
1369         {0x00000096, 0x00116fff},
1370         {0x00000097, 0x60010000},
1371         {0x00000098, 0x10010000},
1372         {0x00000099, 0x00006000},
1373         {0x0000009a, 0x00001000},
1374         {0x0000009f, 0x00a37400}
1375 };
1376
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378         {0x0000006f, 0x03044000},
1379         {0x00000070, 0x0480c018},
1380         {0x00000071, 0x00000040},
1381         {0x00000072, 0x01000000},
1382         {0x00000074, 0x000000ff},
1383         {0x00000075, 0x00143400},
1384         {0x00000076, 0x08ec0800},
1385         {0x00000077, 0x040000cc},
1386         {0x00000079, 0x00000000},
1387         {0x0000007a, 0x21000409},
1388         {0x0000007c, 0x00000000},
1389         {0x0000007d, 0xe8000000},
1390         {0x0000007e, 0x044408a8},
1391         {0x0000007f, 0x00000003},
1392         {0x00000080, 0x00000000},
1393         {0x00000081, 0x01000000},
1394         {0x00000082, 0x02000000},
1395         {0x00000083, 0x00000000},
1396         {0x00000084, 0xe3f3e4f4},
1397         {0x00000085, 0x00052024},
1398         {0x00000087, 0x00000000},
1399         {0x00000088, 0x66036603},
1400         {0x00000089, 0x01000000},
1401         {0x0000008b, 0x1c0a0000},
1402         {0x0000008c, 0xff010000},
1403         {0x0000008e, 0xffffefff},
1404         {0x0000008f, 0xfff3efff},
1405         {0x00000090, 0xfff3efbf},
1406         {0x00000094, 0x00101101},
1407         {0x00000095, 0x00000fff},
1408         {0x00000096, 0x00116fff},
1409         {0x00000097, 0x60010000},
1410         {0x00000098, 0x10010000},
1411         {0x00000099, 0x00006000},
1412         {0x0000009a, 0x00001000},
1413         {0x0000009f, 0x00a17730}
1414 };
1415
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417         {0x0000006f, 0x03044000},
1418         {0x00000070, 0x0480c018},
1419         {0x00000071, 0x00000040},
1420         {0x00000072, 0x01000000},
1421         {0x00000074, 0x000000ff},
1422         {0x00000075, 0x00143400},
1423         {0x00000076, 0x08ec0800},
1424         {0x00000077, 0x040000cc},
1425         {0x00000079, 0x00000000},
1426         {0x0000007a, 0x21000409},
1427         {0x0000007c, 0x00000000},
1428         {0x0000007d, 0xe8000000},
1429         {0x0000007e, 0x044408a8},
1430         {0x0000007f, 0x00000003},
1431         {0x00000080, 0x00000000},
1432         {0x00000081, 0x01000000},
1433         {0x00000082, 0x02000000},
1434         {0x00000083, 0x00000000},
1435         {0x00000084, 0xe3f3e4f4},
1436         {0x00000085, 0x00052024},
1437         {0x00000087, 0x00000000},
1438         {0x00000088, 0x66036603},
1439         {0x00000089, 0x01000000},
1440         {0x0000008b, 0x1c0a0000},
1441         {0x0000008c, 0xff010000},
1442         {0x0000008e, 0xffffefff},
1443         {0x0000008f, 0xfff3efff},
1444         {0x00000090, 0xfff3efbf},
1445         {0x00000094, 0x00101101},
1446         {0x00000095, 0x00000fff},
1447         {0x00000096, 0x00116fff},
1448         {0x00000097, 0x60010000},
1449         {0x00000098, 0x10010000},
1450         {0x00000099, 0x00006000},
1451         {0x0000009a, 0x00001000},
1452         {0x0000009f, 0x00a07730}
1453 };
1454
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458         const __be32 *fw_data;
1459         u32 running, blackout = 0;
1460         u32 *io_mc_regs;
1461         int i, ucode_size, regs_size;
1462
1463         if (!rdev->mc_fw)
1464                 return -EINVAL;
1465
1466         switch (rdev->family) {
1467         case CHIP_TAHITI:
1468                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469                 ucode_size = SI_MC_UCODE_SIZE;
1470                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1471                 break;
1472         case CHIP_PITCAIRN:
1473                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474                 ucode_size = SI_MC_UCODE_SIZE;
1475                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1476                 break;
1477         case CHIP_VERDE:
1478         default:
1479                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1480                 ucode_size = SI_MC_UCODE_SIZE;
1481                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1482                 break;
1483         case CHIP_OLAND:
1484                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1485                 ucode_size = OLAND_MC_UCODE_SIZE;
1486                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1487                 break;
1488         case CHIP_HAINAN:
1489                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490                 ucode_size = OLAND_MC_UCODE_SIZE;
1491                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1492                 break;
1493         }
1494
1495         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496
1497         if (running == 0) {
1498                 if (running) {
1499                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501                 }
1502
1503                 /* reset the engine and set to writable */
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506
1507                 /* load mc io regs */
1508                 for (i = 0; i < regs_size; i++) {
1509                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511                 }
1512                 /* load the MC ucode */
1513                 fw_data = (const __be32 *)rdev->mc_fw->data;
1514                 for (i = 0; i < ucode_size; i++)
1515                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516
1517                 /* put the engine back into the active state */
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521
1522                 /* wait for training to complete */
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525                                 break;
1526                         udelay(1);
1527                 }
1528                 for (i = 0; i < rdev->usec_timeout; i++) {
1529                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530                                 break;
1531                         udelay(1);
1532                 }
1533
1534                 if (running)
1535                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536         }
1537
1538         return 0;
1539 }
1540
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543         const char *chip_name;
1544         const char *rlc_chip_name;
1545         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546         size_t smc_req_size;
1547         char fw_name[30];
1548         int err;
1549
1550         DRM_DEBUG("\n");
1551
1552         switch (rdev->family) {
1553         case CHIP_TAHITI:
1554                 chip_name = "TAHITI";
1555                 rlc_chip_name = "TAHITI";
1556                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1558                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1559                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1561                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562                 break;
1563         case CHIP_PITCAIRN:
1564                 chip_name = "PITCAIRN";
1565                 rlc_chip_name = "PITCAIRN";
1566                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1568                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1569                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1571                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572                 break;
1573         case CHIP_VERDE:
1574                 chip_name = "VERDE";
1575                 rlc_chip_name = "VERDE";
1576                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1578                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1579                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1581                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582                 break;
1583         case CHIP_OLAND:
1584                 chip_name = "OLAND";
1585                 rlc_chip_name = "OLAND";
1586                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1588                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1589                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592                 break;
1593         case CHIP_HAINAN:
1594                 chip_name = "HAINAN";
1595                 rlc_chip_name = "HAINAN";
1596                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1598                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1599                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602                 break;
1603         default: BUG();
1604         }
1605
1606         DRM_INFO("Loading %s Microcode\n", chip_name);
1607
1608         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610         if (err)
1611                 goto out;
1612         if (rdev->pfp_fw->size != pfp_req_size) {
1613                 printk(KERN_ERR
1614                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615                        rdev->pfp_fw->size, fw_name);
1616                 err = -EINVAL;
1617                 goto out;
1618         }
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->me_fw->size != me_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->me_fw->size, fw_name);
1628                 err = -EINVAL;
1629         }
1630
1631         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633         if (err)
1634                 goto out;
1635         if (rdev->ce_fw->size != ce_req_size) {
1636                 printk(KERN_ERR
1637                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638                        rdev->ce_fw->size, fw_name);
1639                 err = -EINVAL;
1640         }
1641
1642         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644         if (err)
1645                 goto out;
1646         if (rdev->rlc_fw->size != rlc_req_size) {
1647                 printk(KERN_ERR
1648                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649                        rdev->rlc_fw->size, fw_name);
1650                 err = -EINVAL;
1651         }
1652
1653         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655         if (err)
1656                 goto out;
1657         if (rdev->mc_fw->size != mc_req_size) {
1658                 printk(KERN_ERR
1659                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660                        rdev->mc_fw->size, fw_name);
1661                 err = -EINVAL;
1662         }
1663
1664         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666         if (err)
1667                 goto out;
1668         if (rdev->smc_fw->size != smc_req_size) {
1669                 printk(KERN_ERR
1670                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1671                        rdev->smc_fw->size, fw_name);
1672                 err = -EINVAL;
1673         }
1674
1675 out:
1676         if (err) {
1677                 if (err != -EINVAL)
1678                         printk(KERN_ERR
1679                                "si_cp: Failed to load firmware \"%s\"\n",
1680                                fw_name);
1681                 release_firmware(rdev->pfp_fw);
1682                 rdev->pfp_fw = NULL;
1683                 release_firmware(rdev->me_fw);
1684                 rdev->me_fw = NULL;
1685                 release_firmware(rdev->ce_fw);
1686                 rdev->ce_fw = NULL;
1687                 release_firmware(rdev->rlc_fw);
1688                 rdev->rlc_fw = NULL;
1689                 release_firmware(rdev->mc_fw);
1690                 rdev->mc_fw = NULL;
1691                 release_firmware(rdev->smc_fw);
1692                 rdev->smc_fw = NULL;
1693         }
1694         return err;
1695 }
1696
1697 /* watermark setup */
1698 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1699                                    struct radeon_crtc *radeon_crtc,
1700                                    struct drm_display_mode *mode,
1701                                    struct drm_display_mode *other_mode)
1702 {
1703         u32 tmp;
1704         /*
1705          * Line Buffer Setup
1706          * There are 3 line buffers, each one shared by 2 display controllers.
1707          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1708          * the display controllers.  The paritioning is done via one of four
1709          * preset allocations specified in bits 21:20:
1710          *  0 - half lb
1711          *  2 - whole lb, other crtc must be disabled
1712          */
1713         /* this can get tricky if we have two large displays on a paired group
1714          * of crtcs.  Ideally for multiple large displays we'd assign them to
1715          * non-linked crtcs for maximum line buffer allocation.
1716          */
1717         if (radeon_crtc->base.enabled && mode) {
1718                 if (other_mode)
1719                         tmp = 0; /* 1/2 */
1720                 else
1721                         tmp = 2; /* whole */
1722         } else
1723                 tmp = 0;
1724
1725         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1726                DC_LB_MEMORY_CONFIG(tmp));
1727
1728         if (radeon_crtc->base.enabled && mode) {
1729                 switch (tmp) {
1730                 case 0:
1731                 default:
1732                         return 4096 * 2;
1733                 case 2:
1734                         return 8192 * 2;
1735                 }
1736         }
1737
1738         /* controller not enabled, so no lb used */
1739         return 0;
1740 }
1741
1742 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1743 {
1744         u32 tmp = RREG32(MC_SHARED_CHMAP);
1745
1746         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1747         case 0:
1748         default:
1749                 return 1;
1750         case 1:
1751                 return 2;
1752         case 2:
1753                 return 4;
1754         case 3:
1755                 return 8;
1756         case 4:
1757                 return 3;
1758         case 5:
1759                 return 6;
1760         case 6:
1761                 return 10;
1762         case 7:
1763                 return 12;
1764         case 8:
1765                 return 16;
1766         }
1767 }
1768
1769 struct dce6_wm_params {
1770         u32 dram_channels; /* number of dram channels */
1771         u32 yclk;          /* bandwidth per dram data pin in kHz */
1772         u32 sclk;          /* engine clock in kHz */
1773         u32 disp_clk;      /* display clock in kHz */
1774         u32 src_width;     /* viewport width */
1775         u32 active_time;   /* active display time in ns */
1776         u32 blank_time;    /* blank time in ns */
1777         bool interlaced;    /* mode is interlaced */
1778         fixed20_12 vsc;    /* vertical scale ratio */
1779         u32 num_heads;     /* number of active crtcs */
1780         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1781         u32 lb_size;       /* line buffer allocated to pipe */
1782         u32 vtaps;         /* vertical scaler taps */
1783 };
1784
1785 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1786 {
1787         /* Calculate raw DRAM Bandwidth */
1788         fixed20_12 dram_efficiency; /* 0.7 */
1789         fixed20_12 yclk, dram_channels, bandwidth;
1790         fixed20_12 a;
1791
1792         a.full = dfixed_const(1000);
1793         yclk.full = dfixed_const(wm->yclk);
1794         yclk.full = dfixed_div(yclk, a);
1795         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1796         a.full = dfixed_const(10);
1797         dram_efficiency.full = dfixed_const(7);
1798         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1799         bandwidth.full = dfixed_mul(dram_channels, yclk);
1800         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1801
1802         return dfixed_trunc(bandwidth);
1803 }
1804
1805 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1806 {
1807         /* Calculate DRAM Bandwidth and the part allocated to display. */
1808         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1809         fixed20_12 yclk, dram_channels, bandwidth;
1810         fixed20_12 a;
1811
1812         a.full = dfixed_const(1000);
1813         yclk.full = dfixed_const(wm->yclk);
1814         yclk.full = dfixed_div(yclk, a);
1815         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1816         a.full = dfixed_const(10);
1817         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1818         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1819         bandwidth.full = dfixed_mul(dram_channels, yclk);
1820         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1821
1822         return dfixed_trunc(bandwidth);
1823 }
1824
1825 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1826 {
1827         /* Calculate the display Data return Bandwidth */
1828         fixed20_12 return_efficiency; /* 0.8 */
1829         fixed20_12 sclk, bandwidth;
1830         fixed20_12 a;
1831
1832         a.full = dfixed_const(1000);
1833         sclk.full = dfixed_const(wm->sclk);
1834         sclk.full = dfixed_div(sclk, a);
1835         a.full = dfixed_const(10);
1836         return_efficiency.full = dfixed_const(8);
1837         return_efficiency.full = dfixed_div(return_efficiency, a);
1838         a.full = dfixed_const(32);
1839         bandwidth.full = dfixed_mul(a, sclk);
1840         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1841
1842         return dfixed_trunc(bandwidth);
1843 }
1844
1845 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1846 {
1847         return 32;
1848 }
1849
1850 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1851 {
1852         /* Calculate the DMIF Request Bandwidth */
1853         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1854         fixed20_12 disp_clk, sclk, bandwidth;
1855         fixed20_12 a, b1, b2;
1856         u32 min_bandwidth;
1857
1858         a.full = dfixed_const(1000);
1859         disp_clk.full = dfixed_const(wm->disp_clk);
1860         disp_clk.full = dfixed_div(disp_clk, a);
1861         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1862         b1.full = dfixed_mul(a, disp_clk);
1863
1864         a.full = dfixed_const(1000);
1865         sclk.full = dfixed_const(wm->sclk);
1866         sclk.full = dfixed_div(sclk, a);
1867         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1868         b2.full = dfixed_mul(a, sclk);
1869
1870         a.full = dfixed_const(10);
1871         disp_clk_request_efficiency.full = dfixed_const(8);
1872         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1873
1874         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1875
1876         a.full = dfixed_const(min_bandwidth);
1877         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1878
1879         return dfixed_trunc(bandwidth);
1880 }
1881
1882 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1883 {
1884         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1885         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1886         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1887         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1888
1889         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1890 }
1891
1892 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1893 {
1894         /* Calculate the display mode Average Bandwidth
1895          * DisplayMode should contain the source and destination dimensions,
1896          * timing, etc.
1897          */
1898         fixed20_12 bpp;
1899         fixed20_12 line_time;
1900         fixed20_12 src_width;
1901         fixed20_12 bandwidth;
1902         fixed20_12 a;
1903
1904         a.full = dfixed_const(1000);
1905         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1906         line_time.full = dfixed_div(line_time, a);
1907         bpp.full = dfixed_const(wm->bytes_per_pixel);
1908         src_width.full = dfixed_const(wm->src_width);
1909         bandwidth.full = dfixed_mul(src_width, bpp);
1910         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1911         bandwidth.full = dfixed_div(bandwidth, line_time);
1912
1913         return dfixed_trunc(bandwidth);
1914 }
1915
1916 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1917 {
1918         /* First calcualte the latency in ns */
1919         u32 mc_latency = 2000; /* 2000 ns. */
1920         u32 available_bandwidth = dce6_available_bandwidth(wm);
1921         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1922         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1923         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1924         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1925                 (wm->num_heads * cursor_line_pair_return_time);
1926         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1927         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1928         u32 tmp, dmif_size = 12288;
1929         fixed20_12 a, b, c;
1930
1931         if (wm->num_heads == 0)
1932                 return 0;
1933
1934         a.full = dfixed_const(2);
1935         b.full = dfixed_const(1);
1936         if ((wm->vsc.full > a.full) ||
1937             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1938             (wm->vtaps >= 5) ||
1939             ((wm->vsc.full >= a.full) && wm->interlaced))
1940                 max_src_lines_per_dst_line = 4;
1941         else
1942                 max_src_lines_per_dst_line = 2;
1943
1944         a.full = dfixed_const(available_bandwidth);
1945         b.full = dfixed_const(wm->num_heads);
1946         a.full = dfixed_div(a, b);
1947
1948         b.full = dfixed_const(mc_latency + 512);
1949         c.full = dfixed_const(wm->disp_clk);
1950         b.full = dfixed_div(b, c);
1951
1952         c.full = dfixed_const(dmif_size);
1953         b.full = dfixed_div(c, b);
1954
1955         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1956
1957         b.full = dfixed_const(1000);
1958         c.full = dfixed_const(wm->disp_clk);
1959         b.full = dfixed_div(c, b);
1960         c.full = dfixed_const(wm->bytes_per_pixel);
1961         b.full = dfixed_mul(b, c);
1962
1963         lb_fill_bw = min(tmp, dfixed_trunc(b));
1964
1965         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1966         b.full = dfixed_const(1000);
1967         c.full = dfixed_const(lb_fill_bw);
1968         b.full = dfixed_div(c, b);
1969         a.full = dfixed_div(a, b);
1970         line_fill_time = dfixed_trunc(a);
1971
1972         if (line_fill_time < wm->active_time)
1973                 return latency;
1974         else
1975                 return latency + (line_fill_time - wm->active_time);
1976
1977 }
1978
1979 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1980 {
1981         if (dce6_average_bandwidth(wm) <=
1982             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1983                 return true;
1984         else
1985                 return false;
1986 };
1987
1988 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1989 {
1990         if (dce6_average_bandwidth(wm) <=
1991             (dce6_available_bandwidth(wm) / wm->num_heads))
1992                 return true;
1993         else
1994                 return false;
1995 };
1996
1997 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1998 {
1999         u32 lb_partitions = wm->lb_size / wm->src_width;
2000         u32 line_time = wm->active_time + wm->blank_time;
2001         u32 latency_tolerant_lines;
2002         u32 latency_hiding;
2003         fixed20_12 a;
2004
2005         a.full = dfixed_const(1);
2006         if (wm->vsc.full > a.full)
2007                 latency_tolerant_lines = 1;
2008         else {
2009                 if (lb_partitions <= (wm->vtaps + 1))
2010                         latency_tolerant_lines = 1;
2011                 else
2012                         latency_tolerant_lines = 2;
2013         }
2014
2015         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2016
2017         if (dce6_latency_watermark(wm) <= latency_hiding)
2018                 return true;
2019         else
2020                 return false;
2021 }
2022
2023 static void dce6_program_watermarks(struct radeon_device *rdev,
2024                                          struct radeon_crtc *radeon_crtc,
2025                                          u32 lb_size, u32 num_heads)
2026 {
2027         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2028         struct dce6_wm_params wm_low, wm_high;
2029         u32 dram_channels;
2030         u32 pixel_period;
2031         u32 line_time = 0;
2032         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2033         u32 priority_a_mark = 0, priority_b_mark = 0;
2034         u32 priority_a_cnt = PRIORITY_OFF;
2035         u32 priority_b_cnt = PRIORITY_OFF;
2036         u32 tmp, arb_control3;
2037         fixed20_12 a, b, c;
2038
2039         if (radeon_crtc->base.enabled && num_heads && mode) {
2040                 pixel_period = 1000000 / (u32)mode->clock;
2041                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2042                 priority_a_cnt = 0;
2043                 priority_b_cnt = 0;
2044
2045                 if (rdev->family == CHIP_ARUBA)
2046                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2047                 else
2048                         dram_channels = si_get_number_of_dram_channels(rdev);
2049
2050                 /* watermark for high clocks */
2051                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2052                         wm_high.yclk =
2053                                 radeon_dpm_get_mclk(rdev, false) * 10;
2054                         wm_high.sclk =
2055                                 radeon_dpm_get_sclk(rdev, false) * 10;
2056                 } else {
2057                         wm_high.yclk = rdev->pm.current_mclk * 10;
2058                         wm_high.sclk = rdev->pm.current_sclk * 10;
2059                 }
2060
2061                 wm_high.disp_clk = mode->clock;
2062                 wm_high.src_width = mode->crtc_hdisplay;
2063                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2064                 wm_high.blank_time = line_time - wm_high.active_time;
2065                 wm_high.interlaced = false;
2066                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2067                         wm_high.interlaced = true;
2068                 wm_high.vsc = radeon_crtc->vsc;
2069                 wm_high.vtaps = 1;
2070                 if (radeon_crtc->rmx_type != RMX_OFF)
2071                         wm_high.vtaps = 2;
2072                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2073                 wm_high.lb_size = lb_size;
2074                 wm_high.dram_channels = dram_channels;
2075                 wm_high.num_heads = num_heads;
2076
2077                 /* watermark for low clocks */
2078                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079                         wm_low.yclk =
2080                                 radeon_dpm_get_mclk(rdev, true) * 10;
2081                         wm_low.sclk =
2082                                 radeon_dpm_get_sclk(rdev, true) * 10;
2083                 } else {
2084                         wm_low.yclk = rdev->pm.current_mclk * 10;
2085                         wm_low.sclk = rdev->pm.current_sclk * 10;
2086                 }
2087
2088                 wm_low.disp_clk = mode->clock;
2089                 wm_low.src_width = mode->crtc_hdisplay;
2090                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2091                 wm_low.blank_time = line_time - wm_low.active_time;
2092                 wm_low.interlaced = false;
2093                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094                         wm_low.interlaced = true;
2095                 wm_low.vsc = radeon_crtc->vsc;
2096                 wm_low.vtaps = 1;
2097                 if (radeon_crtc->rmx_type != RMX_OFF)
2098                         wm_low.vtaps = 2;
2099                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100                 wm_low.lb_size = lb_size;
2101                 wm_low.dram_channels = dram_channels;
2102                 wm_low.num_heads = num_heads;
2103
2104                 /* set for high clocks */
2105                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2106                 /* set for low clocks */
2107                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2108
2109                 /* possibly force display priority to high */
2110                 /* should really do this at mode validation time... */
2111                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2112                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2113                     !dce6_check_latency_hiding(&wm_high) ||
2114                     (rdev->disp_priority == 2)) {
2115                         DRM_DEBUG_KMS("force priority to high\n");
2116                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2117                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2118                 }
2119                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2120                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2121                     !dce6_check_latency_hiding(&wm_low) ||
2122                     (rdev->disp_priority == 2)) {
2123                         DRM_DEBUG_KMS("force priority to high\n");
2124                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2125                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2126                 }
2127
2128                 a.full = dfixed_const(1000);
2129                 b.full = dfixed_const(mode->clock);
2130                 b.full = dfixed_div(b, a);
2131                 c.full = dfixed_const(latency_watermark_a);
2132                 c.full = dfixed_mul(c, b);
2133                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2134                 c.full = dfixed_div(c, a);
2135                 a.full = dfixed_const(16);
2136                 c.full = dfixed_div(c, a);
2137                 priority_a_mark = dfixed_trunc(c);
2138                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2139
2140                 a.full = dfixed_const(1000);
2141                 b.full = dfixed_const(mode->clock);
2142                 b.full = dfixed_div(b, a);
2143                 c.full = dfixed_const(latency_watermark_b);
2144                 c.full = dfixed_mul(c, b);
2145                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2146                 c.full = dfixed_div(c, a);
2147                 a.full = dfixed_const(16);
2148                 c.full = dfixed_div(c, a);
2149                 priority_b_mark = dfixed_trunc(c);
2150                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2151         }
2152
2153         /* select wm A */
2154         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2155         tmp = arb_control3;
2156         tmp &= ~LATENCY_WATERMARK_MASK(3);
2157         tmp |= LATENCY_WATERMARK_MASK(1);
2158         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2159         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2160                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2161                 LATENCY_HIGH_WATERMARK(line_time)));
2162         /* select wm B */
2163         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2164         tmp &= ~LATENCY_WATERMARK_MASK(3);
2165         tmp |= LATENCY_WATERMARK_MASK(2);
2166         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2167         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2168                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2169                 LATENCY_HIGH_WATERMARK(line_time)));
2170         /* restore original selection */
2171         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2172
2173         /* write the priority marks */
2174         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2175         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2176
2177         /* save values for DPM */
2178         radeon_crtc->line_time = line_time;
2179         radeon_crtc->wm_high = latency_watermark_a;
2180         radeon_crtc->wm_low = latency_watermark_b;
2181 }
2182
2183 void dce6_bandwidth_update(struct radeon_device *rdev)
2184 {
2185         struct drm_display_mode *mode0 = NULL;
2186         struct drm_display_mode *mode1 = NULL;
2187         u32 num_heads = 0, lb_size;
2188         int i;
2189
2190         radeon_update_display_priority(rdev);
2191
2192         for (i = 0; i < rdev->num_crtc; i++) {
2193                 if (rdev->mode_info.crtcs[i]->base.enabled)
2194                         num_heads++;
2195         }
2196         for (i = 0; i < rdev->num_crtc; i += 2) {
2197                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2198                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2199                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2200                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2201                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2202                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2203         }
2204 }
2205
2206 /*
2207  * Core functions
2208  */
2209 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2210 {
2211         const u32 num_tile_mode_states = 32;
2212         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2213
2214         switch (rdev->config.si.mem_row_size_in_kb) {
2215         case 1:
2216                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2217                 break;
2218         case 2:
2219         default:
2220                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2221                 break;
2222         case 4:
2223                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2224                 break;
2225         }
2226
2227         if ((rdev->family == CHIP_TAHITI) ||
2228             (rdev->family == CHIP_PITCAIRN)) {
2229                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2230                         switch (reg_offset) {
2231                         case 0:  /* non-AA compressed depth or any compressed stencil */
2232                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2234                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2235                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2236                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2237                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2240                                 break;
2241                         case 1:  /* 2xAA/4xAA compressed depth only */
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2245                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2247                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2250                                 break;
2251                         case 2:  /* 8xAA compressed depth only */
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2257                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2260                                 break;
2261                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2262                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2264                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2265                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2267                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2270                                 break;
2271                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2275                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2277                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2280                                 break;
2281                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2282                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2284                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2285                                                  TILE_SPLIT(split_equal_to_row_size) |
2286                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2287                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2290                                 break;
2291                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2292                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2294                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2295                                                  TILE_SPLIT(split_equal_to_row_size) |
2296                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2297                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2300                                 break;
2301                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2305                                                  TILE_SPLIT(split_equal_to_row_size) |
2306                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2307                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2310                                 break;
2311                         case 8:  /* 1D and 1D Array Surfaces */
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2313                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2314                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2315                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2316                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2317                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2320                                 break;
2321                         case 9:  /* Displayable maps. */
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2327                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2330                                 break;
2331                         case 10:  /* Display 8bpp. */
2332                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2335                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2336                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2337                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2339                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2340                                 break;
2341                         case 11:  /* Display 16bpp. */
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2347                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2350                                 break;
2351                         case 12:  /* Display 32bpp. */
2352                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2355                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2356                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2357                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2360                                 break;
2361                         case 13:  /* Thin. */
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2364                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2366                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2367                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2370                                 break;
2371                         case 14:  /* Thin 8 bpp. */
2372                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2374                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2376                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2377                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2380                                 break;
2381                         case 15:  /* Thin 16 bpp. */
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2384                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2385                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2386                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2387                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2390                                 break;
2391                         case 16:  /* Thin 32 bpp. */
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2396                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2397                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2400                                 break;
2401                         case 17:  /* Thin 64 bpp. */
2402                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2404                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405                                                  TILE_SPLIT(split_equal_to_row_size) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2407                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2410                                 break;
2411                         case 21:  /* 8 bpp PRT. */
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2415                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2416                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2417                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2418                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2420                                 break;
2421                         case 22:  /* 16 bpp PRT */
2422                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2425                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2426                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2427                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2430                                 break;
2431                         case 23:  /* 32 bpp PRT */
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2436                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2437                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440                                 break;
2441                         case 24:  /* 64 bpp PRT */
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2447                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450                                 break;
2451                         case 25:  /* 128 bpp PRT */
2452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2456                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2457                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2460                                 break;
2461                         default:
2462                                 gb_tile_moden = 0;
2463                                 break;
2464                         }
2465                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2466                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2467                 }
2468         } else if ((rdev->family == CHIP_VERDE) ||
2469                    (rdev->family == CHIP_OLAND) ||
2470                    (rdev->family == CHIP_HAINAN)) {
2471                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2472                         switch (reg_offset) {
2473                         case 0:  /* non-AA compressed depth or any compressed stencil */
2474                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2476                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2477                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2479                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2482                                 break;
2483                         case 1:  /* 2xAA/4xAA compressed depth only */
2484                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2486                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2487                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2488                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2489                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2492                                 break;
2493                         case 2:  /* 8xAA compressed depth only */
2494                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2497                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2498                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2499                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2502                                 break;
2503                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2504                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2506                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2507                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2508                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2509                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2512                                 break;
2513                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2519                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522                                 break;
2523                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2524                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2527                                                  TILE_SPLIT(split_equal_to_row_size) |
2528                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2529                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532                                 break;
2533                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2534                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2537                                                  TILE_SPLIT(split_equal_to_row_size) |
2538                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2539                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542                                 break;
2543                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2547                                                  TILE_SPLIT(split_equal_to_row_size) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2549                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2552                                 break;
2553                         case 8:  /* 1D and 1D Array Surfaces */
2554                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2557                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2559                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562                                 break;
2563                         case 9:  /* Displayable maps. */
2564                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2565                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2569                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572                                 break;
2573                         case 10:  /* Display 8bpp. */
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2577                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2579                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2582                                 break;
2583                         case 11:  /* Display 16bpp. */
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2587                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2588                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2589                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2592                                 break;
2593                         case 12:  /* Display 32bpp. */
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2597                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2599                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2602                                 break;
2603                         case 13:  /* Thin. */
2604                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2606                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2609                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612                                 break;
2613                         case 14:  /* Thin 8 bpp. */
2614                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2617                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2619                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622                                 break;
2623                         case 15:  /* Thin 16 bpp. */
2624                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2629                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632                                 break;
2633                         case 16:  /* Thin 32 bpp. */
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2637                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2638                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2639                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2642                                 break;
2643                         case 17:  /* Thin 64 bpp. */
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2647                                                  TILE_SPLIT(split_equal_to_row_size) |
2648                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2649                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652                                 break;
2653                         case 21:  /* 8 bpp PRT. */
2654                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2659                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2660                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662                                 break;
2663                         case 22:  /* 16 bpp PRT */
2664                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2666                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2667                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2668                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2669                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2672                                 break;
2673                         case 23:  /* 32 bpp PRT */
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2678                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2679                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682                                 break;
2683                         case 24:  /* 64 bpp PRT */
2684                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2686                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2687                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2689                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2692                                 break;
2693                         case 25:  /* 128 bpp PRT */
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2698                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2699                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2702                                 break;
2703                         default:
2704                                 gb_tile_moden = 0;
2705                                 break;
2706                         }
2707                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2708                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2709                 }
2710         } else
2711                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2712 }
2713
2714 static void si_select_se_sh(struct radeon_device *rdev,
2715                             u32 se_num, u32 sh_num)
2716 {
2717         u32 data = INSTANCE_BROADCAST_WRITES;
2718
2719         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2720                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2721         else if (se_num == 0xffffffff)
2722                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2723         else if (sh_num == 0xffffffff)
2724                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2725         else
2726                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2727         WREG32(GRBM_GFX_INDEX, data);
2728 }
2729
2730 static u32 si_create_bitmask(u32 bit_width)
2731 {
2732         u32 i, mask = 0;
2733
2734         for (i = 0; i < bit_width; i++) {
2735                 mask <<= 1;
2736                 mask |= 1;
2737         }
2738         return mask;
2739 }
2740
2741 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2742 {
2743         u32 data, mask;
2744
2745         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2746         if (data & 1)
2747                 data &= INACTIVE_CUS_MASK;
2748         else
2749                 data = 0;
2750         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2751
2752         data >>= INACTIVE_CUS_SHIFT;
2753
2754         mask = si_create_bitmask(cu_per_sh);
2755
2756         return ~data & mask;
2757 }
2758
2759 static void si_setup_spi(struct radeon_device *rdev,
2760                          u32 se_num, u32 sh_per_se,
2761                          u32 cu_per_sh)
2762 {
2763         int i, j, k;
2764         u32 data, mask, active_cu;
2765
2766         for (i = 0; i < se_num; i++) {
2767                 for (j = 0; j < sh_per_se; j++) {
2768                         si_select_se_sh(rdev, i, j);
2769                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2770                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2771
2772                         mask = 1;
2773                         for (k = 0; k < 16; k++) {
2774                                 mask <<= k;
2775                                 if (active_cu & mask) {
2776                                         data &= ~mask;
2777                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2778                                         break;
2779                                 }
2780                         }
2781                 }
2782         }
2783         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2784 }
2785
2786 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2787                               u32 max_rb_num, u32 se_num,
2788                               u32 sh_per_se)
2789 {
2790         u32 data, mask;
2791
2792         data = RREG32(CC_RB_BACKEND_DISABLE);
2793         if (data & 1)
2794                 data &= BACKEND_DISABLE_MASK;
2795         else
2796                 data = 0;
2797         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2798
2799         data >>= BACKEND_DISABLE_SHIFT;
2800
2801         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2802
2803         return data & mask;
2804 }
2805
2806 static void si_setup_rb(struct radeon_device *rdev,
2807                         u32 se_num, u32 sh_per_se,
2808                         u32 max_rb_num)
2809 {
2810         int i, j;
2811         u32 data, mask;
2812         u32 disabled_rbs = 0;
2813         u32 enabled_rbs = 0;
2814
2815         for (i = 0; i < se_num; i++) {
2816                 for (j = 0; j < sh_per_se; j++) {
2817                         si_select_se_sh(rdev, i, j);
2818                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2819                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2820                 }
2821         }
2822         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2823
2824         mask = 1;
2825         for (i = 0; i < max_rb_num; i++) {
2826                 if (!(disabled_rbs & mask))
2827                         enabled_rbs |= mask;
2828                 mask <<= 1;
2829         }
2830
2831         for (i = 0; i < se_num; i++) {
2832                 si_select_se_sh(rdev, i, 0xffffffff);
2833                 data = 0;
2834                 for (j = 0; j < sh_per_se; j++) {
2835                         switch (enabled_rbs & 3) {
2836                         case 1:
2837                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2838                                 break;
2839                         case 2:
2840                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2841                                 break;
2842                         case 3:
2843                         default:
2844                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2845                                 break;
2846                         }
2847                         enabled_rbs >>= 2;
2848                 }
2849                 WREG32(PA_SC_RASTER_CONFIG, data);
2850         }
2851         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2852 }
2853
2854 static void si_gpu_init(struct radeon_device *rdev)
2855 {
2856         u32 gb_addr_config = 0;
2857         u32 mc_shared_chmap, mc_arb_ramcfg;
2858         u32 sx_debug_1;
2859         u32 hdp_host_path_cntl;
2860         u32 tmp;
2861         int i, j;
2862
2863         switch (rdev->family) {
2864         case CHIP_TAHITI:
2865                 rdev->config.si.max_shader_engines = 2;
2866                 rdev->config.si.max_tile_pipes = 12;
2867                 rdev->config.si.max_cu_per_sh = 8;
2868                 rdev->config.si.max_sh_per_se = 2;
2869                 rdev->config.si.max_backends_per_se = 4;
2870                 rdev->config.si.max_texture_channel_caches = 12;
2871                 rdev->config.si.max_gprs = 256;
2872                 rdev->config.si.max_gs_threads = 32;
2873                 rdev->config.si.max_hw_contexts = 8;
2874
2875                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2876                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2877                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2878                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2879                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2880                 break;
2881         case CHIP_PITCAIRN:
2882                 rdev->config.si.max_shader_engines = 2;
2883                 rdev->config.si.max_tile_pipes = 8;
2884                 rdev->config.si.max_cu_per_sh = 5;
2885                 rdev->config.si.max_sh_per_se = 2;
2886                 rdev->config.si.max_backends_per_se = 4;
2887                 rdev->config.si.max_texture_channel_caches = 8;
2888                 rdev->config.si.max_gprs = 256;
2889                 rdev->config.si.max_gs_threads = 32;
2890                 rdev->config.si.max_hw_contexts = 8;
2891
2892                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2893                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2894                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2895                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2896                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2897                 break;
2898         case CHIP_VERDE:
2899         default:
2900                 rdev->config.si.max_shader_engines = 1;
2901                 rdev->config.si.max_tile_pipes = 4;
2902                 rdev->config.si.max_cu_per_sh = 5;
2903                 rdev->config.si.max_sh_per_se = 2;
2904                 rdev->config.si.max_backends_per_se = 4;
2905                 rdev->config.si.max_texture_channel_caches = 4;
2906                 rdev->config.si.max_gprs = 256;
2907                 rdev->config.si.max_gs_threads = 32;
2908                 rdev->config.si.max_hw_contexts = 8;
2909
2910                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2911                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2912                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2913                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2914                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2915                 break;
2916         case CHIP_OLAND:
2917                 rdev->config.si.max_shader_engines = 1;
2918                 rdev->config.si.max_tile_pipes = 4;
2919                 rdev->config.si.max_cu_per_sh = 6;
2920                 rdev->config.si.max_sh_per_se = 1;
2921                 rdev->config.si.max_backends_per_se = 2;
2922                 rdev->config.si.max_texture_channel_caches = 4;
2923                 rdev->config.si.max_gprs = 256;
2924                 rdev->config.si.max_gs_threads = 16;
2925                 rdev->config.si.max_hw_contexts = 8;
2926
2927                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2928                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2929                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2930                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2931                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2932                 break;
2933         case CHIP_HAINAN:
2934                 rdev->config.si.max_shader_engines = 1;
2935                 rdev->config.si.max_tile_pipes = 4;
2936                 rdev->config.si.max_cu_per_sh = 5;
2937                 rdev->config.si.max_sh_per_se = 1;
2938                 rdev->config.si.max_backends_per_se = 1;
2939                 rdev->config.si.max_texture_channel_caches = 2;
2940                 rdev->config.si.max_gprs = 256;
2941                 rdev->config.si.max_gs_threads = 16;
2942                 rdev->config.si.max_hw_contexts = 8;
2943
2944                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2945                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2946                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2947                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2948                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2949                 break;
2950         }
2951
2952         /* Initialize HDP */
2953         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2954                 WREG32((0x2c14 + j), 0x00000000);
2955                 WREG32((0x2c18 + j), 0x00000000);
2956                 WREG32((0x2c1c + j), 0x00000000);
2957                 WREG32((0x2c20 + j), 0x00000000);
2958                 WREG32((0x2c24 + j), 0x00000000);
2959         }
2960
2961         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2962
2963         evergreen_fix_pci_max_read_req_size(rdev);
2964
2965         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2966
2967         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2968         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2969
2970         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2971         rdev->config.si.mem_max_burst_length_bytes = 256;
2972         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2973         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2974         if (rdev->config.si.mem_row_size_in_kb > 4)
2975                 rdev->config.si.mem_row_size_in_kb = 4;
2976         /* XXX use MC settings? */
2977         rdev->config.si.shader_engine_tile_size = 32;
2978         rdev->config.si.num_gpus = 1;
2979         rdev->config.si.multi_gpu_tile_size = 64;
2980
2981         /* fix up row size */
2982         gb_addr_config &= ~ROW_SIZE_MASK;
2983         switch (rdev->config.si.mem_row_size_in_kb) {
2984         case 1:
2985         default:
2986                 gb_addr_config |= ROW_SIZE(0);
2987                 break;
2988         case 2:
2989                 gb_addr_config |= ROW_SIZE(1);
2990                 break;
2991         case 4:
2992                 gb_addr_config |= ROW_SIZE(2);
2993                 break;
2994         }
2995
2996         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2997          * not have bank info, so create a custom tiling dword.
2998          * bits 3:0   num_pipes
2999          * bits 7:4   num_banks
3000          * bits 11:8  group_size
3001          * bits 15:12 row_size
3002          */
3003         rdev->config.si.tile_config = 0;
3004         switch (rdev->config.si.num_tile_pipes) {
3005         case 1:
3006                 rdev->config.si.tile_config |= (0 << 0);
3007                 break;
3008         case 2:
3009                 rdev->config.si.tile_config |= (1 << 0);
3010                 break;
3011         case 4:
3012                 rdev->config.si.tile_config |= (2 << 0);
3013                 break;
3014         case 8:
3015         default:
3016                 /* XXX what about 12? */
3017                 rdev->config.si.tile_config |= (3 << 0);
3018                 break;
3019         }       
3020         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3021         case 0: /* four banks */
3022                 rdev->config.si.tile_config |= 0 << 4;
3023                 break;
3024         case 1: /* eight banks */
3025                 rdev->config.si.tile_config |= 1 << 4;
3026                 break;
3027         case 2: /* sixteen banks */
3028         default:
3029                 rdev->config.si.tile_config |= 2 << 4;
3030                 break;
3031         }
3032         rdev->config.si.tile_config |=
3033                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3034         rdev->config.si.tile_config |=
3035                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3036
3037         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3038         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3039         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3040         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3041         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3042         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3043         if (rdev->has_uvd) {
3044                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3045                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3046                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3047         }
3048
3049         si_tiling_mode_table_init(rdev);
3050
3051         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3052                     rdev->config.si.max_sh_per_se,
3053                     rdev->config.si.max_backends_per_se);
3054
3055         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3056                      rdev->config.si.max_sh_per_se,
3057                      rdev->config.si.max_cu_per_sh);
3058
3059
3060         /* set HW defaults for 3D engine */
3061         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3062                                      ROQ_IB2_START(0x2b)));
3063         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3064
3065         sx_debug_1 = RREG32(SX_DEBUG_1);
3066         WREG32(SX_DEBUG_1, sx_debug_1);
3067
3068         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3069
3070         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3071                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3072                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3073                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3074
3075         WREG32(VGT_NUM_INSTANCES, 1);
3076
3077         WREG32(CP_PERFMON_CNTL, 0);
3078
3079         WREG32(SQ_CONFIG, 0);
3080
3081         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3082                                           FORCE_EOV_MAX_REZ_CNT(255)));
3083
3084         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3085                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3086
3087         WREG32(VGT_GS_VERTEX_REUSE, 16);
3088         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3089
3090         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3091         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3092         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3093         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3094         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3095         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3096         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3097         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3098
3099         tmp = RREG32(HDP_MISC_CNTL);
3100         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3101         WREG32(HDP_MISC_CNTL, tmp);
3102
3103         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3104         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3105
3106         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3107
3108         udelay(50);
3109 }
3110
3111 /*
3112  * GPU scratch registers helpers function.
3113  */
3114 static void si_scratch_init(struct radeon_device *rdev)
3115 {
3116         int i;
3117
3118         rdev->scratch.num_reg = 7;
3119         rdev->scratch.reg_base = SCRATCH_REG0;
3120         for (i = 0; i < rdev->scratch.num_reg; i++) {
3121                 rdev->scratch.free[i] = true;
3122                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3123         }
3124 }
3125
3126 void si_fence_ring_emit(struct radeon_device *rdev,
3127                         struct radeon_fence *fence)
3128 {
3129         struct radeon_ring *ring = &rdev->ring[fence->ring];
3130         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3131
3132         /* flush read cache over gart */
3133         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3134         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3135         radeon_ring_write(ring, 0);
3136         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3137         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3138                           PACKET3_TC_ACTION_ENA |
3139                           PACKET3_SH_KCACHE_ACTION_ENA |
3140                           PACKET3_SH_ICACHE_ACTION_ENA);
3141         radeon_ring_write(ring, 0xFFFFFFFF);
3142         radeon_ring_write(ring, 0);
3143         radeon_ring_write(ring, 10); /* poll interval */
3144         /* EVENT_WRITE_EOP - flush caches, send int */
3145         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3146         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3147         radeon_ring_write(ring, addr & 0xffffffff);
3148         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3149         radeon_ring_write(ring, fence->seq);
3150         radeon_ring_write(ring, 0);
3151 }
3152
3153 /*
3154  * IB stuff
3155  */
3156 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3157 {
3158         struct radeon_ring *ring = &rdev->ring[ib->ring];
3159         u32 header;
3160
3161         if (ib->is_const_ib) {
3162                 /* set switch buffer packet before const IB */
3163                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3164                 radeon_ring_write(ring, 0);
3165
3166                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3167         } else {
3168                 u32 next_rptr;
3169                 if (ring->rptr_save_reg) {
3170                         next_rptr = ring->wptr + 3 + 4 + 8;
3171                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3172                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3173                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3174                         radeon_ring_write(ring, next_rptr);
3175                 } else if (rdev->wb.enabled) {
3176                         next_rptr = ring->wptr + 5 + 4 + 8;
3177                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3178                         radeon_ring_write(ring, (1 << 8));
3179                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3180                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3181                         radeon_ring_write(ring, next_rptr);
3182                 }
3183
3184                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3185         }
3186
3187         radeon_ring_write(ring, header);
3188         radeon_ring_write(ring,
3189 #ifdef __BIG_ENDIAN
3190                           (2 << 0) |
3191 #endif
3192                           (ib->gpu_addr & 0xFFFFFFFC));
3193         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3194         radeon_ring_write(ring, ib->length_dw |
3195                           (ib->vm ? (ib->vm->id << 24) : 0));
3196
3197         if (!ib->is_const_ib) {
3198                 /* flush read cache over gart for this vmid */
3199                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3201                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3202                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3203                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3204                                   PACKET3_TC_ACTION_ENA |
3205                                   PACKET3_SH_KCACHE_ACTION_ENA |
3206                                   PACKET3_SH_ICACHE_ACTION_ENA);
3207                 radeon_ring_write(ring, 0xFFFFFFFF);
3208                 radeon_ring_write(ring, 0);
3209                 radeon_ring_write(ring, 10); /* poll interval */
3210         }
3211 }
3212
3213 /*
3214  * CP.
3215  */
3216 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3217 {
3218         if (enable)
3219                 WREG32(CP_ME_CNTL, 0);
3220         else {
3221                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3222                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3223                 WREG32(SCRATCH_UMSK, 0);
3224                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3225                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3226                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3227         }
3228         udelay(50);
3229 }
3230
3231 static int si_cp_load_microcode(struct radeon_device *rdev)
3232 {
3233         const __be32 *fw_data;
3234         int i;
3235
3236         if (!rdev->me_fw || !rdev->pfp_fw)
3237                 return -EINVAL;
3238
3239         si_cp_enable(rdev, false);
3240
3241         /* PFP */
3242         fw_data = (const __be32 *)rdev->pfp_fw->data;
3243         WREG32(CP_PFP_UCODE_ADDR, 0);
3244         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3245                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3246         WREG32(CP_PFP_UCODE_ADDR, 0);
3247
3248         /* CE */
3249         fw_data = (const __be32 *)rdev->ce_fw->data;
3250         WREG32(CP_CE_UCODE_ADDR, 0);
3251         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3252                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3253         WREG32(CP_CE_UCODE_ADDR, 0);
3254
3255         /* ME */
3256         fw_data = (const __be32 *)rdev->me_fw->data;
3257         WREG32(CP_ME_RAM_WADDR, 0);
3258         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3259                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3260         WREG32(CP_ME_RAM_WADDR, 0);
3261
3262         WREG32(CP_PFP_UCODE_ADDR, 0);
3263         WREG32(CP_CE_UCODE_ADDR, 0);
3264         WREG32(CP_ME_RAM_WADDR, 0);
3265         WREG32(CP_ME_RAM_RADDR, 0);
3266         return 0;
3267 }
3268
3269 static int si_cp_start(struct radeon_device *rdev)
3270 {
3271         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272         int r, i;
3273
3274         r = radeon_ring_lock(rdev, ring, 7 + 4);
3275         if (r) {
3276                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277                 return r;
3278         }
3279         /* init the CP */
3280         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3281         radeon_ring_write(ring, 0x1);
3282         radeon_ring_write(ring, 0x0);
3283         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3284         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3285         radeon_ring_write(ring, 0);
3286         radeon_ring_write(ring, 0);
3287
3288         /* init the CE partitions */
3289         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3290         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3291         radeon_ring_write(ring, 0xc000);
3292         radeon_ring_write(ring, 0xe000);
3293         radeon_ring_unlock_commit(rdev, ring);
3294
3295         si_cp_enable(rdev, true);
3296
3297         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3298         if (r) {
3299                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3300                 return r;
3301         }
3302
3303         /* setup clear context state */
3304         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3306
3307         for (i = 0; i < si_default_size; i++)
3308                 radeon_ring_write(ring, si_default_state[i]);
3309
3310         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3311         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3312
3313         /* set clear context state */
3314         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3315         radeon_ring_write(ring, 0);
3316
3317         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3318         radeon_ring_write(ring, 0x00000316);
3319         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3320         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3321
3322         radeon_ring_unlock_commit(rdev, ring);
3323
3324         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3325                 ring = &rdev->ring[i];
3326                 r = radeon_ring_lock(rdev, ring, 2);
3327
3328                 /* clear the compute context state */
3329                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3330                 radeon_ring_write(ring, 0);
3331
3332                 radeon_ring_unlock_commit(rdev, ring);
3333         }
3334
3335         return 0;
3336 }
3337
3338 static void si_cp_fini(struct radeon_device *rdev)
3339 {
3340         struct radeon_ring *ring;
3341         si_cp_enable(rdev, false);
3342
3343         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3344         radeon_ring_fini(rdev, ring);
3345         radeon_scratch_free(rdev, ring->rptr_save_reg);
3346
3347         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3348         radeon_ring_fini(rdev, ring);
3349         radeon_scratch_free(rdev, ring->rptr_save_reg);
3350
3351         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3352         radeon_ring_fini(rdev, ring);
3353         radeon_scratch_free(rdev, ring->rptr_save_reg);
3354 }
3355
3356 static int si_cp_resume(struct radeon_device *rdev)
3357 {
3358         struct radeon_ring *ring;
3359         u32 tmp;
3360         u32 rb_bufsz;
3361         int r;
3362
3363         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3364         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3365                                  SOFT_RESET_PA |
3366                                  SOFT_RESET_VGT |
3367                                  SOFT_RESET_SPI |
3368                                  SOFT_RESET_SX));
3369         RREG32(GRBM_SOFT_RESET);
3370         mdelay(15);
3371         WREG32(GRBM_SOFT_RESET, 0);
3372         RREG32(GRBM_SOFT_RESET);
3373
3374         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3375         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3376
3377         /* Set the write pointer delay */
3378         WREG32(CP_RB_WPTR_DELAY, 0);
3379
3380         WREG32(CP_DEBUG, 0);
3381         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3382
3383         /* ring 0 - compute and gfx */
3384         /* Set ring buffer size */
3385         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3386         rb_bufsz = drm_order(ring->ring_size / 8);
3387         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3388 #ifdef __BIG_ENDIAN
3389         tmp |= BUF_SWAP_32BIT;
3390 #endif
3391         WREG32(CP_RB0_CNTL, tmp);
3392
3393         /* Initialize the ring buffer's read and write pointers */
3394         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3395         ring->wptr = 0;
3396         WREG32(CP_RB0_WPTR, ring->wptr);
3397
3398         /* set the wb address whether it's enabled or not */
3399         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3400         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3401
3402         if (rdev->wb.enabled)
3403                 WREG32(SCRATCH_UMSK, 0xff);
3404         else {
3405                 tmp |= RB_NO_UPDATE;
3406                 WREG32(SCRATCH_UMSK, 0);
3407         }
3408
3409         mdelay(1);
3410         WREG32(CP_RB0_CNTL, tmp);
3411
3412         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3413
3414         ring->rptr = RREG32(CP_RB0_RPTR);
3415
3416         /* ring1  - compute only */
3417         /* Set ring buffer size */
3418         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3419         rb_bufsz = drm_order(ring->ring_size / 8);
3420         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422         tmp |= BUF_SWAP_32BIT;
3423 #endif
3424         WREG32(CP_RB1_CNTL, tmp);
3425
3426         /* Initialize the ring buffer's read and write pointers */
3427         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3428         ring->wptr = 0;
3429         WREG32(CP_RB1_WPTR, ring->wptr);
3430
3431         /* set the wb address whether it's enabled or not */
3432         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3433         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3434
3435         mdelay(1);
3436         WREG32(CP_RB1_CNTL, tmp);
3437
3438         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3439
3440         ring->rptr = RREG32(CP_RB1_RPTR);
3441
3442         /* ring2 - compute only */
3443         /* Set ring buffer size */
3444         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3445         rb_bufsz = drm_order(ring->ring_size / 8);
3446         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3447 #ifdef __BIG_ENDIAN
3448         tmp |= BUF_SWAP_32BIT;
3449 #endif
3450         WREG32(CP_RB2_CNTL, tmp);
3451
3452         /* Initialize the ring buffer's read and write pointers */
3453         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3454         ring->wptr = 0;
3455         WREG32(CP_RB2_WPTR, ring->wptr);
3456
3457         /* set the wb address whether it's enabled or not */
3458         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3459         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3460
3461         mdelay(1);
3462         WREG32(CP_RB2_CNTL, tmp);
3463
3464         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3465
3466         ring->rptr = RREG32(CP_RB2_RPTR);
3467
3468         /* start the rings */
3469         si_cp_start(rdev);
3470         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3471         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3472         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3473         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3474         if (r) {
3475                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3476                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3477                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3478                 return r;
3479         }
3480         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3481         if (r) {
3482                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483         }
3484         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3485         if (r) {
3486                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3487         }
3488
3489         return 0;
3490 }
3491
3492 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3493 {
3494         u32 reset_mask = 0;
3495         u32 tmp;
3496
3497         /* GRBM_STATUS */
3498         tmp = RREG32(GRBM_STATUS);
3499         if (tmp & (PA_BUSY | SC_BUSY |
3500                    BCI_BUSY | SX_BUSY |
3501                    TA_BUSY | VGT_BUSY |
3502                    DB_BUSY | CB_BUSY |
3503                    GDS_BUSY | SPI_BUSY |
3504                    IA_BUSY | IA_BUSY_NO_DMA))
3505                 reset_mask |= RADEON_RESET_GFX;
3506
3507         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3508                    CP_BUSY | CP_COHERENCY_BUSY))
3509                 reset_mask |= RADEON_RESET_CP;
3510
3511         if (tmp & GRBM_EE_BUSY)
3512                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3513
3514         /* GRBM_STATUS2 */
3515         tmp = RREG32(GRBM_STATUS2);
3516         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3517                 reset_mask |= RADEON_RESET_RLC;
3518
3519         /* DMA_STATUS_REG 0 */
3520         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3521         if (!(tmp & DMA_IDLE))
3522                 reset_mask |= RADEON_RESET_DMA;
3523
3524         /* DMA_STATUS_REG 1 */
3525         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3526         if (!(tmp & DMA_IDLE))
3527                 reset_mask |= RADEON_RESET_DMA1;
3528
3529         /* SRBM_STATUS2 */
3530         tmp = RREG32(SRBM_STATUS2);
3531         if (tmp & DMA_BUSY)
3532                 reset_mask |= RADEON_RESET_DMA;
3533
3534         if (tmp & DMA1_BUSY)
3535                 reset_mask |= RADEON_RESET_DMA1;
3536
3537         /* SRBM_STATUS */
3538         tmp = RREG32(SRBM_STATUS);
3539
3540         if (tmp & IH_BUSY)
3541                 reset_mask |= RADEON_RESET_IH;
3542
3543         if (tmp & SEM_BUSY)
3544                 reset_mask |= RADEON_RESET_SEM;
3545
3546         if (tmp & GRBM_RQ_PENDING)
3547                 reset_mask |= RADEON_RESET_GRBM;
3548
3549         if (tmp & VMC_BUSY)
3550                 reset_mask |= RADEON_RESET_VMC;
3551
3552         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3553                    MCC_BUSY | MCD_BUSY))
3554                 reset_mask |= RADEON_RESET_MC;
3555
3556         if (evergreen_is_display_hung(rdev))
3557                 reset_mask |= RADEON_RESET_DISPLAY;
3558
3559         /* VM_L2_STATUS */
3560         tmp = RREG32(VM_L2_STATUS);
3561         if (tmp & L2_BUSY)
3562                 reset_mask |= RADEON_RESET_VMC;
3563
3564         /* Skip MC reset as it's mostly likely not hung, just busy */
3565         if (reset_mask & RADEON_RESET_MC) {
3566                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3567                 reset_mask &= ~RADEON_RESET_MC;
3568         }
3569
3570         return reset_mask;
3571 }
3572
3573 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3574 {
3575         struct evergreen_mc_save save;
3576         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3577         u32 tmp;
3578
3579         if (reset_mask == 0)
3580                 return;
3581
3582         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3583
3584         evergreen_print_gpu_status_regs(rdev);
3585         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3586                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3587         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3588                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3589
3590         /* Disable CP parsing/prefetching */
3591         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3592
3593         if (reset_mask & RADEON_RESET_DMA) {
3594                 /* dma0 */
3595                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3596                 tmp &= ~DMA_RB_ENABLE;
3597                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3598         }
3599         if (reset_mask & RADEON_RESET_DMA1) {
3600                 /* dma1 */
3601                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3602                 tmp &= ~DMA_RB_ENABLE;
3603                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3604         }
3605
3606         udelay(50);
3607
3608         evergreen_mc_stop(rdev, &save);
3609         if (evergreen_mc_wait_for_idle(rdev)) {
3610                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3611         }
3612
3613         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3614                 grbm_soft_reset = SOFT_RESET_CB |
3615                         SOFT_RESET_DB |
3616                         SOFT_RESET_GDS |
3617                         SOFT_RESET_PA |
3618                         SOFT_RESET_SC |
3619                         SOFT_RESET_BCI |
3620                         SOFT_RESET_SPI |
3621                         SOFT_RESET_SX |
3622                         SOFT_RESET_TC |
3623                         SOFT_RESET_TA |
3624                         SOFT_RESET_VGT |
3625                         SOFT_RESET_IA;
3626         }
3627
3628         if (reset_mask & RADEON_RESET_CP) {
3629                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3630
3631                 srbm_soft_reset |= SOFT_RESET_GRBM;
3632         }
3633
3634         if (reset_mask & RADEON_RESET_DMA)
3635                 srbm_soft_reset |= SOFT_RESET_DMA;
3636
3637         if (reset_mask & RADEON_RESET_DMA1)
3638                 srbm_soft_reset |= SOFT_RESET_DMA1;
3639
3640         if (reset_mask & RADEON_RESET_DISPLAY)
3641                 srbm_soft_reset |= SOFT_RESET_DC;
3642
3643         if (reset_mask & RADEON_RESET_RLC)
3644                 grbm_soft_reset |= SOFT_RESET_RLC;
3645
3646         if (reset_mask & RADEON_RESET_SEM)
3647                 srbm_soft_reset |= SOFT_RESET_SEM;
3648
3649         if (reset_mask & RADEON_RESET_IH)
3650                 srbm_soft_reset |= SOFT_RESET_IH;
3651
3652         if (reset_mask & RADEON_RESET_GRBM)
3653                 srbm_soft_reset |= SOFT_RESET_GRBM;
3654
3655         if (reset_mask & RADEON_RESET_VMC)
3656                 srbm_soft_reset |= SOFT_RESET_VMC;
3657
3658         if (reset_mask & RADEON_RESET_MC)
3659                 srbm_soft_reset |= SOFT_RESET_MC;
3660
3661         if (grbm_soft_reset) {
3662                 tmp = RREG32(GRBM_SOFT_RESET);
3663                 tmp |= grbm_soft_reset;
3664                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3665                 WREG32(GRBM_SOFT_RESET, tmp);
3666                 tmp = RREG32(GRBM_SOFT_RESET);
3667
3668                 udelay(50);
3669
3670                 tmp &= ~grbm_soft_reset;
3671                 WREG32(GRBM_SOFT_RESET, tmp);
3672                 tmp = RREG32(GRBM_SOFT_RESET);
3673         }
3674
3675         if (srbm_soft_reset) {
3676                 tmp = RREG32(SRBM_SOFT_RESET);
3677                 tmp |= srbm_soft_reset;
3678                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3679                 WREG32(SRBM_SOFT_RESET, tmp);
3680                 tmp = RREG32(SRBM_SOFT_RESET);
3681
3682                 udelay(50);
3683
3684                 tmp &= ~srbm_soft_reset;
3685                 WREG32(SRBM_SOFT_RESET, tmp);
3686                 tmp = RREG32(SRBM_SOFT_RESET);
3687         }
3688
3689         /* Wait a little for things to settle down */
3690         udelay(50);
3691
3692         evergreen_mc_resume(rdev, &save);
3693         udelay(50);
3694
3695         evergreen_print_gpu_status_regs(rdev);
3696 }
3697
3698 int si_asic_reset(struct radeon_device *rdev)
3699 {
3700         u32 reset_mask;
3701
3702         reset_mask = si_gpu_check_soft_reset(rdev);
3703
3704         if (reset_mask)
3705                 r600_set_bios_scratch_engine_hung(rdev, true);
3706
3707         si_gpu_soft_reset(rdev, reset_mask);
3708
3709         reset_mask = si_gpu_check_soft_reset(rdev);
3710
3711         if (!reset_mask)
3712                 r600_set_bios_scratch_engine_hung(rdev, false);
3713
3714         return 0;
3715 }
3716
3717 /**
3718  * si_gfx_is_lockup - Check if the GFX engine is locked up
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Check if the GFX engine is locked up.
3724  * Returns true if the engine appears to be locked up, false if not.
3725  */
3726 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3727 {
3728         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3729
3730         if (!(reset_mask & (RADEON_RESET_GFX |
3731                             RADEON_RESET_COMPUTE |
3732                             RADEON_RESET_CP))) {
3733                 radeon_ring_lockup_update(ring);
3734                 return false;
3735         }
3736         /* force CP activities */
3737         radeon_ring_force_activity(rdev, ring);
3738         return radeon_ring_test_lockup(rdev, ring);
3739 }
3740
3741 /**
3742  * si_dma_is_lockup - Check if the DMA engine is locked up
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Check if the async DMA engine is locked up.
3748  * Returns true if the engine appears to be locked up, false if not.
3749  */
3750 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3751 {
3752         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3753         u32 mask;
3754
3755         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3756                 mask = RADEON_RESET_DMA;
3757         else
3758                 mask = RADEON_RESET_DMA1;
3759
3760         if (!(reset_mask & mask)) {
3761                 radeon_ring_lockup_update(ring);
3762                 return false;
3763         }
3764         /* force ring activities */
3765         radeon_ring_force_activity(rdev, ring);
3766         return radeon_ring_test_lockup(rdev, ring);
3767 }
3768
3769 /* MC */
3770 static void si_mc_program(struct radeon_device *rdev)
3771 {
3772         struct evergreen_mc_save save;
3773         u32 tmp;
3774         int i, j;
3775
3776         /* Initialize HDP */
3777         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3778                 WREG32((0x2c14 + j), 0x00000000);
3779                 WREG32((0x2c18 + j), 0x00000000);
3780                 WREG32((0x2c1c + j), 0x00000000);
3781                 WREG32((0x2c20 + j), 0x00000000);
3782                 WREG32((0x2c24 + j), 0x00000000);
3783         }
3784         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3785
3786         evergreen_mc_stop(rdev, &save);
3787         if (radeon_mc_wait_for_idle(rdev)) {
3788                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3789         }
3790         if (!ASIC_IS_NODCE(rdev))
3791                 /* Lockout access through VGA aperture*/
3792                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3793         /* Update configuration */
3794         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3795                rdev->mc.vram_start >> 12);
3796         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3797                rdev->mc.vram_end >> 12);
3798         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3799                rdev->vram_scratch.gpu_addr >> 12);
3800         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3801         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3802         WREG32(MC_VM_FB_LOCATION, tmp);
3803         /* XXX double check these! */
3804         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3805         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3806         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3807         WREG32(MC_VM_AGP_BASE, 0);
3808         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3809         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3810         if (radeon_mc_wait_for_idle(rdev)) {
3811                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3812         }
3813         evergreen_mc_resume(rdev, &save);
3814         if (!ASIC_IS_NODCE(rdev)) {
3815                 /* we need to own VRAM, so turn off the VGA renderer here
3816                  * to stop it overwriting our objects */
3817                 rv515_vga_render_disable(rdev);
3818         }
3819 }
3820
3821 void si_vram_gtt_location(struct radeon_device *rdev,
3822                           struct radeon_mc *mc)
3823 {
3824         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3825                 /* leave room for at least 1024M GTT */
3826                 dev_warn(rdev->dev, "limiting VRAM\n");
3827                 mc->real_vram_size = 0xFFC0000000ULL;
3828                 mc->mc_vram_size = 0xFFC0000000ULL;
3829         }
3830         radeon_vram_location(rdev, &rdev->mc, 0);
3831         rdev->mc.gtt_base_align = 0;
3832         radeon_gtt_location(rdev, mc);
3833 }
3834
3835 static int si_mc_init(struct radeon_device *rdev)
3836 {
3837         u32 tmp;
3838         int chansize, numchan;
3839
3840         /* Get VRAM informations */
3841         rdev->mc.vram_is_ddr = true;
3842         tmp = RREG32(MC_ARB_RAMCFG);
3843         if (tmp & CHANSIZE_OVERRIDE) {
3844                 chansize = 16;
3845         } else if (tmp & CHANSIZE_MASK) {
3846                 chansize = 64;
3847         } else {
3848                 chansize = 32;
3849         }
3850         tmp = RREG32(MC_SHARED_CHMAP);
3851         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3852         case 0:
3853         default:
3854                 numchan = 1;
3855                 break;
3856         case 1:
3857                 numchan = 2;
3858                 break;
3859         case 2:
3860                 numchan = 4;
3861                 break;
3862         case 3:
3863                 numchan = 8;
3864                 break;
3865         case 4:
3866                 numchan = 3;
3867                 break;
3868         case 5:
3869                 numchan = 6;
3870                 break;
3871         case 6:
3872                 numchan = 10;
3873                 break;
3874         case 7:
3875                 numchan = 12;
3876                 break;
3877         case 8:
3878                 numchan = 16;
3879                 break;
3880         }
3881         rdev->mc.vram_width = numchan * chansize;
3882         /* Could aper size report 0 ? */
3883         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3884         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3885         /* size in MB on si */
3886         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3887         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3888         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3889         si_vram_gtt_location(rdev, &rdev->mc);
3890         radeon_update_bandwidth_info(rdev);
3891
3892         return 0;
3893 }
3894
3895 /*
3896  * GART
3897  */
3898 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3899 {
3900         /* flush hdp cache */
3901         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3902
3903         /* bits 0-15 are the VM contexts0-15 */
3904         WREG32(VM_INVALIDATE_REQUEST, 1);
3905 }
3906
3907 static int si_pcie_gart_enable(struct radeon_device *rdev)
3908 {
3909         int r, i;
3910
3911         if (rdev->gart.robj == NULL) {
3912                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3913                 return -EINVAL;
3914         }
3915         r = radeon_gart_table_vram_pin(rdev);
3916         if (r)
3917                 return r;
3918         radeon_gart_restore(rdev);
3919         /* Setup TLB control */
3920         WREG32(MC_VM_MX_L1_TLB_CNTL,
3921                (0xA << 7) |
3922                ENABLE_L1_TLB |
3923                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3924                ENABLE_ADVANCED_DRIVER_MODEL |
3925                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3926         /* Setup L2 cache */
3927         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3928                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3929                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3930                EFFECTIVE_L2_QUEUE_SIZE(7) |
3931                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3932         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3933         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3934                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3935         /* setup context0 */
3936         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3937         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3938         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3939         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3940                         (u32)(rdev->dummy_page.addr >> 12));
3941         WREG32(VM_CONTEXT0_CNTL2, 0);
3942         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3943                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3944
3945         WREG32(0x15D4, 0);
3946         WREG32(0x15D8, 0);
3947         WREG32(0x15DC, 0);
3948
3949         /* empty context1-15 */
3950         /* set vm size, must be a multiple of 4 */
3951         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3952         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3953         /* Assign the pt base to something valid for now; the pts used for
3954          * the VMs are determined by the application and setup and assigned
3955          * on the fly in the vm part of radeon_gart.c
3956          */
3957         for (i = 1; i < 16; i++) {
3958                 if (i < 8)
3959                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3960                                rdev->gart.table_addr >> 12);
3961                 else
3962                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3963                                rdev->gart.table_addr >> 12);
3964         }
3965
3966         /* enable context1-15 */
3967         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3968                (u32)(rdev->dummy_page.addr >> 12));
3969         WREG32(VM_CONTEXT1_CNTL2, 4);
3970         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3971                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3972                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3973                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3974                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3975                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3977                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3983
3984         si_pcie_gart_tlb_flush(rdev);
3985         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3986                  (unsigned)(rdev->mc.gtt_size >> 20),
3987                  (unsigned long long)rdev->gart.table_addr);
3988         rdev->gart.ready = true;
3989         return 0;
3990 }
3991
3992 static void si_pcie_gart_disable(struct radeon_device *rdev)
3993 {
3994         /* Disable all tables */
3995         WREG32(VM_CONTEXT0_CNTL, 0);
3996         WREG32(VM_CONTEXT1_CNTL, 0);
3997         /* Setup TLB control */
3998         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3999                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4000         /* Setup L2 cache */
4001         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4002                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4003                EFFECTIVE_L2_QUEUE_SIZE(7) |
4004                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4005         WREG32(VM_L2_CNTL2, 0);
4006         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4007                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4008         radeon_gart_table_vram_unpin(rdev);
4009 }
4010
4011 static void si_pcie_gart_fini(struct radeon_device *rdev)
4012 {
4013         si_pcie_gart_disable(rdev);
4014         radeon_gart_table_vram_free(rdev);
4015         radeon_gart_fini(rdev);
4016 }
4017
4018 /* vm parser */
4019 static bool si_vm_reg_valid(u32 reg)
4020 {
4021         /* context regs are fine */
4022         if (reg >= 0x28000)
4023                 return true;
4024
4025         /* check config regs */
4026         switch (reg) {
4027         case GRBM_GFX_INDEX:
4028         case CP_STRMOUT_CNTL:
4029         case VGT_VTX_VECT_EJECT_REG:
4030         case VGT_CACHE_INVALIDATION:
4031         case VGT_ESGS_RING_SIZE:
4032         case VGT_GSVS_RING_SIZE:
4033         case VGT_GS_VERTEX_REUSE:
4034         case VGT_PRIMITIVE_TYPE:
4035         case VGT_INDEX_TYPE:
4036         case VGT_NUM_INDICES:
4037         case VGT_NUM_INSTANCES:
4038         case VGT_TF_RING_SIZE:
4039         case VGT_HS_OFFCHIP_PARAM:
4040         case VGT_TF_MEMORY_BASE:
4041         case PA_CL_ENHANCE:
4042         case PA_SU_LINE_STIPPLE_VALUE:
4043         case PA_SC_LINE_STIPPLE_STATE:
4044         case PA_SC_ENHANCE:
4045         case SQC_CACHES:
4046         case SPI_STATIC_THREAD_MGMT_1:
4047         case SPI_STATIC_THREAD_MGMT_2:
4048         case SPI_STATIC_THREAD_MGMT_3:
4049         case SPI_PS_MAX_WAVE_ID:
4050         case SPI_CONFIG_CNTL:
4051         case SPI_CONFIG_CNTL_1:
4052         case TA_CNTL_AUX:
4053                 return true;
4054         default:
4055                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4056                 return false;
4057         }
4058 }
4059
4060 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4061                                   u32 *ib, struct radeon_cs_packet *pkt)
4062 {
4063         switch (pkt->opcode) {
4064         case PACKET3_NOP:
4065         case PACKET3_SET_BASE:
4066         case PACKET3_SET_CE_DE_COUNTERS:
4067         case PACKET3_LOAD_CONST_RAM:
4068         case PACKET3_WRITE_CONST_RAM:
4069         case PACKET3_WRITE_CONST_RAM_OFFSET:
4070         case PACKET3_DUMP_CONST_RAM:
4071         case PACKET3_INCREMENT_CE_COUNTER:
4072         case PACKET3_WAIT_ON_DE_COUNTER:
4073         case PACKET3_CE_WRITE:
4074                 break;
4075         default:
4076                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4077                 return -EINVAL;
4078         }
4079         return 0;
4080 }
4081
4082 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4083                                    u32 *ib, struct radeon_cs_packet *pkt)
4084 {
4085         u32 idx = pkt->idx + 1;
4086         u32 idx_value = ib[idx];
4087         u32 start_reg, end_reg, reg, i;
4088         u32 command, info;
4089
4090         switch (pkt->opcode) {
4091         case PACKET3_NOP:
4092         case PACKET3_SET_BASE:
4093         case PACKET3_CLEAR_STATE:
4094         case PACKET3_INDEX_BUFFER_SIZE:
4095         case PACKET3_DISPATCH_DIRECT:
4096         case PACKET3_DISPATCH_INDIRECT:
4097         case PACKET3_ALLOC_GDS:
4098         case PACKET3_WRITE_GDS_RAM:
4099         case PACKET3_ATOMIC_GDS:
4100         case PACKET3_ATOMIC:
4101         case PACKET3_OCCLUSION_QUERY:
4102         case PACKET3_SET_PREDICATION:
4103         case PACKET3_COND_EXEC:
4104         case PACKET3_PRED_EXEC:
4105         case PACKET3_DRAW_INDIRECT:
4106         case PACKET3_DRAW_INDEX_INDIRECT:
4107         case PACKET3_INDEX_BASE:
4108         case PACKET3_DRAW_INDEX_2:
4109         case PACKET3_CONTEXT_CONTROL:
4110         case PACKET3_INDEX_TYPE:
4111         case PACKET3_DRAW_INDIRECT_MULTI:
4112         case PACKET3_DRAW_INDEX_AUTO:
4113         case PACKET3_DRAW_INDEX_IMMD:
4114         case PACKET3_NUM_INSTANCES:
4115         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4116         case PACKET3_STRMOUT_BUFFER_UPDATE:
4117         case PACKET3_DRAW_INDEX_OFFSET_2:
4118         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4119         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4120         case PACKET3_MPEG_INDEX:
4121         case PACKET3_WAIT_REG_MEM:
4122         case PACKET3_MEM_WRITE:
4123         case PACKET3_PFP_SYNC_ME:
4124         case PACKET3_SURFACE_SYNC:
4125         case PACKET3_EVENT_WRITE:
4126         case PACKET3_EVENT_WRITE_EOP:
4127         case PACKET3_EVENT_WRITE_EOS:
4128         case PACKET3_SET_CONTEXT_REG:
4129         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4130         case PACKET3_SET_SH_REG:
4131         case PACKET3_SET_SH_REG_OFFSET:
4132         case PACKET3_INCREMENT_DE_COUNTER:
4133         case PACKET3_WAIT_ON_CE_COUNTER:
4134         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4135         case PACKET3_ME_WRITE:
4136                 break;
4137         case PACKET3_COPY_DATA:
4138                 if ((idx_value & 0xf00) == 0) {
4139                         reg = ib[idx + 3] * 4;
4140                         if (!si_vm_reg_valid(reg))
4141                                 return -EINVAL;
4142                 }
4143                 break;
4144         case PACKET3_WRITE_DATA:
4145                 if ((idx_value & 0xf00) == 0) {
4146                         start_reg = ib[idx + 1] * 4;
4147                         if (idx_value & 0x10000) {
4148                                 if (!si_vm_reg_valid(start_reg))
4149                                         return -EINVAL;
4150                         } else {
4151                                 for (i = 0; i < (pkt->count - 2); i++) {
4152                                         reg = start_reg + (4 * i);
4153                                         if (!si_vm_reg_valid(reg))
4154                                                 return -EINVAL;
4155                                 }
4156                         }
4157                 }
4158                 break;
4159         case PACKET3_COND_WRITE:
4160                 if (idx_value & 0x100) {
4161                         reg = ib[idx + 5] * 4;
4162                         if (!si_vm_reg_valid(reg))
4163                                 return -EINVAL;
4164                 }
4165                 break;
4166         case PACKET3_COPY_DW:
4167                 if (idx_value & 0x2) {
4168                         reg = ib[idx + 3] * 4;
4169                         if (!si_vm_reg_valid(reg))
4170                                 return -EINVAL;
4171                 }
4172                 break;
4173         case PACKET3_SET_CONFIG_REG:
4174                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4175                 end_reg = 4 * pkt->count + start_reg - 4;
4176                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4177                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4178                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4179                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4180                         return -EINVAL;
4181                 }
4182                 for (i = 0; i < pkt->count; i++) {
4183                         reg = start_reg + (4 * i);
4184                         if (!si_vm_reg_valid(reg))
4185                                 return -EINVAL;
4186                 }
4187                 break;
4188         case PACKET3_CP_DMA:
4189                 command = ib[idx + 4];
4190                 info = ib[idx + 1];
4191                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4192                         /* src address space is register */
4193                         if (((info & 0x60000000) >> 29) == 0) {
4194                                 start_reg = idx_value << 2;
4195                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4196                                         reg = start_reg;
4197                                         if (!si_vm_reg_valid(reg)) {
4198                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4199                                                 return -EINVAL;
4200                                         }
4201                                 } else {
4202                                         for (i = 0; i < (command & 0x1fffff); i++) {
4203                                                 reg = start_reg + (4 * i);
4204                                                 if (!si_vm_reg_valid(reg)) {
4205                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4206                                                         return -EINVAL;
4207                                                 }
4208                                         }
4209                                 }
4210                         }
4211                 }
4212                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4213                         /* dst address space is register */
4214                         if (((info & 0x00300000) >> 20) == 0) {
4215                                 start_reg = ib[idx + 2];
4216                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4217                                         reg = start_reg;
4218                                         if (!si_vm_reg_valid(reg)) {
4219                                                 DRM_ERROR("CP DMA Bad DST register\n");
4220                                                 return -EINVAL;
4221                                         }
4222                                 } else {
4223                                         for (i = 0; i < (command & 0x1fffff); i++) {
4224                                                 reg = start_reg + (4 * i);
4225                                                 if (!si_vm_reg_valid(reg)) {
4226                                                         DRM_ERROR("CP DMA Bad DST register\n");
4227                                                         return -EINVAL;
4228                                                 }
4229                                         }
4230                                 }
4231                         }
4232                 }
4233                 break;
4234         default:
4235                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4236                 return -EINVAL;
4237         }
4238         return 0;
4239 }
4240
4241 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4242                                        u32 *ib, struct radeon_cs_packet *pkt)
4243 {
4244         u32 idx = pkt->idx + 1;
4245         u32 idx_value = ib[idx];
4246         u32 start_reg, reg, i;
4247
4248         switch (pkt->opcode) {
4249         case PACKET3_NOP:
4250         case PACKET3_SET_BASE:
4251         case PACKET3_CLEAR_STATE:
4252         case PACKET3_DISPATCH_DIRECT:
4253         case PACKET3_DISPATCH_INDIRECT:
4254         case PACKET3_ALLOC_GDS:
4255         case PACKET3_WRITE_GDS_RAM:
4256         case PACKET3_ATOMIC_GDS:
4257         case PACKET3_ATOMIC:
4258         case PACKET3_OCCLUSION_QUERY:
4259         case PACKET3_SET_PREDICATION:
4260         case PACKET3_COND_EXEC:
4261         case PACKET3_PRED_EXEC:
4262         case PACKET3_CONTEXT_CONTROL:
4263         case PACKET3_STRMOUT_BUFFER_UPDATE:
4264         case PACKET3_WAIT_REG_MEM:
4265         case PACKET3_MEM_WRITE:
4266         case PACKET3_PFP_SYNC_ME:
4267         case PACKET3_SURFACE_SYNC:
4268         case PACKET3_EVENT_WRITE:
4269         case PACKET3_EVENT_WRITE_EOP:
4270         case PACKET3_EVENT_WRITE_EOS:
4271         case PACKET3_SET_CONTEXT_REG:
4272         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4273         case PACKET3_SET_SH_REG:
4274         case PACKET3_SET_SH_REG_OFFSET:
4275         case PACKET3_INCREMENT_DE_COUNTER:
4276         case PACKET3_WAIT_ON_CE_COUNTER:
4277         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4278         case PACKET3_ME_WRITE:
4279                 break;
4280         case PACKET3_COPY_DATA:
4281                 if ((idx_value & 0xf00) == 0) {
4282                         reg = ib[idx + 3] * 4;
4283                         if (!si_vm_reg_valid(reg))
4284                                 return -EINVAL;
4285                 }
4286                 break;
4287         case PACKET3_WRITE_DATA:
4288                 if ((idx_value & 0xf00) == 0) {
4289                         start_reg = ib[idx + 1] * 4;
4290                         if (idx_value & 0x10000) {
4291                                 if (!si_vm_reg_valid(start_reg))
4292                                         return -EINVAL;
4293                         } else {
4294                                 for (i = 0; i < (pkt->count - 2); i++) {
4295                                         reg = start_reg + (4 * i);
4296                                         if (!si_vm_reg_valid(reg))
4297                                                 return -EINVAL;
4298                                 }
4299                         }
4300                 }
4301                 break;
4302         case PACKET3_COND_WRITE:
4303                 if (idx_value & 0x100) {
4304                         reg = ib[idx + 5] * 4;
4305                         if (!si_vm_reg_valid(reg))
4306                                 return -EINVAL;
4307                 }
4308                 break;
4309         case PACKET3_COPY_DW:
4310                 if (idx_value & 0x2) {
4311                         reg = ib[idx + 3] * 4;
4312                         if (!si_vm_reg_valid(reg))
4313                                 return -EINVAL;
4314                 }
4315                 break;
4316         default:
4317                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4318                 return -EINVAL;
4319         }
4320         return 0;
4321 }
4322
4323 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4324 {
4325         int ret = 0;
4326         u32 idx = 0;
4327         struct radeon_cs_packet pkt;
4328
4329         do {
4330                 pkt.idx = idx;
4331                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4332                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4333                 pkt.one_reg_wr = 0;
4334                 switch (pkt.type) {
4335                 case RADEON_PACKET_TYPE0:
4336                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4337                         ret = -EINVAL;
4338                         break;
4339                 case RADEON_PACKET_TYPE2:
4340                         idx += 1;
4341                         break;
4342                 case RADEON_PACKET_TYPE3:
4343                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4344                         if (ib->is_const_ib)
4345                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4346                         else {
4347                                 switch (ib->ring) {
4348                                 case RADEON_RING_TYPE_GFX_INDEX:
4349                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4350                                         break;
4351                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4352                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4353                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4354                                         break;
4355                                 default:
4356                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4357                                         ret = -EINVAL;
4358                                         break;
4359                                 }
4360                         }
4361                         idx += pkt.count + 2;
4362                         break;
4363                 default:
4364                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4365                         ret = -EINVAL;
4366                         break;
4367                 }
4368                 if (ret)
4369                         break;
4370         } while (idx < ib->length_dw);
4371
4372         return ret;
4373 }
4374
4375 /*
4376  * vm
4377  */
4378 int si_vm_init(struct radeon_device *rdev)
4379 {
4380         /* number of VMs */
4381         rdev->vm_manager.nvm = 16;
4382         /* base offset of vram pages */
4383         rdev->vm_manager.vram_base_offset = 0;
4384
4385         return 0;
4386 }
4387
4388 void si_vm_fini(struct radeon_device *rdev)
4389 {
4390 }
4391
4392 /**
4393  * si_vm_decode_fault - print human readable fault info
4394  *
4395  * @rdev: radeon_device pointer
4396  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4397  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4398  *
4399  * Print human readable fault information (SI).
4400  */
4401 static void si_vm_decode_fault(struct radeon_device *rdev,
4402                                u32 status, u32 addr)
4403 {
4404         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4405         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4406         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4407         char *block;
4408
4409         if (rdev->family == CHIP_TAHITI) {
4410                 switch (mc_id) {
4411                 case 160:
4412                 case 144:
4413                 case 96:
4414                 case 80:
4415                 case 224:
4416                 case 208:
4417                 case 32:
4418                 case 16:
4419                         block = "CB";
4420                         break;
4421                 case 161:
4422                 case 145:
4423                 case 97:
4424                 case 81:
4425                 case 225:
4426                 case 209:
4427                 case 33:
4428                 case 17:
4429                         block = "CB_FMASK";
4430                         break;
4431                 case 162:
4432                 case 146:
4433                 case 98:
4434                 case 82:
4435                 case 226:
4436                 case 210:
4437                 case 34:
4438                 case 18:
4439                         block = "CB_CMASK";
4440                         break;
4441                 case 163:
4442                 case 147:
4443                 case 99:
4444                 case 83:
4445                 case 227:
4446                 case 211:
4447                 case 35:
4448                 case 19:
4449                         block = "CB_IMMED";
4450                         break;
4451                 case 164:
4452                 case 148:
4453                 case 100:
4454                 case 84:
4455                 case 228:
4456                 case 212:
4457                 case 36:
4458                 case 20:
4459                         block = "DB";
4460                         break;
4461                 case 165:
4462                 case 149:
4463                 case 101:
4464                 case 85:
4465                 case 229:
4466                 case 213:
4467                 case 37:
4468                 case 21:
4469                         block = "DB_HTILE";
4470                         break;
4471                 case 167:
4472                 case 151:
4473                 case 103:
4474                 case 87:
4475                 case 231:
4476                 case 215:
4477                 case 39:
4478                 case 23:
4479                         block = "DB_STEN";
4480                         break;
4481                 case 72:
4482                 case 68:
4483                 case 64:
4484                 case 8:
4485                 case 4:
4486                 case 0:
4487                 case 136:
4488                 case 132:
4489                 case 128:
4490                 case 200:
4491                 case 196:
4492                 case 192:
4493                         block = "TC";
4494                         break;
4495                 case 112:
4496                 case 48:
4497                         block = "CP";
4498                         break;
4499                 case 49:
4500                 case 177:
4501                 case 50:
4502                 case 178:
4503                         block = "SH";
4504                         break;
4505                 case 53:
4506                 case 190:
4507                         block = "VGT";
4508                         break;
4509                 case 117:
4510                         block = "IH";
4511                         break;
4512                 case 51:
4513                 case 115:
4514                         block = "RLC";
4515                         break;
4516                 case 119:
4517                 case 183:
4518                         block = "DMA0";
4519                         break;
4520                 case 61:
4521                         block = "DMA1";
4522                         break;
4523                 case 248:
4524                 case 120:
4525                         block = "HDP";
4526                         break;
4527                 default:
4528                         block = "unknown";
4529                         break;
4530                 }
4531         } else {
4532                 switch (mc_id) {
4533                 case 32:
4534                 case 16:
4535                 case 96:
4536                 case 80:
4537                 case 160:
4538                 case 144:
4539                 case 224:
4540                 case 208:
4541                         block = "CB";
4542                         break;
4543                 case 33:
4544                 case 17:
4545                 case 97:
4546                 case 81:
4547                 case 161:
4548                 case 145:
4549                 case 225:
4550                 case 209:
4551                         block = "CB_FMASK";
4552                         break;
4553                 case 34:
4554                 case 18:
4555                 case 98:
4556                 case 82:
4557                 case 162:
4558                 case 146:
4559                 case 226:
4560                 case 210:
4561                         block = "CB_CMASK";
4562                         break;
4563                 case 35:
4564                 case 19:
4565                 case 99:
4566                 case 83:
4567                 case 163:
4568                 case 147:
4569                 case 227:
4570                 case 211:
4571                         block = "CB_IMMED";
4572                         break;
4573                 case 36:
4574                 case 20:
4575                 case 100:
4576                 case 84:
4577                 case 164:
4578                 case 148:
4579                 case 228:
4580                 case 212:
4581                         block = "DB";
4582                         break;
4583                 case 37:
4584                 case 21:
4585                 case 101:
4586                 case 85:
4587                 case 165:
4588                 case 149:
4589                 case 229:
4590                 case 213:
4591                         block = "DB_HTILE";
4592                         break;
4593                 case 39:
4594                 case 23:
4595                 case 103:
4596                 case 87:
4597                 case 167:
4598                 case 151:
4599                 case 231:
4600                 case 215:
4601                         block = "DB_STEN";
4602                         break;
4603                 case 72:
4604                 case 68:
4605                 case 8:
4606                 case 4:
4607                 case 136:
4608                 case 132:
4609                 case 200:
4610                 case 196:
4611                         block = "TC";
4612                         break;
4613                 case 112:
4614                 case 48:
4615                         block = "CP";
4616                         break;
4617                 case 49:
4618                 case 177:
4619                 case 50:
4620                 case 178:
4621                         block = "SH";
4622                         break;
4623                 case 53:
4624                         block = "VGT";
4625                         break;
4626                 case 117:
4627                         block = "IH";
4628                         break;
4629                 case 51:
4630                 case 115:
4631                         block = "RLC";
4632                         break;
4633                 case 119:
4634                 case 183:
4635                         block = "DMA0";
4636                         break;
4637                 case 61:
4638                         block = "DMA1";
4639                         break;
4640                 case 248:
4641                 case 120:
4642                         block = "HDP";
4643                         break;
4644                 default:
4645                         block = "unknown";
4646                         break;
4647                 }
4648         }
4649
4650         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4651                protections, vmid, addr,
4652                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4653                block, mc_id);
4654 }
4655
4656 /**
4657  * si_vm_set_page - update the page tables using the CP
4658  *
4659  * @rdev: radeon_device pointer
4660  * @ib: indirect buffer to fill with commands
4661  * @pe: addr of the page entry
4662  * @addr: dst addr to write into pe
4663  * @count: number of page entries to update
4664  * @incr: increase next addr by incr bytes
4665  * @flags: access flags
4666  *
4667  * Update the page tables using the CP (SI).
4668  */
4669 void si_vm_set_page(struct radeon_device *rdev,
4670                     struct radeon_ib *ib,
4671                     uint64_t pe,
4672                     uint64_t addr, unsigned count,
4673                     uint32_t incr, uint32_t flags)
4674 {
4675         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4676         uint64_t value;
4677         unsigned ndw;
4678
4679         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4680                 while (count) {
4681                         ndw = 2 + count * 2;
4682                         if (ndw > 0x3FFE)
4683                                 ndw = 0x3FFE;
4684
4685                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4686                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4687                                         WRITE_DATA_DST_SEL(1));
4688                         ib->ptr[ib->length_dw++] = pe;
4689                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4690                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4691                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4692                                         value = radeon_vm_map_gart(rdev, addr);
4693                                         value &= 0xFFFFFFFFFFFFF000ULL;
4694                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4695                                         value = addr;
4696                                 } else {
4697                                         value = 0;
4698                                 }
4699                                 addr += incr;
4700                                 value |= r600_flags;
4701                                 ib->ptr[ib->length_dw++] = value;
4702                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4703                         }
4704                 }
4705         } else {
4706                 /* DMA */
4707                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4708                         while (count) {
4709                                 ndw = count * 2;
4710                                 if (ndw > 0xFFFFE)
4711                                         ndw = 0xFFFFE;
4712
4713                                 /* for non-physically contiguous pages (system) */
4714                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4715                                 ib->ptr[ib->length_dw++] = pe;
4716                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4717                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4718                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4719                                                 value = radeon_vm_map_gart(rdev, addr);
4720                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4721                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4722                                                 value = addr;
4723                                         } else {
4724                                                 value = 0;
4725                                         }
4726                                         addr += incr;
4727                                         value |= r600_flags;
4728                                         ib->ptr[ib->length_dw++] = value;
4729                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4730                                 }
4731                         }
4732                 } else {
4733                         while (count) {
4734                                 ndw = count * 2;
4735                                 if (ndw > 0xFFFFE)
4736                                         ndw = 0xFFFFE;
4737
4738                                 if (flags & RADEON_VM_PAGE_VALID)
4739                                         value = addr;
4740                                 else
4741                                         value = 0;
4742                                 /* for physically contiguous pages (vram) */
4743                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4744                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4745                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4746                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4747                                 ib->ptr[ib->length_dw++] = 0;
4748                                 ib->ptr[ib->length_dw++] = value; /* value */
4749                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4750                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4751                                 ib->ptr[ib->length_dw++] = 0;
4752                                 pe += ndw * 4;
4753                                 addr += (ndw / 2) * incr;
4754                                 count -= ndw / 2;
4755                         }
4756                 }
4757                 while (ib->length_dw & 0x7)
4758                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4759         }
4760 }
4761
4762 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4763 {
4764         struct radeon_ring *ring = &rdev->ring[ridx];
4765
4766         if (vm == NULL)
4767                 return;
4768
4769         /* write new base address */
4770         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772                                  WRITE_DATA_DST_SEL(0)));
4773
4774         if (vm->id < 8) {
4775                 radeon_ring_write(ring,
4776                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4777         } else {
4778                 radeon_ring_write(ring,
4779                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4780         }
4781         radeon_ring_write(ring, 0);
4782         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4783
4784         /* flush hdp cache */
4785         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4786         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4787                                  WRITE_DATA_DST_SEL(0)));
4788         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4789         radeon_ring_write(ring, 0);
4790         radeon_ring_write(ring, 0x1);
4791
4792         /* bits 0-15 are the VM contexts0-15 */
4793         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795                                  WRITE_DATA_DST_SEL(0)));
4796         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4797         radeon_ring_write(ring, 0);
4798         radeon_ring_write(ring, 1 << vm->id);
4799
4800         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4801         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4802         radeon_ring_write(ring, 0x0);
4803 }
4804
4805 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4806 {
4807         struct radeon_ring *ring = &rdev->ring[ridx];
4808
4809         if (vm == NULL)
4810                 return;
4811
4812         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4813         if (vm->id < 8) {
4814                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4815         } else {
4816                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4817         }
4818         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4819
4820         /* flush hdp cache */
4821         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4822         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4823         radeon_ring_write(ring, 1);
4824
4825         /* bits 0-7 are the VM contexts0-7 */
4826         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4827         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4828         radeon_ring_write(ring, 1 << vm->id);
4829 }
4830
4831 /*
4832  *  Power and clock gating
4833  */
4834 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4835 {
4836         int i;
4837
4838         for (i = 0; i < rdev->usec_timeout; i++) {
4839                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4840                         break;
4841                 udelay(1);
4842         }
4843
4844         for (i = 0; i < rdev->usec_timeout; i++) {
4845                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4846                         break;
4847                 udelay(1);
4848         }
4849 }
4850
4851 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4852                                          bool enable)
4853 {
4854         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4855         u32 mask;
4856         int i;
4857
4858         if (enable)
4859                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4860         else
4861                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4862         WREG32(CP_INT_CNTL_RING0, tmp);
4863
4864         if (!enable) {
4865                 /* read a gfx register */
4866                 tmp = RREG32(DB_DEPTH_INFO);
4867
4868                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4869                 for (i = 0; i < rdev->usec_timeout; i++) {
4870                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4871                                 break;
4872                         udelay(1);
4873                 }
4874         }
4875 }
4876
4877 static void si_set_uvd_dcm(struct radeon_device *rdev,
4878                            bool sw_mode)
4879 {
4880         u32 tmp, tmp2;
4881
4882         tmp = RREG32(UVD_CGC_CTRL);
4883         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4884         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4885
4886         if (sw_mode) {
4887                 tmp &= ~0x7ffff800;
4888                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4889         } else {
4890                 tmp |= 0x7ffff800;
4891                 tmp2 = 0;
4892         }
4893
4894         WREG32(UVD_CGC_CTRL, tmp);
4895         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4896 }
4897
4898 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4899 {
4900         bool hw_mode = true;
4901
4902         if (hw_mode) {
4903                 si_set_uvd_dcm(rdev, false);
4904         } else {
4905                 u32 tmp = RREG32(UVD_CGC_CTRL);
4906                 tmp &= ~DCM;
4907                 WREG32(UVD_CGC_CTRL, tmp);
4908         }
4909 }
4910
4911 static u32 si_halt_rlc(struct radeon_device *rdev)
4912 {
4913         u32 data, orig;
4914
4915         orig = data = RREG32(RLC_CNTL);
4916
4917         if (data & RLC_ENABLE) {
4918                 data &= ~RLC_ENABLE;
4919                 WREG32(RLC_CNTL, data);
4920
4921                 si_wait_for_rlc_serdes(rdev);
4922         }
4923
4924         return orig;
4925 }
4926
4927 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4928 {
4929         u32 tmp;
4930
4931         tmp = RREG32(RLC_CNTL);
4932         if (tmp != rlc)
4933                 WREG32(RLC_CNTL, rlc);
4934 }
4935
4936 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4937 {
4938         u32 data, orig;
4939
4940         orig = data = RREG32(DMA_PG);
4941         if (enable)
4942                 data |= PG_CNTL_ENABLE;
4943         else
4944                 data &= ~PG_CNTL_ENABLE;
4945         if (orig != data)
4946                 WREG32(DMA_PG, data);
4947 }
4948
4949 static void si_init_dma_pg(struct radeon_device *rdev)
4950 {
4951         u32 tmp;
4952
4953         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4954         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4955
4956         for (tmp = 0; tmp < 5; tmp++)
4957                 WREG32(DMA_PGFSM_WRITE, 0);
4958 }
4959
4960 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4961                                bool enable)
4962 {
4963         u32 tmp;
4964
4965         if (enable) {
4966                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4967                 WREG32(RLC_TTOP_D, tmp);
4968
4969                 tmp = RREG32(RLC_PG_CNTL);
4970                 tmp |= GFX_PG_ENABLE;
4971                 WREG32(RLC_PG_CNTL, tmp);
4972
4973                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4974                 tmp |= AUTO_PG_EN;
4975                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4976         } else {
4977                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4978                 tmp &= ~AUTO_PG_EN;
4979                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4980
4981                 tmp = RREG32(DB_RENDER_CONTROL);
4982         }
4983 }
4984
4985 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4986 {
4987         u32 tmp;
4988
4989         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4990
4991         tmp = RREG32(RLC_PG_CNTL);
4992         tmp |= GFX_PG_SRC;
4993         WREG32(RLC_PG_CNTL, tmp);
4994
4995         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4996
4997         tmp = RREG32(RLC_AUTO_PG_CTRL);
4998
4999         tmp &= ~GRBM_REG_SGIT_MASK;
5000         tmp |= GRBM_REG_SGIT(0x700);
5001         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5002         WREG32(RLC_AUTO_PG_CTRL, tmp);
5003 }
5004
5005 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5006 {
5007         u32 mask = 0, tmp, tmp1;
5008         int i;
5009
5010         si_select_se_sh(rdev, se, sh);
5011         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5012         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5013         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5014
5015         tmp &= 0xffff0000;
5016
5017         tmp |= tmp1;
5018         tmp >>= 16;
5019
5020         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5021                 mask <<= 1;
5022                 mask |= 1;
5023         }
5024
5025         return (~tmp) & mask;
5026 }
5027
5028 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5029 {
5030         u32 i, j, k, active_cu_number = 0;
5031         u32 mask, counter, cu_bitmap;
5032         u32 tmp = 0;
5033
5034         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5035                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5036                         mask = 1;
5037                         cu_bitmap = 0;
5038                         counter  = 0;
5039                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5040                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5041                                         if (counter < 2)
5042                                                 cu_bitmap |= mask;
5043                                         counter++;
5044                                 }
5045                                 mask <<= 1;
5046                         }
5047
5048                         active_cu_number += counter;
5049                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5050                 }
5051         }
5052
5053         WREG32(RLC_PG_AO_CU_MASK, tmp);
5054
5055         tmp = RREG32(RLC_MAX_PG_CU);
5056         tmp &= ~MAX_PU_CU_MASK;
5057         tmp |= MAX_PU_CU(active_cu_number);
5058         WREG32(RLC_MAX_PG_CU, tmp);
5059 }
5060
5061 static void si_enable_cgcg(struct radeon_device *rdev,
5062                            bool enable)
5063 {
5064         u32 data, orig, tmp;
5065
5066         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5067
5068         si_enable_gui_idle_interrupt(rdev, enable);
5069
5070         if (enable) {
5071                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5072
5073                 tmp = si_halt_rlc(rdev);
5074
5075                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5076                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5077                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5078
5079                 si_wait_for_rlc_serdes(rdev);
5080
5081                 si_update_rlc(rdev, tmp);
5082
5083                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5084
5085                 data |= CGCG_EN | CGLS_EN;
5086         } else {
5087                 RREG32(CB_CGTT_SCLK_CTRL);
5088                 RREG32(CB_CGTT_SCLK_CTRL);
5089                 RREG32(CB_CGTT_SCLK_CTRL);
5090                 RREG32(CB_CGTT_SCLK_CTRL);
5091
5092                 data &= ~(CGCG_EN | CGLS_EN);
5093         }
5094
5095         if (orig != data)
5096                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5097 }
5098
5099 static void si_enable_mgcg(struct radeon_device *rdev,
5100                            bool enable)
5101 {
5102         u32 data, orig, tmp = 0;
5103
5104         if (enable) {
5105                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5106                 data = 0x96940200;
5107                 if (orig != data)
5108                         WREG32(CGTS_SM_CTRL_REG, data);
5109
5110                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5111                 data |= CP_MEM_LS_EN;
5112                 if (orig != data)
5113                         WREG32(CP_MEM_SLP_CNTL, data);
5114
5115                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5116                 data &= 0xffffffc0;
5117                 if (orig != data)
5118                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5119
5120                 tmp = si_halt_rlc(rdev);
5121
5122                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5123                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5124                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5125
5126                 si_update_rlc(rdev, tmp);
5127         } else {
5128                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5129                 data |= 0x00000003;
5130                 if (orig != data)
5131                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5132
5133                 data = RREG32(CP_MEM_SLP_CNTL);
5134                 if (data & CP_MEM_LS_EN) {
5135                         data &= ~CP_MEM_LS_EN;
5136                         WREG32(CP_MEM_SLP_CNTL, data);
5137                 }
5138                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5139                 data |= LS_OVERRIDE | OVERRIDE;
5140                 if (orig != data)
5141                         WREG32(CGTS_SM_CTRL_REG, data);
5142
5143                 tmp = si_halt_rlc(rdev);
5144
5145                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5146                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5147                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5148
5149                 si_update_rlc(rdev, tmp);
5150         }
5151 }
5152
5153 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5154                                bool enable)
5155 {
5156         u32 orig, data, tmp;
5157
5158         if (enable) {
5159                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5160                 tmp |= 0x3fff;
5161                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5162
5163                 orig = data = RREG32(UVD_CGC_CTRL);
5164                 data |= DCM;
5165                 if (orig != data)
5166                         WREG32(UVD_CGC_CTRL, data);
5167
5168                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5169                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5170         } else {
5171                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5172                 tmp &= ~0x3fff;
5173                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5174
5175                 orig = data = RREG32(UVD_CGC_CTRL);
5176                 data &= ~DCM;
5177                 if (orig != data)
5178                         WREG32(UVD_CGC_CTRL, data);
5179
5180                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5181                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5182         }
5183 }
5184
5185 static const u32 mc_cg_registers[] =
5186 {
5187         MC_HUB_MISC_HUB_CG,
5188         MC_HUB_MISC_SIP_CG,
5189         MC_HUB_MISC_VM_CG,
5190         MC_XPB_CLK_GAT,
5191         ATC_MISC_CG,
5192         MC_CITF_MISC_WR_CG,
5193         MC_CITF_MISC_RD_CG,
5194         MC_CITF_MISC_VM_CG,
5195         VM_L2_CG,
5196 };
5197
5198 static void si_enable_mc_ls(struct radeon_device *rdev,
5199                             bool enable)
5200 {
5201         int i;
5202         u32 orig, data;
5203
5204         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5205                 orig = data = RREG32(mc_cg_registers[i]);
5206                 if (enable)
5207                         data |= MC_LS_ENABLE;
5208                 else
5209                         data &= ~MC_LS_ENABLE;
5210                 if (data != orig)
5211                         WREG32(mc_cg_registers[i], data);
5212         }
5213 }
5214
5215
5216 static void si_init_cg(struct radeon_device *rdev)
5217 {
5218         bool has_uvd = true;
5219
5220         si_enable_mgcg(rdev, true);
5221         si_enable_cgcg(rdev, true);
5222         /* disable MC LS on Tahiti */
5223         if (rdev->family == CHIP_TAHITI)
5224                 si_enable_mc_ls(rdev, false);
5225         if (has_uvd) {
5226                 si_enable_uvd_mgcg(rdev, true);
5227                 si_init_uvd_internal_cg(rdev);
5228         }
5229 }
5230
5231 static void si_fini_cg(struct radeon_device *rdev)
5232 {
5233         bool has_uvd = true;
5234
5235         if (has_uvd)
5236                 si_enable_uvd_mgcg(rdev, false);
5237         si_enable_cgcg(rdev, false);
5238         si_enable_mgcg(rdev, false);
5239 }
5240
5241 static void si_init_pg(struct radeon_device *rdev)
5242 {
5243         bool has_pg = false;
5244
5245         /* only cape verde supports PG */
5246         if (rdev->family == CHIP_VERDE)
5247                 has_pg = true;
5248
5249         if (has_pg) {
5250                 si_init_ao_cu_mask(rdev);
5251                 si_init_dma_pg(rdev);
5252                 si_enable_dma_pg(rdev, true);
5253                 si_init_gfx_cgpg(rdev);
5254                 si_enable_gfx_cgpg(rdev, true);
5255         } else {
5256                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5257                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5258         }
5259 }
5260
5261 static void si_fini_pg(struct radeon_device *rdev)
5262 {
5263         bool has_pg = false;
5264
5265         /* only cape verde supports PG */
5266         if (rdev->family == CHIP_VERDE)
5267                 has_pg = true;
5268
5269         if (has_pg) {
5270                 si_enable_dma_pg(rdev, false);
5271                 si_enable_gfx_cgpg(rdev, false);
5272         }
5273 }
5274
5275 /*
5276  * RLC
5277  */
5278 void si_rlc_fini(struct radeon_device *rdev)
5279 {
5280         int r;
5281
5282         /* save restore block */
5283         if (rdev->rlc.save_restore_obj) {
5284                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5285                 if (unlikely(r != 0))
5286                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5287                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5288                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5289
5290                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5291                 rdev->rlc.save_restore_obj = NULL;
5292         }
5293
5294         /* clear state block */
5295         if (rdev->rlc.clear_state_obj) {
5296                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5297                 if (unlikely(r != 0))
5298                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5299                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5300                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5301
5302                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5303                 rdev->rlc.clear_state_obj = NULL;
5304         }
5305 }
5306
5307 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5308
5309 int si_rlc_init(struct radeon_device *rdev)
5310 {
5311         volatile u32 *dst_ptr;
5312         u32 dws, data, i, j, k, reg_num;
5313         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5314         u64 reg_list_mc_addr;
5315         const struct cs_section_def *cs_data = si_cs_data;
5316         int r;
5317
5318         /* save restore block */
5319         if (rdev->rlc.save_restore_obj == NULL) {
5320                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5321                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5322                                      &rdev->rlc.save_restore_obj);
5323                 if (r) {
5324                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5325                         return r;
5326                 }
5327         }
5328
5329         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5330         if (unlikely(r != 0)) {
5331                 si_rlc_fini(rdev);
5332                 return r;
5333         }
5334         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5335                           &rdev->rlc.save_restore_gpu_addr);
5336         if (r) {
5337                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5338                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5339                 si_rlc_fini(rdev);
5340                 return r;
5341         }
5342
5343         if (rdev->family == CHIP_VERDE) {
5344                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5345                 if (r) {
5346                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5347                         si_rlc_fini(rdev);
5348                 return r;
5349                 }
5350                 /* write the sr buffer */
5351                 dst_ptr = rdev->rlc.sr_ptr;
5352                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5353                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5354                 }
5355                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5356         }
5357         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5358
5359         /* clear state block */
5360         reg_list_num = 0;
5361         dws = 0;
5362         for (i = 0; cs_data[i].section != NULL; i++) {
5363                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5364                         reg_list_num++;
5365                         dws += cs_data[i].section[j].reg_count;
5366                 }
5367         }
5368         reg_list_blk_index = (3 * reg_list_num + 2);
5369         dws += reg_list_blk_index;
5370
5371         if (rdev->rlc.clear_state_obj == NULL) {
5372                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5373                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5374                 if (r) {
5375                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5376                         si_rlc_fini(rdev);
5377                         return r;
5378                 }
5379         }
5380         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5381         if (unlikely(r != 0)) {
5382                 si_rlc_fini(rdev);
5383                 return r;
5384         }
5385         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5386                           &rdev->rlc.clear_state_gpu_addr);
5387         if (r) {
5388
5389                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5390                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5391                 si_rlc_fini(rdev);
5392                 return r;
5393         }
5394         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5395         if (r) {
5396                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5397                 si_rlc_fini(rdev);
5398                 return r;
5399         }
5400         /* set up the cs buffer */
5401         dst_ptr = rdev->rlc.cs_ptr;
5402         reg_list_hdr_blk_index = 0;
5403         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5404         data = upper_32_bits(reg_list_mc_addr);
5405         dst_ptr[reg_list_hdr_blk_index] = data;
5406         reg_list_hdr_blk_index++;
5407         for (i = 0; cs_data[i].section != NULL; i++) {
5408                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5409                         reg_num = cs_data[i].section[j].reg_count;
5410                         data = reg_list_mc_addr & 0xffffffff;
5411                         dst_ptr[reg_list_hdr_blk_index] = data;
5412                         reg_list_hdr_blk_index++;
5413
5414                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5415                         dst_ptr[reg_list_hdr_blk_index] = data;
5416                         reg_list_hdr_blk_index++;
5417
5418                         data = 0x08000000 | (reg_num * 4);
5419                         dst_ptr[reg_list_hdr_blk_index] = data;
5420                         reg_list_hdr_blk_index++;
5421
5422                         for (k = 0; k < reg_num; k++) {
5423                                 data = cs_data[i].section[j].extent[k];
5424                                 dst_ptr[reg_list_blk_index + k] = data;
5425                         }
5426                         reg_list_mc_addr += reg_num * 4;
5427                         reg_list_blk_index += reg_num;
5428                 }
5429         }
5430         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5431
5432         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5433         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5434
5435         return 0;
5436 }
5437
5438 static void si_rlc_reset(struct radeon_device *rdev)
5439 {
5440         u32 tmp = RREG32(GRBM_SOFT_RESET);
5441
5442         tmp |= SOFT_RESET_RLC;
5443         WREG32(GRBM_SOFT_RESET, tmp);
5444         udelay(50);
5445         tmp &= ~SOFT_RESET_RLC;
5446         WREG32(GRBM_SOFT_RESET, tmp);
5447         udelay(50);
5448 }
5449
5450 static void si_rlc_stop(struct radeon_device *rdev)
5451 {
5452         WREG32(RLC_CNTL, 0);
5453
5454         si_enable_gui_idle_interrupt(rdev, false);
5455
5456         si_wait_for_rlc_serdes(rdev);
5457 }
5458
5459 static void si_rlc_start(struct radeon_device *rdev)
5460 {
5461         WREG32(RLC_CNTL, RLC_ENABLE);
5462
5463         si_enable_gui_idle_interrupt(rdev, true);
5464
5465         udelay(50);
5466 }
5467
5468 static bool si_lbpw_supported(struct radeon_device *rdev)
5469 {
5470         u32 tmp;
5471
5472         /* Enable LBPW only for DDR3 */
5473         tmp = RREG32(MC_SEQ_MISC0);
5474         if ((tmp & 0xF0000000) == 0xB0000000)
5475                 return true;
5476         return false;
5477 }
5478
5479 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5480 {
5481         u32 tmp;
5482
5483         tmp = RREG32(RLC_LB_CNTL);
5484         if (enable)
5485                 tmp |= LOAD_BALANCE_ENABLE;
5486         else
5487                 tmp &= ~LOAD_BALANCE_ENABLE;
5488         WREG32(RLC_LB_CNTL, tmp);
5489
5490         if (!enable) {
5491                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5492                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5493         }
5494 }
5495
5496 static int si_rlc_resume(struct radeon_device *rdev)
5497 {
5498         u32 i;
5499         const __be32 *fw_data;
5500
5501         if (!rdev->rlc_fw)
5502                 return -EINVAL;
5503
5504         si_rlc_stop(rdev);
5505
5506         si_rlc_reset(rdev);
5507
5508         si_init_pg(rdev);
5509
5510         si_init_cg(rdev);
5511
5512         WREG32(RLC_RL_BASE, 0);
5513         WREG32(RLC_RL_SIZE, 0);
5514         WREG32(RLC_LB_CNTL, 0);
5515         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5516         WREG32(RLC_LB_CNTR_INIT, 0);
5517         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5518
5519         WREG32(RLC_MC_CNTL, 0);
5520         WREG32(RLC_UCODE_CNTL, 0);
5521
5522         fw_data = (const __be32 *)rdev->rlc_fw->data;
5523         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5524                 WREG32(RLC_UCODE_ADDR, i);
5525                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5526         }
5527         WREG32(RLC_UCODE_ADDR, 0);
5528
5529         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5530
5531         si_rlc_start(rdev);
5532
5533         return 0;
5534 }
5535
5536 static void si_enable_interrupts(struct radeon_device *rdev)
5537 {
5538         u32 ih_cntl = RREG32(IH_CNTL);
5539         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5540
5541         ih_cntl |= ENABLE_INTR;
5542         ih_rb_cntl |= IH_RB_ENABLE;
5543         WREG32(IH_CNTL, ih_cntl);
5544         WREG32(IH_RB_CNTL, ih_rb_cntl);
5545         rdev->ih.enabled = true;
5546 }
5547
5548 static void si_disable_interrupts(struct radeon_device *rdev)
5549 {
5550         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5551         u32 ih_cntl = RREG32(IH_CNTL);
5552
5553         ih_rb_cntl &= ~IH_RB_ENABLE;
5554         ih_cntl &= ~ENABLE_INTR;
5555         WREG32(IH_RB_CNTL, ih_rb_cntl);
5556         WREG32(IH_CNTL, ih_cntl);
5557         /* set rptr, wptr to 0 */
5558         WREG32(IH_RB_RPTR, 0);
5559         WREG32(IH_RB_WPTR, 0);
5560         rdev->ih.enabled = false;
5561         rdev->ih.rptr = 0;
5562 }
5563
5564 static void si_disable_interrupt_state(struct radeon_device *rdev)
5565 {
5566         u32 tmp;
5567
5568         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5569         WREG32(CP_INT_CNTL_RING1, 0);
5570         WREG32(CP_INT_CNTL_RING2, 0);
5571         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5572         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5573         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5574         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5575         WREG32(GRBM_INT_CNTL, 0);
5576         if (rdev->num_crtc >= 2) {
5577                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5578                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5579         }
5580         if (rdev->num_crtc >= 4) {
5581                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5582                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5583         }
5584         if (rdev->num_crtc >= 6) {
5585                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5586                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5587         }
5588
5589         if (rdev->num_crtc >= 2) {
5590                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5591                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5592         }
5593         if (rdev->num_crtc >= 4) {
5594                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5595                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5596         }
5597         if (rdev->num_crtc >= 6) {
5598                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5599                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5600         }
5601
5602         if (!ASIC_IS_NODCE(rdev)) {
5603                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5604
5605                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5606                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5607                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5608                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5609                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5610                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5611                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5612                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5613                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5614                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5615                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5616                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5617         }
5618 }
5619
5620 static int si_irq_init(struct radeon_device *rdev)
5621 {
5622         int ret = 0;
5623         int rb_bufsz;
5624         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5625
5626         /* allocate ring */
5627         ret = r600_ih_ring_alloc(rdev);
5628         if (ret)
5629                 return ret;
5630
5631         /* disable irqs */
5632         si_disable_interrupts(rdev);
5633
5634         /* init rlc */
5635         ret = si_rlc_resume(rdev);
5636         if (ret) {
5637                 r600_ih_ring_fini(rdev);
5638                 return ret;
5639         }
5640
5641         /* setup interrupt control */
5642         /* set dummy read address to ring address */
5643         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5644         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5645         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5646          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5647          */
5648         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5649         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5650         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5651         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5652
5653         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5654         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5655
5656         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5657                       IH_WPTR_OVERFLOW_CLEAR |
5658                       (rb_bufsz << 1));
5659
5660         if (rdev->wb.enabled)
5661                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5662
5663         /* set the writeback address whether it's enabled or not */
5664         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5665         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5666
5667         WREG32(IH_RB_CNTL, ih_rb_cntl);
5668
5669         /* set rptr, wptr to 0 */
5670         WREG32(IH_RB_RPTR, 0);
5671         WREG32(IH_RB_WPTR, 0);
5672
5673         /* Default settings for IH_CNTL (disabled at first) */
5674         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5675         /* RPTR_REARM only works if msi's are enabled */
5676         if (rdev->msi_enabled)
5677                 ih_cntl |= RPTR_REARM;
5678         WREG32(IH_CNTL, ih_cntl);
5679
5680         /* force the active interrupt state to all disabled */
5681         si_disable_interrupt_state(rdev);
5682
5683         pci_set_master(rdev->pdev);
5684
5685         /* enable irqs */
5686         si_enable_interrupts(rdev);
5687
5688         return ret;
5689 }
5690
5691 int si_irq_set(struct radeon_device *rdev)
5692 {
5693         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5694         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5695         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5696         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5697         u32 grbm_int_cntl = 0;
5698         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5699         u32 dma_cntl, dma_cntl1;
5700         u32 thermal_int = 0;
5701
5702         if (!rdev->irq.installed) {
5703                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5704                 return -EINVAL;
5705         }
5706         /* don't enable anything if the ih is disabled */
5707         if (!rdev->ih.enabled) {
5708                 si_disable_interrupts(rdev);
5709                 /* force the active interrupt state to all disabled */
5710                 si_disable_interrupt_state(rdev);
5711                 return 0;
5712         }
5713
5714         if (!ASIC_IS_NODCE(rdev)) {
5715                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5717                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5718                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5719                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5720                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5721         }
5722
5723         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5724         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5725
5726         thermal_int = RREG32(CG_THERMAL_INT) &
5727                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5728
5729         /* enable CP interrupts on all rings */
5730         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5731                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5732                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5733         }
5734         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5735                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5736                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5737         }
5738         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5739                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5740                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5741         }
5742         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5743                 DRM_DEBUG("si_irq_set: sw int dma\n");
5744                 dma_cntl |= TRAP_ENABLE;
5745         }
5746
5747         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5748                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5749                 dma_cntl1 |= TRAP_ENABLE;
5750         }
5751         if (rdev->irq.crtc_vblank_int[0] ||
5752             atomic_read(&rdev->irq.pflip[0])) {
5753                 DRM_DEBUG("si_irq_set: vblank 0\n");
5754                 crtc1 |= VBLANK_INT_MASK;
5755         }
5756         if (rdev->irq.crtc_vblank_int[1] ||
5757             atomic_read(&rdev->irq.pflip[1])) {
5758                 DRM_DEBUG("si_irq_set: vblank 1\n");
5759                 crtc2 |= VBLANK_INT_MASK;
5760         }
5761         if (rdev->irq.crtc_vblank_int[2] ||
5762             atomic_read(&rdev->irq.pflip[2])) {
5763                 DRM_DEBUG("si_irq_set: vblank 2\n");
5764                 crtc3 |= VBLANK_INT_MASK;
5765         }
5766         if (rdev->irq.crtc_vblank_int[3] ||
5767             atomic_read(&rdev->irq.pflip[3])) {
5768                 DRM_DEBUG("si_irq_set: vblank 3\n");
5769                 crtc4 |= VBLANK_INT_MASK;
5770         }
5771         if (rdev->irq.crtc_vblank_int[4] ||
5772             atomic_read(&rdev->irq.pflip[4])) {
5773                 DRM_DEBUG("si_irq_set: vblank 4\n");
5774                 crtc5 |= VBLANK_INT_MASK;
5775         }
5776         if (rdev->irq.crtc_vblank_int[5] ||
5777             atomic_read(&rdev->irq.pflip[5])) {
5778                 DRM_DEBUG("si_irq_set: vblank 5\n");
5779                 crtc6 |= VBLANK_INT_MASK;
5780         }
5781         if (rdev->irq.hpd[0]) {
5782                 DRM_DEBUG("si_irq_set: hpd 1\n");
5783                 hpd1 |= DC_HPDx_INT_EN;
5784         }
5785         if (rdev->irq.hpd[1]) {
5786                 DRM_DEBUG("si_irq_set: hpd 2\n");
5787                 hpd2 |= DC_HPDx_INT_EN;
5788         }
5789         if (rdev->irq.hpd[2]) {
5790                 DRM_DEBUG("si_irq_set: hpd 3\n");
5791                 hpd3 |= DC_HPDx_INT_EN;
5792         }
5793         if (rdev->irq.hpd[3]) {
5794                 DRM_DEBUG("si_irq_set: hpd 4\n");
5795                 hpd4 |= DC_HPDx_INT_EN;
5796         }
5797         if (rdev->irq.hpd[4]) {
5798                 DRM_DEBUG("si_irq_set: hpd 5\n");
5799                 hpd5 |= DC_HPDx_INT_EN;
5800         }
5801         if (rdev->irq.hpd[5]) {
5802                 DRM_DEBUG("si_irq_set: hpd 6\n");
5803                 hpd6 |= DC_HPDx_INT_EN;
5804         }
5805
5806         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5807         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5808         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5809
5810         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5811         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5812
5813         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5814
5815         if (rdev->irq.dpm_thermal) {
5816                 DRM_DEBUG("dpm thermal\n");
5817                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5818         }
5819
5820         if (rdev->num_crtc >= 2) {
5821                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5822                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5823         }
5824         if (rdev->num_crtc >= 4) {
5825                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5826                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5827         }
5828         if (rdev->num_crtc >= 6) {
5829                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5830                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5831         }
5832
5833         if (rdev->num_crtc >= 2) {
5834                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5835                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5836         }
5837         if (rdev->num_crtc >= 4) {
5838                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5839                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5840         }
5841         if (rdev->num_crtc >= 6) {
5842                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5843                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5844         }
5845
5846         if (!ASIC_IS_NODCE(rdev)) {
5847                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5848                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5849                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5850                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5851                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5852                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5853         }
5854
5855         WREG32(CG_THERMAL_INT, thermal_int);
5856
5857         return 0;
5858 }
5859
5860 static inline void si_irq_ack(struct radeon_device *rdev)
5861 {
5862         u32 tmp;
5863
5864         if (ASIC_IS_NODCE(rdev))
5865                 return;
5866
5867         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5868         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5869         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5870         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5871         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5872         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5873         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5874         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5875         if (rdev->num_crtc >= 4) {
5876                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5877                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5878         }
5879         if (rdev->num_crtc >= 6) {
5880                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5881                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5882         }
5883
5884         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5885                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5886         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5887                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5888         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5889                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5890         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5891                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5892         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5893                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5894         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5895                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5896
5897         if (rdev->num_crtc >= 4) {
5898                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5899                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5900                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5901                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5902                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5903                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5904                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5905                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5906                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5907                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5908                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5909                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5910         }
5911
5912         if (rdev->num_crtc >= 6) {
5913                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5914                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5915                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5916                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5917                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5918                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5919                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5920                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5921                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5922                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5923                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5924                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5925         }
5926
5927         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5928                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5929                 tmp |= DC_HPDx_INT_ACK;
5930                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5931         }
5932         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5933                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5934                 tmp |= DC_HPDx_INT_ACK;
5935                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5936         }
5937         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5938                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5939                 tmp |= DC_HPDx_INT_ACK;
5940                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5941         }
5942         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5943                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5944                 tmp |= DC_HPDx_INT_ACK;
5945                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5946         }
5947         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5948                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5949                 tmp |= DC_HPDx_INT_ACK;
5950                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5951         }
5952         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5953                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5954                 tmp |= DC_HPDx_INT_ACK;
5955                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5956         }
5957 }
5958
5959 static void si_irq_disable(struct radeon_device *rdev)
5960 {
5961         si_disable_interrupts(rdev);
5962         /* Wait and acknowledge irq */
5963         mdelay(1);
5964         si_irq_ack(rdev);
5965         si_disable_interrupt_state(rdev);
5966 }
5967
5968 static void si_irq_suspend(struct radeon_device *rdev)
5969 {
5970         si_irq_disable(rdev);
5971         si_rlc_stop(rdev);
5972 }
5973
5974 static void si_irq_fini(struct radeon_device *rdev)
5975 {
5976         si_irq_suspend(rdev);
5977         r600_ih_ring_fini(rdev);
5978 }
5979
5980 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5981 {
5982         u32 wptr, tmp;
5983
5984         if (rdev->wb.enabled)
5985                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5986         else
5987                 wptr = RREG32(IH_RB_WPTR);
5988
5989         if (wptr & RB_OVERFLOW) {
5990                 /* When a ring buffer overflow happen start parsing interrupt
5991                  * from the last not overwritten vector (wptr + 16). Hopefully
5992                  * this should allow us to catchup.
5993                  */
5994                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5995                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5996                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5997                 tmp = RREG32(IH_RB_CNTL);
5998                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5999                 WREG32(IH_RB_CNTL, tmp);
6000         }
6001         return (wptr & rdev->ih.ptr_mask);
6002 }
6003
6004 /*        SI IV Ring
6005  * Each IV ring entry is 128 bits:
6006  * [7:0]    - interrupt source id
6007  * [31:8]   - reserved
6008  * [59:32]  - interrupt source data
6009  * [63:60]  - reserved
6010  * [71:64]  - RINGID
6011  * [79:72]  - VMID
6012  * [127:80] - reserved
6013  */
6014 int si_irq_process(struct radeon_device *rdev)
6015 {
6016         u32 wptr;
6017         u32 rptr;
6018         u32 src_id, src_data, ring_id;
6019         u32 ring_index;
6020         bool queue_hotplug = false;
6021         bool queue_thermal = false;
6022         u32 status, addr;
6023
6024         if (!rdev->ih.enabled || rdev->shutdown)
6025                 return IRQ_NONE;
6026
6027         wptr = si_get_ih_wptr(rdev);
6028
6029 restart_ih:
6030         /* is somebody else already processing irqs? */
6031         if (atomic_xchg(&rdev->ih.lock, 1))
6032                 return IRQ_NONE;
6033
6034         rptr = rdev->ih.rptr;
6035         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6036
6037         /* Order reading of wptr vs. reading of IH ring data */
6038         rmb();
6039
6040         /* display interrupts */
6041         si_irq_ack(rdev);
6042
6043         while (rptr != wptr) {
6044                 /* wptr/rptr are in bytes! */
6045                 ring_index = rptr / 4;
6046                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6047                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6048                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6049
6050                 switch (src_id) {
6051                 case 1: /* D1 vblank/vline */
6052                         switch (src_data) {
6053                         case 0: /* D1 vblank */
6054                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6055                                         if (rdev->irq.crtc_vblank_int[0]) {
6056                                                 drm_handle_vblank(rdev->ddev, 0);
6057                                                 rdev->pm.vblank_sync = true;
6058                                                 wake_up(&rdev->irq.vblank_queue);
6059                                         }
6060                                         if (atomic_read(&rdev->irq.pflip[0]))
6061                                                 radeon_crtc_handle_flip(rdev, 0);
6062                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6063                                         DRM_DEBUG("IH: D1 vblank\n");
6064                                 }
6065                                 break;
6066                         case 1: /* D1 vline */
6067                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6068                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6069                                         DRM_DEBUG("IH: D1 vline\n");
6070                                 }
6071                                 break;
6072                         default:
6073                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6074                                 break;
6075                         }
6076                         break;
6077                 case 2: /* D2 vblank/vline */
6078                         switch (src_data) {
6079                         case 0: /* D2 vblank */
6080                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6081                                         if (rdev->irq.crtc_vblank_int[1]) {
6082                                                 drm_handle_vblank(rdev->ddev, 1);
6083                                                 rdev->pm.vblank_sync = true;
6084                                                 wake_up(&rdev->irq.vblank_queue);
6085                                         }
6086                                         if (atomic_read(&rdev->irq.pflip[1]))
6087                                                 radeon_crtc_handle_flip(rdev, 1);
6088                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6089                                         DRM_DEBUG("IH: D2 vblank\n");
6090                                 }
6091                                 break;
6092                         case 1: /* D2 vline */
6093                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6094                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6095                                         DRM_DEBUG("IH: D2 vline\n");
6096                                 }
6097                                 break;
6098                         default:
6099                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6100                                 break;
6101                         }
6102                         break;
6103                 case 3: /* D3 vblank/vline */
6104                         switch (src_data) {
6105                         case 0: /* D3 vblank */
6106                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6107                                         if (rdev->irq.crtc_vblank_int[2]) {
6108                                                 drm_handle_vblank(rdev->ddev, 2);
6109                                                 rdev->pm.vblank_sync = true;
6110                                                 wake_up(&rdev->irq.vblank_queue);
6111                                         }
6112                                         if (atomic_read(&rdev->irq.pflip[2]))
6113                                                 radeon_crtc_handle_flip(rdev, 2);
6114                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6115                                         DRM_DEBUG("IH: D3 vblank\n");
6116                                 }
6117                                 break;
6118                         case 1: /* D3 vline */
6119                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6120                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6121                                         DRM_DEBUG("IH: D3 vline\n");
6122                                 }
6123                                 break;
6124                         default:
6125                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6126                                 break;
6127                         }
6128                         break;
6129                 case 4: /* D4 vblank/vline */
6130                         switch (src_data) {
6131                         case 0: /* D4 vblank */
6132                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6133                                         if (rdev->irq.crtc_vblank_int[3]) {
6134                                                 drm_handle_vblank(rdev->ddev, 3);
6135                                                 rdev->pm.vblank_sync = true;
6136                                                 wake_up(&rdev->irq.vblank_queue);
6137                                         }
6138                                         if (atomic_read(&rdev->irq.pflip[3]))
6139                                                 radeon_crtc_handle_flip(rdev, 3);
6140                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6141                                         DRM_DEBUG("IH: D4 vblank\n");
6142                                 }
6143                                 break;
6144                         case 1: /* D4 vline */
6145                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6146                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6147                                         DRM_DEBUG("IH: D4 vline\n");
6148                                 }
6149                                 break;
6150                         default:
6151                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6152                                 break;
6153                         }
6154                         break;
6155                 case 5: /* D5 vblank/vline */
6156                         switch (src_data) {
6157                         case 0: /* D5 vblank */
6158                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6159                                         if (rdev->irq.crtc_vblank_int[4]) {
6160                                                 drm_handle_vblank(rdev->ddev, 4);
6161                                                 rdev->pm.vblank_sync = true;
6162                                                 wake_up(&rdev->irq.vblank_queue);
6163                                         }
6164                                         if (atomic_read(&rdev->irq.pflip[4]))
6165                                                 radeon_crtc_handle_flip(rdev, 4);
6166                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6167                                         DRM_DEBUG("IH: D5 vblank\n");
6168                                 }
6169                                 break;
6170                         case 1: /* D5 vline */
6171                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6172                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6173                                         DRM_DEBUG("IH: D5 vline\n");
6174                                 }
6175                                 break;
6176                         default:
6177                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6178                                 break;
6179                         }
6180                         break;
6181                 case 6: /* D6 vblank/vline */
6182                         switch (src_data) {
6183                         case 0: /* D6 vblank */
6184                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6185                                         if (rdev->irq.crtc_vblank_int[5]) {
6186                                                 drm_handle_vblank(rdev->ddev, 5);
6187                                                 rdev->pm.vblank_sync = true;
6188                                                 wake_up(&rdev->irq.vblank_queue);
6189                                         }
6190                                         if (atomic_read(&rdev->irq.pflip[5]))
6191                                                 radeon_crtc_handle_flip(rdev, 5);
6192                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6193                                         DRM_DEBUG("IH: D6 vblank\n");
6194                                 }
6195                                 break;
6196                         case 1: /* D6 vline */
6197                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6198                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6199                                         DRM_DEBUG("IH: D6 vline\n");
6200                                 }
6201                                 break;
6202                         default:
6203                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6204                                 break;
6205                         }
6206                         break;
6207                 case 42: /* HPD hotplug */
6208                         switch (src_data) {
6209                         case 0:
6210                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6211                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6212                                         queue_hotplug = true;
6213                                         DRM_DEBUG("IH: HPD1\n");
6214                                 }
6215                                 break;
6216                         case 1:
6217                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6218                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6219                                         queue_hotplug = true;
6220                                         DRM_DEBUG("IH: HPD2\n");
6221                                 }
6222                                 break;
6223                         case 2:
6224                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6225                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6226                                         queue_hotplug = true;
6227                                         DRM_DEBUG("IH: HPD3\n");
6228                                 }
6229                                 break;
6230                         case 3:
6231                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6232                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6233                                         queue_hotplug = true;
6234                                         DRM_DEBUG("IH: HPD4\n");
6235                                 }
6236                                 break;
6237                         case 4:
6238                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6239                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6240                                         queue_hotplug = true;
6241                                         DRM_DEBUG("IH: HPD5\n");
6242                                 }
6243                                 break;
6244                         case 5:
6245                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6246                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6247                                         queue_hotplug = true;
6248                                         DRM_DEBUG("IH: HPD6\n");
6249                                 }
6250                                 break;
6251                         default:
6252                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6253                                 break;
6254                         }
6255                         break;
6256                 case 146:
6257                 case 147:
6258                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6259                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6260                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6261                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6262                                 addr);
6263                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6264                                 status);
6265                         si_vm_decode_fault(rdev, status, addr);
6266                         /* reset addr and status */
6267                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6268                         break;
6269                 case 176: /* RINGID0 CP_INT */
6270                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6271                         break;
6272                 case 177: /* RINGID1 CP_INT */
6273                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6274                         break;
6275                 case 178: /* RINGID2 CP_INT */
6276                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6277                         break;
6278                 case 181: /* CP EOP event */
6279                         DRM_DEBUG("IH: CP EOP\n");
6280                         switch (ring_id) {
6281                         case 0:
6282                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6283                                 break;
6284                         case 1:
6285                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6286                                 break;
6287                         case 2:
6288                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6289                                 break;
6290                         }
6291                         break;
6292                 case 224: /* DMA trap event */
6293                         DRM_DEBUG("IH: DMA trap\n");
6294                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6295                         break;
6296                 case 230: /* thermal low to high */
6297                         DRM_DEBUG("IH: thermal low to high\n");
6298                         rdev->pm.dpm.thermal.high_to_low = false;
6299                         queue_thermal = true;
6300                         break;
6301                 case 231: /* thermal high to low */
6302                         DRM_DEBUG("IH: thermal high to low\n");
6303                         rdev->pm.dpm.thermal.high_to_low = true;
6304                         queue_thermal = true;
6305                         break;
6306                 case 233: /* GUI IDLE */
6307                         DRM_DEBUG("IH: GUI idle\n");
6308                         break;
6309                 case 244: /* DMA trap event */
6310                         DRM_DEBUG("IH: DMA1 trap\n");
6311                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6312                         break;
6313                 default:
6314                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6315                         break;
6316                 }
6317
6318                 /* wptr/rptr are in bytes! */
6319                 rptr += 16;
6320                 rptr &= rdev->ih.ptr_mask;
6321         }
6322         if (queue_hotplug)
6323                 schedule_work(&rdev->hotplug_work);
6324         if (queue_thermal && rdev->pm.dpm_enabled)
6325                 schedule_work(&rdev->pm.dpm.thermal.work);
6326         rdev->ih.rptr = rptr;
6327         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6328         atomic_set(&rdev->ih.lock, 0);
6329
6330         /* make sure wptr hasn't changed while processing */
6331         wptr = si_get_ih_wptr(rdev);
6332         if (wptr != rptr)
6333                 goto restart_ih;
6334
6335         return IRQ_HANDLED;
6336 }
6337
6338 /**
6339  * si_copy_dma - copy pages using the DMA engine
6340  *
6341  * @rdev: radeon_device pointer
6342  * @src_offset: src GPU address
6343  * @dst_offset: dst GPU address
6344  * @num_gpu_pages: number of GPU pages to xfer
6345  * @fence: radeon fence object
6346  *
6347  * Copy GPU paging using the DMA engine (SI).
6348  * Used by the radeon ttm implementation to move pages if
6349  * registered as the asic copy callback.
6350  */
6351 int si_copy_dma(struct radeon_device *rdev,
6352                 uint64_t src_offset, uint64_t dst_offset,
6353                 unsigned num_gpu_pages,
6354                 struct radeon_fence **fence)
6355 {
6356         struct radeon_semaphore *sem = NULL;
6357         int ring_index = rdev->asic->copy.dma_ring_index;
6358         struct radeon_ring *ring = &rdev->ring[ring_index];
6359         u32 size_in_bytes, cur_size_in_bytes;
6360         int i, num_loops;
6361         int r = 0;
6362
6363         r = radeon_semaphore_create(rdev, &sem);
6364         if (r) {
6365                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6366                 return r;
6367         }
6368
6369         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6370         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6371         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6372         if (r) {
6373                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6374                 radeon_semaphore_free(rdev, &sem, NULL);
6375                 return r;
6376         }
6377
6378         if (radeon_fence_need_sync(*fence, ring->idx)) {
6379                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6380                                             ring->idx);
6381                 radeon_fence_note_sync(*fence, ring->idx);
6382         } else {
6383                 radeon_semaphore_free(rdev, &sem, NULL);
6384         }
6385
6386         for (i = 0; i < num_loops; i++) {
6387                 cur_size_in_bytes = size_in_bytes;
6388                 if (cur_size_in_bytes > 0xFFFFF)
6389                         cur_size_in_bytes = 0xFFFFF;
6390                 size_in_bytes -= cur_size_in_bytes;
6391                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6392                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6393                 radeon_ring_write(ring, src_offset & 0xffffffff);
6394                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6395                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6396                 src_offset += cur_size_in_bytes;
6397                 dst_offset += cur_size_in_bytes;
6398         }
6399
6400         r = radeon_fence_emit(rdev, fence, ring->idx);
6401         if (r) {
6402                 radeon_ring_unlock_undo(rdev, ring);
6403                 return r;
6404         }
6405
6406         radeon_ring_unlock_commit(rdev, ring);
6407         radeon_semaphore_free(rdev, &sem, *fence);
6408
6409         return r;
6410 }
6411
6412 /*
6413  * startup/shutdown callbacks
6414  */
6415 static int si_startup(struct radeon_device *rdev)
6416 {
6417         struct radeon_ring *ring;
6418         int r;
6419
6420         /* enable pcie gen2/3 link */
6421         si_pcie_gen3_enable(rdev);
6422         /* enable aspm */
6423         si_program_aspm(rdev);
6424
6425         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6426             !rdev->rlc_fw || !rdev->mc_fw) {
6427                 r = si_init_microcode(rdev);
6428                 if (r) {
6429                         DRM_ERROR("Failed to load firmware!\n");
6430                         return r;
6431                 }
6432         }
6433
6434         r = si_mc_load_microcode(rdev);
6435         if (r) {
6436                 DRM_ERROR("Failed to load MC firmware!\n");
6437                 return r;
6438         }
6439
6440         r = r600_vram_scratch_init(rdev);
6441         if (r)
6442                 return r;
6443
6444         si_mc_program(rdev);
6445         r = si_pcie_gart_enable(rdev);
6446         if (r)
6447                 return r;
6448         si_gpu_init(rdev);
6449
6450         /* allocate rlc buffers */
6451         r = si_rlc_init(rdev);
6452         if (r) {
6453                 DRM_ERROR("Failed to init rlc BOs!\n");
6454                 return r;
6455         }
6456
6457         /* allocate wb buffer */
6458         r = radeon_wb_init(rdev);
6459         if (r)
6460                 return r;
6461
6462         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6463         if (r) {
6464                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6465                 return r;
6466         }
6467
6468         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6469         if (r) {
6470                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6471                 return r;
6472         }
6473
6474         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6475         if (r) {
6476                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6477                 return r;
6478         }
6479
6480         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6481         if (r) {
6482                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6483                 return r;
6484         }
6485
6486         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6487         if (r) {
6488                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6489                 return r;
6490         }
6491
6492         if (rdev->has_uvd) {
6493                 r = rv770_uvd_resume(rdev);
6494                 if (!r) {
6495                         r = radeon_fence_driver_start_ring(rdev,
6496                                                            R600_RING_TYPE_UVD_INDEX);
6497                         if (r)
6498                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6499                 }
6500                 if (r)
6501                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6502         }
6503
6504         /* Enable IRQ */
6505         if (!rdev->irq.installed) {
6506                 r = radeon_irq_kms_init(rdev);
6507                 if (r)
6508                         return r;
6509         }
6510
6511         r = si_irq_init(rdev);
6512         if (r) {
6513                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6514                 radeon_irq_kms_fini(rdev);
6515                 return r;
6516         }
6517         si_irq_set(rdev);
6518
6519         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6520         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6521                              CP_RB0_RPTR, CP_RB0_WPTR,
6522                              0, 0xfffff, RADEON_CP_PACKET2);
6523         if (r)
6524                 return r;
6525
6526         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6527         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6528                              CP_RB1_RPTR, CP_RB1_WPTR,
6529                              0, 0xfffff, RADEON_CP_PACKET2);
6530         if (r)
6531                 return r;
6532
6533         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6534         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6535                              CP_RB2_RPTR, CP_RB2_WPTR,
6536                              0, 0xfffff, RADEON_CP_PACKET2);
6537         if (r)
6538                 return r;
6539
6540         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6541         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6542                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6543                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6544                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6545         if (r)
6546                 return r;
6547
6548         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6549         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6550                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6551                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6552                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6553         if (r)
6554                 return r;
6555
6556         r = si_cp_load_microcode(rdev);
6557         if (r)
6558                 return r;
6559         r = si_cp_resume(rdev);
6560         if (r)
6561                 return r;
6562
6563         r = cayman_dma_resume(rdev);
6564         if (r)
6565                 return r;
6566
6567         if (rdev->has_uvd) {
6568                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6569                 if (ring->ring_size) {
6570                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6571                                              R600_WB_UVD_RPTR_OFFSET,
6572                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6573                                              0, 0xfffff, RADEON_CP_PACKET2);
6574                         if (!r)
6575                                 r = r600_uvd_init(rdev);
6576                         if (r)
6577                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6578                 }
6579         }
6580
6581         r = radeon_ib_pool_init(rdev);
6582         if (r) {
6583                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6584                 return r;
6585         }
6586
6587         r = radeon_vm_manager_init(rdev);
6588         if (r) {
6589                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6590                 return r;
6591         }
6592
6593         return 0;
6594 }
6595
6596 int si_resume(struct radeon_device *rdev)
6597 {
6598         int r;
6599
6600         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6601          * posting will perform necessary task to bring back GPU into good
6602          * shape.
6603          */
6604         /* post card */
6605         atom_asic_init(rdev->mode_info.atom_context);
6606
6607         /* init golden registers */
6608         si_init_golden_registers(rdev);
6609
6610         rdev->accel_working = true;
6611         r = si_startup(rdev);
6612         if (r) {
6613                 DRM_ERROR("si startup failed on resume\n");
6614                 rdev->accel_working = false;
6615                 return r;
6616         }
6617
6618         return r;
6619
6620 }
6621
6622 int si_suspend(struct radeon_device *rdev)
6623 {
6624         radeon_vm_manager_fini(rdev);
6625         si_cp_enable(rdev, false);
6626         cayman_dma_stop(rdev);
6627         if (rdev->has_uvd) {
6628                 r600_uvd_rbc_stop(rdev);
6629                 radeon_uvd_suspend(rdev);
6630         }
6631         si_irq_suspend(rdev);
6632         radeon_wb_disable(rdev);
6633         si_pcie_gart_disable(rdev);
6634         return 0;
6635 }
6636
6637 /* Plan is to move initialization in that function and use
6638  * helper function so that radeon_device_init pretty much
6639  * do nothing more than calling asic specific function. This
6640  * should also allow to remove a bunch of callback function
6641  * like vram_info.
6642  */
6643 int si_init(struct radeon_device *rdev)
6644 {
6645         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6646         int r;
6647
6648         /* Read BIOS */
6649         if (!radeon_get_bios(rdev)) {
6650                 if (ASIC_IS_AVIVO(rdev))
6651                         return -EINVAL;
6652         }
6653         /* Must be an ATOMBIOS */
6654         if (!rdev->is_atom_bios) {
6655                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6656                 return -EINVAL;
6657         }
6658         r = radeon_atombios_init(rdev);
6659         if (r)
6660                 return r;
6661
6662         /* Post card if necessary */
6663         if (!radeon_card_posted(rdev)) {
6664                 if (!rdev->bios) {
6665                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6666                         return -EINVAL;
6667                 }
6668                 DRM_INFO("GPU not posted. posting now...\n");
6669                 atom_asic_init(rdev->mode_info.atom_context);
6670         }
6671         /* init golden registers */
6672         si_init_golden_registers(rdev);
6673         /* Initialize scratch registers */
6674         si_scratch_init(rdev);
6675         /* Initialize surface registers */
6676         radeon_surface_init(rdev);
6677         /* Initialize clocks */
6678         radeon_get_clock_info(rdev->ddev);
6679
6680         /* Fence driver */
6681         r = radeon_fence_driver_init(rdev);
6682         if (r)
6683                 return r;
6684
6685         /* initialize memory controller */
6686         r = si_mc_init(rdev);
6687         if (r)
6688                 return r;
6689         /* Memory manager */
6690         r = radeon_bo_init(rdev);
6691         if (r)
6692                 return r;
6693
6694         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6695         ring->ring_obj = NULL;
6696         r600_ring_init(rdev, ring, 1024 * 1024);
6697
6698         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6699         ring->ring_obj = NULL;
6700         r600_ring_init(rdev, ring, 1024 * 1024);
6701
6702         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6703         ring->ring_obj = NULL;
6704         r600_ring_init(rdev, ring, 1024 * 1024);
6705
6706         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6707         ring->ring_obj = NULL;
6708         r600_ring_init(rdev, ring, 64 * 1024);
6709
6710         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6711         ring->ring_obj = NULL;
6712         r600_ring_init(rdev, ring, 64 * 1024);
6713
6714         if (rdev->has_uvd) {
6715                 r = radeon_uvd_init(rdev);
6716                 if (!r) {
6717                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6718                         ring->ring_obj = NULL;
6719                         r600_ring_init(rdev, ring, 4096);
6720                 }
6721         }
6722
6723         rdev->ih.ring_obj = NULL;
6724         r600_ih_ring_init(rdev, 64 * 1024);
6725
6726         r = r600_pcie_gart_init(rdev);
6727         if (r)
6728                 return r;
6729
6730         rdev->accel_working = true;
6731         r = si_startup(rdev);
6732         if (r) {
6733                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6734                 si_cp_fini(rdev);
6735                 cayman_dma_fini(rdev);
6736                 si_irq_fini(rdev);
6737                 si_rlc_fini(rdev);
6738                 radeon_wb_fini(rdev);
6739                 radeon_ib_pool_fini(rdev);
6740                 radeon_vm_manager_fini(rdev);
6741                 radeon_irq_kms_fini(rdev);
6742                 si_pcie_gart_fini(rdev);
6743                 rdev->accel_working = false;
6744         }
6745
6746         /* Don't start up if the MC ucode is missing.
6747          * The default clocks and voltages before the MC ucode
6748          * is loaded are not suffient for advanced operations.
6749          */
6750         if (!rdev->mc_fw) {
6751                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6752                 return -EINVAL;
6753         }
6754
6755         return 0;
6756 }
6757
6758 void si_fini(struct radeon_device *rdev)
6759 {
6760         si_cp_fini(rdev);
6761         cayman_dma_fini(rdev);
6762         si_irq_fini(rdev);
6763         si_rlc_fini(rdev);
6764         si_fini_cg(rdev);
6765         si_fini_pg(rdev);
6766         radeon_wb_fini(rdev);
6767         radeon_vm_manager_fini(rdev);
6768         radeon_ib_pool_fini(rdev);
6769         radeon_irq_kms_fini(rdev);
6770         if (rdev->has_uvd)
6771                 radeon_uvd_fini(rdev);
6772         si_pcie_gart_fini(rdev);
6773         r600_vram_scratch_fini(rdev);
6774         radeon_gem_fini(rdev);
6775         radeon_fence_driver_fini(rdev);
6776         radeon_bo_fini(rdev);
6777         radeon_atombios_fini(rdev);
6778         kfree(rdev->bios);
6779         rdev->bios = NULL;
6780 }
6781
6782 /**
6783  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6784  *
6785  * @rdev: radeon_device pointer
6786  *
6787  * Fetches a GPU clock counter snapshot (SI).
6788  * Returns the 64 bit clock counter snapshot.
6789  */
6790 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6791 {
6792         uint64_t clock;
6793
6794         mutex_lock(&rdev->gpu_clock_mutex);
6795         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6796         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6797                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6798         mutex_unlock(&rdev->gpu_clock_mutex);
6799         return clock;
6800 }
6801
6802 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6803 {
6804         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6805         int r;
6806
6807         /* bypass vclk and dclk with bclk */
6808         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6809                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6810                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6811
6812         /* put PLL in bypass mode */
6813         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6814
6815         if (!vclk || !dclk) {
6816                 /* keep the Bypass mode, put PLL to sleep */
6817                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6818                 return 0;
6819         }
6820
6821         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6822                                           16384, 0x03FFFFFF, 0, 128, 5,
6823                                           &fb_div, &vclk_div, &dclk_div);
6824         if (r)
6825                 return r;
6826
6827         /* set RESET_ANTI_MUX to 0 */
6828         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6829
6830         /* set VCO_MODE to 1 */
6831         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6832
6833         /* toggle UPLL_SLEEP to 1 then back to 0 */
6834         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6835         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6836
6837         /* deassert UPLL_RESET */
6838         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6839
6840         mdelay(1);
6841
6842         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6843         if (r)
6844                 return r;
6845
6846         /* assert UPLL_RESET again */
6847         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6848
6849         /* disable spread spectrum. */
6850         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6851
6852         /* set feedback divider */
6853         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6854
6855         /* set ref divider to 0 */
6856         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6857
6858         if (fb_div < 307200)
6859                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6860         else
6861                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6862
6863         /* set PDIV_A and PDIV_B */
6864         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6865                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6866                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6867
6868         /* give the PLL some time to settle */
6869         mdelay(15);
6870
6871         /* deassert PLL_RESET */
6872         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6873
6874         mdelay(15);
6875
6876         /* switch from bypass mode to normal mode */
6877         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6878
6879         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6880         if (r)
6881                 return r;
6882
6883         /* switch VCLK and DCLK selection */
6884         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6885                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6886                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6887
6888         mdelay(100);
6889
6890         return 0;
6891 }
6892
6893 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6894 {
6895         struct pci_dev *root = rdev->pdev->bus->self;
6896         int bridge_pos, gpu_pos;
6897         u32 speed_cntl, mask, current_data_rate;
6898         int ret, i;
6899         u16 tmp16;
6900
6901         if (radeon_pcie_gen2 == 0)
6902                 return;
6903
6904         if (rdev->flags & RADEON_IS_IGP)
6905                 return;
6906
6907         if (!(rdev->flags & RADEON_IS_PCIE))
6908                 return;
6909
6910         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6911         if (ret != 0)
6912                 return;
6913
6914         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6915                 return;
6916
6917         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6918         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6919                 LC_CURRENT_DATA_RATE_SHIFT;
6920         if (mask & DRM_PCIE_SPEED_80) {
6921                 if (current_data_rate == 2) {
6922                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6923                         return;
6924                 }
6925                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6926         } else if (mask & DRM_PCIE_SPEED_50) {
6927                 if (current_data_rate == 1) {
6928                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6929                         return;
6930                 }
6931                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6932         }
6933
6934         bridge_pos = pci_pcie_cap(root);
6935         if (!bridge_pos)
6936                 return;
6937
6938         gpu_pos = pci_pcie_cap(rdev->pdev);
6939         if (!gpu_pos)
6940                 return;
6941
6942         if (mask & DRM_PCIE_SPEED_80) {
6943                 /* re-try equalization if gen3 is not already enabled */
6944                 if (current_data_rate != 2) {
6945                         u16 bridge_cfg, gpu_cfg;
6946                         u16 bridge_cfg2, gpu_cfg2;
6947                         u32 max_lw, current_lw, tmp;
6948
6949                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6950                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6951
6952                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6953                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6954
6955                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6956                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6957
6958                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6959                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6960                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6961
6962                         if (current_lw < max_lw) {
6963                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6964                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6965                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6966                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6967                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6968                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6969                                 }
6970                         }
6971
6972                         for (i = 0; i < 10; i++) {
6973                                 /* check status */
6974                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6975                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6976                                         break;
6977
6978                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6979                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6980
6981                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6982                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6983
6984                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6985                                 tmp |= LC_SET_QUIESCE;
6986                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6987
6988                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6989                                 tmp |= LC_REDO_EQ;
6990                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6991
6992                                 mdelay(100);
6993
6994                                 /* linkctl */
6995                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6996                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6997                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6998                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6999
7000                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7001                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7002                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7003                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7004
7005                                 /* linkctl2 */
7006                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7007                                 tmp16 &= ~((1 << 4) | (7 << 9));
7008                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7009                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7010
7011                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7012                                 tmp16 &= ~((1 << 4) | (7 << 9));
7013                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7014                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7015
7016                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7017                                 tmp &= ~LC_SET_QUIESCE;
7018                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7019                         }
7020                 }
7021         }
7022
7023         /* set the link speed */
7024         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7025         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7026         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7027
7028         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7029         tmp16 &= ~0xf;
7030         if (mask & DRM_PCIE_SPEED_80)
7031                 tmp16 |= 3; /* gen3 */
7032         else if (mask & DRM_PCIE_SPEED_50)
7033                 tmp16 |= 2; /* gen2 */
7034         else
7035                 tmp16 |= 1; /* gen1 */
7036         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7037
7038         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7039         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7040         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7041
7042         for (i = 0; i < rdev->usec_timeout; i++) {
7043                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7044                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7045                         break;
7046                 udelay(1);
7047         }
7048 }
7049
7050 static void si_program_aspm(struct radeon_device *rdev)
7051 {
7052         u32 data, orig;
7053         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7054         bool disable_clkreq = false;
7055
7056         if (radeon_aspm == 0)
7057                 return;
7058
7059         if (!(rdev->flags & RADEON_IS_PCIE))
7060                 return;
7061
7062         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7063         data &= ~LC_XMIT_N_FTS_MASK;
7064         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7065         if (orig != data)
7066                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7067
7068         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7069         data |= LC_GO_TO_RECOVERY;
7070         if (orig != data)
7071                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7072
7073         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7074         data |= P_IGNORE_EDB_ERR;
7075         if (orig != data)
7076                 WREG32_PCIE(PCIE_P_CNTL, data);
7077
7078         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7079         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7080         data |= LC_PMI_TO_L1_DIS;
7081         if (!disable_l0s)
7082                 data |= LC_L0S_INACTIVITY(7);
7083
7084         if (!disable_l1) {
7085                 data |= LC_L1_INACTIVITY(7);
7086                 data &= ~LC_PMI_TO_L1_DIS;
7087                 if (orig != data)
7088                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7089
7090                 if (!disable_plloff_in_l1) {
7091                         bool clk_req_support;
7092
7093                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7094                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7095                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7096                         if (orig != data)
7097                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7098
7099                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7100                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7101                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7102                         if (orig != data)
7103                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7104
7105                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7106                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7107                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7108                         if (orig != data)
7109                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7110
7111                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7112                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7113                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7114                         if (orig != data)
7115                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7116
7117                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7118                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7119                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7120                                 if (orig != data)
7121                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7122
7123                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7124                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7125                                 if (orig != data)
7126                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7127
7128                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7129                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7130                                 if (orig != data)
7131                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7132
7133                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7134                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7135                                 if (orig != data)
7136                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7137
7138                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7139                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7140                                 if (orig != data)
7141                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7142
7143                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7144                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7145                                 if (orig != data)
7146                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7147
7148                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7149                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7150                                 if (orig != data)
7151                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7152
7153                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7154                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7155                                 if (orig != data)
7156                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7157                         }
7158                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7159                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7160                         data |= LC_DYN_LANES_PWR_STATE(3);
7161                         if (orig != data)
7162                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7163
7164                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7165                         data &= ~LS2_EXIT_TIME_MASK;
7166                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7167                                 data |= LS2_EXIT_TIME(5);
7168                         if (orig != data)
7169                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7170
7171                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7172                         data &= ~LS2_EXIT_TIME_MASK;
7173                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7174                                 data |= LS2_EXIT_TIME(5);
7175                         if (orig != data)
7176                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7177
7178                         if (!disable_clkreq) {
7179                                 struct pci_dev *root = rdev->pdev->bus->self;
7180                                 u32 lnkcap;
7181
7182                                 clk_req_support = false;
7183                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7184                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7185                                         clk_req_support = true;
7186                         } else {
7187                                 clk_req_support = false;
7188                         }
7189
7190                         if (clk_req_support) {
7191                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7192                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7193                                 if (orig != data)
7194                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7195
7196                                 orig = data = RREG32(THM_CLK_CNTL);
7197                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7198                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7199                                 if (orig != data)
7200                                         WREG32(THM_CLK_CNTL, data);
7201
7202                                 orig = data = RREG32(MISC_CLK_CNTL);
7203                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7204                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7205                                 if (orig != data)
7206                                         WREG32(MISC_CLK_CNTL, data);
7207
7208                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7209                                 data &= ~BCLK_AS_XCLK;
7210                                 if (orig != data)
7211                                         WREG32(CG_CLKPIN_CNTL, data);
7212
7213                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7214                                 data &= ~FORCE_BIF_REFCLK_EN;
7215                                 if (orig != data)
7216                                         WREG32(CG_CLKPIN_CNTL_2, data);
7217
7218                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7219                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7220                                 data |= MPLL_CLKOUT_SEL(4);
7221                                 if (orig != data)
7222                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7223
7224                                 orig = data = RREG32(SPLL_CNTL_MODE);
7225                                 data &= ~SPLL_REFCLK_SEL_MASK;
7226                                 if (orig != data)
7227                                         WREG32(SPLL_CNTL_MODE, data);
7228                         }
7229                 }
7230         } else {
7231                 if (orig != data)
7232                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7233         }
7234
7235         orig = data = RREG32_PCIE(PCIE_CNTL2);
7236         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7237         if (orig != data)
7238                 WREG32_PCIE(PCIE_CNTL2, data);
7239
7240         if (!disable_l0s) {
7241                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7242                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7243                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7244                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7245                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7246                                 data &= ~LC_L0S_INACTIVITY_MASK;
7247                                 if (orig != data)
7248                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7249                         }
7250                 }
7251         }
7252 }