]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/ni.c
Merge tag 'ux500-defconfig-for-arm-soc' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-imx.git] / drivers / gpu / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "nid.h"
33 #include "atom.h"
34 #include "ni_reg.h"
35 #include "cayman_blit_shaders.h"
36
37 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
38 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
39 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
40 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
41 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
42 extern void evergreen_mc_program(struct radeon_device *rdev);
43 extern void evergreen_irq_suspend(struct radeon_device *rdev);
44 extern int evergreen_mc_init(struct radeon_device *rdev);
45 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
46 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
47 extern void si_rlc_fini(struct radeon_device *rdev);
48 extern int si_rlc_init(struct radeon_device *rdev);
49
50 #define EVERGREEN_PFP_UCODE_SIZE 1120
51 #define EVERGREEN_PM4_UCODE_SIZE 1376
52 #define EVERGREEN_RLC_UCODE_SIZE 768
53 #define BTC_MC_UCODE_SIZE 6024
54
55 #define CAYMAN_PFP_UCODE_SIZE 2176
56 #define CAYMAN_PM4_UCODE_SIZE 2176
57 #define CAYMAN_RLC_UCODE_SIZE 1024
58 #define CAYMAN_MC_UCODE_SIZE 6037
59
60 #define ARUBA_RLC_UCODE_SIZE 1536
61
62 /* Firmware Names */
63 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
64 MODULE_FIRMWARE("radeon/BARTS_me.bin");
65 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
66 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
67 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
68 MODULE_FIRMWARE("radeon/TURKS_me.bin");
69 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
70 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
71 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
72 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
73 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
74 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
75 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
76 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
77 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
78 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
79 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
80
81
82 static const u32 cayman_golden_registers2[] =
83 {
84         0x3e5c, 0xffffffff, 0x00000000,
85         0x3e48, 0xffffffff, 0x00000000,
86         0x3e4c, 0xffffffff, 0x00000000,
87         0x3e64, 0xffffffff, 0x00000000,
88         0x3e50, 0xffffffff, 0x00000000,
89         0x3e60, 0xffffffff, 0x00000000
90 };
91
92 static const u32 cayman_golden_registers[] =
93 {
94         0x5eb4, 0xffffffff, 0x00000002,
95         0x5e78, 0x8f311ff1, 0x001000f0,
96         0x3f90, 0xffff0000, 0xff000000,
97         0x9148, 0xffff0000, 0xff000000,
98         0x3f94, 0xffff0000, 0xff000000,
99         0x914c, 0xffff0000, 0xff000000,
100         0xc78, 0x00000080, 0x00000080,
101         0xbd4, 0x70073777, 0x00011003,
102         0xd02c, 0xbfffff1f, 0x08421000,
103         0xd0b8, 0x73773777, 0x02011003,
104         0x5bc0, 0x00200000, 0x50100000,
105         0x98f8, 0x33773777, 0x02011003,
106         0x98fc, 0xffffffff, 0x76541032,
107         0x7030, 0x31000311, 0x00000011,
108         0x2f48, 0x33773777, 0x42010001,
109         0x6b28, 0x00000010, 0x00000012,
110         0x7728, 0x00000010, 0x00000012,
111         0x10328, 0x00000010, 0x00000012,
112         0x10f28, 0x00000010, 0x00000012,
113         0x11b28, 0x00000010, 0x00000012,
114         0x12728, 0x00000010, 0x00000012,
115         0x240c, 0x000007ff, 0x00000000,
116         0x8a14, 0xf000001f, 0x00000007,
117         0x8b24, 0x3fff3fff, 0x00ff0fff,
118         0x8b10, 0x0000ff0f, 0x00000000,
119         0x28a4c, 0x07ffffff, 0x06000000,
120         0x10c, 0x00000001, 0x00010003,
121         0xa02c, 0xffffffff, 0x0000009b,
122         0x913c, 0x0000010f, 0x01000100,
123         0x8c04, 0xf8ff00ff, 0x40600060,
124         0x28350, 0x00000f01, 0x00000000,
125         0x9508, 0x3700001f, 0x00000002,
126         0x960c, 0xffffffff, 0x54763210,
127         0x88c4, 0x001f3ae3, 0x00000082,
128         0x88d0, 0xffffffff, 0x0f40df40,
129         0x88d4, 0x0000001f, 0x00000010,
130         0x8974, 0xffffffff, 0x00000000
131 };
132
133 static const u32 dvst_golden_registers2[] =
134 {
135         0x8f8, 0xffffffff, 0,
136         0x8fc, 0x00380000, 0,
137         0x8f8, 0xffffffff, 1,
138         0x8fc, 0x0e000000, 0
139 };
140
141 static const u32 dvst_golden_registers[] =
142 {
143         0x690, 0x3fff3fff, 0x20c00033,
144         0x918c, 0x0fff0fff, 0x00010006,
145         0x91a8, 0x0fff0fff, 0x00010006,
146         0x9150, 0xffffdfff, 0x6e944040,
147         0x917c, 0x0fff0fff, 0x00030002,
148         0x9198, 0x0fff0fff, 0x00030002,
149         0x915c, 0x0fff0fff, 0x00010000,
150         0x3f90, 0xffff0001, 0xff000000,
151         0x9178, 0x0fff0fff, 0x00070000,
152         0x9194, 0x0fff0fff, 0x00070000,
153         0x9148, 0xffff0001, 0xff000000,
154         0x9190, 0x0fff0fff, 0x00090008,
155         0x91ac, 0x0fff0fff, 0x00090008,
156         0x3f94, 0xffff0000, 0xff000000,
157         0x914c, 0xffff0000, 0xff000000,
158         0x929c, 0x00000fff, 0x00000001,
159         0x55e4, 0xff607fff, 0xfc000100,
160         0x8a18, 0xff000fff, 0x00000100,
161         0x8b28, 0xff000fff, 0x00000100,
162         0x9144, 0xfffc0fff, 0x00000100,
163         0x6ed8, 0x00010101, 0x00010000,
164         0x9830, 0xffffffff, 0x00000000,
165         0x9834, 0xf00fffff, 0x00000400,
166         0x9838, 0xfffffffe, 0x00000000,
167         0xd0c0, 0xff000fff, 0x00000100,
168         0xd02c, 0xbfffff1f, 0x08421000,
169         0xd0b8, 0x73773777, 0x12010001,
170         0x5bb0, 0x000000f0, 0x00000070,
171         0x98f8, 0x73773777, 0x12010001,
172         0x98fc, 0xffffffff, 0x00000010,
173         0x9b7c, 0x00ff0000, 0x00fc0000,
174         0x8030, 0x00001f0f, 0x0000100a,
175         0x2f48, 0x73773777, 0x12010001,
176         0x2408, 0x00030000, 0x000c007f,
177         0x8a14, 0xf000003f, 0x00000007,
178         0x8b24, 0x3fff3fff, 0x00ff0fff,
179         0x8b10, 0x0000ff0f, 0x00000000,
180         0x28a4c, 0x07ffffff, 0x06000000,
181         0x4d8, 0x00000fff, 0x00000100,
182         0xa008, 0xffffffff, 0x00010000,
183         0x913c, 0xffff03ff, 0x01000100,
184         0x8c00, 0x000000ff, 0x00000003,
185         0x8c04, 0xf8ff00ff, 0x40600060,
186         0x8cf0, 0x1fff1fff, 0x08e00410,
187         0x28350, 0x00000f01, 0x00000000,
188         0x9508, 0xf700071f, 0x00000002,
189         0x960c, 0xffffffff, 0x54763210,
190         0x20ef8, 0x01ff01ff, 0x00000002,
191         0x20e98, 0xfffffbff, 0x00200000,
192         0x2015c, 0xffffffff, 0x00000f40,
193         0x88c4, 0x001f3ae3, 0x00000082,
194         0x8978, 0x3fffffff, 0x04050140,
195         0x88d4, 0x0000001f, 0x00000010,
196         0x8974, 0xffffffff, 0x00000000
197 };
198
199 static const u32 scrapper_golden_registers[] =
200 {
201         0x690, 0x3fff3fff, 0x20c00033,
202         0x918c, 0x0fff0fff, 0x00010006,
203         0x918c, 0x0fff0fff, 0x00010006,
204         0x91a8, 0x0fff0fff, 0x00010006,
205         0x91a8, 0x0fff0fff, 0x00010006,
206         0x9150, 0xffffdfff, 0x6e944040,
207         0x9150, 0xffffdfff, 0x6e944040,
208         0x917c, 0x0fff0fff, 0x00030002,
209         0x917c, 0x0fff0fff, 0x00030002,
210         0x9198, 0x0fff0fff, 0x00030002,
211         0x9198, 0x0fff0fff, 0x00030002,
212         0x915c, 0x0fff0fff, 0x00010000,
213         0x915c, 0x0fff0fff, 0x00010000,
214         0x3f90, 0xffff0001, 0xff000000,
215         0x3f90, 0xffff0001, 0xff000000,
216         0x9178, 0x0fff0fff, 0x00070000,
217         0x9178, 0x0fff0fff, 0x00070000,
218         0x9194, 0x0fff0fff, 0x00070000,
219         0x9194, 0x0fff0fff, 0x00070000,
220         0x9148, 0xffff0001, 0xff000000,
221         0x9148, 0xffff0001, 0xff000000,
222         0x9190, 0x0fff0fff, 0x00090008,
223         0x9190, 0x0fff0fff, 0x00090008,
224         0x91ac, 0x0fff0fff, 0x00090008,
225         0x91ac, 0x0fff0fff, 0x00090008,
226         0x3f94, 0xffff0000, 0xff000000,
227         0x3f94, 0xffff0000, 0xff000000,
228         0x914c, 0xffff0000, 0xff000000,
229         0x914c, 0xffff0000, 0xff000000,
230         0x929c, 0x00000fff, 0x00000001,
231         0x929c, 0x00000fff, 0x00000001,
232         0x55e4, 0xff607fff, 0xfc000100,
233         0x8a18, 0xff000fff, 0x00000100,
234         0x8a18, 0xff000fff, 0x00000100,
235         0x8b28, 0xff000fff, 0x00000100,
236         0x8b28, 0xff000fff, 0x00000100,
237         0x9144, 0xfffc0fff, 0x00000100,
238         0x9144, 0xfffc0fff, 0x00000100,
239         0x6ed8, 0x00010101, 0x00010000,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9830, 0xffffffff, 0x00000000,
242         0x9834, 0xf00fffff, 0x00000400,
243         0x9834, 0xf00fffff, 0x00000400,
244         0x9838, 0xfffffffe, 0x00000000,
245         0x9838, 0xfffffffe, 0x00000000,
246         0xd0c0, 0xff000fff, 0x00000100,
247         0xd02c, 0xbfffff1f, 0x08421000,
248         0xd02c, 0xbfffff1f, 0x08421000,
249         0xd0b8, 0x73773777, 0x12010001,
250         0xd0b8, 0x73773777, 0x12010001,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x98f8, 0x73773777, 0x12010001,
253         0x98f8, 0x73773777, 0x12010001,
254         0x98fc, 0xffffffff, 0x00000010,
255         0x98fc, 0xffffffff, 0x00000010,
256         0x9b7c, 0x00ff0000, 0x00fc0000,
257         0x9b7c, 0x00ff0000, 0x00fc0000,
258         0x8030, 0x00001f0f, 0x0000100a,
259         0x8030, 0x00001f0f, 0x0000100a,
260         0x2f48, 0x73773777, 0x12010001,
261         0x2f48, 0x73773777, 0x12010001,
262         0x2408, 0x00030000, 0x000c007f,
263         0x8a14, 0xf000003f, 0x00000007,
264         0x8a14, 0xf000003f, 0x00000007,
265         0x8b24, 0x3fff3fff, 0x00ff0fff,
266         0x8b24, 0x3fff3fff, 0x00ff0fff,
267         0x8b10, 0x0000ff0f, 0x00000000,
268         0x8b10, 0x0000ff0f, 0x00000000,
269         0x28a4c, 0x07ffffff, 0x06000000,
270         0x28a4c, 0x07ffffff, 0x06000000,
271         0x4d8, 0x00000fff, 0x00000100,
272         0x4d8, 0x00000fff, 0x00000100,
273         0xa008, 0xffffffff, 0x00010000,
274         0xa008, 0xffffffff, 0x00010000,
275         0x913c, 0xffff03ff, 0x01000100,
276         0x913c, 0xffff03ff, 0x01000100,
277         0x90e8, 0x001fffff, 0x010400c0,
278         0x8c00, 0x000000ff, 0x00000003,
279         0x8c00, 0x000000ff, 0x00000003,
280         0x8c04, 0xf8ff00ff, 0x40600060,
281         0x8c04, 0xf8ff00ff, 0x40600060,
282         0x8c30, 0x0000000f, 0x00040005,
283         0x8cf0, 0x1fff1fff, 0x08e00410,
284         0x8cf0, 0x1fff1fff, 0x08e00410,
285         0x900c, 0x00ffffff, 0x0017071f,
286         0x28350, 0x00000f01, 0x00000000,
287         0x28350, 0x00000f01, 0x00000000,
288         0x9508, 0xf700071f, 0x00000002,
289         0x9508, 0xf700071f, 0x00000002,
290         0x9688, 0x00300000, 0x0017000f,
291         0x960c, 0xffffffff, 0x54763210,
292         0x960c, 0xffffffff, 0x54763210,
293         0x20ef8, 0x01ff01ff, 0x00000002,
294         0x20e98, 0xfffffbff, 0x00200000,
295         0x2015c, 0xffffffff, 0x00000f40,
296         0x88c4, 0x001f3ae3, 0x00000082,
297         0x88c4, 0x001f3ae3, 0x00000082,
298         0x8978, 0x3fffffff, 0x04050140,
299         0x8978, 0x3fffffff, 0x04050140,
300         0x88d4, 0x0000001f, 0x00000010,
301         0x88d4, 0x0000001f, 0x00000010,
302         0x8974, 0xffffffff, 0x00000000,
303         0x8974, 0xffffffff, 0x00000000
304 };
305
306 static void ni_init_golden_registers(struct radeon_device *rdev)
307 {
308         switch (rdev->family) {
309         case CHIP_CAYMAN:
310                 radeon_program_register_sequence(rdev,
311                                                  cayman_golden_registers,
312                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
313                 radeon_program_register_sequence(rdev,
314                                                  cayman_golden_registers2,
315                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
316                 break;
317         case CHIP_ARUBA:
318                 if ((rdev->pdev->device == 0x9900) ||
319                     (rdev->pdev->device == 0x9901) ||
320                     (rdev->pdev->device == 0x9903) ||
321                     (rdev->pdev->device == 0x9904) ||
322                     (rdev->pdev->device == 0x9905) ||
323                     (rdev->pdev->device == 0x9906) ||
324                     (rdev->pdev->device == 0x9907) ||
325                     (rdev->pdev->device == 0x9908) ||
326                     (rdev->pdev->device == 0x9909) ||
327                     (rdev->pdev->device == 0x990A) ||
328                     (rdev->pdev->device == 0x990B) ||
329                     (rdev->pdev->device == 0x990C) ||
330                     (rdev->pdev->device == 0x990D) ||
331                     (rdev->pdev->device == 0x990E) ||
332                     (rdev->pdev->device == 0x990F) ||
333                     (rdev->pdev->device == 0x9910) ||
334                     (rdev->pdev->device == 0x9913) ||
335                     (rdev->pdev->device == 0x9917) ||
336                     (rdev->pdev->device == 0x9918)) {
337                         radeon_program_register_sequence(rdev,
338                                                          dvst_golden_registers,
339                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
340                         radeon_program_register_sequence(rdev,
341                                                          dvst_golden_registers2,
342                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
343                 } else {
344                         radeon_program_register_sequence(rdev,
345                                                          scrapper_golden_registers,
346                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
347                         radeon_program_register_sequence(rdev,
348                                                          dvst_golden_registers2,
349                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
350                 }
351                 break;
352         default:
353                 break;
354         }
355 }
356
357 #define BTC_IO_MC_REGS_SIZE 29
358
359 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
360         {0x00000077, 0xff010100},
361         {0x00000078, 0x00000000},
362         {0x00000079, 0x00001434},
363         {0x0000007a, 0xcc08ec08},
364         {0x0000007b, 0x00040000},
365         {0x0000007c, 0x000080c0},
366         {0x0000007d, 0x09000000},
367         {0x0000007e, 0x00210404},
368         {0x00000081, 0x08a8e800},
369         {0x00000082, 0x00030444},
370         {0x00000083, 0x00000000},
371         {0x00000085, 0x00000001},
372         {0x00000086, 0x00000002},
373         {0x00000087, 0x48490000},
374         {0x00000088, 0x20244647},
375         {0x00000089, 0x00000005},
376         {0x0000008b, 0x66030000},
377         {0x0000008c, 0x00006603},
378         {0x0000008d, 0x00000100},
379         {0x0000008f, 0x00001c0a},
380         {0x00000090, 0xff000001},
381         {0x00000094, 0x00101101},
382         {0x00000095, 0x00000fff},
383         {0x00000096, 0x00116fff},
384         {0x00000097, 0x60010000},
385         {0x00000098, 0x10010000},
386         {0x00000099, 0x00006000},
387         {0x0000009a, 0x00001000},
388         {0x0000009f, 0x00946a00}
389 };
390
391 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
392         {0x00000077, 0xff010100},
393         {0x00000078, 0x00000000},
394         {0x00000079, 0x00001434},
395         {0x0000007a, 0xcc08ec08},
396         {0x0000007b, 0x00040000},
397         {0x0000007c, 0x000080c0},
398         {0x0000007d, 0x09000000},
399         {0x0000007e, 0x00210404},
400         {0x00000081, 0x08a8e800},
401         {0x00000082, 0x00030444},
402         {0x00000083, 0x00000000},
403         {0x00000085, 0x00000001},
404         {0x00000086, 0x00000002},
405         {0x00000087, 0x48490000},
406         {0x00000088, 0x20244647},
407         {0x00000089, 0x00000005},
408         {0x0000008b, 0x66030000},
409         {0x0000008c, 0x00006603},
410         {0x0000008d, 0x00000100},
411         {0x0000008f, 0x00001c0a},
412         {0x00000090, 0xff000001},
413         {0x00000094, 0x00101101},
414         {0x00000095, 0x00000fff},
415         {0x00000096, 0x00116fff},
416         {0x00000097, 0x60010000},
417         {0x00000098, 0x10010000},
418         {0x00000099, 0x00006000},
419         {0x0000009a, 0x00001000},
420         {0x0000009f, 0x00936a00}
421 };
422
423 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
424         {0x00000077, 0xff010100},
425         {0x00000078, 0x00000000},
426         {0x00000079, 0x00001434},
427         {0x0000007a, 0xcc08ec08},
428         {0x0000007b, 0x00040000},
429         {0x0000007c, 0x000080c0},
430         {0x0000007d, 0x09000000},
431         {0x0000007e, 0x00210404},
432         {0x00000081, 0x08a8e800},
433         {0x00000082, 0x00030444},
434         {0x00000083, 0x00000000},
435         {0x00000085, 0x00000001},
436         {0x00000086, 0x00000002},
437         {0x00000087, 0x48490000},
438         {0x00000088, 0x20244647},
439         {0x00000089, 0x00000005},
440         {0x0000008b, 0x66030000},
441         {0x0000008c, 0x00006603},
442         {0x0000008d, 0x00000100},
443         {0x0000008f, 0x00001c0a},
444         {0x00000090, 0xff000001},
445         {0x00000094, 0x00101101},
446         {0x00000095, 0x00000fff},
447         {0x00000096, 0x00116fff},
448         {0x00000097, 0x60010000},
449         {0x00000098, 0x10010000},
450         {0x00000099, 0x00006000},
451         {0x0000009a, 0x00001000},
452         {0x0000009f, 0x00916a00}
453 };
454
455 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
456         {0x00000077, 0xff010100},
457         {0x00000078, 0x00000000},
458         {0x00000079, 0x00001434},
459         {0x0000007a, 0xcc08ec08},
460         {0x0000007b, 0x00040000},
461         {0x0000007c, 0x000080c0},
462         {0x0000007d, 0x09000000},
463         {0x0000007e, 0x00210404},
464         {0x00000081, 0x08a8e800},
465         {0x00000082, 0x00030444},
466         {0x00000083, 0x00000000},
467         {0x00000085, 0x00000001},
468         {0x00000086, 0x00000002},
469         {0x00000087, 0x48490000},
470         {0x00000088, 0x20244647},
471         {0x00000089, 0x00000005},
472         {0x0000008b, 0x66030000},
473         {0x0000008c, 0x00006603},
474         {0x0000008d, 0x00000100},
475         {0x0000008f, 0x00001c0a},
476         {0x00000090, 0xff000001},
477         {0x00000094, 0x00101101},
478         {0x00000095, 0x00000fff},
479         {0x00000096, 0x00116fff},
480         {0x00000097, 0x60010000},
481         {0x00000098, 0x10010000},
482         {0x00000099, 0x00006000},
483         {0x0000009a, 0x00001000},
484         {0x0000009f, 0x00976b00}
485 };
486
487 int ni_mc_load_microcode(struct radeon_device *rdev)
488 {
489         const __be32 *fw_data;
490         u32 mem_type, running, blackout = 0;
491         u32 *io_mc_regs;
492         int i, ucode_size, regs_size;
493
494         if (!rdev->mc_fw)
495                 return -EINVAL;
496
497         switch (rdev->family) {
498         case CHIP_BARTS:
499                 io_mc_regs = (u32 *)&barts_io_mc_regs;
500                 ucode_size = BTC_MC_UCODE_SIZE;
501                 regs_size = BTC_IO_MC_REGS_SIZE;
502                 break;
503         case CHIP_TURKS:
504                 io_mc_regs = (u32 *)&turks_io_mc_regs;
505                 ucode_size = BTC_MC_UCODE_SIZE;
506                 regs_size = BTC_IO_MC_REGS_SIZE;
507                 break;
508         case CHIP_CAICOS:
509         default:
510                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
511                 ucode_size = BTC_MC_UCODE_SIZE;
512                 regs_size = BTC_IO_MC_REGS_SIZE;
513                 break;
514         case CHIP_CAYMAN:
515                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
516                 ucode_size = CAYMAN_MC_UCODE_SIZE;
517                 regs_size = BTC_IO_MC_REGS_SIZE;
518                 break;
519         }
520
521         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
522         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
523
524         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
525                 if (running) {
526                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
527                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
528                 }
529
530                 /* reset the engine and set to writable */
531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
533
534                 /* load mc io regs */
535                 for (i = 0; i < regs_size; i++) {
536                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
537                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
538                 }
539                 /* load the MC ucode */
540                 fw_data = (const __be32 *)rdev->mc_fw->data;
541                 for (i = 0; i < ucode_size; i++)
542                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
543
544                 /* put the engine back into the active state */
545                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
546                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
547                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
548
549                 /* wait for training to complete */
550                 for (i = 0; i < rdev->usec_timeout; i++) {
551                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
552                                 break;
553                         udelay(1);
554                 }
555
556                 if (running)
557                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
558         }
559
560         return 0;
561 }
562
563 int ni_init_microcode(struct radeon_device *rdev)
564 {
565         struct platform_device *pdev;
566         const char *chip_name;
567         const char *rlc_chip_name;
568         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
569         char fw_name[30];
570         int err;
571
572         DRM_DEBUG("\n");
573
574         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
575         err = IS_ERR(pdev);
576         if (err) {
577                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
578                 return -EINVAL;
579         }
580
581         switch (rdev->family) {
582         case CHIP_BARTS:
583                 chip_name = "BARTS";
584                 rlc_chip_name = "BTC";
585                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
586                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
587                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
588                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
589                 break;
590         case CHIP_TURKS:
591                 chip_name = "TURKS";
592                 rlc_chip_name = "BTC";
593                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
594                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
595                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
596                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
597                 break;
598         case CHIP_CAICOS:
599                 chip_name = "CAICOS";
600                 rlc_chip_name = "BTC";
601                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
602                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
603                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
604                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
605                 break;
606         case CHIP_CAYMAN:
607                 chip_name = "CAYMAN";
608                 rlc_chip_name = "CAYMAN";
609                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
610                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
611                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
612                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
613                 break;
614         case CHIP_ARUBA:
615                 chip_name = "ARUBA";
616                 rlc_chip_name = "ARUBA";
617                 /* pfp/me same size as CAYMAN */
618                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
619                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
620                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
621                 mc_req_size = 0;
622                 break;
623         default: BUG();
624         }
625
626         DRM_INFO("Loading %s Microcode\n", chip_name);
627
628         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
629         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
630         if (err)
631                 goto out;
632         if (rdev->pfp_fw->size != pfp_req_size) {
633                 printk(KERN_ERR
634                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
635                        rdev->pfp_fw->size, fw_name);
636                 err = -EINVAL;
637                 goto out;
638         }
639
640         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
641         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
642         if (err)
643                 goto out;
644         if (rdev->me_fw->size != me_req_size) {
645                 printk(KERN_ERR
646                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
647                        rdev->me_fw->size, fw_name);
648                 err = -EINVAL;
649         }
650
651         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
652         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
653         if (err)
654                 goto out;
655         if (rdev->rlc_fw->size != rlc_req_size) {
656                 printk(KERN_ERR
657                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
658                        rdev->rlc_fw->size, fw_name);
659                 err = -EINVAL;
660         }
661
662         /* no MC ucode on TN */
663         if (!(rdev->flags & RADEON_IS_IGP)) {
664                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
665                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
666                 if (err)
667                         goto out;
668                 if (rdev->mc_fw->size != mc_req_size) {
669                         printk(KERN_ERR
670                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
671                                rdev->mc_fw->size, fw_name);
672                         err = -EINVAL;
673                 }
674         }
675 out:
676         platform_device_unregister(pdev);
677
678         if (err) {
679                 if (err != -EINVAL)
680                         printk(KERN_ERR
681                                "ni_cp: Failed to load firmware \"%s\"\n",
682                                fw_name);
683                 release_firmware(rdev->pfp_fw);
684                 rdev->pfp_fw = NULL;
685                 release_firmware(rdev->me_fw);
686                 rdev->me_fw = NULL;
687                 release_firmware(rdev->rlc_fw);
688                 rdev->rlc_fw = NULL;
689                 release_firmware(rdev->mc_fw);
690                 rdev->mc_fw = NULL;
691         }
692         return err;
693 }
694
695 /*
696  * Core functions
697  */
698 static void cayman_gpu_init(struct radeon_device *rdev)
699 {
700         u32 gb_addr_config = 0;
701         u32 mc_shared_chmap, mc_arb_ramcfg;
702         u32 cgts_tcc_disable;
703         u32 sx_debug_1;
704         u32 smx_dc_ctl0;
705         u32 cgts_sm_ctrl_reg;
706         u32 hdp_host_path_cntl;
707         u32 tmp;
708         u32 disabled_rb_mask;
709         int i, j;
710
711         switch (rdev->family) {
712         case CHIP_CAYMAN:
713                 rdev->config.cayman.max_shader_engines = 2;
714                 rdev->config.cayman.max_pipes_per_simd = 4;
715                 rdev->config.cayman.max_tile_pipes = 8;
716                 rdev->config.cayman.max_simds_per_se = 12;
717                 rdev->config.cayman.max_backends_per_se = 4;
718                 rdev->config.cayman.max_texture_channel_caches = 8;
719                 rdev->config.cayman.max_gprs = 256;
720                 rdev->config.cayman.max_threads = 256;
721                 rdev->config.cayman.max_gs_threads = 32;
722                 rdev->config.cayman.max_stack_entries = 512;
723                 rdev->config.cayman.sx_num_of_sets = 8;
724                 rdev->config.cayman.sx_max_export_size = 256;
725                 rdev->config.cayman.sx_max_export_pos_size = 64;
726                 rdev->config.cayman.sx_max_export_smx_size = 192;
727                 rdev->config.cayman.max_hw_contexts = 8;
728                 rdev->config.cayman.sq_num_cf_insts = 2;
729
730                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
731                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
732                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
733                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
734                 break;
735         case CHIP_ARUBA:
736         default:
737                 rdev->config.cayman.max_shader_engines = 1;
738                 rdev->config.cayman.max_pipes_per_simd = 4;
739                 rdev->config.cayman.max_tile_pipes = 2;
740                 if ((rdev->pdev->device == 0x9900) ||
741                     (rdev->pdev->device == 0x9901) ||
742                     (rdev->pdev->device == 0x9905) ||
743                     (rdev->pdev->device == 0x9906) ||
744                     (rdev->pdev->device == 0x9907) ||
745                     (rdev->pdev->device == 0x9908) ||
746                     (rdev->pdev->device == 0x9909) ||
747                     (rdev->pdev->device == 0x990B) ||
748                     (rdev->pdev->device == 0x990C) ||
749                     (rdev->pdev->device == 0x990F) ||
750                     (rdev->pdev->device == 0x9910) ||
751                     (rdev->pdev->device == 0x9917) ||
752                     (rdev->pdev->device == 0x9999) ||
753                     (rdev->pdev->device == 0x999C)) {
754                         rdev->config.cayman.max_simds_per_se = 6;
755                         rdev->config.cayman.max_backends_per_se = 2;
756                 } else if ((rdev->pdev->device == 0x9903) ||
757                            (rdev->pdev->device == 0x9904) ||
758                            (rdev->pdev->device == 0x990A) ||
759                            (rdev->pdev->device == 0x990D) ||
760                            (rdev->pdev->device == 0x990E) ||
761                            (rdev->pdev->device == 0x9913) ||
762                            (rdev->pdev->device == 0x9918) ||
763                            (rdev->pdev->device == 0x999D)) {
764                         rdev->config.cayman.max_simds_per_se = 4;
765                         rdev->config.cayman.max_backends_per_se = 2;
766                 } else if ((rdev->pdev->device == 0x9919) ||
767                            (rdev->pdev->device == 0x9990) ||
768                            (rdev->pdev->device == 0x9991) ||
769                            (rdev->pdev->device == 0x9994) ||
770                            (rdev->pdev->device == 0x9995) ||
771                            (rdev->pdev->device == 0x9996) ||
772                            (rdev->pdev->device == 0x999A) ||
773                            (rdev->pdev->device == 0x99A0)) {
774                         rdev->config.cayman.max_simds_per_se = 3;
775                         rdev->config.cayman.max_backends_per_se = 1;
776                 } else {
777                         rdev->config.cayman.max_simds_per_se = 2;
778                         rdev->config.cayman.max_backends_per_se = 1;
779                 }
780                 rdev->config.cayman.max_texture_channel_caches = 2;
781                 rdev->config.cayman.max_gprs = 256;
782                 rdev->config.cayman.max_threads = 256;
783                 rdev->config.cayman.max_gs_threads = 32;
784                 rdev->config.cayman.max_stack_entries = 512;
785                 rdev->config.cayman.sx_num_of_sets = 8;
786                 rdev->config.cayman.sx_max_export_size = 256;
787                 rdev->config.cayman.sx_max_export_pos_size = 64;
788                 rdev->config.cayman.sx_max_export_smx_size = 192;
789                 rdev->config.cayman.max_hw_contexts = 8;
790                 rdev->config.cayman.sq_num_cf_insts = 2;
791
792                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
793                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
794                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
795                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
796                 break;
797         }
798
799         /* Initialize HDP */
800         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
801                 WREG32((0x2c14 + j), 0x00000000);
802                 WREG32((0x2c18 + j), 0x00000000);
803                 WREG32((0x2c1c + j), 0x00000000);
804                 WREG32((0x2c20 + j), 0x00000000);
805                 WREG32((0x2c24 + j), 0x00000000);
806         }
807
808         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
809
810         evergreen_fix_pci_max_read_req_size(rdev);
811
812         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
813         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
814
815         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
816         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
817         if (rdev->config.cayman.mem_row_size_in_kb > 4)
818                 rdev->config.cayman.mem_row_size_in_kb = 4;
819         /* XXX use MC settings? */
820         rdev->config.cayman.shader_engine_tile_size = 32;
821         rdev->config.cayman.num_gpus = 1;
822         rdev->config.cayman.multi_gpu_tile_size = 64;
823
824         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
825         rdev->config.cayman.num_tile_pipes = (1 << tmp);
826         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
827         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
828         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
829         rdev->config.cayman.num_shader_engines = tmp + 1;
830         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
831         rdev->config.cayman.num_gpus = tmp + 1;
832         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
833         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
834         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
835         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
836
837
838         /* setup tiling info dword.  gb_addr_config is not adequate since it does
839          * not have bank info, so create a custom tiling dword.
840          * bits 3:0   num_pipes
841          * bits 7:4   num_banks
842          * bits 11:8  group_size
843          * bits 15:12 row_size
844          */
845         rdev->config.cayman.tile_config = 0;
846         switch (rdev->config.cayman.num_tile_pipes) {
847         case 1:
848         default:
849                 rdev->config.cayman.tile_config |= (0 << 0);
850                 break;
851         case 2:
852                 rdev->config.cayman.tile_config |= (1 << 0);
853                 break;
854         case 4:
855                 rdev->config.cayman.tile_config |= (2 << 0);
856                 break;
857         case 8:
858                 rdev->config.cayman.tile_config |= (3 << 0);
859                 break;
860         }
861
862         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
863         if (rdev->flags & RADEON_IS_IGP)
864                 rdev->config.cayman.tile_config |= 1 << 4;
865         else {
866                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
867                 case 0: /* four banks */
868                         rdev->config.cayman.tile_config |= 0 << 4;
869                         break;
870                 case 1: /* eight banks */
871                         rdev->config.cayman.tile_config |= 1 << 4;
872                         break;
873                 case 2: /* sixteen banks */
874                 default:
875                         rdev->config.cayman.tile_config |= 2 << 4;
876                         break;
877                 }
878         }
879         rdev->config.cayman.tile_config |=
880                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
881         rdev->config.cayman.tile_config |=
882                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
883
884         tmp = 0;
885         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
886                 u32 rb_disable_bitmap;
887
888                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
889                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
890                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
891                 tmp <<= 4;
892                 tmp |= rb_disable_bitmap;
893         }
894         /* enabled rb are just the one not disabled :) */
895         disabled_rb_mask = tmp;
896         tmp = 0;
897         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
898                 tmp |= (1 << i);
899         /* if all the backends are disabled, fix it up here */
900         if ((disabled_rb_mask & tmp) == tmp) {
901                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
902                         disabled_rb_mask &= ~(1 << i);
903         }
904
905         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
906         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
907
908         WREG32(GB_ADDR_CONFIG, gb_addr_config);
909         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
910         if (ASIC_IS_DCE6(rdev))
911                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
912         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
913         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
914         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
915         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
916         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
917         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
918
919         if ((rdev->config.cayman.max_backends_per_se == 1) &&
920             (rdev->flags & RADEON_IS_IGP)) {
921                 if ((disabled_rb_mask & 3) == 1) {
922                         /* RB0 disabled, RB1 enabled */
923                         tmp = 0x11111111;
924                 } else {
925                         /* RB1 disabled, RB0 enabled */
926                         tmp = 0x00000000;
927                 }
928         } else {
929                 tmp = gb_addr_config & NUM_PIPES_MASK;
930                 tmp = r6xx_remap_render_backend(rdev, tmp,
931                                                 rdev->config.cayman.max_backends_per_se *
932                                                 rdev->config.cayman.max_shader_engines,
933                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
934         }
935         WREG32(GB_BACKEND_MAP, tmp);
936
937         cgts_tcc_disable = 0xffff0000;
938         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
939                 cgts_tcc_disable &= ~(1 << (16 + i));
940         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
941         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
942         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
943         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
944
945         /* reprogram the shader complex */
946         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
947         for (i = 0; i < 16; i++)
948                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
949         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
950
951         /* set HW defaults for 3D engine */
952         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
953
954         sx_debug_1 = RREG32(SX_DEBUG_1);
955         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
956         WREG32(SX_DEBUG_1, sx_debug_1);
957
958         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
959         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
960         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
961         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
962
963         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
964
965         /* need to be explicitly zero-ed */
966         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
967         WREG32(SQ_LSTMP_RING_BASE, 0);
968         WREG32(SQ_HSTMP_RING_BASE, 0);
969         WREG32(SQ_ESTMP_RING_BASE, 0);
970         WREG32(SQ_GSTMP_RING_BASE, 0);
971         WREG32(SQ_VSTMP_RING_BASE, 0);
972         WREG32(SQ_PSTMP_RING_BASE, 0);
973
974         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
975
976         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
977                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
978                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
979
980         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
981                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
982                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
983
984
985         WREG32(VGT_NUM_INSTANCES, 1);
986
987         WREG32(CP_PERFMON_CNTL, 0);
988
989         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
990                                   FETCH_FIFO_HIWATER(0x4) |
991                                   DONE_FIFO_HIWATER(0xe0) |
992                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
993
994         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
995         WREG32(SQ_CONFIG, (VC_ENABLE |
996                            EXPORT_SRC_C |
997                            GFX_PRIO(0) |
998                            CS1_PRIO(0) |
999                            CS2_PRIO(1)));
1000         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1001
1002         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1003                                           FORCE_EOV_MAX_REZ_CNT(255)));
1004
1005         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1006                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1007
1008         WREG32(VGT_GS_VERTEX_REUSE, 16);
1009         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1010
1011         WREG32(CB_PERF_CTR0_SEL_0, 0);
1012         WREG32(CB_PERF_CTR0_SEL_1, 0);
1013         WREG32(CB_PERF_CTR1_SEL_0, 0);
1014         WREG32(CB_PERF_CTR1_SEL_1, 0);
1015         WREG32(CB_PERF_CTR2_SEL_0, 0);
1016         WREG32(CB_PERF_CTR2_SEL_1, 0);
1017         WREG32(CB_PERF_CTR3_SEL_0, 0);
1018         WREG32(CB_PERF_CTR3_SEL_1, 0);
1019
1020         tmp = RREG32(HDP_MISC_CNTL);
1021         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1022         WREG32(HDP_MISC_CNTL, tmp);
1023
1024         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1025         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1026
1027         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1028
1029         udelay(50);
1030 }
1031
1032 /*
1033  * GART
1034  */
1035 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1036 {
1037         /* flush hdp cache */
1038         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1039
1040         /* bits 0-7 are the VM contexts0-7 */
1041         WREG32(VM_INVALIDATE_REQUEST, 1);
1042 }
1043
1044 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1045 {
1046         int i, r;
1047
1048         if (rdev->gart.robj == NULL) {
1049                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1050                 return -EINVAL;
1051         }
1052         r = radeon_gart_table_vram_pin(rdev);
1053         if (r)
1054                 return r;
1055         radeon_gart_restore(rdev);
1056         /* Setup TLB control */
1057         WREG32(MC_VM_MX_L1_TLB_CNTL,
1058                (0xA << 7) |
1059                ENABLE_L1_TLB |
1060                ENABLE_L1_FRAGMENT_PROCESSING |
1061                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1062                ENABLE_ADVANCED_DRIVER_MODEL |
1063                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1064         /* Setup L2 cache */
1065         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1066                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1067                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1068                EFFECTIVE_L2_QUEUE_SIZE(7) |
1069                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1070         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1071         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1072                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1073         /* setup context0 */
1074         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1075         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1076         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1077         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1078                         (u32)(rdev->dummy_page.addr >> 12));
1079         WREG32(VM_CONTEXT0_CNTL2, 0);
1080         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1081                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1082
1083         WREG32(0x15D4, 0);
1084         WREG32(0x15D8, 0);
1085         WREG32(0x15DC, 0);
1086
1087         /* empty context1-7 */
1088         /* Assign the pt base to something valid for now; the pts used for
1089          * the VMs are determined by the application and setup and assigned
1090          * on the fly in the vm part of radeon_gart.c
1091          */
1092         for (i = 1; i < 8; i++) {
1093                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1094                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1095                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1096                         rdev->gart.table_addr >> 12);
1097         }
1098
1099         /* enable context1-7 */
1100         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1101                (u32)(rdev->dummy_page.addr >> 12));
1102         WREG32(VM_CONTEXT1_CNTL2, 4);
1103         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1104                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1105                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1106                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1107                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1108                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1109                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1110                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1111                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1112                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1113                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1114                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1115                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1116
1117         cayman_pcie_gart_tlb_flush(rdev);
1118         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1119                  (unsigned)(rdev->mc.gtt_size >> 20),
1120                  (unsigned long long)rdev->gart.table_addr);
1121         rdev->gart.ready = true;
1122         return 0;
1123 }
1124
1125 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1126 {
1127         /* Disable all tables */
1128         WREG32(VM_CONTEXT0_CNTL, 0);
1129         WREG32(VM_CONTEXT1_CNTL, 0);
1130         /* Setup TLB control */
1131         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1132                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1133                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1134         /* Setup L2 cache */
1135         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1136                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1137                EFFECTIVE_L2_QUEUE_SIZE(7) |
1138                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1139         WREG32(VM_L2_CNTL2, 0);
1140         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1141                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1142         radeon_gart_table_vram_unpin(rdev);
1143 }
1144
1145 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1146 {
1147         cayman_pcie_gart_disable(rdev);
1148         radeon_gart_table_vram_free(rdev);
1149         radeon_gart_fini(rdev);
1150 }
1151
1152 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1153                               int ring, u32 cp_int_cntl)
1154 {
1155         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1156
1157         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1158         WREG32(CP_INT_CNTL, cp_int_cntl);
1159 }
1160
1161 /*
1162  * CP.
1163  */
1164 void cayman_fence_ring_emit(struct radeon_device *rdev,
1165                             struct radeon_fence *fence)
1166 {
1167         struct radeon_ring *ring = &rdev->ring[fence->ring];
1168         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1169
1170         /* flush read cache over gart for this vmid */
1171         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1172         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1173         radeon_ring_write(ring, 0);
1174         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1175         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1176         radeon_ring_write(ring, 0xFFFFFFFF);
1177         radeon_ring_write(ring, 0);
1178         radeon_ring_write(ring, 10); /* poll interval */
1179         /* EVENT_WRITE_EOP - flush caches, send int */
1180         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1181         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1182         radeon_ring_write(ring, addr & 0xffffffff);
1183         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1184         radeon_ring_write(ring, fence->seq);
1185         radeon_ring_write(ring, 0);
1186 }
1187
1188 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1189 {
1190         struct radeon_ring *ring = &rdev->ring[ib->ring];
1191
1192         /* set to DX10/11 mode */
1193         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1194         radeon_ring_write(ring, 1);
1195
1196         if (ring->rptr_save_reg) {
1197                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1198                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1199                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1200                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1201                 radeon_ring_write(ring, next_rptr);
1202         }
1203
1204         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1205         radeon_ring_write(ring,
1206 #ifdef __BIG_ENDIAN
1207                           (2 << 0) |
1208 #endif
1209                           (ib->gpu_addr & 0xFFFFFFFC));
1210         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1211         radeon_ring_write(ring, ib->length_dw | 
1212                           (ib->vm ? (ib->vm->id << 24) : 0));
1213
1214         /* flush read cache over gart for this vmid */
1215         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1216         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1217         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1218         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1219         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1220         radeon_ring_write(ring, 0xFFFFFFFF);
1221         radeon_ring_write(ring, 0);
1222         radeon_ring_write(ring, 10); /* poll interval */
1223 }
1224
1225 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1226                                struct radeon_ring *ring,
1227                                struct radeon_semaphore *semaphore,
1228                                bool emit_wait)
1229 {
1230         uint64_t addr = semaphore->gpu_addr;
1231
1232         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1233         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1234
1235         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1236         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1237
1238         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1239         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1240 }
1241
1242 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1243 {
1244         if (enable)
1245                 WREG32(CP_ME_CNTL, 0);
1246         else {
1247                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1248                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1249                 WREG32(SCRATCH_UMSK, 0);
1250                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1251         }
1252 }
1253
1254 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1255 {
1256         const __be32 *fw_data;
1257         int i;
1258
1259         if (!rdev->me_fw || !rdev->pfp_fw)
1260                 return -EINVAL;
1261
1262         cayman_cp_enable(rdev, false);
1263
1264         fw_data = (const __be32 *)rdev->pfp_fw->data;
1265         WREG32(CP_PFP_UCODE_ADDR, 0);
1266         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1267                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1268         WREG32(CP_PFP_UCODE_ADDR, 0);
1269
1270         fw_data = (const __be32 *)rdev->me_fw->data;
1271         WREG32(CP_ME_RAM_WADDR, 0);
1272         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1273                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1274
1275         WREG32(CP_PFP_UCODE_ADDR, 0);
1276         WREG32(CP_ME_RAM_WADDR, 0);
1277         WREG32(CP_ME_RAM_RADDR, 0);
1278         return 0;
1279 }
1280
1281 static int cayman_cp_start(struct radeon_device *rdev)
1282 {
1283         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1284         int r, i;
1285
1286         r = radeon_ring_lock(rdev, ring, 7);
1287         if (r) {
1288                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1289                 return r;
1290         }
1291         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1292         radeon_ring_write(ring, 0x1);
1293         radeon_ring_write(ring, 0x0);
1294         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1295         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1296         radeon_ring_write(ring, 0);
1297         radeon_ring_write(ring, 0);
1298         radeon_ring_unlock_commit(rdev, ring);
1299
1300         cayman_cp_enable(rdev, true);
1301
1302         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1303         if (r) {
1304                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1305                 return r;
1306         }
1307
1308         /* setup clear context state */
1309         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1310         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1311
1312         for (i = 0; i < cayman_default_size; i++)
1313                 radeon_ring_write(ring, cayman_default_state[i]);
1314
1315         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1316         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1317
1318         /* set clear context state */
1319         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1320         radeon_ring_write(ring, 0);
1321
1322         /* SQ_VTX_BASE_VTX_LOC */
1323         radeon_ring_write(ring, 0xc0026f00);
1324         radeon_ring_write(ring, 0x00000000);
1325         radeon_ring_write(ring, 0x00000000);
1326         radeon_ring_write(ring, 0x00000000);
1327
1328         /* Clear consts */
1329         radeon_ring_write(ring, 0xc0036f00);
1330         radeon_ring_write(ring, 0x00000bc4);
1331         radeon_ring_write(ring, 0xffffffff);
1332         radeon_ring_write(ring, 0xffffffff);
1333         radeon_ring_write(ring, 0xffffffff);
1334
1335         radeon_ring_write(ring, 0xc0026900);
1336         radeon_ring_write(ring, 0x00000316);
1337         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1338         radeon_ring_write(ring, 0x00000010); /*  */
1339
1340         radeon_ring_unlock_commit(rdev, ring);
1341
1342         /* XXX init other rings */
1343
1344         return 0;
1345 }
1346
1347 static void cayman_cp_fini(struct radeon_device *rdev)
1348 {
1349         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1350         cayman_cp_enable(rdev, false);
1351         radeon_ring_fini(rdev, ring);
1352         radeon_scratch_free(rdev, ring->rptr_save_reg);
1353 }
1354
1355 static int cayman_cp_resume(struct radeon_device *rdev)
1356 {
1357         static const int ridx[] = {
1358                 RADEON_RING_TYPE_GFX_INDEX,
1359                 CAYMAN_RING_TYPE_CP1_INDEX,
1360                 CAYMAN_RING_TYPE_CP2_INDEX
1361         };
1362         static const unsigned cp_rb_cntl[] = {
1363                 CP_RB0_CNTL,
1364                 CP_RB1_CNTL,
1365                 CP_RB2_CNTL,
1366         };
1367         static const unsigned cp_rb_rptr_addr[] = {
1368                 CP_RB0_RPTR_ADDR,
1369                 CP_RB1_RPTR_ADDR,
1370                 CP_RB2_RPTR_ADDR
1371         };
1372         static const unsigned cp_rb_rptr_addr_hi[] = {
1373                 CP_RB0_RPTR_ADDR_HI,
1374                 CP_RB1_RPTR_ADDR_HI,
1375                 CP_RB2_RPTR_ADDR_HI
1376         };
1377         static const unsigned cp_rb_base[] = {
1378                 CP_RB0_BASE,
1379                 CP_RB1_BASE,
1380                 CP_RB2_BASE
1381         };
1382         struct radeon_ring *ring;
1383         int i, r;
1384
1385         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1386         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1387                                  SOFT_RESET_PA |
1388                                  SOFT_RESET_SH |
1389                                  SOFT_RESET_VGT |
1390                                  SOFT_RESET_SPI |
1391                                  SOFT_RESET_SX));
1392         RREG32(GRBM_SOFT_RESET);
1393         mdelay(15);
1394         WREG32(GRBM_SOFT_RESET, 0);
1395         RREG32(GRBM_SOFT_RESET);
1396
1397         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1398         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1399
1400         /* Set the write pointer delay */
1401         WREG32(CP_RB_WPTR_DELAY, 0);
1402
1403         WREG32(CP_DEBUG, (1 << 27));
1404
1405         /* set the wb address whether it's enabled or not */
1406         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1407         WREG32(SCRATCH_UMSK, 0xff);
1408
1409         for (i = 0; i < 3; ++i) {
1410                 uint32_t rb_cntl;
1411                 uint64_t addr;
1412
1413                 /* Set ring buffer size */
1414                 ring = &rdev->ring[ridx[i]];
1415                 rb_cntl = drm_order(ring->ring_size / 8);
1416                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1417 #ifdef __BIG_ENDIAN
1418                 rb_cntl |= BUF_SWAP_32BIT;
1419 #endif
1420                 WREG32(cp_rb_cntl[i], rb_cntl);
1421
1422                 /* set the wb address whether it's enabled or not */
1423                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1424                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1425                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1426         }
1427
1428         /* set the rb base addr, this causes an internal reset of ALL rings */
1429         for (i = 0; i < 3; ++i) {
1430                 ring = &rdev->ring[ridx[i]];
1431                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1432         }
1433
1434         for (i = 0; i < 3; ++i) {
1435                 /* Initialize the ring buffer's read and write pointers */
1436                 ring = &rdev->ring[ridx[i]];
1437                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1438
1439                 ring->rptr = ring->wptr = 0;
1440                 WREG32(ring->rptr_reg, ring->rptr);
1441                 WREG32(ring->wptr_reg, ring->wptr);
1442
1443                 mdelay(1);
1444                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1445         }
1446
1447         /* start the rings */
1448         cayman_cp_start(rdev);
1449         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1450         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1451         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1452         /* this only test cp0 */
1453         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1454         if (r) {
1455                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1456                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1457                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1458                 return r;
1459         }
1460
1461         return 0;
1462 }
1463
1464 /*
1465  * DMA
1466  * Starting with R600, the GPU has an asynchronous
1467  * DMA engine.  The programming model is very similar
1468  * to the 3D engine (ring buffer, IBs, etc.), but the
1469  * DMA controller has it's own packet format that is
1470  * different form the PM4 format used by the 3D engine.
1471  * It supports copying data, writing embedded data,
1472  * solid fills, and a number of other things.  It also
1473  * has support for tiling/detiling of buffers.
1474  * Cayman and newer support two asynchronous DMA engines.
1475  */
1476 /**
1477  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1478  *
1479  * @rdev: radeon_device pointer
1480  * @ib: IB object to schedule
1481  *
1482  * Schedule an IB in the DMA ring (cayman-SI).
1483  */
1484 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1485                                 struct radeon_ib *ib)
1486 {
1487         struct radeon_ring *ring = &rdev->ring[ib->ring];
1488
1489         if (rdev->wb.enabled) {
1490                 u32 next_rptr = ring->wptr + 4;
1491                 while ((next_rptr & 7) != 5)
1492                         next_rptr++;
1493                 next_rptr += 3;
1494                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1495                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1496                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1497                 radeon_ring_write(ring, next_rptr);
1498         }
1499
1500         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1501          * Pad as necessary with NOPs.
1502          */
1503         while ((ring->wptr & 7) != 5)
1504                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1505         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1506         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1507         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1508
1509 }
1510
1511 /**
1512  * cayman_dma_stop - stop the async dma engines
1513  *
1514  * @rdev: radeon_device pointer
1515  *
1516  * Stop the async dma engines (cayman-SI).
1517  */
1518 void cayman_dma_stop(struct radeon_device *rdev)
1519 {
1520         u32 rb_cntl;
1521
1522         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1523
1524         /* dma0 */
1525         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1526         rb_cntl &= ~DMA_RB_ENABLE;
1527         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1528
1529         /* dma1 */
1530         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1531         rb_cntl &= ~DMA_RB_ENABLE;
1532         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1533
1534         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1535         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1536 }
1537
1538 /**
1539  * cayman_dma_resume - setup and start the async dma engines
1540  *
1541  * @rdev: radeon_device pointer
1542  *
1543  * Set up the DMA ring buffers and enable them. (cayman-SI).
1544  * Returns 0 for success, error for failure.
1545  */
1546 int cayman_dma_resume(struct radeon_device *rdev)
1547 {
1548         struct radeon_ring *ring;
1549         u32 rb_cntl, dma_cntl, ib_cntl;
1550         u32 rb_bufsz;
1551         u32 reg_offset, wb_offset;
1552         int i, r;
1553
1554         /* Reset dma */
1555         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1556         RREG32(SRBM_SOFT_RESET);
1557         udelay(50);
1558         WREG32(SRBM_SOFT_RESET, 0);
1559
1560         for (i = 0; i < 2; i++) {
1561                 if (i == 0) {
1562                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1563                         reg_offset = DMA0_REGISTER_OFFSET;
1564                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1565                 } else {
1566                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1567                         reg_offset = DMA1_REGISTER_OFFSET;
1568                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1569                 }
1570
1571                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1572                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1573
1574                 /* Set ring buffer size in dwords */
1575                 rb_bufsz = drm_order(ring->ring_size / 4);
1576                 rb_cntl = rb_bufsz << 1;
1577 #ifdef __BIG_ENDIAN
1578                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1579 #endif
1580                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1581
1582                 /* Initialize the ring buffer's read and write pointers */
1583                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1584                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1585
1586                 /* set the wb address whether it's enabled or not */
1587                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1588                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1589                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1590                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1591
1592                 if (rdev->wb.enabled)
1593                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1594
1595                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1596
1597                 /* enable DMA IBs */
1598                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1599 #ifdef __BIG_ENDIAN
1600                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1601 #endif
1602                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1603
1604                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1605                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1606                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1607
1608                 ring->wptr = 0;
1609                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1610
1611                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1612
1613                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1614
1615                 ring->ready = true;
1616
1617                 r = radeon_ring_test(rdev, ring->idx, ring);
1618                 if (r) {
1619                         ring->ready = false;
1620                         return r;
1621                 }
1622         }
1623
1624         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1625
1626         return 0;
1627 }
1628
1629 /**
1630  * cayman_dma_fini - tear down the async dma engines
1631  *
1632  * @rdev: radeon_device pointer
1633  *
1634  * Stop the async dma engines and free the rings (cayman-SI).
1635  */
1636 void cayman_dma_fini(struct radeon_device *rdev)
1637 {
1638         cayman_dma_stop(rdev);
1639         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1640         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1641 }
1642
1643 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1644 {
1645         u32 reset_mask = 0;
1646         u32 tmp;
1647
1648         /* GRBM_STATUS */
1649         tmp = RREG32(GRBM_STATUS);
1650         if (tmp & (PA_BUSY | SC_BUSY |
1651                    SH_BUSY | SX_BUSY |
1652                    TA_BUSY | VGT_BUSY |
1653                    DB_BUSY | CB_BUSY |
1654                    GDS_BUSY | SPI_BUSY |
1655                    IA_BUSY | IA_BUSY_NO_DMA))
1656                 reset_mask |= RADEON_RESET_GFX;
1657
1658         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1659                    CP_BUSY | CP_COHERENCY_BUSY))
1660                 reset_mask |= RADEON_RESET_CP;
1661
1662         if (tmp & GRBM_EE_BUSY)
1663                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1664
1665         /* DMA_STATUS_REG 0 */
1666         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1667         if (!(tmp & DMA_IDLE))
1668                 reset_mask |= RADEON_RESET_DMA;
1669
1670         /* DMA_STATUS_REG 1 */
1671         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1672         if (!(tmp & DMA_IDLE))
1673                 reset_mask |= RADEON_RESET_DMA1;
1674
1675         /* SRBM_STATUS2 */
1676         tmp = RREG32(SRBM_STATUS2);
1677         if (tmp & DMA_BUSY)
1678                 reset_mask |= RADEON_RESET_DMA;
1679
1680         if (tmp & DMA1_BUSY)
1681                 reset_mask |= RADEON_RESET_DMA1;
1682
1683         /* SRBM_STATUS */
1684         tmp = RREG32(SRBM_STATUS);
1685         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1686                 reset_mask |= RADEON_RESET_RLC;
1687
1688         if (tmp & IH_BUSY)
1689                 reset_mask |= RADEON_RESET_IH;
1690
1691         if (tmp & SEM_BUSY)
1692                 reset_mask |= RADEON_RESET_SEM;
1693
1694         if (tmp & GRBM_RQ_PENDING)
1695                 reset_mask |= RADEON_RESET_GRBM;
1696
1697         if (tmp & VMC_BUSY)
1698                 reset_mask |= RADEON_RESET_VMC;
1699
1700         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1701                    MCC_BUSY | MCD_BUSY))
1702                 reset_mask |= RADEON_RESET_MC;
1703
1704         if (evergreen_is_display_hung(rdev))
1705                 reset_mask |= RADEON_RESET_DISPLAY;
1706
1707         /* VM_L2_STATUS */
1708         tmp = RREG32(VM_L2_STATUS);
1709         if (tmp & L2_BUSY)
1710                 reset_mask |= RADEON_RESET_VMC;
1711
1712         /* Skip MC reset as it's mostly likely not hung, just busy */
1713         if (reset_mask & RADEON_RESET_MC) {
1714                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1715                 reset_mask &= ~RADEON_RESET_MC;
1716         }
1717
1718         return reset_mask;
1719 }
1720
1721 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1722 {
1723         struct evergreen_mc_save save;
1724         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1725         u32 tmp;
1726
1727         if (reset_mask == 0)
1728                 return;
1729
1730         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1731
1732         evergreen_print_gpu_status_regs(rdev);
1733         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1734                  RREG32(0x14F8));
1735         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1736                  RREG32(0x14D8));
1737         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1738                  RREG32(0x14FC));
1739         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1740                  RREG32(0x14DC));
1741
1742         /* Disable CP parsing/prefetching */
1743         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1744
1745         if (reset_mask & RADEON_RESET_DMA) {
1746                 /* dma0 */
1747                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1748                 tmp &= ~DMA_RB_ENABLE;
1749                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1750         }
1751
1752         if (reset_mask & RADEON_RESET_DMA1) {
1753                 /* dma1 */
1754                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1755                 tmp &= ~DMA_RB_ENABLE;
1756                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1757         }
1758
1759         udelay(50);
1760
1761         evergreen_mc_stop(rdev, &save);
1762         if (evergreen_mc_wait_for_idle(rdev)) {
1763                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1764         }
1765
1766         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1767                 grbm_soft_reset = SOFT_RESET_CB |
1768                         SOFT_RESET_DB |
1769                         SOFT_RESET_GDS |
1770                         SOFT_RESET_PA |
1771                         SOFT_RESET_SC |
1772                         SOFT_RESET_SPI |
1773                         SOFT_RESET_SH |
1774                         SOFT_RESET_SX |
1775                         SOFT_RESET_TC |
1776                         SOFT_RESET_TA |
1777                         SOFT_RESET_VGT |
1778                         SOFT_RESET_IA;
1779         }
1780
1781         if (reset_mask & RADEON_RESET_CP) {
1782                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1783
1784                 srbm_soft_reset |= SOFT_RESET_GRBM;
1785         }
1786
1787         if (reset_mask & RADEON_RESET_DMA)
1788                 srbm_soft_reset |= SOFT_RESET_DMA;
1789
1790         if (reset_mask & RADEON_RESET_DMA1)
1791                 srbm_soft_reset |= SOFT_RESET_DMA1;
1792
1793         if (reset_mask & RADEON_RESET_DISPLAY)
1794                 srbm_soft_reset |= SOFT_RESET_DC;
1795
1796         if (reset_mask & RADEON_RESET_RLC)
1797                 srbm_soft_reset |= SOFT_RESET_RLC;
1798
1799         if (reset_mask & RADEON_RESET_SEM)
1800                 srbm_soft_reset |= SOFT_RESET_SEM;
1801
1802         if (reset_mask & RADEON_RESET_IH)
1803                 srbm_soft_reset |= SOFT_RESET_IH;
1804
1805         if (reset_mask & RADEON_RESET_GRBM)
1806                 srbm_soft_reset |= SOFT_RESET_GRBM;
1807
1808         if (reset_mask & RADEON_RESET_VMC)
1809                 srbm_soft_reset |= SOFT_RESET_VMC;
1810
1811         if (!(rdev->flags & RADEON_IS_IGP)) {
1812                 if (reset_mask & RADEON_RESET_MC)
1813                         srbm_soft_reset |= SOFT_RESET_MC;
1814         }
1815
1816         if (grbm_soft_reset) {
1817                 tmp = RREG32(GRBM_SOFT_RESET);
1818                 tmp |= grbm_soft_reset;
1819                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1820                 WREG32(GRBM_SOFT_RESET, tmp);
1821                 tmp = RREG32(GRBM_SOFT_RESET);
1822
1823                 udelay(50);
1824
1825                 tmp &= ~grbm_soft_reset;
1826                 WREG32(GRBM_SOFT_RESET, tmp);
1827                 tmp = RREG32(GRBM_SOFT_RESET);
1828         }
1829
1830         if (srbm_soft_reset) {
1831                 tmp = RREG32(SRBM_SOFT_RESET);
1832                 tmp |= srbm_soft_reset;
1833                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1834                 WREG32(SRBM_SOFT_RESET, tmp);
1835                 tmp = RREG32(SRBM_SOFT_RESET);
1836
1837                 udelay(50);
1838
1839                 tmp &= ~srbm_soft_reset;
1840                 WREG32(SRBM_SOFT_RESET, tmp);
1841                 tmp = RREG32(SRBM_SOFT_RESET);
1842         }
1843
1844         /* Wait a little for things to settle down */
1845         udelay(50);
1846
1847         evergreen_mc_resume(rdev, &save);
1848         udelay(50);
1849
1850         evergreen_print_gpu_status_regs(rdev);
1851 }
1852
1853 int cayman_asic_reset(struct radeon_device *rdev)
1854 {
1855         u32 reset_mask;
1856
1857         reset_mask = cayman_gpu_check_soft_reset(rdev);
1858
1859         if (reset_mask)
1860                 r600_set_bios_scratch_engine_hung(rdev, true);
1861
1862         cayman_gpu_soft_reset(rdev, reset_mask);
1863
1864         reset_mask = cayman_gpu_check_soft_reset(rdev);
1865
1866         if (!reset_mask)
1867                 r600_set_bios_scratch_engine_hung(rdev, false);
1868
1869         return 0;
1870 }
1871
1872 /**
1873  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1874  *
1875  * @rdev: radeon_device pointer
1876  * @ring: radeon_ring structure holding ring information
1877  *
1878  * Check if the GFX engine is locked up.
1879  * Returns true if the engine appears to be locked up, false if not.
1880  */
1881 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1882 {
1883         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1884
1885         if (!(reset_mask & (RADEON_RESET_GFX |
1886                             RADEON_RESET_COMPUTE |
1887                             RADEON_RESET_CP))) {
1888                 radeon_ring_lockup_update(ring);
1889                 return false;
1890         }
1891         /* force CP activities */
1892         radeon_ring_force_activity(rdev, ring);
1893         return radeon_ring_test_lockup(rdev, ring);
1894 }
1895
1896 /**
1897  * cayman_dma_is_lockup - Check if the DMA engine is locked up
1898  *
1899  * @rdev: radeon_device pointer
1900  * @ring: radeon_ring structure holding ring information
1901  *
1902  * Check if the async DMA engine is locked up.
1903  * Returns true if the engine appears to be locked up, false if not.
1904  */
1905 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1906 {
1907         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1908         u32 mask;
1909
1910         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1911                 mask = RADEON_RESET_DMA;
1912         else
1913                 mask = RADEON_RESET_DMA1;
1914
1915         if (!(reset_mask & mask)) {
1916                 radeon_ring_lockup_update(ring);
1917                 return false;
1918         }
1919         /* force ring activities */
1920         radeon_ring_force_activity(rdev, ring);
1921         return radeon_ring_test_lockup(rdev, ring);
1922 }
1923
1924 static int cayman_startup(struct radeon_device *rdev)
1925 {
1926         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1927         int r;
1928
1929         /* enable pcie gen2 link */
1930         evergreen_pcie_gen2_enable(rdev);
1931
1932         if (rdev->flags & RADEON_IS_IGP) {
1933                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1934                         r = ni_init_microcode(rdev);
1935                         if (r) {
1936                                 DRM_ERROR("Failed to load firmware!\n");
1937                                 return r;
1938                         }
1939                 }
1940         } else {
1941                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1942                         r = ni_init_microcode(rdev);
1943                         if (r) {
1944                                 DRM_ERROR("Failed to load firmware!\n");
1945                                 return r;
1946                         }
1947                 }
1948
1949                 r = ni_mc_load_microcode(rdev);
1950                 if (r) {
1951                         DRM_ERROR("Failed to load MC firmware!\n");
1952                         return r;
1953                 }
1954         }
1955
1956         r = r600_vram_scratch_init(rdev);
1957         if (r)
1958                 return r;
1959
1960         evergreen_mc_program(rdev);
1961         r = cayman_pcie_gart_enable(rdev);
1962         if (r)
1963                 return r;
1964         cayman_gpu_init(rdev);
1965
1966         r = evergreen_blit_init(rdev);
1967         if (r) {
1968                 r600_blit_fini(rdev);
1969                 rdev->asic->copy.copy = NULL;
1970                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1971         }
1972
1973         /* allocate rlc buffers */
1974         if (rdev->flags & RADEON_IS_IGP) {
1975                 r = si_rlc_init(rdev);
1976                 if (r) {
1977                         DRM_ERROR("Failed to init rlc BOs!\n");
1978                         return r;
1979                 }
1980         }
1981
1982         /* allocate wb buffer */
1983         r = radeon_wb_init(rdev);
1984         if (r)
1985                 return r;
1986
1987         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1988         if (r) {
1989                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1990                 return r;
1991         }
1992
1993         r = rv770_uvd_resume(rdev);
1994         if (!r) {
1995                 r = radeon_fence_driver_start_ring(rdev,
1996                                                    R600_RING_TYPE_UVD_INDEX);
1997                 if (r)
1998                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
1999         }
2000         if (r)
2001                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2002
2003         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2004         if (r) {
2005                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2006                 return r;
2007         }
2008
2009         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2010         if (r) {
2011                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2012                 return r;
2013         }
2014
2015         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2016         if (r) {
2017                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2018                 return r;
2019         }
2020
2021         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2022         if (r) {
2023                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2024                 return r;
2025         }
2026
2027         /* Enable IRQ */
2028         if (!rdev->irq.installed) {
2029                 r = radeon_irq_kms_init(rdev);
2030                 if (r)
2031                         return r;
2032         }
2033
2034         r = r600_irq_init(rdev);
2035         if (r) {
2036                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2037                 radeon_irq_kms_fini(rdev);
2038                 return r;
2039         }
2040         evergreen_irq_set(rdev);
2041
2042         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2043                              CP_RB0_RPTR, CP_RB0_WPTR,
2044                              0, 0xfffff, RADEON_CP_PACKET2);
2045         if (r)
2046                 return r;
2047
2048         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2049         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2050                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2051                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2052                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2053         if (r)
2054                 return r;
2055
2056         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2057         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2058                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2059                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2060                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2061         if (r)
2062                 return r;
2063
2064         r = cayman_cp_load_microcode(rdev);
2065         if (r)
2066                 return r;
2067         r = cayman_cp_resume(rdev);
2068         if (r)
2069                 return r;
2070
2071         r = cayman_dma_resume(rdev);
2072         if (r)
2073                 return r;
2074
2075         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2076         if (ring->ring_size) {
2077                 r = radeon_ring_init(rdev, ring, ring->ring_size,
2078                                      R600_WB_UVD_RPTR_OFFSET,
2079                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2080                                      0, 0xfffff, RADEON_CP_PACKET2);
2081                 if (!r)
2082                         r = r600_uvd_init(rdev);
2083                 if (r)
2084                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2085         }
2086
2087         r = radeon_ib_pool_init(rdev);
2088         if (r) {
2089                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2090                 return r;
2091         }
2092
2093         r = radeon_vm_manager_init(rdev);
2094         if (r) {
2095                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2096                 return r;
2097         }
2098
2099         r = r600_audio_init(rdev);
2100         if (r)
2101                 return r;
2102
2103         return 0;
2104 }
2105
2106 int cayman_resume(struct radeon_device *rdev)
2107 {
2108         int r;
2109
2110         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2111          * posting will perform necessary task to bring back GPU into good
2112          * shape.
2113          */
2114         /* post card */
2115         atom_asic_init(rdev->mode_info.atom_context);
2116
2117         /* init golden registers */
2118         ni_init_golden_registers(rdev);
2119
2120         rdev->accel_working = true;
2121         r = cayman_startup(rdev);
2122         if (r) {
2123                 DRM_ERROR("cayman startup failed on resume\n");
2124                 rdev->accel_working = false;
2125                 return r;
2126         }
2127         return r;
2128 }
2129
2130 int cayman_suspend(struct radeon_device *rdev)
2131 {
2132         r600_audio_fini(rdev);
2133         radeon_vm_manager_fini(rdev);
2134         cayman_cp_enable(rdev, false);
2135         cayman_dma_stop(rdev);
2136         r600_uvd_rbc_stop(rdev);
2137         radeon_uvd_suspend(rdev);
2138         evergreen_irq_suspend(rdev);
2139         radeon_wb_disable(rdev);
2140         cayman_pcie_gart_disable(rdev);
2141         return 0;
2142 }
2143
2144 /* Plan is to move initialization in that function and use
2145  * helper function so that radeon_device_init pretty much
2146  * do nothing more than calling asic specific function. This
2147  * should also allow to remove a bunch of callback function
2148  * like vram_info.
2149  */
2150 int cayman_init(struct radeon_device *rdev)
2151 {
2152         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2153         int r;
2154
2155         /* Read BIOS */
2156         if (!radeon_get_bios(rdev)) {
2157                 if (ASIC_IS_AVIVO(rdev))
2158                         return -EINVAL;
2159         }
2160         /* Must be an ATOMBIOS */
2161         if (!rdev->is_atom_bios) {
2162                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2163                 return -EINVAL;
2164         }
2165         r = radeon_atombios_init(rdev);
2166         if (r)
2167                 return r;
2168
2169         /* Post card if necessary */
2170         if (!radeon_card_posted(rdev)) {
2171                 if (!rdev->bios) {
2172                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2173                         return -EINVAL;
2174                 }
2175                 DRM_INFO("GPU not posted. posting now...\n");
2176                 atom_asic_init(rdev->mode_info.atom_context);
2177         }
2178         /* init golden registers */
2179         ni_init_golden_registers(rdev);
2180         /* Initialize scratch registers */
2181         r600_scratch_init(rdev);
2182         /* Initialize surface registers */
2183         radeon_surface_init(rdev);
2184         /* Initialize clocks */
2185         radeon_get_clock_info(rdev->ddev);
2186         /* Fence driver */
2187         r = radeon_fence_driver_init(rdev);
2188         if (r)
2189                 return r;
2190         /* initialize memory controller */
2191         r = evergreen_mc_init(rdev);
2192         if (r)
2193                 return r;
2194         /* Memory manager */
2195         r = radeon_bo_init(rdev);
2196         if (r)
2197                 return r;
2198
2199         ring->ring_obj = NULL;
2200         r600_ring_init(rdev, ring, 1024 * 1024);
2201
2202         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2203         ring->ring_obj = NULL;
2204         r600_ring_init(rdev, ring, 64 * 1024);
2205
2206         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2207         ring->ring_obj = NULL;
2208         r600_ring_init(rdev, ring, 64 * 1024);
2209
2210         r = radeon_uvd_init(rdev);
2211         if (!r) {
2212                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2213                 ring->ring_obj = NULL;
2214                 r600_ring_init(rdev, ring, 4096);
2215         }
2216
2217         rdev->ih.ring_obj = NULL;
2218         r600_ih_ring_init(rdev, 64 * 1024);
2219
2220         r = r600_pcie_gart_init(rdev);
2221         if (r)
2222                 return r;
2223
2224         rdev->accel_working = true;
2225         r = cayman_startup(rdev);
2226         if (r) {
2227                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2228                 cayman_cp_fini(rdev);
2229                 cayman_dma_fini(rdev);
2230                 r600_irq_fini(rdev);
2231                 if (rdev->flags & RADEON_IS_IGP)
2232                         si_rlc_fini(rdev);
2233                 radeon_wb_fini(rdev);
2234                 radeon_ib_pool_fini(rdev);
2235                 radeon_vm_manager_fini(rdev);
2236                 radeon_irq_kms_fini(rdev);
2237                 cayman_pcie_gart_fini(rdev);
2238                 rdev->accel_working = false;
2239         }
2240
2241         /* Don't start up if the MC ucode is missing.
2242          * The default clocks and voltages before the MC ucode
2243          * is loaded are not suffient for advanced operations.
2244          *
2245          * We can skip this check for TN, because there is no MC
2246          * ucode.
2247          */
2248         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2249                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2250                 return -EINVAL;
2251         }
2252
2253         return 0;
2254 }
2255
2256 void cayman_fini(struct radeon_device *rdev)
2257 {
2258         r600_blit_fini(rdev);
2259         cayman_cp_fini(rdev);
2260         cayman_dma_fini(rdev);
2261         r600_irq_fini(rdev);
2262         if (rdev->flags & RADEON_IS_IGP)
2263                 si_rlc_fini(rdev);
2264         radeon_wb_fini(rdev);
2265         radeon_vm_manager_fini(rdev);
2266         radeon_ib_pool_fini(rdev);
2267         radeon_irq_kms_fini(rdev);
2268         radeon_uvd_fini(rdev);
2269         cayman_pcie_gart_fini(rdev);
2270         r600_vram_scratch_fini(rdev);
2271         radeon_gem_fini(rdev);
2272         radeon_fence_driver_fini(rdev);
2273         radeon_bo_fini(rdev);
2274         radeon_atombios_fini(rdev);
2275         kfree(rdev->bios);
2276         rdev->bios = NULL;
2277 }
2278
2279 /*
2280  * vm
2281  */
2282 int cayman_vm_init(struct radeon_device *rdev)
2283 {
2284         /* number of VMs */
2285         rdev->vm_manager.nvm = 8;
2286         /* base offset of vram pages */
2287         if (rdev->flags & RADEON_IS_IGP) {
2288                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2289                 tmp <<= 22;
2290                 rdev->vm_manager.vram_base_offset = tmp;
2291         } else
2292                 rdev->vm_manager.vram_base_offset = 0;
2293         return 0;
2294 }
2295
2296 void cayman_vm_fini(struct radeon_device *rdev)
2297 {
2298 }
2299
2300 #define R600_ENTRY_VALID   (1 << 0)
2301 #define R600_PTE_SYSTEM    (1 << 1)
2302 #define R600_PTE_SNOOPED   (1 << 2)
2303 #define R600_PTE_READABLE  (1 << 5)
2304 #define R600_PTE_WRITEABLE (1 << 6)
2305
2306 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2307 {
2308         uint32_t r600_flags = 0;
2309         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2310         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2311         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2312         if (flags & RADEON_VM_PAGE_SYSTEM) {
2313                 r600_flags |= R600_PTE_SYSTEM;
2314                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2315         }
2316         return r600_flags;
2317 }
2318
2319 /**
2320  * cayman_vm_set_page - update the page tables using the CP
2321  *
2322  * @rdev: radeon_device pointer
2323  * @ib: indirect buffer to fill with commands
2324  * @pe: addr of the page entry
2325  * @addr: dst addr to write into pe
2326  * @count: number of page entries to update
2327  * @incr: increase next addr by incr bytes
2328  * @flags: access flags
2329  *
2330  * Update the page tables using the CP (cayman/TN).
2331  */
2332 void cayman_vm_set_page(struct radeon_device *rdev,
2333                         struct radeon_ib *ib,
2334                         uint64_t pe,
2335                         uint64_t addr, unsigned count,
2336                         uint32_t incr, uint32_t flags)
2337 {
2338         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2339         uint64_t value;
2340         unsigned ndw;
2341
2342         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2343                 while (count) {
2344                         ndw = 1 + count * 2;
2345                         if (ndw > 0x3FFF)
2346                                 ndw = 0x3FFF;
2347
2348                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2349                         ib->ptr[ib->length_dw++] = pe;
2350                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2351                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2352                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2353                                         value = radeon_vm_map_gart(rdev, addr);
2354                                         value &= 0xFFFFFFFFFFFFF000ULL;
2355                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2356                                         value = addr;
2357                                 } else {
2358                                         value = 0;
2359                                 }
2360                                 addr += incr;
2361                                 value |= r600_flags;
2362                                 ib->ptr[ib->length_dw++] = value;
2363                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2364                         }
2365                 }
2366         } else {
2367                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2368                     (count == 1)) {
2369                         while (count) {
2370                                 ndw = count * 2;
2371                                 if (ndw > 0xFFFFE)
2372                                         ndw = 0xFFFFE;
2373
2374                                 /* for non-physically contiguous pages (system) */
2375                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2376                                 ib->ptr[ib->length_dw++] = pe;
2377                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2378                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2379                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2380                                                 value = radeon_vm_map_gart(rdev, addr);
2381                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2382                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2383                                                 value = addr;
2384                                         } else {
2385                                                 value = 0;
2386                                         }
2387                                         addr += incr;
2388                                         value |= r600_flags;
2389                                         ib->ptr[ib->length_dw++] = value;
2390                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2391                                 }
2392                         }
2393                         while (ib->length_dw & 0x7)
2394                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2395                 } else {
2396                         while (count) {
2397                                 ndw = count * 2;
2398                                 if (ndw > 0xFFFFE)
2399                                         ndw = 0xFFFFE;
2400
2401                                 if (flags & RADEON_VM_PAGE_VALID)
2402                                         value = addr;
2403                                 else
2404                                         value = 0;
2405                                 /* for physically contiguous pages (vram) */
2406                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2407                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2408                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2409                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2410                                 ib->ptr[ib->length_dw++] = 0;
2411                                 ib->ptr[ib->length_dw++] = value; /* value */
2412                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2413                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2414                                 ib->ptr[ib->length_dw++] = 0;
2415                                 pe += ndw * 4;
2416                                 addr += (ndw / 2) * incr;
2417                                 count -= ndw / 2;
2418                         }
2419                 }
2420                 while (ib->length_dw & 0x7)
2421                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2422         }
2423 }
2424
2425 /**
2426  * cayman_vm_flush - vm flush using the CP
2427  *
2428  * @rdev: radeon_device pointer
2429  *
2430  * Update the page table base and flush the VM TLB
2431  * using the CP (cayman-si).
2432  */
2433 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2434 {
2435         struct radeon_ring *ring = &rdev->ring[ridx];
2436
2437         if (vm == NULL)
2438                 return;
2439
2440         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2441         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2442
2443         /* flush hdp cache */
2444         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2445         radeon_ring_write(ring, 0x1);
2446
2447         /* bits 0-7 are the VM contexts0-7 */
2448         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2449         radeon_ring_write(ring, 1 << vm->id);
2450
2451         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2452         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2453         radeon_ring_write(ring, 0x0);
2454 }
2455
2456 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2457 {
2458         struct radeon_ring *ring = &rdev->ring[ridx];
2459
2460         if (vm == NULL)
2461                 return;
2462
2463         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2464         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2465         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2466
2467         /* flush hdp cache */
2468         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2469         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2470         radeon_ring_write(ring, 1);
2471
2472         /* bits 0-7 are the VM contexts0-7 */
2473         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2474         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2475         radeon_ring_write(ring, 1 << vm->id);
2476 }
2477