]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon/cik: log and handle VM page fault interrupts
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34
35 /* GFX */
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
39 /* compute */
40 #define CIK_MEC_UCODE_SIZE 4192
41 /* interrupts */
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
45 /* gddr controller */
46 #define CIK_MC_UCODE_SIZE 7866
47
48 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
60 MODULE_FIRMWARE("radeon/KABINI_me.bin");
61 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
62 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
63 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
70
71 #define BONAIRE_IO_MC_REGS_SIZE 36
72
73 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
74 {
75         {0x00000070, 0x04400000},
76         {0x00000071, 0x80c01803},
77         {0x00000072, 0x00004004},
78         {0x00000073, 0x00000100},
79         {0x00000074, 0x00ff0000},
80         {0x00000075, 0x34000000},
81         {0x00000076, 0x08000014},
82         {0x00000077, 0x00cc08ec},
83         {0x00000078, 0x00000400},
84         {0x00000079, 0x00000000},
85         {0x0000007a, 0x04090000},
86         {0x0000007c, 0x00000000},
87         {0x0000007e, 0x4408a8e8},
88         {0x0000007f, 0x00000304},
89         {0x00000080, 0x00000000},
90         {0x00000082, 0x00000001},
91         {0x00000083, 0x00000002},
92         {0x00000084, 0xf3e4f400},
93         {0x00000085, 0x052024e3},
94         {0x00000087, 0x00000000},
95         {0x00000088, 0x01000000},
96         {0x0000008a, 0x1c0a0000},
97         {0x0000008b, 0xff010000},
98         {0x0000008d, 0xffffefff},
99         {0x0000008e, 0xfff3efff},
100         {0x0000008f, 0xfff3efbf},
101         {0x00000092, 0xf7ffffff},
102         {0x00000093, 0xffffff7f},
103         {0x00000095, 0x00101101},
104         {0x00000096, 0x00000fff},
105         {0x00000097, 0x00116fff},
106         {0x00000098, 0x60010000},
107         {0x00000099, 0x10010000},
108         {0x0000009a, 0x00006000},
109         {0x0000009b, 0x00001000},
110         {0x0000009f, 0x00b48000}
111 };
112
113 /* ucode loading */
114 /**
115  * ci_mc_load_microcode - load MC ucode into the hw
116  *
117  * @rdev: radeon_device pointer
118  *
119  * Load the GDDR MC ucode into the hw (CIK).
120  * Returns 0 on success, error on failure.
121  */
122 static int ci_mc_load_microcode(struct radeon_device *rdev)
123 {
124         const __be32 *fw_data;
125         u32 running, blackout = 0;
126         u32 *io_mc_regs;
127         int i, ucode_size, regs_size;
128
129         if (!rdev->mc_fw)
130                 return -EINVAL;
131
132         switch (rdev->family) {
133         case CHIP_BONAIRE:
134         default:
135                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
136                 ucode_size = CIK_MC_UCODE_SIZE;
137                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
138                 break;
139         }
140
141         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
142
143         if (running == 0) {
144                 if (running) {
145                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
146                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
147                 }
148
149                 /* reset the engine and set to writable */
150                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
151                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
152
153                 /* load mc io regs */
154                 for (i = 0; i < regs_size; i++) {
155                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
156                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
157                 }
158                 /* load the MC ucode */
159                 fw_data = (const __be32 *)rdev->mc_fw->data;
160                 for (i = 0; i < ucode_size; i++)
161                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
162
163                 /* put the engine back into the active state */
164                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
165                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
166                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
167
168                 /* wait for training to complete */
169                 for (i = 0; i < rdev->usec_timeout; i++) {
170                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
171                                 break;
172                         udelay(1);
173                 }
174                 for (i = 0; i < rdev->usec_timeout; i++) {
175                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
176                                 break;
177                         udelay(1);
178                 }
179
180                 if (running)
181                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
182         }
183
184         return 0;
185 }
186
187 /**
188  * cik_init_microcode - load ucode images from disk
189  *
190  * @rdev: radeon_device pointer
191  *
192  * Use the firmware interface to load the ucode images into
193  * the driver (not loaded into hw).
194  * Returns 0 on success, error on failure.
195  */
196 static int cik_init_microcode(struct radeon_device *rdev)
197 {
198         struct platform_device *pdev;
199         const char *chip_name;
200         size_t pfp_req_size, me_req_size, ce_req_size,
201                 mec_req_size, rlc_req_size, mc_req_size;
202         char fw_name[30];
203         int err;
204
205         DRM_DEBUG("\n");
206
207         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
208         err = IS_ERR(pdev);
209         if (err) {
210                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
211                 return -EINVAL;
212         }
213
214         switch (rdev->family) {
215         case CHIP_BONAIRE:
216                 chip_name = "BONAIRE";
217                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
218                 me_req_size = CIK_ME_UCODE_SIZE * 4;
219                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
220                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
221                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
222                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
223                 break;
224         case CHIP_KAVERI:
225                 chip_name = "KAVERI";
226                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
227                 me_req_size = CIK_ME_UCODE_SIZE * 4;
228                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
229                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
230                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
231                 break;
232         case CHIP_KABINI:
233                 chip_name = "KABINI";
234                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
235                 me_req_size = CIK_ME_UCODE_SIZE * 4;
236                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
237                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
238                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
239                 break;
240         default: BUG();
241         }
242
243         DRM_INFO("Loading %s Microcode\n", chip_name);
244
245         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
246         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
247         if (err)
248                 goto out;
249         if (rdev->pfp_fw->size != pfp_req_size) {
250                 printk(KERN_ERR
251                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
252                        rdev->pfp_fw->size, fw_name);
253                 err = -EINVAL;
254                 goto out;
255         }
256
257         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
258         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
259         if (err)
260                 goto out;
261         if (rdev->me_fw->size != me_req_size) {
262                 printk(KERN_ERR
263                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
264                        rdev->me_fw->size, fw_name);
265                 err = -EINVAL;
266         }
267
268         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
269         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
270         if (err)
271                 goto out;
272         if (rdev->ce_fw->size != ce_req_size) {
273                 printk(KERN_ERR
274                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
275                        rdev->ce_fw->size, fw_name);
276                 err = -EINVAL;
277         }
278
279         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
280         err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
281         if (err)
282                 goto out;
283         if (rdev->mec_fw->size != mec_req_size) {
284                 printk(KERN_ERR
285                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
286                        rdev->mec_fw->size, fw_name);
287                 err = -EINVAL;
288         }
289
290         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
291         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
292         if (err)
293                 goto out;
294         if (rdev->rlc_fw->size != rlc_req_size) {
295                 printk(KERN_ERR
296                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
297                        rdev->rlc_fw->size, fw_name);
298                 err = -EINVAL;
299         }
300
301         /* No MC ucode on APUs */
302         if (!(rdev->flags & RADEON_IS_IGP)) {
303                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
304                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
305                 if (err)
306                         goto out;
307                 if (rdev->mc_fw->size != mc_req_size) {
308                         printk(KERN_ERR
309                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
310                                rdev->mc_fw->size, fw_name);
311                         err = -EINVAL;
312                 }
313         }
314
315 out:
316         platform_device_unregister(pdev);
317
318         if (err) {
319                 if (err != -EINVAL)
320                         printk(KERN_ERR
321                                "cik_cp: Failed to load firmware \"%s\"\n",
322                                fw_name);
323                 release_firmware(rdev->pfp_fw);
324                 rdev->pfp_fw = NULL;
325                 release_firmware(rdev->me_fw);
326                 rdev->me_fw = NULL;
327                 release_firmware(rdev->ce_fw);
328                 rdev->ce_fw = NULL;
329                 release_firmware(rdev->rlc_fw);
330                 rdev->rlc_fw = NULL;
331                 release_firmware(rdev->mc_fw);
332                 rdev->mc_fw = NULL;
333         }
334         return err;
335 }
336
337 /*
338  * Core functions
339  */
340 /**
341  * cik_tiling_mode_table_init - init the hw tiling table
342  *
343  * @rdev: radeon_device pointer
344  *
345  * Starting with SI, the tiling setup is done globally in a
346  * set of 32 tiling modes.  Rather than selecting each set of
347  * parameters per surface as on older asics, we just select
348  * which index in the tiling table we want to use, and the
349  * surface uses those parameters (CIK).
350  */
351 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
352 {
353         const u32 num_tile_mode_states = 32;
354         const u32 num_secondary_tile_mode_states = 16;
355         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
356         u32 num_pipe_configs;
357         u32 num_rbs = rdev->config.cik.max_backends_per_se *
358                 rdev->config.cik.max_shader_engines;
359
360         switch (rdev->config.cik.mem_row_size_in_kb) {
361         case 1:
362                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
363                 break;
364         case 2:
365         default:
366                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
367                 break;
368         case 4:
369                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
370                 break;
371         }
372
373         num_pipe_configs = rdev->config.cik.max_tile_pipes;
374         if (num_pipe_configs > 8)
375                 num_pipe_configs = 8; /* ??? */
376
377         if (num_pipe_configs == 8) {
378                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
379                         switch (reg_offset) {
380                         case 0:
381                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
382                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
383                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
384                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
385                                 break;
386                         case 1:
387                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
388                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
389                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
390                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
391                                 break;
392                         case 2:
393                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
394                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
395                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
396                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
397                                 break;
398                         case 3:
399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
400                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
401                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
403                                 break;
404                         case 4:
405                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
407                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
408                                                  TILE_SPLIT(split_equal_to_row_size));
409                                 break;
410                         case 5:
411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
413                                 break;
414                         case 6:
415                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
416                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
417                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
418                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
419                                 break;
420                         case 7:
421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
422                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
423                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
424                                                  TILE_SPLIT(split_equal_to_row_size));
425                                 break;
426                         case 8:
427                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
428                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
429                                 break;
430                         case 9:
431                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
432                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
433                                 break;
434                         case 10:
435                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
436                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
437                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
438                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
439                                 break;
440                         case 11:
441                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
442                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
443                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
444                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
445                                 break;
446                         case 12:
447                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
448                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
449                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
450                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
451                                 break;
452                         case 13:
453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
454                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
455                                 break;
456                         case 14:
457                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
458                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
459                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
460                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
461                                 break;
462                         case 16:
463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
464                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
466                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
467                                 break;
468                         case 17:
469                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
470                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
471                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
472                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
473                                 break;
474                         case 27:
475                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
476                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
477                                 break;
478                         case 28:
479                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
480                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
481                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
482                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
483                                 break;
484                         case 29:
485                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
486                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
487                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
488                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
489                                 break;
490                         case 30:
491                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
492                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
493                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
494                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
495                                 break;
496                         default:
497                                 gb_tile_moden = 0;
498                                 break;
499                         }
500                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
501                 }
502                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
503                         switch (reg_offset) {
504                         case 0:
505                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
506                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
507                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
508                                                  NUM_BANKS(ADDR_SURF_16_BANK));
509                                 break;
510                         case 1:
511                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
512                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
513                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
514                                                  NUM_BANKS(ADDR_SURF_16_BANK));
515                                 break;
516                         case 2:
517                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
518                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
519                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
520                                                  NUM_BANKS(ADDR_SURF_16_BANK));
521                                 break;
522                         case 3:
523                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
524                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
525                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
526                                                  NUM_BANKS(ADDR_SURF_16_BANK));
527                                 break;
528                         case 4:
529                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
532                                                  NUM_BANKS(ADDR_SURF_8_BANK));
533                                 break;
534                         case 5:
535                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
536                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
537                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
538                                                  NUM_BANKS(ADDR_SURF_4_BANK));
539                                 break;
540                         case 6:
541                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
542                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
543                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
544                                                  NUM_BANKS(ADDR_SURF_2_BANK));
545                                 break;
546                         case 8:
547                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
548                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
549                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
550                                                  NUM_BANKS(ADDR_SURF_16_BANK));
551                                 break;
552                         case 9:
553                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
554                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
555                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
556                                                  NUM_BANKS(ADDR_SURF_16_BANK));
557                                 break;
558                         case 10:
559                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
560                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
561                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
562                                                  NUM_BANKS(ADDR_SURF_16_BANK));
563                                 break;
564                         case 11:
565                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
566                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
567                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
568                                                  NUM_BANKS(ADDR_SURF_16_BANK));
569                                 break;
570                         case 12:
571                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
572                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
573                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
574                                                  NUM_BANKS(ADDR_SURF_8_BANK));
575                                 break;
576                         case 13:
577                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
578                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
579                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
580                                                  NUM_BANKS(ADDR_SURF_4_BANK));
581                                 break;
582                         case 14:
583                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
584                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
585                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
586                                                  NUM_BANKS(ADDR_SURF_2_BANK));
587                                 break;
588                         default:
589                                 gb_tile_moden = 0;
590                                 break;
591                         }
592                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
593                 }
594         } else if (num_pipe_configs == 4) {
595                 if (num_rbs == 4) {
596                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
597                                 switch (reg_offset) {
598                                 case 0:
599                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
600                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
601                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
602                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
603                                         break;
604                                 case 1:
605                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
606                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
607                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
608                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
609                                         break;
610                                 case 2:
611                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
612                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
613                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
614                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
615                                         break;
616                                 case 3:
617                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
618                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
619                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
620                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
621                                         break;
622                                 case 4:
623                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
624                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
625                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
626                                                          TILE_SPLIT(split_equal_to_row_size));
627                                         break;
628                                 case 5:
629                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
630                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
631                                         break;
632                                 case 6:
633                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
634                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
635                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
636                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
637                                         break;
638                                 case 7:
639                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
640                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
641                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
642                                                          TILE_SPLIT(split_equal_to_row_size));
643                                         break;
644                                 case 8:
645                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
646                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
647                                         break;
648                                 case 9:
649                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
650                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
651                                         break;
652                                 case 10:
653                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
654                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
655                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
656                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
657                                         break;
658                                 case 11:
659                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
660                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
661                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
662                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
663                                         break;
664                                 case 12:
665                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
666                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
667                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
668                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
669                                         break;
670                                 case 13:
671                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
672                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
673                                         break;
674                                 case 14:
675                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
676                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
677                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
678                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
679                                         break;
680                                 case 16:
681                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
682                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
683                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
684                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
685                                         break;
686                                 case 17:
687                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
688                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
689                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
690                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
691                                         break;
692                                 case 27:
693                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
694                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
695                                         break;
696                                 case 28:
697                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
698                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
699                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
700                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
701                                         break;
702                                 case 29:
703                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
704                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
705                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
706                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
707                                         break;
708                                 case 30:
709                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
710                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
711                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
712                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
713                                         break;
714                                 default:
715                                         gb_tile_moden = 0;
716                                         break;
717                                 }
718                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
719                         }
720                 } else if (num_rbs < 4) {
721                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
722                                 switch (reg_offset) {
723                                 case 0:
724                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
725                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
726                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
727                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
728                                         break;
729                                 case 1:
730                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
731                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
732                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
733                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
734                                         break;
735                                 case 2:
736                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
737                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
738                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
739                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
740                                         break;
741                                 case 3:
742                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
743                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
744                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
745                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
746                                         break;
747                                 case 4:
748                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
749                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
750                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
751                                                          TILE_SPLIT(split_equal_to_row_size));
752                                         break;
753                                 case 5:
754                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
755                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
756                                         break;
757                                 case 6:
758                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
759                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
760                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
761                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
762                                         break;
763                                 case 7:
764                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
765                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
766                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
767                                                          TILE_SPLIT(split_equal_to_row_size));
768                                         break;
769                                 case 8:
770                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
771                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
772                                         break;
773                                 case 9:
774                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
775                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
776                                         break;
777                                 case 10:
778                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
779                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
780                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
781                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
782                                         break;
783                                 case 11:
784                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
785                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
786                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
787                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
788                                         break;
789                                 case 12:
790                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
791                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
792                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
793                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
794                                         break;
795                                 case 13:
796                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
797                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
798                                         break;
799                                 case 14:
800                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
801                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
802                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
803                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
804                                         break;
805                                 case 16:
806                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
807                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
808                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
809                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
810                                         break;
811                                 case 17:
812                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
813                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
814                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
815                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
816                                         break;
817                                 case 27:
818                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
819                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
820                                         break;
821                                 case 28:
822                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
823                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
824                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
825                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
826                                         break;
827                                 case 29:
828                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
829                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
830                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
831                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
832                                         break;
833                                 case 30:
834                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
835                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
836                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
837                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
838                                         break;
839                                 default:
840                                         gb_tile_moden = 0;
841                                         break;
842                                 }
843                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
844                         }
845                 }
846                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
847                         switch (reg_offset) {
848                         case 0:
849                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
850                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
851                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
852                                                  NUM_BANKS(ADDR_SURF_16_BANK));
853                                 break;
854                         case 1:
855                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
856                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
857                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
858                                                  NUM_BANKS(ADDR_SURF_16_BANK));
859                                 break;
860                         case 2:
861                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
862                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
863                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
864                                                  NUM_BANKS(ADDR_SURF_16_BANK));
865                                 break;
866                         case 3:
867                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
868                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
869                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
870                                                  NUM_BANKS(ADDR_SURF_16_BANK));
871                                 break;
872                         case 4:
873                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
874                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
875                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
876                                                  NUM_BANKS(ADDR_SURF_16_BANK));
877                                 break;
878                         case 5:
879                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
880                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
881                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
882                                                  NUM_BANKS(ADDR_SURF_8_BANK));
883                                 break;
884                         case 6:
885                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
886                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
887                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
888                                                  NUM_BANKS(ADDR_SURF_4_BANK));
889                                 break;
890                         case 8:
891                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
892                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
893                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
894                                                  NUM_BANKS(ADDR_SURF_16_BANK));
895                                 break;
896                         case 9:
897                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
898                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
899                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
900                                                  NUM_BANKS(ADDR_SURF_16_BANK));
901                                 break;
902                         case 10:
903                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
904                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
905                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
906                                                  NUM_BANKS(ADDR_SURF_16_BANK));
907                                 break;
908                         case 11:
909                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
910                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
911                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
912                                                  NUM_BANKS(ADDR_SURF_16_BANK));
913                                 break;
914                         case 12:
915                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
916                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
917                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
918                                                  NUM_BANKS(ADDR_SURF_16_BANK));
919                                 break;
920                         case 13:
921                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
922                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
923                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
924                                                  NUM_BANKS(ADDR_SURF_8_BANK));
925                                 break;
926                         case 14:
927                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
928                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
929                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
930                                                  NUM_BANKS(ADDR_SURF_4_BANK));
931                                 break;
932                         default:
933                                 gb_tile_moden = 0;
934                                 break;
935                         }
936                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
937                 }
938         } else if (num_pipe_configs == 2) {
939                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
940                         switch (reg_offset) {
941                         case 0:
942                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
943                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
944                                                  PIPE_CONFIG(ADDR_SURF_P2) |
945                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
946                                 break;
947                         case 1:
948                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
950                                                  PIPE_CONFIG(ADDR_SURF_P2) |
951                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
952                                 break;
953                         case 2:
954                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
955                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
956                                                  PIPE_CONFIG(ADDR_SURF_P2) |
957                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
958                                 break;
959                         case 3:
960                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
961                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
962                                                  PIPE_CONFIG(ADDR_SURF_P2) |
963                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
964                                 break;
965                         case 4:
966                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
967                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968                                                  PIPE_CONFIG(ADDR_SURF_P2) |
969                                                  TILE_SPLIT(split_equal_to_row_size));
970                                 break;
971                         case 5:
972                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
973                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
974                                 break;
975                         case 6:
976                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
977                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
978                                                  PIPE_CONFIG(ADDR_SURF_P2) |
979                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
980                                 break;
981                         case 7:
982                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
983                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
984                                                  PIPE_CONFIG(ADDR_SURF_P2) |
985                                                  TILE_SPLIT(split_equal_to_row_size));
986                                 break;
987                         case 8:
988                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
989                                 break;
990                         case 9:
991                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
992                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
993                                 break;
994                         case 10:
995                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
997                                                  PIPE_CONFIG(ADDR_SURF_P2) |
998                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
999                                 break;
1000                         case 11:
1001                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1002                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1003                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1004                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1005                                 break;
1006                         case 12:
1007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1008                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1009                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1010                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1011                                 break;
1012                         case 13:
1013                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1014                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1015                                 break;
1016                         case 14:
1017                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1018                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1019                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1020                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1021                                 break;
1022                         case 16:
1023                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1024                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1025                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1026                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1027                                 break;
1028                         case 17:
1029                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1031                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1032                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1033                                 break;
1034                         case 27:
1035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1036                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1037                                 break;
1038                         case 28:
1039                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1040                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1041                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1042                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1043                                 break;
1044                         case 29:
1045                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1046                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1047                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1048                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1049                                 break;
1050                         case 30:
1051                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1052                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1053                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1054                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1055                                 break;
1056                         default:
1057                                 gb_tile_moden = 0;
1058                                 break;
1059                         }
1060                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1061                 }
1062                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1063                         switch (reg_offset) {
1064                         case 0:
1065                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1066                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1067                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1068                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1069                                 break;
1070                         case 1:
1071                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1072                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1073                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1074                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1075                                 break;
1076                         case 2:
1077                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1078                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1079                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1080                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1081                                 break;
1082                         case 3:
1083                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1084                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1085                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1086                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1087                                 break;
1088                         case 4:
1089                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1090                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1091                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1092                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1093                                 break;
1094                         case 5:
1095                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1096                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1097                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1098                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1099                                 break;
1100                         case 6:
1101                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1102                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1103                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1104                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1105                                 break;
1106                         case 8:
1107                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1108                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1109                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1110                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1111                                 break;
1112                         case 9:
1113                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1114                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1116                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1117                                 break;
1118                         case 10:
1119                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1120                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1121                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1122                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1123                                 break;
1124                         case 11:
1125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1129                                 break;
1130                         case 12:
1131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1135                                 break;
1136                         case 13:
1137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1141                                 break;
1142                         case 14:
1143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1146                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1147                                 break;
1148                         default:
1149                                 gb_tile_moden = 0;
1150                                 break;
1151                         }
1152                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1153                 }
1154         } else
1155                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1156 }
1157
1158 /**
1159  * cik_select_se_sh - select which SE, SH to address
1160  *
1161  * @rdev: radeon_device pointer
1162  * @se_num: shader engine to address
1163  * @sh_num: sh block to address
1164  *
1165  * Select which SE, SH combinations to address. Certain
1166  * registers are instanced per SE or SH.  0xffffffff means
1167  * broadcast to all SEs or SHs (CIK).
1168  */
1169 static void cik_select_se_sh(struct radeon_device *rdev,
1170                              u32 se_num, u32 sh_num)
1171 {
1172         u32 data = INSTANCE_BROADCAST_WRITES;
1173
1174         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1175                 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1176         else if (se_num == 0xffffffff)
1177                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1178         else if (sh_num == 0xffffffff)
1179                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1180         else
1181                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1182         WREG32(GRBM_GFX_INDEX, data);
1183 }
1184
1185 /**
1186  * cik_create_bitmask - create a bitmask
1187  *
1188  * @bit_width: length of the mask
1189  *
1190  * create a variable length bit mask (CIK).
1191  * Returns the bitmask.
1192  */
1193 static u32 cik_create_bitmask(u32 bit_width)
1194 {
1195         u32 i, mask = 0;
1196
1197         for (i = 0; i < bit_width; i++) {
1198                 mask <<= 1;
1199                 mask |= 1;
1200         }
1201         return mask;
1202 }
1203
1204 /**
1205  * cik_select_se_sh - select which SE, SH to address
1206  *
1207  * @rdev: radeon_device pointer
1208  * @max_rb_num: max RBs (render backends) for the asic
1209  * @se_num: number of SEs (shader engines) for the asic
1210  * @sh_per_se: number of SH blocks per SE for the asic
1211  *
1212  * Calculates the bitmask of disabled RBs (CIK).
1213  * Returns the disabled RB bitmask.
1214  */
1215 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1216                               u32 max_rb_num, u32 se_num,
1217                               u32 sh_per_se)
1218 {
1219         u32 data, mask;
1220
1221         data = RREG32(CC_RB_BACKEND_DISABLE);
1222         if (data & 1)
1223                 data &= BACKEND_DISABLE_MASK;
1224         else
1225                 data = 0;
1226         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1227
1228         data >>= BACKEND_DISABLE_SHIFT;
1229
1230         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1231
1232         return data & mask;
1233 }
1234
1235 /**
1236  * cik_setup_rb - setup the RBs on the asic
1237  *
1238  * @rdev: radeon_device pointer
1239  * @se_num: number of SEs (shader engines) for the asic
1240  * @sh_per_se: number of SH blocks per SE for the asic
1241  * @max_rb_num: max RBs (render backends) for the asic
1242  *
1243  * Configures per-SE/SH RB registers (CIK).
1244  */
1245 static void cik_setup_rb(struct radeon_device *rdev,
1246                          u32 se_num, u32 sh_per_se,
1247                          u32 max_rb_num)
1248 {
1249         int i, j;
1250         u32 data, mask;
1251         u32 disabled_rbs = 0;
1252         u32 enabled_rbs = 0;
1253
1254         for (i = 0; i < se_num; i++) {
1255                 for (j = 0; j < sh_per_se; j++) {
1256                         cik_select_se_sh(rdev, i, j);
1257                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1258                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1259                 }
1260         }
1261         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1262
1263         mask = 1;
1264         for (i = 0; i < max_rb_num; i++) {
1265                 if (!(disabled_rbs & mask))
1266                         enabled_rbs |= mask;
1267                 mask <<= 1;
1268         }
1269
1270         for (i = 0; i < se_num; i++) {
1271                 cik_select_se_sh(rdev, i, 0xffffffff);
1272                 data = 0;
1273                 for (j = 0; j < sh_per_se; j++) {
1274                         switch (enabled_rbs & 3) {
1275                         case 1:
1276                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1277                                 break;
1278                         case 2:
1279                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1280                                 break;
1281                         case 3:
1282                         default:
1283                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1284                                 break;
1285                         }
1286                         enabled_rbs >>= 2;
1287                 }
1288                 WREG32(PA_SC_RASTER_CONFIG, data);
1289         }
1290         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1291 }
1292
1293 /**
1294  * cik_gpu_init - setup the 3D engine
1295  *
1296  * @rdev: radeon_device pointer
1297  *
1298  * Configures the 3D engine and tiling configuration
1299  * registers so that the 3D engine is usable.
1300  */
1301 static void cik_gpu_init(struct radeon_device *rdev)
1302 {
1303         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1304         u32 mc_shared_chmap, mc_arb_ramcfg;
1305         u32 hdp_host_path_cntl;
1306         u32 tmp;
1307         int i, j;
1308
1309         switch (rdev->family) {
1310         case CHIP_BONAIRE:
1311                 rdev->config.cik.max_shader_engines = 2;
1312                 rdev->config.cik.max_tile_pipes = 4;
1313                 rdev->config.cik.max_cu_per_sh = 7;
1314                 rdev->config.cik.max_sh_per_se = 1;
1315                 rdev->config.cik.max_backends_per_se = 2;
1316                 rdev->config.cik.max_texture_channel_caches = 4;
1317                 rdev->config.cik.max_gprs = 256;
1318                 rdev->config.cik.max_gs_threads = 32;
1319                 rdev->config.cik.max_hw_contexts = 8;
1320
1321                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1322                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1323                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1324                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1325                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1326                 break;
1327         case CHIP_KAVERI:
1328                 /* TODO */
1329                 break;
1330         case CHIP_KABINI:
1331         default:
1332                 rdev->config.cik.max_shader_engines = 1;
1333                 rdev->config.cik.max_tile_pipes = 2;
1334                 rdev->config.cik.max_cu_per_sh = 2;
1335                 rdev->config.cik.max_sh_per_se = 1;
1336                 rdev->config.cik.max_backends_per_se = 1;
1337                 rdev->config.cik.max_texture_channel_caches = 2;
1338                 rdev->config.cik.max_gprs = 256;
1339                 rdev->config.cik.max_gs_threads = 16;
1340                 rdev->config.cik.max_hw_contexts = 8;
1341
1342                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1343                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1344                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1345                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1346                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1347                 break;
1348         }
1349
1350         /* Initialize HDP */
1351         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1352                 WREG32((0x2c14 + j), 0x00000000);
1353                 WREG32((0x2c18 + j), 0x00000000);
1354                 WREG32((0x2c1c + j), 0x00000000);
1355                 WREG32((0x2c20 + j), 0x00000000);
1356                 WREG32((0x2c24 + j), 0x00000000);
1357         }
1358
1359         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1360
1361         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1362
1363         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1364         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1365
1366         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1367         rdev->config.cik.mem_max_burst_length_bytes = 256;
1368         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1369         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1370         if (rdev->config.cik.mem_row_size_in_kb > 4)
1371                 rdev->config.cik.mem_row_size_in_kb = 4;
1372         /* XXX use MC settings? */
1373         rdev->config.cik.shader_engine_tile_size = 32;
1374         rdev->config.cik.num_gpus = 1;
1375         rdev->config.cik.multi_gpu_tile_size = 64;
1376
1377         /* fix up row size */
1378         gb_addr_config &= ~ROW_SIZE_MASK;
1379         switch (rdev->config.cik.mem_row_size_in_kb) {
1380         case 1:
1381         default:
1382                 gb_addr_config |= ROW_SIZE(0);
1383                 break;
1384         case 2:
1385                 gb_addr_config |= ROW_SIZE(1);
1386                 break;
1387         case 4:
1388                 gb_addr_config |= ROW_SIZE(2);
1389                 break;
1390         }
1391
1392         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1393          * not have bank info, so create a custom tiling dword.
1394          * bits 3:0   num_pipes
1395          * bits 7:4   num_banks
1396          * bits 11:8  group_size
1397          * bits 15:12 row_size
1398          */
1399         rdev->config.cik.tile_config = 0;
1400         switch (rdev->config.cik.num_tile_pipes) {
1401         case 1:
1402                 rdev->config.cik.tile_config |= (0 << 0);
1403                 break;
1404         case 2:
1405                 rdev->config.cik.tile_config |= (1 << 0);
1406                 break;
1407         case 4:
1408                 rdev->config.cik.tile_config |= (2 << 0);
1409                 break;
1410         case 8:
1411         default:
1412                 /* XXX what about 12? */
1413                 rdev->config.cik.tile_config |= (3 << 0);
1414                 break;
1415         }
1416         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1417                 rdev->config.cik.tile_config |= 1 << 4;
1418         else
1419                 rdev->config.cik.tile_config |= 0 << 4;
1420         rdev->config.cik.tile_config |=
1421                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1422         rdev->config.cik.tile_config |=
1423                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1424
1425         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1426         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1427         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1428
1429         cik_tiling_mode_table_init(rdev);
1430
1431         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1432                      rdev->config.cik.max_sh_per_se,
1433                      rdev->config.cik.max_backends_per_se);
1434
1435         /* set HW defaults for 3D engine */
1436         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1437
1438         WREG32(SX_DEBUG_1, 0x20);
1439
1440         WREG32(TA_CNTL_AUX, 0x00010000);
1441
1442         tmp = RREG32(SPI_CONFIG_CNTL);
1443         tmp |= 0x03000000;
1444         WREG32(SPI_CONFIG_CNTL, tmp);
1445
1446         WREG32(SQ_CONFIG, 1);
1447
1448         WREG32(DB_DEBUG, 0);
1449
1450         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1451         tmp |= 0x00000400;
1452         WREG32(DB_DEBUG2, tmp);
1453
1454         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1455         tmp |= 0x00020200;
1456         WREG32(DB_DEBUG3, tmp);
1457
1458         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1459         tmp |= 0x00018208;
1460         WREG32(CB_HW_CONTROL, tmp);
1461
1462         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1463
1464         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1465                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1466                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1467                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1468
1469         WREG32(VGT_NUM_INSTANCES, 1);
1470
1471         WREG32(CP_PERFMON_CNTL, 0);
1472
1473         WREG32(SQ_CONFIG, 0);
1474
1475         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1476                                           FORCE_EOV_MAX_REZ_CNT(255)));
1477
1478         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1479                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1480
1481         WREG32(VGT_GS_VERTEX_REUSE, 16);
1482         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1483
1484         tmp = RREG32(HDP_MISC_CNTL);
1485         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1486         WREG32(HDP_MISC_CNTL, tmp);
1487
1488         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1489         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1490
1491         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1492         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1493
1494         udelay(50);
1495 }
1496
1497 /*
1498  * GPU scratch registers helpers function.
1499  */
1500 /**
1501  * cik_scratch_init - setup driver info for CP scratch regs
1502  *
1503  * @rdev: radeon_device pointer
1504  *
1505  * Set up the number and offset of the CP scratch registers.
1506  * NOTE: use of CP scratch registers is a legacy inferface and
1507  * is not used by default on newer asics (r6xx+).  On newer asics,
1508  * memory buffers are used for fences rather than scratch regs.
1509  */
1510 static void cik_scratch_init(struct radeon_device *rdev)
1511 {
1512         int i;
1513
1514         rdev->scratch.num_reg = 7;
1515         rdev->scratch.reg_base = SCRATCH_REG0;
1516         for (i = 0; i < rdev->scratch.num_reg; i++) {
1517                 rdev->scratch.free[i] = true;
1518                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1519         }
1520 }
1521
1522 /**
1523  * cik_ring_test - basic gfx ring test
1524  *
1525  * @rdev: radeon_device pointer
1526  * @ring: radeon_ring structure holding ring information
1527  *
1528  * Allocate a scratch register and write to it using the gfx ring (CIK).
1529  * Provides a basic gfx ring test to verify that the ring is working.
1530  * Used by cik_cp_gfx_resume();
1531  * Returns 0 on success, error on failure.
1532  */
1533 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1534 {
1535         uint32_t scratch;
1536         uint32_t tmp = 0;
1537         unsigned i;
1538         int r;
1539
1540         r = radeon_scratch_get(rdev, &scratch);
1541         if (r) {
1542                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1543                 return r;
1544         }
1545         WREG32(scratch, 0xCAFEDEAD);
1546         r = radeon_ring_lock(rdev, ring, 3);
1547         if (r) {
1548                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1549                 radeon_scratch_free(rdev, scratch);
1550                 return r;
1551         }
1552         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1553         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1554         radeon_ring_write(ring, 0xDEADBEEF);
1555         radeon_ring_unlock_commit(rdev, ring);
1556         for (i = 0; i < rdev->usec_timeout; i++) {
1557                 tmp = RREG32(scratch);
1558                 if (tmp == 0xDEADBEEF)
1559                         break;
1560                 DRM_UDELAY(1);
1561         }
1562         if (i < rdev->usec_timeout) {
1563                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1564         } else {
1565                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1566                           ring->idx, scratch, tmp);
1567                 r = -EINVAL;
1568         }
1569         radeon_scratch_free(rdev, scratch);
1570         return r;
1571 }
1572
1573 /**
1574  * cik_fence_ring_emit - emit a fence on the gfx ring
1575  *
1576  * @rdev: radeon_device pointer
1577  * @fence: radeon fence object
1578  *
1579  * Emits a fence sequnce number on the gfx ring and flushes
1580  * GPU caches.
1581  */
1582 void cik_fence_ring_emit(struct radeon_device *rdev,
1583                          struct radeon_fence *fence)
1584 {
1585         struct radeon_ring *ring = &rdev->ring[fence->ring];
1586         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1587
1588         /* EVENT_WRITE_EOP - flush caches, send int */
1589         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1590         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1591                                  EOP_TC_ACTION_EN |
1592                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1593                                  EVENT_INDEX(5)));
1594         radeon_ring_write(ring, addr & 0xfffffffc);
1595         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1596         radeon_ring_write(ring, fence->seq);
1597         radeon_ring_write(ring, 0);
1598         /* HDP flush */
1599         /* We should be using the new WAIT_REG_MEM special op packet here
1600          * but it causes the CP to hang
1601          */
1602         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1603         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1604                                  WRITE_DATA_DST_SEL(0)));
1605         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1606         radeon_ring_write(ring, 0);
1607         radeon_ring_write(ring, 0);
1608 }
1609
1610 void cik_semaphore_ring_emit(struct radeon_device *rdev,
1611                              struct radeon_ring *ring,
1612                              struct radeon_semaphore *semaphore,
1613                              bool emit_wait)
1614 {
1615         uint64_t addr = semaphore->gpu_addr;
1616         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1617
1618         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1619         radeon_ring_write(ring, addr & 0xffffffff);
1620         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1621 }
1622
1623 /*
1624  * IB stuff
1625  */
1626 /**
1627  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1628  *
1629  * @rdev: radeon_device pointer
1630  * @ib: radeon indirect buffer object
1631  *
1632  * Emits an DE (drawing engine) or CE (constant engine) IB
1633  * on the gfx ring.  IBs are usually generated by userspace
1634  * acceleration drivers and submitted to the kernel for
1635  * sheduling on the ring.  This function schedules the IB
1636  * on the gfx ring for execution by the GPU.
1637  */
1638 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1639 {
1640         struct radeon_ring *ring = &rdev->ring[ib->ring];
1641         u32 header, control = INDIRECT_BUFFER_VALID;
1642
1643         if (ib->is_const_ib) {
1644                 /* set switch buffer packet before const IB */
1645                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1646                 radeon_ring_write(ring, 0);
1647
1648                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1649         } else {
1650                 u32 next_rptr;
1651                 if (ring->rptr_save_reg) {
1652                         next_rptr = ring->wptr + 3 + 4;
1653                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1654                         radeon_ring_write(ring, ((ring->rptr_save_reg -
1655                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
1656                         radeon_ring_write(ring, next_rptr);
1657                 } else if (rdev->wb.enabled) {
1658                         next_rptr = ring->wptr + 5 + 4;
1659                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1660                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1661                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1662                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1663                         radeon_ring_write(ring, next_rptr);
1664                 }
1665
1666                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1667         }
1668
1669         control |= ib->length_dw |
1670                 (ib->vm ? (ib->vm->id << 24) : 0);
1671
1672         radeon_ring_write(ring, header);
1673         radeon_ring_write(ring,
1674 #ifdef __BIG_ENDIAN
1675                           (2 << 0) |
1676 #endif
1677                           (ib->gpu_addr & 0xFFFFFFFC));
1678         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1679         radeon_ring_write(ring, control);
1680 }
1681
1682 /**
1683  * cik_ib_test - basic gfx ring IB test
1684  *
1685  * @rdev: radeon_device pointer
1686  * @ring: radeon_ring structure holding ring information
1687  *
1688  * Allocate an IB and execute it on the gfx ring (CIK).
1689  * Provides a basic gfx ring test to verify that IBs are working.
1690  * Returns 0 on success, error on failure.
1691  */
1692 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1693 {
1694         struct radeon_ib ib;
1695         uint32_t scratch;
1696         uint32_t tmp = 0;
1697         unsigned i;
1698         int r;
1699
1700         r = radeon_scratch_get(rdev, &scratch);
1701         if (r) {
1702                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1703                 return r;
1704         }
1705         WREG32(scratch, 0xCAFEDEAD);
1706         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1707         if (r) {
1708                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1709                 return r;
1710         }
1711         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1712         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1713         ib.ptr[2] = 0xDEADBEEF;
1714         ib.length_dw = 3;
1715         r = radeon_ib_schedule(rdev, &ib, NULL);
1716         if (r) {
1717                 radeon_scratch_free(rdev, scratch);
1718                 radeon_ib_free(rdev, &ib);
1719                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1720                 return r;
1721         }
1722         r = radeon_fence_wait(ib.fence, false);
1723         if (r) {
1724                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1725                 return r;
1726         }
1727         for (i = 0; i < rdev->usec_timeout; i++) {
1728                 tmp = RREG32(scratch);
1729                 if (tmp == 0xDEADBEEF)
1730                         break;
1731                 DRM_UDELAY(1);
1732         }
1733         if (i < rdev->usec_timeout) {
1734                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1735         } else {
1736                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1737                           scratch, tmp);
1738                 r = -EINVAL;
1739         }
1740         radeon_scratch_free(rdev, scratch);
1741         radeon_ib_free(rdev, &ib);
1742         return r;
1743 }
1744
1745 /*
1746  * CP.
1747  * On CIK, gfx and compute now have independant command processors.
1748  *
1749  * GFX
1750  * Gfx consists of a single ring and can process both gfx jobs and
1751  * compute jobs.  The gfx CP consists of three microengines (ME):
1752  * PFP - Pre-Fetch Parser
1753  * ME - Micro Engine
1754  * CE - Constant Engine
1755  * The PFP and ME make up what is considered the Drawing Engine (DE).
1756  * The CE is an asynchronous engine used for updating buffer desciptors
1757  * used by the DE so that they can be loaded into cache in parallel
1758  * while the DE is processing state update packets.
1759  *
1760  * Compute
1761  * The compute CP consists of two microengines (ME):
1762  * MEC1 - Compute MicroEngine 1
1763  * MEC2 - Compute MicroEngine 2
1764  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1765  * The queues are exposed to userspace and are programmed directly
1766  * by the compute runtime.
1767  */
1768 /**
1769  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1770  *
1771  * @rdev: radeon_device pointer
1772  * @enable: enable or disable the MEs
1773  *
1774  * Halts or unhalts the gfx MEs.
1775  */
1776 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1777 {
1778         if (enable)
1779                 WREG32(CP_ME_CNTL, 0);
1780         else {
1781                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1782                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1783         }
1784         udelay(50);
1785 }
1786
1787 /**
1788  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1789  *
1790  * @rdev: radeon_device pointer
1791  *
1792  * Loads the gfx PFP, ME, and CE ucode.
1793  * Returns 0 for success, -EINVAL if the ucode is not available.
1794  */
1795 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1796 {
1797         const __be32 *fw_data;
1798         int i;
1799
1800         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1801                 return -EINVAL;
1802
1803         cik_cp_gfx_enable(rdev, false);
1804
1805         /* PFP */
1806         fw_data = (const __be32 *)rdev->pfp_fw->data;
1807         WREG32(CP_PFP_UCODE_ADDR, 0);
1808         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1809                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1810         WREG32(CP_PFP_UCODE_ADDR, 0);
1811
1812         /* CE */
1813         fw_data = (const __be32 *)rdev->ce_fw->data;
1814         WREG32(CP_CE_UCODE_ADDR, 0);
1815         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1816                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1817         WREG32(CP_CE_UCODE_ADDR, 0);
1818
1819         /* ME */
1820         fw_data = (const __be32 *)rdev->me_fw->data;
1821         WREG32(CP_ME_RAM_WADDR, 0);
1822         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1823                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1824         WREG32(CP_ME_RAM_WADDR, 0);
1825
1826         WREG32(CP_PFP_UCODE_ADDR, 0);
1827         WREG32(CP_CE_UCODE_ADDR, 0);
1828         WREG32(CP_ME_RAM_WADDR, 0);
1829         WREG32(CP_ME_RAM_RADDR, 0);
1830         return 0;
1831 }
1832
1833 /**
1834  * cik_cp_gfx_start - start the gfx ring
1835  *
1836  * @rdev: radeon_device pointer
1837  *
1838  * Enables the ring and loads the clear state context and other
1839  * packets required to init the ring.
1840  * Returns 0 for success, error for failure.
1841  */
1842 static int cik_cp_gfx_start(struct radeon_device *rdev)
1843 {
1844         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1845         int r, i;
1846
1847         /* init the CP */
1848         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1849         WREG32(CP_ENDIAN_SWAP, 0);
1850         WREG32(CP_DEVICE_ID, 1);
1851
1852         cik_cp_gfx_enable(rdev, true);
1853
1854         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1855         if (r) {
1856                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1857                 return r;
1858         }
1859
1860         /* init the CE partitions.  CE only used for gfx on CIK */
1861         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1862         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1863         radeon_ring_write(ring, 0xc000);
1864         radeon_ring_write(ring, 0xc000);
1865
1866         /* setup clear context state */
1867         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1868         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1869
1870         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1871         radeon_ring_write(ring, 0x80000000);
1872         radeon_ring_write(ring, 0x80000000);
1873
1874         for (i = 0; i < cik_default_size; i++)
1875                 radeon_ring_write(ring, cik_default_state[i]);
1876
1877         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1878         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1879
1880         /* set clear context state */
1881         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1882         radeon_ring_write(ring, 0);
1883
1884         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1885         radeon_ring_write(ring, 0x00000316);
1886         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1887         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1888
1889         radeon_ring_unlock_commit(rdev, ring);
1890
1891         return 0;
1892 }
1893
1894 /**
1895  * cik_cp_gfx_fini - stop the gfx ring
1896  *
1897  * @rdev: radeon_device pointer
1898  *
1899  * Stop the gfx ring and tear down the driver ring
1900  * info.
1901  */
1902 static void cik_cp_gfx_fini(struct radeon_device *rdev)
1903 {
1904         cik_cp_gfx_enable(rdev, false);
1905         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1906 }
1907
1908 /**
1909  * cik_cp_gfx_resume - setup the gfx ring buffer registers
1910  *
1911  * @rdev: radeon_device pointer
1912  *
1913  * Program the location and size of the gfx ring buffer
1914  * and test it to make sure it's working.
1915  * Returns 0 for success, error for failure.
1916  */
1917 static int cik_cp_gfx_resume(struct radeon_device *rdev)
1918 {
1919         struct radeon_ring *ring;
1920         u32 tmp;
1921         u32 rb_bufsz;
1922         u64 rb_addr;
1923         int r;
1924
1925         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1926         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1927
1928         /* Set the write pointer delay */
1929         WREG32(CP_RB_WPTR_DELAY, 0);
1930
1931         /* set the RB to use vmid 0 */
1932         WREG32(CP_RB_VMID, 0);
1933
1934         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1935
1936         /* ring 0 - compute and gfx */
1937         /* Set ring buffer size */
1938         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1939         rb_bufsz = drm_order(ring->ring_size / 8);
1940         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1941 #ifdef __BIG_ENDIAN
1942         tmp |= BUF_SWAP_32BIT;
1943 #endif
1944         WREG32(CP_RB0_CNTL, tmp);
1945
1946         /* Initialize the ring buffer's read and write pointers */
1947         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1948         ring->wptr = 0;
1949         WREG32(CP_RB0_WPTR, ring->wptr);
1950
1951         /* set the wb address wether it's enabled or not */
1952         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1953         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1954
1955         /* scratch register shadowing is no longer supported */
1956         WREG32(SCRATCH_UMSK, 0);
1957
1958         if (!rdev->wb.enabled)
1959                 tmp |= RB_NO_UPDATE;
1960
1961         mdelay(1);
1962         WREG32(CP_RB0_CNTL, tmp);
1963
1964         rb_addr = ring->gpu_addr >> 8;
1965         WREG32(CP_RB0_BASE, rb_addr);
1966         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1967
1968         ring->rptr = RREG32(CP_RB0_RPTR);
1969
1970         /* start the ring */
1971         cik_cp_gfx_start(rdev);
1972         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1973         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1974         if (r) {
1975                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1976                 return r;
1977         }
1978         return 0;
1979 }
1980
1981 /**
1982  * cik_cp_compute_enable - enable/disable the compute CP MEs
1983  *
1984  * @rdev: radeon_device pointer
1985  * @enable: enable or disable the MEs
1986  *
1987  * Halts or unhalts the compute MEs.
1988  */
1989 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
1990 {
1991         if (enable)
1992                 WREG32(CP_MEC_CNTL, 0);
1993         else
1994                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
1995         udelay(50);
1996 }
1997
1998 /**
1999  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2000  *
2001  * @rdev: radeon_device pointer
2002  *
2003  * Loads the compute MEC1&2 ucode.
2004  * Returns 0 for success, -EINVAL if the ucode is not available.
2005  */
2006 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2007 {
2008         const __be32 *fw_data;
2009         int i;
2010
2011         if (!rdev->mec_fw)
2012                 return -EINVAL;
2013
2014         cik_cp_compute_enable(rdev, false);
2015
2016         /* MEC1 */
2017         fw_data = (const __be32 *)rdev->mec_fw->data;
2018         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2019         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2020                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2021         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2022
2023         if (rdev->family == CHIP_KAVERI) {
2024                 /* MEC2 */
2025                 fw_data = (const __be32 *)rdev->mec_fw->data;
2026                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2027                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2028                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2029                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2030         }
2031
2032         return 0;
2033 }
2034
2035 /**
2036  * cik_cp_compute_start - start the compute queues
2037  *
2038  * @rdev: radeon_device pointer
2039  *
2040  * Enable the compute queues.
2041  * Returns 0 for success, error for failure.
2042  */
2043 static int cik_cp_compute_start(struct radeon_device *rdev)
2044 {
2045         //todo
2046         return 0;
2047 }
2048
2049 /**
2050  * cik_cp_compute_fini - stop the compute queues
2051  *
2052  * @rdev: radeon_device pointer
2053  *
2054  * Stop the compute queues and tear down the driver queue
2055  * info.
2056  */
2057 static void cik_cp_compute_fini(struct radeon_device *rdev)
2058 {
2059         cik_cp_compute_enable(rdev, false);
2060         //todo
2061 }
2062
2063 /**
2064  * cik_cp_compute_resume - setup the compute queue registers
2065  *
2066  * @rdev: radeon_device pointer
2067  *
2068  * Program the compute queues and test them to make sure they
2069  * are working.
2070  * Returns 0 for success, error for failure.
2071  */
2072 static int cik_cp_compute_resume(struct radeon_device *rdev)
2073 {
2074         int r;
2075
2076         //todo
2077         r = cik_cp_compute_start(rdev);
2078         if (r)
2079                 return r;
2080         return 0;
2081 }
2082
2083 /* XXX temporary wrappers to handle both compute and gfx */
2084 /* XXX */
2085 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2086 {
2087         cik_cp_gfx_enable(rdev, enable);
2088         cik_cp_compute_enable(rdev, enable);
2089 }
2090
2091 /* XXX */
2092 static int cik_cp_load_microcode(struct radeon_device *rdev)
2093 {
2094         int r;
2095
2096         r = cik_cp_gfx_load_microcode(rdev);
2097         if (r)
2098                 return r;
2099         r = cik_cp_compute_load_microcode(rdev);
2100         if (r)
2101                 return r;
2102
2103         return 0;
2104 }
2105
2106 /* XXX */
2107 static void cik_cp_fini(struct radeon_device *rdev)
2108 {
2109         cik_cp_gfx_fini(rdev);
2110         cik_cp_compute_fini(rdev);
2111 }
2112
2113 /* XXX */
2114 static int cik_cp_resume(struct radeon_device *rdev)
2115 {
2116         int r;
2117
2118         /* Reset all cp blocks */
2119         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2120         RREG32(GRBM_SOFT_RESET);
2121         mdelay(15);
2122         WREG32(GRBM_SOFT_RESET, 0);
2123         RREG32(GRBM_SOFT_RESET);
2124
2125         r = cik_cp_load_microcode(rdev);
2126         if (r)
2127                 return r;
2128
2129         r = cik_cp_gfx_resume(rdev);
2130         if (r)
2131                 return r;
2132         r = cik_cp_compute_resume(rdev);
2133         if (r)
2134                 return r;
2135
2136         return 0;
2137 }
2138
2139 /**
2140  * cik_gpu_is_lockup - check if the 3D engine is locked up
2141  *
2142  * @rdev: radeon_device pointer
2143  * @ring: radeon_ring structure holding ring information
2144  *
2145  * Check if the 3D engine is locked up (CIK).
2146  * Returns true if the engine is locked, false if not.
2147  */
2148 bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2149 {
2150         u32 srbm_status, srbm_status2;
2151         u32 grbm_status, grbm_status2;
2152         u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
2153
2154         srbm_status = RREG32(SRBM_STATUS);
2155         srbm_status2 = RREG32(SRBM_STATUS2);
2156         grbm_status = RREG32(GRBM_STATUS);
2157         grbm_status2 = RREG32(GRBM_STATUS2);
2158         grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2159         grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2160         grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
2161         grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
2162         if (!(grbm_status & GUI_ACTIVE)) {
2163                 radeon_ring_lockup_update(ring);
2164                 return false;
2165         }
2166         /* force CP activities */
2167         radeon_ring_force_activity(rdev, ring);
2168         return radeon_ring_test_lockup(rdev, ring);
2169 }
2170
2171 /**
2172  * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
2173  *
2174  * @rdev: radeon_device pointer
2175  *
2176  * Soft reset the GFX engine and CPG blocks (CIK).
2177  * XXX: deal with reseting RLC and CPF
2178  * Returns 0 for success.
2179  */
2180 static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
2181 {
2182         struct evergreen_mc_save save;
2183         u32 grbm_reset = 0;
2184
2185         if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2186                 return 0;
2187
2188         dev_info(rdev->dev, "GPU GFX softreset \n");
2189         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2190                 RREG32(GRBM_STATUS));
2191         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2192                 RREG32(GRBM_STATUS2));
2193         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2194                 RREG32(GRBM_STATUS_SE0));
2195         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2196                 RREG32(GRBM_STATUS_SE1));
2197         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2198                 RREG32(GRBM_STATUS_SE2));
2199         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2200                 RREG32(GRBM_STATUS_SE3));
2201         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2202                 RREG32(SRBM_STATUS));
2203         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2204                 RREG32(SRBM_STATUS2));
2205         evergreen_mc_stop(rdev, &save);
2206         if (radeon_mc_wait_for_idle(rdev)) {
2207                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2208         }
2209         /* Disable CP parsing/prefetching */
2210         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2211
2212         /* reset all the gfx block and all CPG blocks */
2213         grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
2214
2215         dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2216         WREG32(GRBM_SOFT_RESET, grbm_reset);
2217         (void)RREG32(GRBM_SOFT_RESET);
2218         udelay(50);
2219         WREG32(GRBM_SOFT_RESET, 0);
2220         (void)RREG32(GRBM_SOFT_RESET);
2221         /* Wait a little for things to settle down */
2222         udelay(50);
2223         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2224                 RREG32(GRBM_STATUS));
2225         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2226                 RREG32(GRBM_STATUS2));
2227         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2228                 RREG32(GRBM_STATUS_SE0));
2229         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2230                 RREG32(GRBM_STATUS_SE1));
2231         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2232                 RREG32(GRBM_STATUS_SE2));
2233         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2234                 RREG32(GRBM_STATUS_SE3));
2235         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2236                 RREG32(SRBM_STATUS));
2237         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2238                 RREG32(SRBM_STATUS2));
2239         evergreen_mc_resume(rdev, &save);
2240         return 0;
2241 }
2242
2243 /**
2244  * cik_compute_gpu_soft_reset - soft reset CPC
2245  *
2246  * @rdev: radeon_device pointer
2247  *
2248  * Soft reset the CPC blocks (CIK).
2249  * XXX: deal with reseting RLC and CPF
2250  * Returns 0 for success.
2251  */
2252 static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
2253 {
2254         struct evergreen_mc_save save;
2255         u32 grbm_reset = 0;
2256
2257         dev_info(rdev->dev, "GPU compute softreset \n");
2258         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2259                 RREG32(GRBM_STATUS));
2260         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2261                 RREG32(GRBM_STATUS2));
2262         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2263                 RREG32(GRBM_STATUS_SE0));
2264         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2265                 RREG32(GRBM_STATUS_SE1));
2266         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2267                 RREG32(GRBM_STATUS_SE2));
2268         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2269                 RREG32(GRBM_STATUS_SE3));
2270         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2271                 RREG32(SRBM_STATUS));
2272         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2273                 RREG32(SRBM_STATUS2));
2274         evergreen_mc_stop(rdev, &save);
2275         if (radeon_mc_wait_for_idle(rdev)) {
2276                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2277         }
2278         /* Disable CP parsing/prefetching */
2279         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2280
2281         /* reset all the CPC blocks */
2282         grbm_reset = SOFT_RESET_CPG;
2283
2284         dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2285         WREG32(GRBM_SOFT_RESET, grbm_reset);
2286         (void)RREG32(GRBM_SOFT_RESET);
2287         udelay(50);
2288         WREG32(GRBM_SOFT_RESET, 0);
2289         (void)RREG32(GRBM_SOFT_RESET);
2290         /* Wait a little for things to settle down */
2291         udelay(50);
2292         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2293                 RREG32(GRBM_STATUS));
2294         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2295                 RREG32(GRBM_STATUS2));
2296         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2297                 RREG32(GRBM_STATUS_SE0));
2298         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2299                 RREG32(GRBM_STATUS_SE1));
2300         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2301                 RREG32(GRBM_STATUS_SE2));
2302         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2303                 RREG32(GRBM_STATUS_SE3));
2304         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2305                 RREG32(SRBM_STATUS));
2306         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2307                 RREG32(SRBM_STATUS2));
2308         evergreen_mc_resume(rdev, &save);
2309         return 0;
2310 }
2311
2312 /**
2313  * cik_asic_reset - soft reset compute and gfx
2314  *
2315  * @rdev: radeon_device pointer
2316  *
2317  * Soft reset the CPC blocks (CIK).
2318  * XXX: make this more fine grained and only reset
2319  * what is necessary.
2320  * Returns 0 for success.
2321  */
2322 int cik_asic_reset(struct radeon_device *rdev)
2323 {
2324         int r;
2325
2326         r = cik_compute_gpu_soft_reset(rdev);
2327         if (r)
2328                 dev_info(rdev->dev, "Compute reset failed!\n");
2329
2330         return cik_gfx_gpu_soft_reset(rdev);
2331 }
2332
2333 /* MC */
2334 /**
2335  * cik_mc_program - program the GPU memory controller
2336  *
2337  * @rdev: radeon_device pointer
2338  *
2339  * Set the location of vram, gart, and AGP in the GPU's
2340  * physical address space (CIK).
2341  */
2342 static void cik_mc_program(struct radeon_device *rdev)
2343 {
2344         struct evergreen_mc_save save;
2345         u32 tmp;
2346         int i, j;
2347
2348         /* Initialize HDP */
2349         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2350                 WREG32((0x2c14 + j), 0x00000000);
2351                 WREG32((0x2c18 + j), 0x00000000);
2352                 WREG32((0x2c1c + j), 0x00000000);
2353                 WREG32((0x2c20 + j), 0x00000000);
2354                 WREG32((0x2c24 + j), 0x00000000);
2355         }
2356         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2357
2358         evergreen_mc_stop(rdev, &save);
2359         if (radeon_mc_wait_for_idle(rdev)) {
2360                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2361         }
2362         /* Lockout access through VGA aperture*/
2363         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2364         /* Update configuration */
2365         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2366                rdev->mc.vram_start >> 12);
2367         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2368                rdev->mc.vram_end >> 12);
2369         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2370                rdev->vram_scratch.gpu_addr >> 12);
2371         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2372         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2373         WREG32(MC_VM_FB_LOCATION, tmp);
2374         /* XXX double check these! */
2375         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2376         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2377         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2378         WREG32(MC_VM_AGP_BASE, 0);
2379         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2380         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2381         if (radeon_mc_wait_for_idle(rdev)) {
2382                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2383         }
2384         evergreen_mc_resume(rdev, &save);
2385         /* we need to own VRAM, so turn off the VGA renderer here
2386          * to stop it overwriting our objects */
2387         rv515_vga_render_disable(rdev);
2388 }
2389
2390 /**
2391  * cik_mc_init - initialize the memory controller driver params
2392  *
2393  * @rdev: radeon_device pointer
2394  *
2395  * Look up the amount of vram, vram width, and decide how to place
2396  * vram and gart within the GPU's physical address space (CIK).
2397  * Returns 0 for success.
2398  */
2399 static int cik_mc_init(struct radeon_device *rdev)
2400 {
2401         u32 tmp;
2402         int chansize, numchan;
2403
2404         /* Get VRAM informations */
2405         rdev->mc.vram_is_ddr = true;
2406         tmp = RREG32(MC_ARB_RAMCFG);
2407         if (tmp & CHANSIZE_MASK) {
2408                 chansize = 64;
2409         } else {
2410                 chansize = 32;
2411         }
2412         tmp = RREG32(MC_SHARED_CHMAP);
2413         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2414         case 0:
2415         default:
2416                 numchan = 1;
2417                 break;
2418         case 1:
2419                 numchan = 2;
2420                 break;
2421         case 2:
2422                 numchan = 4;
2423                 break;
2424         case 3:
2425                 numchan = 8;
2426                 break;
2427         case 4:
2428                 numchan = 3;
2429                 break;
2430         case 5:
2431                 numchan = 6;
2432                 break;
2433         case 6:
2434                 numchan = 10;
2435                 break;
2436         case 7:
2437                 numchan = 12;
2438                 break;
2439         case 8:
2440                 numchan = 16;
2441                 break;
2442         }
2443         rdev->mc.vram_width = numchan * chansize;
2444         /* Could aper size report 0 ? */
2445         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2446         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2447         /* size in MB on si */
2448         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2449         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2450         rdev->mc.visible_vram_size = rdev->mc.aper_size;
2451         si_vram_gtt_location(rdev, &rdev->mc);
2452         radeon_update_bandwidth_info(rdev);
2453
2454         return 0;
2455 }
2456
2457 /*
2458  * GART
2459  * VMID 0 is the physical GPU addresses as used by the kernel.
2460  * VMIDs 1-15 are used for userspace clients and are handled
2461  * by the radeon vm/hsa code.
2462  */
2463 /**
2464  * cik_pcie_gart_tlb_flush - gart tlb flush callback
2465  *
2466  * @rdev: radeon_device pointer
2467  *
2468  * Flush the TLB for the VMID 0 page table (CIK).
2469  */
2470 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
2471 {
2472         /* flush hdp cache */
2473         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
2474
2475         /* bits 0-15 are the VM contexts0-15 */
2476         WREG32(VM_INVALIDATE_REQUEST, 0x1);
2477 }
2478
2479 /**
2480  * cik_pcie_gart_enable - gart enable
2481  *
2482  * @rdev: radeon_device pointer
2483  *
2484  * This sets up the TLBs, programs the page tables for VMID0,
2485  * sets up the hw for VMIDs 1-15 which are allocated on
2486  * demand, and sets up the global locations for the LDS, GDS,
2487  * and GPUVM for FSA64 clients (CIK).
2488  * Returns 0 for success, errors for failure.
2489  */
2490 static int cik_pcie_gart_enable(struct radeon_device *rdev)
2491 {
2492         int r, i;
2493
2494         if (rdev->gart.robj == NULL) {
2495                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2496                 return -EINVAL;
2497         }
2498         r = radeon_gart_table_vram_pin(rdev);
2499         if (r)
2500                 return r;
2501         radeon_gart_restore(rdev);
2502         /* Setup TLB control */
2503         WREG32(MC_VM_MX_L1_TLB_CNTL,
2504                (0xA << 7) |
2505                ENABLE_L1_TLB |
2506                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2507                ENABLE_ADVANCED_DRIVER_MODEL |
2508                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2509         /* Setup L2 cache */
2510         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2511                ENABLE_L2_FRAGMENT_PROCESSING |
2512                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2513                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2514                EFFECTIVE_L2_QUEUE_SIZE(7) |
2515                CONTEXT1_IDENTITY_ACCESS_MODE(1));
2516         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2517         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2518                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2519         /* setup context0 */
2520         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2521         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2522         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2523         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2524                         (u32)(rdev->dummy_page.addr >> 12));
2525         WREG32(VM_CONTEXT0_CNTL2, 0);
2526         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2527                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2528
2529         WREG32(0x15D4, 0);
2530         WREG32(0x15D8, 0);
2531         WREG32(0x15DC, 0);
2532
2533         /* empty context1-15 */
2534         /* FIXME start with 4G, once using 2 level pt switch to full
2535          * vm size space
2536          */
2537         /* set vm size, must be a multiple of 4 */
2538         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2539         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2540         for (i = 1; i < 16; i++) {
2541                 if (i < 8)
2542                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2543                                rdev->gart.table_addr >> 12);
2544                 else
2545                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2546                                rdev->gart.table_addr >> 12);
2547         }
2548
2549         /* enable context1-15 */
2550         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2551                (u32)(rdev->dummy_page.addr >> 12));
2552         WREG32(VM_CONTEXT1_CNTL2, 4);
2553         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2554                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2555                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2556                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2557                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2558                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2559                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2560                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2561                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2562                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2563                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2564                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2565                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2566
2567         /* TC cache setup ??? */
2568         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
2569         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
2570         WREG32(TC_CFG_L1_STORE_POLICY, 0);
2571
2572         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
2573         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
2574         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
2575         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
2576         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
2577
2578         WREG32(TC_CFG_L1_VOLATILE, 0);
2579         WREG32(TC_CFG_L2_VOLATILE, 0);
2580
2581         if (rdev->family == CHIP_KAVERI) {
2582                 u32 tmp = RREG32(CHUB_CONTROL);
2583                 tmp &= ~BYPASS_VM;
2584                 WREG32(CHUB_CONTROL, tmp);
2585         }
2586
2587         /* XXX SH_MEM regs */
2588         /* where to put LDS, scratch, GPUVM in FSA64 space */
2589         for (i = 0; i < 16; i++) {
2590                 WREG32(SRBM_GFX_CNTL, VMID(i));
2591                 WREG32(SH_MEM_CONFIG, 0);
2592                 WREG32(SH_MEM_APE1_BASE, 1);
2593                 WREG32(SH_MEM_APE1_LIMIT, 0);
2594                 WREG32(SH_MEM_BASES, 0);
2595         }
2596         WREG32(SRBM_GFX_CNTL, 0);
2597
2598         cik_pcie_gart_tlb_flush(rdev);
2599         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2600                  (unsigned)(rdev->mc.gtt_size >> 20),
2601                  (unsigned long long)rdev->gart.table_addr);
2602         rdev->gart.ready = true;
2603         return 0;
2604 }
2605
2606 /**
2607  * cik_pcie_gart_disable - gart disable
2608  *
2609  * @rdev: radeon_device pointer
2610  *
2611  * This disables all VM page table (CIK).
2612  */
2613 static void cik_pcie_gart_disable(struct radeon_device *rdev)
2614 {
2615         /* Disable all tables */
2616         WREG32(VM_CONTEXT0_CNTL, 0);
2617         WREG32(VM_CONTEXT1_CNTL, 0);
2618         /* Setup TLB control */
2619         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2620                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2621         /* Setup L2 cache */
2622         WREG32(VM_L2_CNTL,
2623                ENABLE_L2_FRAGMENT_PROCESSING |
2624                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2625                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2626                EFFECTIVE_L2_QUEUE_SIZE(7) |
2627                CONTEXT1_IDENTITY_ACCESS_MODE(1));
2628         WREG32(VM_L2_CNTL2, 0);
2629         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2630                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2631         radeon_gart_table_vram_unpin(rdev);
2632 }
2633
2634 /**
2635  * cik_pcie_gart_fini - vm fini callback
2636  *
2637  * @rdev: radeon_device pointer
2638  *
2639  * Tears down the driver GART/VM setup (CIK).
2640  */
2641 static void cik_pcie_gart_fini(struct radeon_device *rdev)
2642 {
2643         cik_pcie_gart_disable(rdev);
2644         radeon_gart_table_vram_free(rdev);
2645         radeon_gart_fini(rdev);
2646 }
2647
2648 /* vm parser */
2649 /**
2650  * cik_ib_parse - vm ib_parse callback
2651  *
2652  * @rdev: radeon_device pointer
2653  * @ib: indirect buffer pointer
2654  *
2655  * CIK uses hw IB checking so this is a nop (CIK).
2656  */
2657 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2658 {
2659         return 0;
2660 }
2661
2662 /*
2663  * vm
2664  * VMID 0 is the physical GPU addresses as used by the kernel.
2665  * VMIDs 1-15 are used for userspace clients and are handled
2666  * by the radeon vm/hsa code.
2667  */
2668 /**
2669  * cik_vm_init - cik vm init callback
2670  *
2671  * @rdev: radeon_device pointer
2672  *
2673  * Inits cik specific vm parameters (number of VMs, base of vram for
2674  * VMIDs 1-15) (CIK).
2675  * Returns 0 for success.
2676  */
2677 int cik_vm_init(struct radeon_device *rdev)
2678 {
2679         /* number of VMs */
2680         rdev->vm_manager.nvm = 16;
2681         /* base offset of vram pages */
2682         if (rdev->flags & RADEON_IS_IGP) {
2683                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
2684                 tmp <<= 22;
2685                 rdev->vm_manager.vram_base_offset = tmp;
2686         } else
2687                 rdev->vm_manager.vram_base_offset = 0;
2688
2689         return 0;
2690 }
2691
2692 /**
2693  * cik_vm_fini - cik vm fini callback
2694  *
2695  * @rdev: radeon_device pointer
2696  *
2697  * Tear down any asic specific VM setup (CIK).
2698  */
2699 void cik_vm_fini(struct radeon_device *rdev)
2700 {
2701 }
2702
2703 /**
2704  * cik_vm_flush - cik vm flush using the CP
2705  *
2706  * @rdev: radeon_device pointer
2707  *
2708  * Update the page table base and flush the VM TLB
2709  * using the CP (CIK).
2710  */
2711 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2712 {
2713         struct radeon_ring *ring = &rdev->ring[ridx];
2714
2715         if (vm == NULL)
2716                 return;
2717
2718         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2719         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2720                                  WRITE_DATA_DST_SEL(0)));
2721         if (vm->id < 8) {
2722                 radeon_ring_write(ring,
2723                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2724         } else {
2725                 radeon_ring_write(ring,
2726                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2727         }
2728         radeon_ring_write(ring, 0);
2729         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2730
2731         /* update SH_MEM_* regs */
2732         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2733         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2734                                  WRITE_DATA_DST_SEL(0)));
2735         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2736         radeon_ring_write(ring, 0);
2737         radeon_ring_write(ring, VMID(vm->id));
2738
2739         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
2740         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2741                                  WRITE_DATA_DST_SEL(0)));
2742         radeon_ring_write(ring, SH_MEM_BASES >> 2);
2743         radeon_ring_write(ring, 0);
2744
2745         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
2746         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
2747         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
2748         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
2749
2750         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2751         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2752                                  WRITE_DATA_DST_SEL(0)));
2753         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2754         radeon_ring_write(ring, 0);
2755         radeon_ring_write(ring, VMID(0));
2756
2757         /* HDP flush */
2758         /* We should be using the WAIT_REG_MEM packet here like in
2759          * cik_fence_ring_emit(), but it causes the CP to hang in this
2760          * context...
2761          */
2762         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2763         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2764                                  WRITE_DATA_DST_SEL(0)));
2765         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2766         radeon_ring_write(ring, 0);
2767         radeon_ring_write(ring, 0);
2768
2769         /* bits 0-15 are the VM contexts0-15 */
2770         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2771         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2772                                  WRITE_DATA_DST_SEL(0)));
2773         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
2774         radeon_ring_write(ring, 0);
2775         radeon_ring_write(ring, 1 << vm->id);
2776
2777         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2778         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2779         radeon_ring_write(ring, 0x0);
2780 }
2781
2782 /*
2783  * RLC
2784  * The RLC is a multi-purpose microengine that handles a
2785  * variety of functions, the most important of which is
2786  * the interrupt controller.
2787  */
2788 /**
2789  * cik_rlc_stop - stop the RLC ME
2790  *
2791  * @rdev: radeon_device pointer
2792  *
2793  * Halt the RLC ME (MicroEngine) (CIK).
2794  */
2795 static void cik_rlc_stop(struct radeon_device *rdev)
2796 {
2797         int i, j, k;
2798         u32 mask, tmp;
2799
2800         tmp = RREG32(CP_INT_CNTL_RING0);
2801         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2802         WREG32(CP_INT_CNTL_RING0, tmp);
2803
2804         RREG32(CB_CGTT_SCLK_CTRL);
2805         RREG32(CB_CGTT_SCLK_CTRL);
2806         RREG32(CB_CGTT_SCLK_CTRL);
2807         RREG32(CB_CGTT_SCLK_CTRL);
2808
2809         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
2810         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
2811
2812         WREG32(RLC_CNTL, 0);
2813
2814         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
2815                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
2816                         cik_select_se_sh(rdev, i, j);
2817                         for (k = 0; k < rdev->usec_timeout; k++) {
2818                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
2819                                         break;
2820                                 udelay(1);
2821                         }
2822                 }
2823         }
2824         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2825
2826         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
2827         for (k = 0; k < rdev->usec_timeout; k++) {
2828                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2829                         break;
2830                 udelay(1);
2831         }
2832 }
2833
2834 /**
2835  * cik_rlc_start - start the RLC ME
2836  *
2837  * @rdev: radeon_device pointer
2838  *
2839  * Unhalt the RLC ME (MicroEngine) (CIK).
2840  */
2841 static void cik_rlc_start(struct radeon_device *rdev)
2842 {
2843         u32 tmp;
2844
2845         WREG32(RLC_CNTL, RLC_ENABLE);
2846
2847         tmp = RREG32(CP_INT_CNTL_RING0);
2848         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2849         WREG32(CP_INT_CNTL_RING0, tmp);
2850
2851         udelay(50);
2852 }
2853
2854 /**
2855  * cik_rlc_resume - setup the RLC hw
2856  *
2857  * @rdev: radeon_device pointer
2858  *
2859  * Initialize the RLC registers, load the ucode,
2860  * and start the RLC (CIK).
2861  * Returns 0 for success, -EINVAL if the ucode is not available.
2862  */
2863 static int cik_rlc_resume(struct radeon_device *rdev)
2864 {
2865         u32 i, size;
2866         u32 clear_state_info[3];
2867         const __be32 *fw_data;
2868
2869         if (!rdev->rlc_fw)
2870                 return -EINVAL;
2871
2872         switch (rdev->family) {
2873         case CHIP_BONAIRE:
2874         default:
2875                 size = BONAIRE_RLC_UCODE_SIZE;
2876                 break;
2877         case CHIP_KAVERI:
2878                 size = KV_RLC_UCODE_SIZE;
2879                 break;
2880         case CHIP_KABINI:
2881                 size = KB_RLC_UCODE_SIZE;
2882                 break;
2883         }
2884
2885         cik_rlc_stop(rdev);
2886
2887         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
2888         RREG32(GRBM_SOFT_RESET);
2889         udelay(50);
2890         WREG32(GRBM_SOFT_RESET, 0);
2891         RREG32(GRBM_SOFT_RESET);
2892         udelay(50);
2893
2894         WREG32(RLC_LB_CNTR_INIT, 0);
2895         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
2896
2897         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2898         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
2899         WREG32(RLC_LB_PARAMS, 0x00600408);
2900         WREG32(RLC_LB_CNTL, 0x80000004);
2901
2902         WREG32(RLC_MC_CNTL, 0);
2903         WREG32(RLC_UCODE_CNTL, 0);
2904
2905         fw_data = (const __be32 *)rdev->rlc_fw->data;
2906                 WREG32(RLC_GPM_UCODE_ADDR, 0);
2907         for (i = 0; i < size; i++)
2908                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
2909         WREG32(RLC_GPM_UCODE_ADDR, 0);
2910
2911         /* XXX */
2912         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
2913         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
2914         clear_state_info[2] = 0;//cik_default_size;
2915         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
2916         for (i = 0; i < 3; i++)
2917                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
2918         WREG32(RLC_DRIVER_DMA_STATUS, 0);
2919
2920         cik_rlc_start(rdev);
2921
2922         return 0;
2923 }
2924
2925 /*
2926  * Interrupts
2927  * Starting with r6xx, interrupts are handled via a ring buffer.
2928  * Ring buffers are areas of GPU accessible memory that the GPU
2929  * writes interrupt vectors into and the host reads vectors out of.
2930  * There is a rptr (read pointer) that determines where the
2931  * host is currently reading, and a wptr (write pointer)
2932  * which determines where the GPU has written.  When the
2933  * pointers are equal, the ring is idle.  When the GPU
2934  * writes vectors to the ring buffer, it increments the
2935  * wptr.  When there is an interrupt, the host then starts
2936  * fetching commands and processing them until the pointers are
2937  * equal again at which point it updates the rptr.
2938  */
2939
2940 /**
2941  * cik_enable_interrupts - Enable the interrupt ring buffer
2942  *
2943  * @rdev: radeon_device pointer
2944  *
2945  * Enable the interrupt ring buffer (CIK).
2946  */
2947 static void cik_enable_interrupts(struct radeon_device *rdev)
2948 {
2949         u32 ih_cntl = RREG32(IH_CNTL);
2950         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2951
2952         ih_cntl |= ENABLE_INTR;
2953         ih_rb_cntl |= IH_RB_ENABLE;
2954         WREG32(IH_CNTL, ih_cntl);
2955         WREG32(IH_RB_CNTL, ih_rb_cntl);
2956         rdev->ih.enabled = true;
2957 }
2958
2959 /**
2960  * cik_disable_interrupts - Disable the interrupt ring buffer
2961  *
2962  * @rdev: radeon_device pointer
2963  *
2964  * Disable the interrupt ring buffer (CIK).
2965  */
2966 static void cik_disable_interrupts(struct radeon_device *rdev)
2967 {
2968         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2969         u32 ih_cntl = RREG32(IH_CNTL);
2970
2971         ih_rb_cntl &= ~IH_RB_ENABLE;
2972         ih_cntl &= ~ENABLE_INTR;
2973         WREG32(IH_RB_CNTL, ih_rb_cntl);
2974         WREG32(IH_CNTL, ih_cntl);
2975         /* set rptr, wptr to 0 */
2976         WREG32(IH_RB_RPTR, 0);
2977         WREG32(IH_RB_WPTR, 0);
2978         rdev->ih.enabled = false;
2979         rdev->ih.rptr = 0;
2980 }
2981
2982 /**
2983  * cik_disable_interrupt_state - Disable all interrupt sources
2984  *
2985  * @rdev: radeon_device pointer
2986  *
2987  * Clear all interrupt enable bits used by the driver (CIK).
2988  */
2989 static void cik_disable_interrupt_state(struct radeon_device *rdev)
2990 {
2991         u32 tmp;
2992
2993         /* gfx ring */
2994         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2995         /* compute queues */
2996         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
2997         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
2998         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
2999         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3000         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3001         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3002         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3003         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3004         /* grbm */
3005         WREG32(GRBM_INT_CNTL, 0);
3006         /* vline/vblank, etc. */
3007         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3008         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3009         if (rdev->num_crtc >= 4) {
3010                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3011                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3012         }
3013         if (rdev->num_crtc >= 6) {
3014                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3015                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3016         }
3017
3018         /* dac hotplug */
3019         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3020
3021         /* digital hotplug */
3022         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3023         WREG32(DC_HPD1_INT_CONTROL, tmp);
3024         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3025         WREG32(DC_HPD2_INT_CONTROL, tmp);
3026         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3027         WREG32(DC_HPD3_INT_CONTROL, tmp);
3028         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3029         WREG32(DC_HPD4_INT_CONTROL, tmp);
3030         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3031         WREG32(DC_HPD5_INT_CONTROL, tmp);
3032         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3033         WREG32(DC_HPD6_INT_CONTROL, tmp);
3034
3035 }
3036
3037 /**
3038  * cik_irq_init - init and enable the interrupt ring
3039  *
3040  * @rdev: radeon_device pointer
3041  *
3042  * Allocate a ring buffer for the interrupt controller,
3043  * enable the RLC, disable interrupts, enable the IH
3044  * ring buffer and enable it (CIK).
3045  * Called at device load and reume.
3046  * Returns 0 for success, errors for failure.
3047  */
3048 static int cik_irq_init(struct radeon_device *rdev)
3049 {
3050         int ret = 0;
3051         int rb_bufsz;
3052         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3053
3054         /* allocate ring */
3055         ret = r600_ih_ring_alloc(rdev);
3056         if (ret)
3057                 return ret;
3058
3059         /* disable irqs */
3060         cik_disable_interrupts(rdev);
3061
3062         /* init rlc */
3063         ret = cik_rlc_resume(rdev);
3064         if (ret) {
3065                 r600_ih_ring_fini(rdev);
3066                 return ret;
3067         }
3068
3069         /* setup interrupt control */
3070         /* XXX this should actually be a bus address, not an MC address. same on older asics */
3071         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3072         interrupt_cntl = RREG32(INTERRUPT_CNTL);
3073         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3074          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3075          */
3076         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3077         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3078         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3079         WREG32(INTERRUPT_CNTL, interrupt_cntl);
3080
3081         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3082         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3083
3084         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3085                       IH_WPTR_OVERFLOW_CLEAR |
3086                       (rb_bufsz << 1));
3087
3088         if (rdev->wb.enabled)
3089                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3090
3091         /* set the writeback address whether it's enabled or not */
3092         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3093         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3094
3095         WREG32(IH_RB_CNTL, ih_rb_cntl);
3096
3097         /* set rptr, wptr to 0 */
3098         WREG32(IH_RB_RPTR, 0);
3099         WREG32(IH_RB_WPTR, 0);
3100
3101         /* Default settings for IH_CNTL (disabled at first) */
3102         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3103         /* RPTR_REARM only works if msi's are enabled */
3104         if (rdev->msi_enabled)
3105                 ih_cntl |= RPTR_REARM;
3106         WREG32(IH_CNTL, ih_cntl);
3107
3108         /* force the active interrupt state to all disabled */
3109         cik_disable_interrupt_state(rdev);
3110
3111         pci_set_master(rdev->pdev);
3112
3113         /* enable irqs */
3114         cik_enable_interrupts(rdev);
3115
3116         return ret;
3117 }
3118
3119 /**
3120  * cik_irq_set - enable/disable interrupt sources
3121  *
3122  * @rdev: radeon_device pointer
3123  *
3124  * Enable interrupt sources on the GPU (vblanks, hpd,
3125  * etc.) (CIK).
3126  * Returns 0 for success, errors for failure.
3127  */
3128 int cik_irq_set(struct radeon_device *rdev)
3129 {
3130         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
3131                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
3132         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3133         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3134         u32 grbm_int_cntl = 0;
3135
3136         if (!rdev->irq.installed) {
3137                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3138                 return -EINVAL;
3139         }
3140         /* don't enable anything if the ih is disabled */
3141         if (!rdev->ih.enabled) {
3142                 cik_disable_interrupts(rdev);
3143                 /* force the active interrupt state to all disabled */
3144                 cik_disable_interrupt_state(rdev);
3145                 return 0;
3146         }
3147
3148         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3149         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3150         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3151         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3152         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3153         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3154
3155         /* enable CP interrupts on all rings */
3156         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3157                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
3158                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3159         }
3160         /* TODO: compute queues! */
3161         /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
3162
3163         if (rdev->irq.crtc_vblank_int[0] ||
3164             atomic_read(&rdev->irq.pflip[0])) {
3165                 DRM_DEBUG("cik_irq_set: vblank 0\n");
3166                 crtc1 |= VBLANK_INTERRUPT_MASK;
3167         }
3168         if (rdev->irq.crtc_vblank_int[1] ||
3169             atomic_read(&rdev->irq.pflip[1])) {
3170                 DRM_DEBUG("cik_irq_set: vblank 1\n");
3171                 crtc2 |= VBLANK_INTERRUPT_MASK;
3172         }
3173         if (rdev->irq.crtc_vblank_int[2] ||
3174             atomic_read(&rdev->irq.pflip[2])) {
3175                 DRM_DEBUG("cik_irq_set: vblank 2\n");
3176                 crtc3 |= VBLANK_INTERRUPT_MASK;
3177         }
3178         if (rdev->irq.crtc_vblank_int[3] ||
3179             atomic_read(&rdev->irq.pflip[3])) {
3180                 DRM_DEBUG("cik_irq_set: vblank 3\n");
3181                 crtc4 |= VBLANK_INTERRUPT_MASK;
3182         }
3183         if (rdev->irq.crtc_vblank_int[4] ||
3184             atomic_read(&rdev->irq.pflip[4])) {
3185                 DRM_DEBUG("cik_irq_set: vblank 4\n");
3186                 crtc5 |= VBLANK_INTERRUPT_MASK;
3187         }
3188         if (rdev->irq.crtc_vblank_int[5] ||
3189             atomic_read(&rdev->irq.pflip[5])) {
3190                 DRM_DEBUG("cik_irq_set: vblank 5\n");
3191                 crtc6 |= VBLANK_INTERRUPT_MASK;
3192         }
3193         if (rdev->irq.hpd[0]) {
3194                 DRM_DEBUG("cik_irq_set: hpd 1\n");
3195                 hpd1 |= DC_HPDx_INT_EN;
3196         }
3197         if (rdev->irq.hpd[1]) {
3198                 DRM_DEBUG("cik_irq_set: hpd 2\n");
3199                 hpd2 |= DC_HPDx_INT_EN;
3200         }
3201         if (rdev->irq.hpd[2]) {
3202                 DRM_DEBUG("cik_irq_set: hpd 3\n");
3203                 hpd3 |= DC_HPDx_INT_EN;
3204         }
3205         if (rdev->irq.hpd[3]) {
3206                 DRM_DEBUG("cik_irq_set: hpd 4\n");
3207                 hpd4 |= DC_HPDx_INT_EN;
3208         }
3209         if (rdev->irq.hpd[4]) {
3210                 DRM_DEBUG("cik_irq_set: hpd 5\n");
3211                 hpd5 |= DC_HPDx_INT_EN;
3212         }
3213         if (rdev->irq.hpd[5]) {
3214                 DRM_DEBUG("cik_irq_set: hpd 6\n");
3215                 hpd6 |= DC_HPDx_INT_EN;
3216         }
3217
3218         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3219
3220         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3221
3222         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3223         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3224         if (rdev->num_crtc >= 4) {
3225                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3226                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3227         }
3228         if (rdev->num_crtc >= 6) {
3229                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3230                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3231         }
3232
3233         WREG32(DC_HPD1_INT_CONTROL, hpd1);
3234         WREG32(DC_HPD2_INT_CONTROL, hpd2);
3235         WREG32(DC_HPD3_INT_CONTROL, hpd3);
3236         WREG32(DC_HPD4_INT_CONTROL, hpd4);
3237         WREG32(DC_HPD5_INT_CONTROL, hpd5);
3238         WREG32(DC_HPD6_INT_CONTROL, hpd6);
3239
3240         return 0;
3241 }
3242
3243 /**
3244  * cik_irq_ack - ack interrupt sources
3245  *
3246  * @rdev: radeon_device pointer
3247  *
3248  * Ack interrupt sources on the GPU (vblanks, hpd,
3249  * etc.) (CIK).  Certain interrupts sources are sw
3250  * generated and do not require an explicit ack.
3251  */
3252 static inline void cik_irq_ack(struct radeon_device *rdev)
3253 {
3254         u32 tmp;
3255
3256         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3257         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3258         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3259         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3260         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3261         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3262         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
3263
3264         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
3265                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3266         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
3267                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3268         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3269                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3270         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3271                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3272
3273         if (rdev->num_crtc >= 4) {
3274                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3275                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3276                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3277                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3278                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3279                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3280                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3281                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3282         }
3283
3284         if (rdev->num_crtc >= 6) {
3285                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3286                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3287                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3288                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3289                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3290                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3291                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3292                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3293         }
3294
3295         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3296                 tmp = RREG32(DC_HPD1_INT_CONTROL);
3297                 tmp |= DC_HPDx_INT_ACK;
3298                 WREG32(DC_HPD1_INT_CONTROL, tmp);
3299         }
3300         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3301                 tmp = RREG32(DC_HPD2_INT_CONTROL);
3302                 tmp |= DC_HPDx_INT_ACK;
3303                 WREG32(DC_HPD2_INT_CONTROL, tmp);
3304         }
3305         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3306                 tmp = RREG32(DC_HPD3_INT_CONTROL);
3307                 tmp |= DC_HPDx_INT_ACK;
3308                 WREG32(DC_HPD3_INT_CONTROL, tmp);
3309         }
3310         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3311                 tmp = RREG32(DC_HPD4_INT_CONTROL);
3312                 tmp |= DC_HPDx_INT_ACK;
3313                 WREG32(DC_HPD4_INT_CONTROL, tmp);
3314         }
3315         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3316                 tmp = RREG32(DC_HPD5_INT_CONTROL);
3317                 tmp |= DC_HPDx_INT_ACK;
3318                 WREG32(DC_HPD5_INT_CONTROL, tmp);
3319         }
3320         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3321                 tmp = RREG32(DC_HPD5_INT_CONTROL);
3322                 tmp |= DC_HPDx_INT_ACK;
3323                 WREG32(DC_HPD6_INT_CONTROL, tmp);
3324         }
3325 }
3326
3327 /**
3328  * cik_irq_disable - disable interrupts
3329  *
3330  * @rdev: radeon_device pointer
3331  *
3332  * Disable interrupts on the hw (CIK).
3333  */
3334 static void cik_irq_disable(struct radeon_device *rdev)
3335 {
3336         cik_disable_interrupts(rdev);
3337         /* Wait and acknowledge irq */
3338         mdelay(1);
3339         cik_irq_ack(rdev);
3340         cik_disable_interrupt_state(rdev);
3341 }
3342
3343 /**
3344  * cik_irq_disable - disable interrupts for suspend
3345  *
3346  * @rdev: radeon_device pointer
3347  *
3348  * Disable interrupts and stop the RLC (CIK).
3349  * Used for suspend.
3350  */
3351 static void cik_irq_suspend(struct radeon_device *rdev)
3352 {
3353         cik_irq_disable(rdev);
3354         cik_rlc_stop(rdev);
3355 }
3356
3357 /**
3358  * cik_irq_fini - tear down interrupt support
3359  *
3360  * @rdev: radeon_device pointer
3361  *
3362  * Disable interrupts on the hw and free the IH ring
3363  * buffer (CIK).
3364  * Used for driver unload.
3365  */
3366 static void cik_irq_fini(struct radeon_device *rdev)
3367 {
3368         cik_irq_suspend(rdev);
3369         r600_ih_ring_fini(rdev);
3370 }
3371
3372 /**
3373  * cik_get_ih_wptr - get the IH ring buffer wptr
3374  *
3375  * @rdev: radeon_device pointer
3376  *
3377  * Get the IH ring buffer wptr from either the register
3378  * or the writeback memory buffer (CIK).  Also check for
3379  * ring buffer overflow and deal with it.
3380  * Used by cik_irq_process().
3381  * Returns the value of the wptr.
3382  */
3383 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
3384 {
3385         u32 wptr, tmp;
3386
3387         if (rdev->wb.enabled)
3388                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3389         else
3390                 wptr = RREG32(IH_RB_WPTR);
3391
3392         if (wptr & RB_OVERFLOW) {
3393                 /* When a ring buffer overflow happen start parsing interrupt
3394                  * from the last not overwritten vector (wptr + 16). Hopefully
3395                  * this should allow us to catchup.
3396                  */
3397                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3398                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3399                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3400                 tmp = RREG32(IH_RB_CNTL);
3401                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
3402                 WREG32(IH_RB_CNTL, tmp);
3403         }
3404         return (wptr & rdev->ih.ptr_mask);
3405 }
3406
3407 /*        CIK IV Ring
3408  * Each IV ring entry is 128 bits:
3409  * [7:0]    - interrupt source id
3410  * [31:8]   - reserved
3411  * [59:32]  - interrupt source data
3412  * [63:60]  - reserved
3413  * [71:64]  - RINGID: ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
3414  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
3415  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
3416  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
3417  *            PIPE_ID - ME0 0=3D
3418  *                    - ME1&2 compute dispatcher (4 pipes each)
3419  * [79:72]  - VMID
3420  * [95:80]  - PASID
3421  * [127:96] - reserved
3422  */
3423 /**
3424  * cik_irq_process - interrupt handler
3425  *
3426  * @rdev: radeon_device pointer
3427  *
3428  * Interrupt hander (CIK).  Walk the IH ring,
3429  * ack interrupts and schedule work to handle
3430  * interrupt events.
3431  * Returns irq process return code.
3432  */
3433 int cik_irq_process(struct radeon_device *rdev)
3434 {
3435         u32 wptr;
3436         u32 rptr;
3437         u32 src_id, src_data, ring_id;
3438         u8 me_id, pipe_id, queue_id;
3439         u32 ring_index;
3440         bool queue_hotplug = false;
3441         bool queue_reset = false;
3442
3443         if (!rdev->ih.enabled || rdev->shutdown)
3444                 return IRQ_NONE;
3445
3446         wptr = cik_get_ih_wptr(rdev);
3447
3448 restart_ih:
3449         /* is somebody else already processing irqs? */
3450         if (atomic_xchg(&rdev->ih.lock, 1))
3451                 return IRQ_NONE;
3452
3453         rptr = rdev->ih.rptr;
3454         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3455
3456         /* Order reading of wptr vs. reading of IH ring data */
3457         rmb();
3458
3459         /* display interrupts */
3460         cik_irq_ack(rdev);
3461
3462         while (rptr != wptr) {
3463                 /* wptr/rptr are in bytes! */
3464                 ring_index = rptr / 4;
3465                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3466                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3467                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3468                 /* XXX check the bitfield order! */
3469                 me_id = (ring_id & 0x60) >> 5;
3470                 pipe_id = (ring_id & 0x18) >> 3;
3471                 queue_id = (ring_id & 0x7) >> 0;
3472
3473                 switch (src_id) {
3474                 case 1: /* D1 vblank/vline */
3475                         switch (src_data) {
3476                         case 0: /* D1 vblank */
3477                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
3478                                         if (rdev->irq.crtc_vblank_int[0]) {
3479                                                 drm_handle_vblank(rdev->ddev, 0);
3480                                                 rdev->pm.vblank_sync = true;
3481                                                 wake_up(&rdev->irq.vblank_queue);
3482                                         }
3483                                         if (atomic_read(&rdev->irq.pflip[0]))
3484                                                 radeon_crtc_handle_flip(rdev, 0);
3485                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3486                                         DRM_DEBUG("IH: D1 vblank\n");
3487                                 }
3488                                 break;
3489                         case 1: /* D1 vline */
3490                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
3491                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3492                                         DRM_DEBUG("IH: D1 vline\n");
3493                                 }
3494                                 break;
3495                         default:
3496                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3497                                 break;
3498                         }
3499                         break;
3500                 case 2: /* D2 vblank/vline */
3501                         switch (src_data) {
3502                         case 0: /* D2 vblank */
3503                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3504                                         if (rdev->irq.crtc_vblank_int[1]) {
3505                                                 drm_handle_vblank(rdev->ddev, 1);
3506                                                 rdev->pm.vblank_sync = true;
3507                                                 wake_up(&rdev->irq.vblank_queue);
3508                                         }
3509                                         if (atomic_read(&rdev->irq.pflip[1]))
3510                                                 radeon_crtc_handle_flip(rdev, 1);
3511                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3512                                         DRM_DEBUG("IH: D2 vblank\n");
3513                                 }
3514                                 break;
3515                         case 1: /* D2 vline */
3516                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3517                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3518                                         DRM_DEBUG("IH: D2 vline\n");
3519                                 }
3520                                 break;
3521                         default:
3522                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3523                                 break;
3524                         }
3525                         break;
3526                 case 3: /* D3 vblank/vline */
3527                         switch (src_data) {
3528                         case 0: /* D3 vblank */
3529                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3530                                         if (rdev->irq.crtc_vblank_int[2]) {
3531                                                 drm_handle_vblank(rdev->ddev, 2);
3532                                                 rdev->pm.vblank_sync = true;
3533                                                 wake_up(&rdev->irq.vblank_queue);
3534                                         }
3535                                         if (atomic_read(&rdev->irq.pflip[2]))
3536                                                 radeon_crtc_handle_flip(rdev, 2);
3537                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3538                                         DRM_DEBUG("IH: D3 vblank\n");
3539                                 }
3540                                 break;
3541                         case 1: /* D3 vline */
3542                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3543                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3544                                         DRM_DEBUG("IH: D3 vline\n");
3545                                 }
3546                                 break;
3547                         default:
3548                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3549                                 break;
3550                         }
3551                         break;
3552                 case 4: /* D4 vblank/vline */
3553                         switch (src_data) {
3554                         case 0: /* D4 vblank */
3555                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3556                                         if (rdev->irq.crtc_vblank_int[3]) {
3557                                                 drm_handle_vblank(rdev->ddev, 3);
3558                                                 rdev->pm.vblank_sync = true;
3559                                                 wake_up(&rdev->irq.vblank_queue);
3560                                         }
3561                                         if (atomic_read(&rdev->irq.pflip[3]))
3562                                                 radeon_crtc_handle_flip(rdev, 3);
3563                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3564                                         DRM_DEBUG("IH: D4 vblank\n");
3565                                 }
3566                                 break;
3567                         case 1: /* D4 vline */
3568                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3569                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3570                                         DRM_DEBUG("IH: D4 vline\n");
3571                                 }
3572                                 break;
3573                         default:
3574                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3575                                 break;
3576                         }
3577                         break;
3578                 case 5: /* D5 vblank/vline */
3579                         switch (src_data) {
3580                         case 0: /* D5 vblank */
3581                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3582                                         if (rdev->irq.crtc_vblank_int[4]) {
3583                                                 drm_handle_vblank(rdev->ddev, 4);
3584                                                 rdev->pm.vblank_sync = true;
3585                                                 wake_up(&rdev->irq.vblank_queue);
3586                                         }
3587                                         if (atomic_read(&rdev->irq.pflip[4]))
3588                                                 radeon_crtc_handle_flip(rdev, 4);
3589                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3590                                         DRM_DEBUG("IH: D5 vblank\n");
3591                                 }
3592                                 break;
3593                         case 1: /* D5 vline */
3594                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3595                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3596                                         DRM_DEBUG("IH: D5 vline\n");
3597                                 }
3598                                 break;
3599                         default:
3600                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3601                                 break;
3602                         }
3603                         break;
3604                 case 6: /* D6 vblank/vline */
3605                         switch (src_data) {
3606                         case 0: /* D6 vblank */
3607                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3608                                         if (rdev->irq.crtc_vblank_int[5]) {
3609                                                 drm_handle_vblank(rdev->ddev, 5);
3610                                                 rdev->pm.vblank_sync = true;
3611                                                 wake_up(&rdev->irq.vblank_queue);
3612                                         }
3613                                         if (atomic_read(&rdev->irq.pflip[5]))
3614                                                 radeon_crtc_handle_flip(rdev, 5);
3615                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3616                                         DRM_DEBUG("IH: D6 vblank\n");
3617                                 }
3618                                 break;
3619                         case 1: /* D6 vline */
3620                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3621                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3622                                         DRM_DEBUG("IH: D6 vline\n");
3623                                 }
3624                                 break;
3625                         default:
3626                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3627                                 break;
3628                         }
3629                         break;
3630                 case 42: /* HPD hotplug */
3631                         switch (src_data) {
3632                         case 0:
3633                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3634                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
3635                                         queue_hotplug = true;
3636                                         DRM_DEBUG("IH: HPD1\n");
3637                                 }
3638                                 break;
3639                         case 1:
3640                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3641                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3642                                         queue_hotplug = true;
3643                                         DRM_DEBUG("IH: HPD2\n");
3644                                 }
3645                                 break;
3646                         case 2:
3647                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3648                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3649                                         queue_hotplug = true;
3650                                         DRM_DEBUG("IH: HPD3\n");
3651                                 }
3652                                 break;
3653                         case 3:
3654                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3655                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3656                                         queue_hotplug = true;
3657                                         DRM_DEBUG("IH: HPD4\n");
3658                                 }
3659                                 break;
3660                         case 4:
3661                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3662                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3663                                         queue_hotplug = true;
3664                                         DRM_DEBUG("IH: HPD5\n");
3665                                 }
3666                                 break;
3667                         case 5:
3668                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3669                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3670                                         queue_hotplug = true;
3671                                         DRM_DEBUG("IH: HPD6\n");
3672                                 }
3673                                 break;
3674                         default:
3675                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3676                                 break;
3677                         }
3678                         break;
3679                 case 146:
3680                 case 147:
3681                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3682                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3683                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3684                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3685                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3686                         /* reset addr and status */
3687                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3688                         break;
3689                 case 176: /* GFX RB CP_INT */
3690                 case 177: /* GFX IB CP_INT */
3691                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3692                         break;
3693                 case 181: /* CP EOP event */
3694                         DRM_DEBUG("IH: CP EOP\n");
3695                         switch (me_id) {
3696                         case 0:
3697                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3698                                 break;
3699                         case 1:
3700                                 /* XXX compute */
3701                                 break;
3702                         case 2:
3703                                 /* XXX compute */
3704                                 break;
3705                         }
3706                         break;
3707                 case 184: /* CP Privileged reg access */
3708                         DRM_ERROR("Illegal register access in command stream\n");
3709                         /* XXX check the bitfield order! */
3710                         me_id = (ring_id & 0x60) >> 5;
3711                         pipe_id = (ring_id & 0x18) >> 3;
3712                         queue_id = (ring_id & 0x7) >> 0;
3713                         switch (me_id) {
3714                         case 0:
3715                                 /* This results in a full GPU reset, but all we need to do is soft
3716                                  * reset the CP for gfx
3717                                  */
3718                                 queue_reset = true;
3719                                 break;
3720                         case 1:
3721                                 /* XXX compute */
3722                                 break;
3723                         case 2:
3724                                 /* XXX compute */
3725                                 break;
3726                         }
3727                         break;
3728                 case 185: /* CP Privileged inst */
3729                         DRM_ERROR("Illegal instruction in command stream\n");
3730                         switch (me_id) {
3731                         case 0:
3732                                 /* This results in a full GPU reset, but all we need to do is soft
3733                                  * reset the CP for gfx
3734                                  */
3735                                 queue_reset = true;
3736                                 break;
3737                         case 1:
3738                                 /* XXX compute */
3739                                 break;
3740                         case 2:
3741                                 /* XXX compute */
3742                                 break;
3743                         }
3744                         break;
3745                 case 233: /* GUI IDLE */
3746                         DRM_DEBUG("IH: GUI idle\n");
3747                         break;
3748                 default:
3749                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3750                         break;
3751                 }
3752
3753                 /* wptr/rptr are in bytes! */
3754                 rptr += 16;
3755                 rptr &= rdev->ih.ptr_mask;
3756         }
3757         if (queue_hotplug)
3758                 schedule_work(&rdev->hotplug_work);
3759         if (queue_reset)
3760                 schedule_work(&rdev->reset_work);
3761         rdev->ih.rptr = rptr;
3762         WREG32(IH_RB_RPTR, rdev->ih.rptr);
3763         atomic_set(&rdev->ih.lock, 0);
3764
3765         /* make sure wptr hasn't changed while processing */
3766         wptr = cik_get_ih_wptr(rdev);
3767         if (wptr != rptr)
3768                 goto restart_ih;
3769
3770         return IRQ_HANDLED;
3771 }