]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon: add get_xclk() callback for CIK
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34
35 /* GFX */
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
39 /* compute */
40 #define CIK_MEC_UCODE_SIZE 4192
41 /* interrupts */
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
45 /* gddr controller */
46 #define CIK_MC_UCODE_SIZE 7866
47 /* sdma */
48 #define CIK_SDMA_UCODE_SIZE 1050
49 #define CIK_SDMA_UCODE_VERSION 64
50
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
64 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65 MODULE_FIRMWARE("radeon/KABINI_me.bin");
66 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
69 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
70
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
76 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
77 extern void si_rlc_fini(struct radeon_device *rdev);
78 extern int si_rlc_init(struct radeon_device *rdev);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80
81 /**
82  * cik_get_xclk - get the xclk
83  *
84  * @rdev: radeon_device pointer
85  *
86  * Returns the reference clock used by the gfx engine
87  * (CIK).
88  */
89 u32 cik_get_xclk(struct radeon_device *rdev)
90 {
91         u32 reference_clock = rdev->clock.spll.reference_freq;
92
93         if (rdev->flags & RADEON_IS_IGP) {
94                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
95                         return reference_clock / 2;
96         } else {
97                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
98                         return reference_clock / 4;
99         }
100         return reference_clock;
101 }
102
103 #define BONAIRE_IO_MC_REGS_SIZE 36
104
105 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
106 {
107         {0x00000070, 0x04400000},
108         {0x00000071, 0x80c01803},
109         {0x00000072, 0x00004004},
110         {0x00000073, 0x00000100},
111         {0x00000074, 0x00ff0000},
112         {0x00000075, 0x34000000},
113         {0x00000076, 0x08000014},
114         {0x00000077, 0x00cc08ec},
115         {0x00000078, 0x00000400},
116         {0x00000079, 0x00000000},
117         {0x0000007a, 0x04090000},
118         {0x0000007c, 0x00000000},
119         {0x0000007e, 0x4408a8e8},
120         {0x0000007f, 0x00000304},
121         {0x00000080, 0x00000000},
122         {0x00000082, 0x00000001},
123         {0x00000083, 0x00000002},
124         {0x00000084, 0xf3e4f400},
125         {0x00000085, 0x052024e3},
126         {0x00000087, 0x00000000},
127         {0x00000088, 0x01000000},
128         {0x0000008a, 0x1c0a0000},
129         {0x0000008b, 0xff010000},
130         {0x0000008d, 0xffffefff},
131         {0x0000008e, 0xfff3efff},
132         {0x0000008f, 0xfff3efbf},
133         {0x00000092, 0xf7ffffff},
134         {0x00000093, 0xffffff7f},
135         {0x00000095, 0x00101101},
136         {0x00000096, 0x00000fff},
137         {0x00000097, 0x00116fff},
138         {0x00000098, 0x60010000},
139         {0x00000099, 0x10010000},
140         {0x0000009a, 0x00006000},
141         {0x0000009b, 0x00001000},
142         {0x0000009f, 0x00b48000}
143 };
144
145 /* ucode loading */
146 /**
147  * ci_mc_load_microcode - load MC ucode into the hw
148  *
149  * @rdev: radeon_device pointer
150  *
151  * Load the GDDR MC ucode into the hw (CIK).
152  * Returns 0 on success, error on failure.
153  */
154 static int ci_mc_load_microcode(struct radeon_device *rdev)
155 {
156         const __be32 *fw_data;
157         u32 running, blackout = 0;
158         u32 *io_mc_regs;
159         int i, ucode_size, regs_size;
160
161         if (!rdev->mc_fw)
162                 return -EINVAL;
163
164         switch (rdev->family) {
165         case CHIP_BONAIRE:
166         default:
167                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
168                 ucode_size = CIK_MC_UCODE_SIZE;
169                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
170                 break;
171         }
172
173         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
174
175         if (running == 0) {
176                 if (running) {
177                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
178                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
179                 }
180
181                 /* reset the engine and set to writable */
182                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
183                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
184
185                 /* load mc io regs */
186                 for (i = 0; i < regs_size; i++) {
187                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
188                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
189                 }
190                 /* load the MC ucode */
191                 fw_data = (const __be32 *)rdev->mc_fw->data;
192                 for (i = 0; i < ucode_size; i++)
193                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
194
195                 /* put the engine back into the active state */
196                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
197                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
198                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
199
200                 /* wait for training to complete */
201                 for (i = 0; i < rdev->usec_timeout; i++) {
202                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
203                                 break;
204                         udelay(1);
205                 }
206                 for (i = 0; i < rdev->usec_timeout; i++) {
207                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
208                                 break;
209                         udelay(1);
210                 }
211
212                 if (running)
213                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
214         }
215
216         return 0;
217 }
218
219 /**
220  * cik_init_microcode - load ucode images from disk
221  *
222  * @rdev: radeon_device pointer
223  *
224  * Use the firmware interface to load the ucode images into
225  * the driver (not loaded into hw).
226  * Returns 0 on success, error on failure.
227  */
228 static int cik_init_microcode(struct radeon_device *rdev)
229 {
230         struct platform_device *pdev;
231         const char *chip_name;
232         size_t pfp_req_size, me_req_size, ce_req_size,
233                 mec_req_size, rlc_req_size, mc_req_size,
234                 sdma_req_size;
235         char fw_name[30];
236         int err;
237
238         DRM_DEBUG("\n");
239
240         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
241         err = IS_ERR(pdev);
242         if (err) {
243                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
244                 return -EINVAL;
245         }
246
247         switch (rdev->family) {
248         case CHIP_BONAIRE:
249                 chip_name = "BONAIRE";
250                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
251                 me_req_size = CIK_ME_UCODE_SIZE * 4;
252                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
253                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
254                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
255                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
256                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
257                 break;
258         case CHIP_KAVERI:
259                 chip_name = "KAVERI";
260                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
261                 me_req_size = CIK_ME_UCODE_SIZE * 4;
262                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
263                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
264                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
265                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
266                 break;
267         case CHIP_KABINI:
268                 chip_name = "KABINI";
269                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
270                 me_req_size = CIK_ME_UCODE_SIZE * 4;
271                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
272                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
273                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
274                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
275                 break;
276         default: BUG();
277         }
278
279         DRM_INFO("Loading %s Microcode\n", chip_name);
280
281         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
282         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
283         if (err)
284                 goto out;
285         if (rdev->pfp_fw->size != pfp_req_size) {
286                 printk(KERN_ERR
287                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
288                        rdev->pfp_fw->size, fw_name);
289                 err = -EINVAL;
290                 goto out;
291         }
292
293         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
294         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
295         if (err)
296                 goto out;
297         if (rdev->me_fw->size != me_req_size) {
298                 printk(KERN_ERR
299                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
300                        rdev->me_fw->size, fw_name);
301                 err = -EINVAL;
302         }
303
304         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
305         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
306         if (err)
307                 goto out;
308         if (rdev->ce_fw->size != ce_req_size) {
309                 printk(KERN_ERR
310                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
311                        rdev->ce_fw->size, fw_name);
312                 err = -EINVAL;
313         }
314
315         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
316         err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
317         if (err)
318                 goto out;
319         if (rdev->mec_fw->size != mec_req_size) {
320                 printk(KERN_ERR
321                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
322                        rdev->mec_fw->size, fw_name);
323                 err = -EINVAL;
324         }
325
326         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
327         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
328         if (err)
329                 goto out;
330         if (rdev->rlc_fw->size != rlc_req_size) {
331                 printk(KERN_ERR
332                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
333                        rdev->rlc_fw->size, fw_name);
334                 err = -EINVAL;
335         }
336
337         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
338         err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
339         if (err)
340                 goto out;
341         if (rdev->sdma_fw->size != sdma_req_size) {
342                 printk(KERN_ERR
343                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
344                        rdev->sdma_fw->size, fw_name);
345                 err = -EINVAL;
346         }
347
348         /* No MC ucode on APUs */
349         if (!(rdev->flags & RADEON_IS_IGP)) {
350                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
351                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
352                 if (err)
353                         goto out;
354                 if (rdev->mc_fw->size != mc_req_size) {
355                         printk(KERN_ERR
356                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
357                                rdev->mc_fw->size, fw_name);
358                         err = -EINVAL;
359                 }
360         }
361
362 out:
363         platform_device_unregister(pdev);
364
365         if (err) {
366                 if (err != -EINVAL)
367                         printk(KERN_ERR
368                                "cik_cp: Failed to load firmware \"%s\"\n",
369                                fw_name);
370                 release_firmware(rdev->pfp_fw);
371                 rdev->pfp_fw = NULL;
372                 release_firmware(rdev->me_fw);
373                 rdev->me_fw = NULL;
374                 release_firmware(rdev->ce_fw);
375                 rdev->ce_fw = NULL;
376                 release_firmware(rdev->rlc_fw);
377                 rdev->rlc_fw = NULL;
378                 release_firmware(rdev->mc_fw);
379                 rdev->mc_fw = NULL;
380         }
381         return err;
382 }
383
384 /*
385  * Core functions
386  */
387 /**
388  * cik_tiling_mode_table_init - init the hw tiling table
389  *
390  * @rdev: radeon_device pointer
391  *
392  * Starting with SI, the tiling setup is done globally in a
393  * set of 32 tiling modes.  Rather than selecting each set of
394  * parameters per surface as on older asics, we just select
395  * which index in the tiling table we want to use, and the
396  * surface uses those parameters (CIK).
397  */
398 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
399 {
400         const u32 num_tile_mode_states = 32;
401         const u32 num_secondary_tile_mode_states = 16;
402         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
403         u32 num_pipe_configs;
404         u32 num_rbs = rdev->config.cik.max_backends_per_se *
405                 rdev->config.cik.max_shader_engines;
406
407         switch (rdev->config.cik.mem_row_size_in_kb) {
408         case 1:
409                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
410                 break;
411         case 2:
412         default:
413                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
414                 break;
415         case 4:
416                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
417                 break;
418         }
419
420         num_pipe_configs = rdev->config.cik.max_tile_pipes;
421         if (num_pipe_configs > 8)
422                 num_pipe_configs = 8; /* ??? */
423
424         if (num_pipe_configs == 8) {
425                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
426                         switch (reg_offset) {
427                         case 0:
428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
429                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
430                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
431                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
432                                 break;
433                         case 1:
434                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
435                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
436                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
437                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
438                                 break;
439                         case 2:
440                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
441                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
442                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
443                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
444                                 break;
445                         case 3:
446                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
447                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
448                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
449                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
450                                 break;
451                         case 4:
452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
453                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
455                                                  TILE_SPLIT(split_equal_to_row_size));
456                                 break;
457                         case 5:
458                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
459                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
460                                 break;
461                         case 6:
462                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
463                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
464                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
465                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
466                                 break;
467                         case 7:
468                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
469                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
470                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
471                                                  TILE_SPLIT(split_equal_to_row_size));
472                                 break;
473                         case 8:
474                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
475                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
476                                 break;
477                         case 9:
478                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
479                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
480                                 break;
481                         case 10:
482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
483                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
484                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
485                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
486                                 break;
487                         case 11:
488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
489                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
490                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
491                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
492                                 break;
493                         case 12:
494                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
495                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
496                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
497                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
498                                 break;
499                         case 13:
500                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
501                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
502                                 break;
503                         case 14:
504                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
505                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
506                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
507                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
508                                 break;
509                         case 16:
510                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
511                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
512                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
513                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
514                                 break;
515                         case 17:
516                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
517                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
518                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
520                                 break;
521                         case 27:
522                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
523                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
524                                 break;
525                         case 28:
526                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
527                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
528                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
529                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
530                                 break;
531                         case 29:
532                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
533                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
534                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
535                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
536                                 break;
537                         case 30:
538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
539                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
540                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
541                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
542                                 break;
543                         default:
544                                 gb_tile_moden = 0;
545                                 break;
546                         }
547                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
548                 }
549                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
550                         switch (reg_offset) {
551                         case 0:
552                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
553                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
554                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
555                                                  NUM_BANKS(ADDR_SURF_16_BANK));
556                                 break;
557                         case 1:
558                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
561                                                  NUM_BANKS(ADDR_SURF_16_BANK));
562                                 break;
563                         case 2:
564                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
565                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
566                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
567                                                  NUM_BANKS(ADDR_SURF_16_BANK));
568                                 break;
569                         case 3:
570                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
571                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
572                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
573                                                  NUM_BANKS(ADDR_SURF_16_BANK));
574                                 break;
575                         case 4:
576                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
577                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
578                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
579                                                  NUM_BANKS(ADDR_SURF_8_BANK));
580                                 break;
581                         case 5:
582                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
583                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
584                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
585                                                  NUM_BANKS(ADDR_SURF_4_BANK));
586                                 break;
587                         case 6:
588                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
591                                                  NUM_BANKS(ADDR_SURF_2_BANK));
592                                 break;
593                         case 8:
594                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
595                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
596                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
597                                                  NUM_BANKS(ADDR_SURF_16_BANK));
598                                 break;
599                         case 9:
600                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
601                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
602                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
603                                                  NUM_BANKS(ADDR_SURF_16_BANK));
604                                 break;
605                         case 10:
606                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
607                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
608                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
609                                                  NUM_BANKS(ADDR_SURF_16_BANK));
610                                 break;
611                         case 11:
612                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
613                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
614                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
615                                                  NUM_BANKS(ADDR_SURF_16_BANK));
616                                 break;
617                         case 12:
618                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
621                                                  NUM_BANKS(ADDR_SURF_8_BANK));
622                                 break;
623                         case 13:
624                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
625                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
626                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
627                                                  NUM_BANKS(ADDR_SURF_4_BANK));
628                                 break;
629                         case 14:
630                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
631                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
632                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
633                                                  NUM_BANKS(ADDR_SURF_2_BANK));
634                                 break;
635                         default:
636                                 gb_tile_moden = 0;
637                                 break;
638                         }
639                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
640                 }
641         } else if (num_pipe_configs == 4) {
642                 if (num_rbs == 4) {
643                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
644                                 switch (reg_offset) {
645                                 case 0:
646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
648                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
649                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
650                                         break;
651                                 case 1:
652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
654                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
655                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
656                                         break;
657                                 case 2:
658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
660                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
661                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
662                                         break;
663                                 case 3:
664                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
665                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
666                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
667                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
668                                         break;
669                                 case 4:
670                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
671                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
672                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
673                                                          TILE_SPLIT(split_equal_to_row_size));
674                                         break;
675                                 case 5:
676                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
677                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
678                                         break;
679                                 case 6:
680                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
681                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
682                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
683                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
684                                         break;
685                                 case 7:
686                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
687                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
688                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
689                                                          TILE_SPLIT(split_equal_to_row_size));
690                                         break;
691                                 case 8:
692                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
693                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
694                                         break;
695                                 case 9:
696                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
697                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
698                                         break;
699                                 case 10:
700                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
701                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
702                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
703                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
704                                         break;
705                                 case 11:
706                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
707                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
708                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
709                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
710                                         break;
711                                 case 12:
712                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
713                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
714                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
715                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
716                                         break;
717                                 case 13:
718                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
719                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
720                                         break;
721                                 case 14:
722                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
723                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
724                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
725                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
726                                         break;
727                                 case 16:
728                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
729                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
730                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
732                                         break;
733                                 case 17:
734                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
735                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
736                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
738                                         break;
739                                 case 27:
740                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
741                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
742                                         break;
743                                 case 28:
744                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
745                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
746                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
747                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
748                                         break;
749                                 case 29:
750                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
751                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
752                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
753                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
754                                         break;
755                                 case 30:
756                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
757                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
758                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
759                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
760                                         break;
761                                 default:
762                                         gb_tile_moden = 0;
763                                         break;
764                                 }
765                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
766                         }
767                 } else if (num_rbs < 4) {
768                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
769                                 switch (reg_offset) {
770                                 case 0:
771                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
772                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
773                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
774                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
775                                         break;
776                                 case 1:
777                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
778                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
779                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
780                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
781                                         break;
782                                 case 2:
783                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
784                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
785                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
786                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
787                                         break;
788                                 case 3:
789                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
790                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
791                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
792                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
793                                         break;
794                                 case 4:
795                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
796                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
797                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
798                                                          TILE_SPLIT(split_equal_to_row_size));
799                                         break;
800                                 case 5:
801                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
802                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
803                                         break;
804                                 case 6:
805                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
806                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
807                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
808                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
809                                         break;
810                                 case 7:
811                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
812                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
813                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
814                                                          TILE_SPLIT(split_equal_to_row_size));
815                                         break;
816                                 case 8:
817                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
818                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
819                                         break;
820                                 case 9:
821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
823                                         break;
824                                 case 10:
825                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
826                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
827                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
828                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
829                                         break;
830                                 case 11:
831                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
832                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
833                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
834                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
835                                         break;
836                                 case 12:
837                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
839                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
840                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
841                                         break;
842                                 case 13:
843                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
844                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
845                                         break;
846                                 case 14:
847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
849                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
850                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
851                                         break;
852                                 case 16:
853                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
854                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
855                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
857                                         break;
858                                 case 17:
859                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
860                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
861                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
863                                         break;
864                                 case 27:
865                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
866                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
867                                         break;
868                                 case 28:
869                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
870                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
871                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
872                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
873                                         break;
874                                 case 29:
875                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
876                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
877                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
878                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
879                                         break;
880                                 case 30:
881                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
882                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
883                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
884                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
885                                         break;
886                                 default:
887                                         gb_tile_moden = 0;
888                                         break;
889                                 }
890                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
891                         }
892                 }
893                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
894                         switch (reg_offset) {
895                         case 0:
896                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
897                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
898                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
899                                                  NUM_BANKS(ADDR_SURF_16_BANK));
900                                 break;
901                         case 1:
902                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
903                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
904                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
905                                                  NUM_BANKS(ADDR_SURF_16_BANK));
906                                 break;
907                         case 2:
908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
912                                 break;
913                         case 3:
914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
918                                 break;
919                         case 4:
920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
924                                 break;
925                         case 5:
926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
929                                                  NUM_BANKS(ADDR_SURF_8_BANK));
930                                 break;
931                         case 6:
932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
935                                                  NUM_BANKS(ADDR_SURF_4_BANK));
936                                 break;
937                         case 8:
938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
942                                 break;
943                         case 9:
944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
947                                                  NUM_BANKS(ADDR_SURF_16_BANK));
948                                 break;
949                         case 10:
950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
954                                 break;
955                         case 11:
956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
960                                 break;
961                         case 12:
962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
966                                 break;
967                         case 13:
968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
971                                                  NUM_BANKS(ADDR_SURF_8_BANK));
972                                 break;
973                         case 14:
974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
977                                                  NUM_BANKS(ADDR_SURF_4_BANK));
978                                 break;
979                         default:
980                                 gb_tile_moden = 0;
981                                 break;
982                         }
983                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
984                 }
985         } else if (num_pipe_configs == 2) {
986                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
987                         switch (reg_offset) {
988                         case 0:
989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
990                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
991                                                  PIPE_CONFIG(ADDR_SURF_P2) |
992                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
993                                 break;
994                         case 1:
995                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
997                                                  PIPE_CONFIG(ADDR_SURF_P2) |
998                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
999                                 break;
1000                         case 2:
1001                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1002                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1003                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1004                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1005                                 break;
1006                         case 3:
1007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1008                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1009                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1010                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1011                                 break;
1012                         case 4:
1013                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1014                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1015                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1016                                                  TILE_SPLIT(split_equal_to_row_size));
1017                                 break;
1018                         case 5:
1019                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1020                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1021                                 break;
1022                         case 6:
1023                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1024                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1025                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1026                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1027                                 break;
1028                         case 7:
1029                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1031                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1032                                                  TILE_SPLIT(split_equal_to_row_size));
1033                                 break;
1034                         case 8:
1035                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1036                                 break;
1037                         case 9:
1038                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1039                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1040                                 break;
1041                         case 10:
1042                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1043                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1044                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1045                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046                                 break;
1047                         case 11:
1048                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1049                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1050                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1051                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1052                                 break;
1053                         case 12:
1054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1056                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1058                                 break;
1059                         case 13:
1060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1062                                 break;
1063                         case 14:
1064                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1065                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1066                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1067                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1068                                 break;
1069                         case 16:
1070                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1071                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1072                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1073                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1074                                 break;
1075                         case 17:
1076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1078                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1080                                 break;
1081                         case 27:
1082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1084                                 break;
1085                         case 28:
1086                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1087                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1088                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1089                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1090                                 break;
1091                         case 29:
1092                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1094                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1095                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1096                                 break;
1097                         case 30:
1098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1100                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1102                                 break;
1103                         default:
1104                                 gb_tile_moden = 0;
1105                                 break;
1106                         }
1107                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1108                 }
1109                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1110                         switch (reg_offset) {
1111                         case 0:
1112                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1113                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1114                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1115                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1116                                 break;
1117                         case 1:
1118                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1119                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1120                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1121                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1122                                 break;
1123                         case 2:
1124                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1126                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1127                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1128                                 break;
1129                         case 3:
1130                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1133                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1134                                 break;
1135                         case 4:
1136                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1137                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1138                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1139                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1140                                 break;
1141                         case 5:
1142                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1143                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1144                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1145                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1146                                 break;
1147                         case 6:
1148                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1149                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1150                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1151                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1152                                 break;
1153                         case 8:
1154                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1155                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1156                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1157                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1158                                 break;
1159                         case 9:
1160                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1161                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1162                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1163                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1164                                 break;
1165                         case 10:
1166                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1167                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1168                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1169                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1170                                 break;
1171                         case 11:
1172                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1173                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1174                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1175                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1176                                 break;
1177                         case 12:
1178                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1179                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1180                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1181                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1182                                 break;
1183                         case 13:
1184                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1185                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1186                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1187                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1188                                 break;
1189                         case 14:
1190                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1191                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1192                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1193                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1194                                 break;
1195                         default:
1196                                 gb_tile_moden = 0;
1197                                 break;
1198                         }
1199                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1200                 }
1201         } else
1202                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1203 }
1204
1205 /**
1206  * cik_select_se_sh - select which SE, SH to address
1207  *
1208  * @rdev: radeon_device pointer
1209  * @se_num: shader engine to address
1210  * @sh_num: sh block to address
1211  *
1212  * Select which SE, SH combinations to address. Certain
1213  * registers are instanced per SE or SH.  0xffffffff means
1214  * broadcast to all SEs or SHs (CIK).
1215  */
1216 static void cik_select_se_sh(struct radeon_device *rdev,
1217                              u32 se_num, u32 sh_num)
1218 {
1219         u32 data = INSTANCE_BROADCAST_WRITES;
1220
1221         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1222                 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1223         else if (se_num == 0xffffffff)
1224                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1225         else if (sh_num == 0xffffffff)
1226                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1227         else
1228                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1229         WREG32(GRBM_GFX_INDEX, data);
1230 }
1231
1232 /**
1233  * cik_create_bitmask - create a bitmask
1234  *
1235  * @bit_width: length of the mask
1236  *
1237  * create a variable length bit mask (CIK).
1238  * Returns the bitmask.
1239  */
1240 static u32 cik_create_bitmask(u32 bit_width)
1241 {
1242         u32 i, mask = 0;
1243
1244         for (i = 0; i < bit_width; i++) {
1245                 mask <<= 1;
1246                 mask |= 1;
1247         }
1248         return mask;
1249 }
1250
1251 /**
1252  * cik_select_se_sh - select which SE, SH to address
1253  *
1254  * @rdev: radeon_device pointer
1255  * @max_rb_num: max RBs (render backends) for the asic
1256  * @se_num: number of SEs (shader engines) for the asic
1257  * @sh_per_se: number of SH blocks per SE for the asic
1258  *
1259  * Calculates the bitmask of disabled RBs (CIK).
1260  * Returns the disabled RB bitmask.
1261  */
1262 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1263                               u32 max_rb_num, u32 se_num,
1264                               u32 sh_per_se)
1265 {
1266         u32 data, mask;
1267
1268         data = RREG32(CC_RB_BACKEND_DISABLE);
1269         if (data & 1)
1270                 data &= BACKEND_DISABLE_MASK;
1271         else
1272                 data = 0;
1273         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1274
1275         data >>= BACKEND_DISABLE_SHIFT;
1276
1277         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1278
1279         return data & mask;
1280 }
1281
1282 /**
1283  * cik_setup_rb - setup the RBs on the asic
1284  *
1285  * @rdev: radeon_device pointer
1286  * @se_num: number of SEs (shader engines) for the asic
1287  * @sh_per_se: number of SH blocks per SE for the asic
1288  * @max_rb_num: max RBs (render backends) for the asic
1289  *
1290  * Configures per-SE/SH RB registers (CIK).
1291  */
1292 static void cik_setup_rb(struct radeon_device *rdev,
1293                          u32 se_num, u32 sh_per_se,
1294                          u32 max_rb_num)
1295 {
1296         int i, j;
1297         u32 data, mask;
1298         u32 disabled_rbs = 0;
1299         u32 enabled_rbs = 0;
1300
1301         for (i = 0; i < se_num; i++) {
1302                 for (j = 0; j < sh_per_se; j++) {
1303                         cik_select_se_sh(rdev, i, j);
1304                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1305                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1306                 }
1307         }
1308         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1309
1310         mask = 1;
1311         for (i = 0; i < max_rb_num; i++) {
1312                 if (!(disabled_rbs & mask))
1313                         enabled_rbs |= mask;
1314                 mask <<= 1;
1315         }
1316
1317         for (i = 0; i < se_num; i++) {
1318                 cik_select_se_sh(rdev, i, 0xffffffff);
1319                 data = 0;
1320                 for (j = 0; j < sh_per_se; j++) {
1321                         switch (enabled_rbs & 3) {
1322                         case 1:
1323                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1324                                 break;
1325                         case 2:
1326                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1327                                 break;
1328                         case 3:
1329                         default:
1330                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1331                                 break;
1332                         }
1333                         enabled_rbs >>= 2;
1334                 }
1335                 WREG32(PA_SC_RASTER_CONFIG, data);
1336         }
1337         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1338 }
1339
1340 /**
1341  * cik_gpu_init - setup the 3D engine
1342  *
1343  * @rdev: radeon_device pointer
1344  *
1345  * Configures the 3D engine and tiling configuration
1346  * registers so that the 3D engine is usable.
1347  */
1348 static void cik_gpu_init(struct radeon_device *rdev)
1349 {
1350         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1351         u32 mc_shared_chmap, mc_arb_ramcfg;
1352         u32 hdp_host_path_cntl;
1353         u32 tmp;
1354         int i, j;
1355
1356         switch (rdev->family) {
1357         case CHIP_BONAIRE:
1358                 rdev->config.cik.max_shader_engines = 2;
1359                 rdev->config.cik.max_tile_pipes = 4;
1360                 rdev->config.cik.max_cu_per_sh = 7;
1361                 rdev->config.cik.max_sh_per_se = 1;
1362                 rdev->config.cik.max_backends_per_se = 2;
1363                 rdev->config.cik.max_texture_channel_caches = 4;
1364                 rdev->config.cik.max_gprs = 256;
1365                 rdev->config.cik.max_gs_threads = 32;
1366                 rdev->config.cik.max_hw_contexts = 8;
1367
1368                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1369                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1370                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1371                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1372                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1373                 break;
1374         case CHIP_KAVERI:
1375                 /* TODO */
1376                 break;
1377         case CHIP_KABINI:
1378         default:
1379                 rdev->config.cik.max_shader_engines = 1;
1380                 rdev->config.cik.max_tile_pipes = 2;
1381                 rdev->config.cik.max_cu_per_sh = 2;
1382                 rdev->config.cik.max_sh_per_se = 1;
1383                 rdev->config.cik.max_backends_per_se = 1;
1384                 rdev->config.cik.max_texture_channel_caches = 2;
1385                 rdev->config.cik.max_gprs = 256;
1386                 rdev->config.cik.max_gs_threads = 16;
1387                 rdev->config.cik.max_hw_contexts = 8;
1388
1389                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1390                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1391                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1392                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1393                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1394                 break;
1395         }
1396
1397         /* Initialize HDP */
1398         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1399                 WREG32((0x2c14 + j), 0x00000000);
1400                 WREG32((0x2c18 + j), 0x00000000);
1401                 WREG32((0x2c1c + j), 0x00000000);
1402                 WREG32((0x2c20 + j), 0x00000000);
1403                 WREG32((0x2c24 + j), 0x00000000);
1404         }
1405
1406         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1407
1408         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1409
1410         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1411         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1412
1413         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1414         rdev->config.cik.mem_max_burst_length_bytes = 256;
1415         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1416         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1417         if (rdev->config.cik.mem_row_size_in_kb > 4)
1418                 rdev->config.cik.mem_row_size_in_kb = 4;
1419         /* XXX use MC settings? */
1420         rdev->config.cik.shader_engine_tile_size = 32;
1421         rdev->config.cik.num_gpus = 1;
1422         rdev->config.cik.multi_gpu_tile_size = 64;
1423
1424         /* fix up row size */
1425         gb_addr_config &= ~ROW_SIZE_MASK;
1426         switch (rdev->config.cik.mem_row_size_in_kb) {
1427         case 1:
1428         default:
1429                 gb_addr_config |= ROW_SIZE(0);
1430                 break;
1431         case 2:
1432                 gb_addr_config |= ROW_SIZE(1);
1433                 break;
1434         case 4:
1435                 gb_addr_config |= ROW_SIZE(2);
1436                 break;
1437         }
1438
1439         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1440          * not have bank info, so create a custom tiling dword.
1441          * bits 3:0   num_pipes
1442          * bits 7:4   num_banks
1443          * bits 11:8  group_size
1444          * bits 15:12 row_size
1445          */
1446         rdev->config.cik.tile_config = 0;
1447         switch (rdev->config.cik.num_tile_pipes) {
1448         case 1:
1449                 rdev->config.cik.tile_config |= (0 << 0);
1450                 break;
1451         case 2:
1452                 rdev->config.cik.tile_config |= (1 << 0);
1453                 break;
1454         case 4:
1455                 rdev->config.cik.tile_config |= (2 << 0);
1456                 break;
1457         case 8:
1458         default:
1459                 /* XXX what about 12? */
1460                 rdev->config.cik.tile_config |= (3 << 0);
1461                 break;
1462         }
1463         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1464                 rdev->config.cik.tile_config |= 1 << 4;
1465         else
1466                 rdev->config.cik.tile_config |= 0 << 4;
1467         rdev->config.cik.tile_config |=
1468                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1469         rdev->config.cik.tile_config |=
1470                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1471
1472         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1473         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1474         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1475         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1476         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1477
1478         cik_tiling_mode_table_init(rdev);
1479
1480         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1481                      rdev->config.cik.max_sh_per_se,
1482                      rdev->config.cik.max_backends_per_se);
1483
1484         /* set HW defaults for 3D engine */
1485         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1486
1487         WREG32(SX_DEBUG_1, 0x20);
1488
1489         WREG32(TA_CNTL_AUX, 0x00010000);
1490
1491         tmp = RREG32(SPI_CONFIG_CNTL);
1492         tmp |= 0x03000000;
1493         WREG32(SPI_CONFIG_CNTL, tmp);
1494
1495         WREG32(SQ_CONFIG, 1);
1496
1497         WREG32(DB_DEBUG, 0);
1498
1499         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1500         tmp |= 0x00000400;
1501         WREG32(DB_DEBUG2, tmp);
1502
1503         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1504         tmp |= 0x00020200;
1505         WREG32(DB_DEBUG3, tmp);
1506
1507         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1508         tmp |= 0x00018208;
1509         WREG32(CB_HW_CONTROL, tmp);
1510
1511         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1512
1513         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1514                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1515                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1516                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1517
1518         WREG32(VGT_NUM_INSTANCES, 1);
1519
1520         WREG32(CP_PERFMON_CNTL, 0);
1521
1522         WREG32(SQ_CONFIG, 0);
1523
1524         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1525                                           FORCE_EOV_MAX_REZ_CNT(255)));
1526
1527         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1528                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1529
1530         WREG32(VGT_GS_VERTEX_REUSE, 16);
1531         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1532
1533         tmp = RREG32(HDP_MISC_CNTL);
1534         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1535         WREG32(HDP_MISC_CNTL, tmp);
1536
1537         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1538         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1539
1540         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1541         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1542
1543         udelay(50);
1544 }
1545
1546 /*
1547  * GPU scratch registers helpers function.
1548  */
1549 /**
1550  * cik_scratch_init - setup driver info for CP scratch regs
1551  *
1552  * @rdev: radeon_device pointer
1553  *
1554  * Set up the number and offset of the CP scratch registers.
1555  * NOTE: use of CP scratch registers is a legacy inferface and
1556  * is not used by default on newer asics (r6xx+).  On newer asics,
1557  * memory buffers are used for fences rather than scratch regs.
1558  */
1559 static void cik_scratch_init(struct radeon_device *rdev)
1560 {
1561         int i;
1562
1563         rdev->scratch.num_reg = 7;
1564         rdev->scratch.reg_base = SCRATCH_REG0;
1565         for (i = 0; i < rdev->scratch.num_reg; i++) {
1566                 rdev->scratch.free[i] = true;
1567                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1568         }
1569 }
1570
1571 /**
1572  * cik_ring_test - basic gfx ring test
1573  *
1574  * @rdev: radeon_device pointer
1575  * @ring: radeon_ring structure holding ring information
1576  *
1577  * Allocate a scratch register and write to it using the gfx ring (CIK).
1578  * Provides a basic gfx ring test to verify that the ring is working.
1579  * Used by cik_cp_gfx_resume();
1580  * Returns 0 on success, error on failure.
1581  */
1582 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1583 {
1584         uint32_t scratch;
1585         uint32_t tmp = 0;
1586         unsigned i;
1587         int r;
1588
1589         r = radeon_scratch_get(rdev, &scratch);
1590         if (r) {
1591                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1592                 return r;
1593         }
1594         WREG32(scratch, 0xCAFEDEAD);
1595         r = radeon_ring_lock(rdev, ring, 3);
1596         if (r) {
1597                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1598                 radeon_scratch_free(rdev, scratch);
1599                 return r;
1600         }
1601         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1602         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1603         radeon_ring_write(ring, 0xDEADBEEF);
1604         radeon_ring_unlock_commit(rdev, ring);
1605         for (i = 0; i < rdev->usec_timeout; i++) {
1606                 tmp = RREG32(scratch);
1607                 if (tmp == 0xDEADBEEF)
1608                         break;
1609                 DRM_UDELAY(1);
1610         }
1611         if (i < rdev->usec_timeout) {
1612                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1613         } else {
1614                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1615                           ring->idx, scratch, tmp);
1616                 r = -EINVAL;
1617         }
1618         radeon_scratch_free(rdev, scratch);
1619         return r;
1620 }
1621
1622 /**
1623  * cik_fence_ring_emit - emit a fence on the gfx ring
1624  *
1625  * @rdev: radeon_device pointer
1626  * @fence: radeon fence object
1627  *
1628  * Emits a fence sequnce number on the gfx ring and flushes
1629  * GPU caches.
1630  */
1631 void cik_fence_ring_emit(struct radeon_device *rdev,
1632                          struct radeon_fence *fence)
1633 {
1634         struct radeon_ring *ring = &rdev->ring[fence->ring];
1635         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1636
1637         /* EVENT_WRITE_EOP - flush caches, send int */
1638         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1639         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1640                                  EOP_TC_ACTION_EN |
1641                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1642                                  EVENT_INDEX(5)));
1643         radeon_ring_write(ring, addr & 0xfffffffc);
1644         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1645         radeon_ring_write(ring, fence->seq);
1646         radeon_ring_write(ring, 0);
1647         /* HDP flush */
1648         /* We should be using the new WAIT_REG_MEM special op packet here
1649          * but it causes the CP to hang
1650          */
1651         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1652         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1653                                  WRITE_DATA_DST_SEL(0)));
1654         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1655         radeon_ring_write(ring, 0);
1656         radeon_ring_write(ring, 0);
1657 }
1658
1659 void cik_semaphore_ring_emit(struct radeon_device *rdev,
1660                              struct radeon_ring *ring,
1661                              struct radeon_semaphore *semaphore,
1662                              bool emit_wait)
1663 {
1664         uint64_t addr = semaphore->gpu_addr;
1665         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1666
1667         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1668         radeon_ring_write(ring, addr & 0xffffffff);
1669         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1670 }
1671
1672 /*
1673  * IB stuff
1674  */
1675 /**
1676  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1677  *
1678  * @rdev: radeon_device pointer
1679  * @ib: radeon indirect buffer object
1680  *
1681  * Emits an DE (drawing engine) or CE (constant engine) IB
1682  * on the gfx ring.  IBs are usually generated by userspace
1683  * acceleration drivers and submitted to the kernel for
1684  * sheduling on the ring.  This function schedules the IB
1685  * on the gfx ring for execution by the GPU.
1686  */
1687 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1688 {
1689         struct radeon_ring *ring = &rdev->ring[ib->ring];
1690         u32 header, control = INDIRECT_BUFFER_VALID;
1691
1692         if (ib->is_const_ib) {
1693                 /* set switch buffer packet before const IB */
1694                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1695                 radeon_ring_write(ring, 0);
1696
1697                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1698         } else {
1699                 u32 next_rptr;
1700                 if (ring->rptr_save_reg) {
1701                         next_rptr = ring->wptr + 3 + 4;
1702                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1703                         radeon_ring_write(ring, ((ring->rptr_save_reg -
1704                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
1705                         radeon_ring_write(ring, next_rptr);
1706                 } else if (rdev->wb.enabled) {
1707                         next_rptr = ring->wptr + 5 + 4;
1708                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1709                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1710                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1711                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1712                         radeon_ring_write(ring, next_rptr);
1713                 }
1714
1715                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1716         }
1717
1718         control |= ib->length_dw |
1719                 (ib->vm ? (ib->vm->id << 24) : 0);
1720
1721         radeon_ring_write(ring, header);
1722         radeon_ring_write(ring,
1723 #ifdef __BIG_ENDIAN
1724                           (2 << 0) |
1725 #endif
1726                           (ib->gpu_addr & 0xFFFFFFFC));
1727         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1728         radeon_ring_write(ring, control);
1729 }
1730
1731 /**
1732  * cik_ib_test - basic gfx ring IB test
1733  *
1734  * @rdev: radeon_device pointer
1735  * @ring: radeon_ring structure holding ring information
1736  *
1737  * Allocate an IB and execute it on the gfx ring (CIK).
1738  * Provides a basic gfx ring test to verify that IBs are working.
1739  * Returns 0 on success, error on failure.
1740  */
1741 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1742 {
1743         struct radeon_ib ib;
1744         uint32_t scratch;
1745         uint32_t tmp = 0;
1746         unsigned i;
1747         int r;
1748
1749         r = radeon_scratch_get(rdev, &scratch);
1750         if (r) {
1751                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1752                 return r;
1753         }
1754         WREG32(scratch, 0xCAFEDEAD);
1755         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1756         if (r) {
1757                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1758                 return r;
1759         }
1760         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1761         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1762         ib.ptr[2] = 0xDEADBEEF;
1763         ib.length_dw = 3;
1764         r = radeon_ib_schedule(rdev, &ib, NULL);
1765         if (r) {
1766                 radeon_scratch_free(rdev, scratch);
1767                 radeon_ib_free(rdev, &ib);
1768                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1769                 return r;
1770         }
1771         r = radeon_fence_wait(ib.fence, false);
1772         if (r) {
1773                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1774                 return r;
1775         }
1776         for (i = 0; i < rdev->usec_timeout; i++) {
1777                 tmp = RREG32(scratch);
1778                 if (tmp == 0xDEADBEEF)
1779                         break;
1780                 DRM_UDELAY(1);
1781         }
1782         if (i < rdev->usec_timeout) {
1783                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1784         } else {
1785                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1786                           scratch, tmp);
1787                 r = -EINVAL;
1788         }
1789         radeon_scratch_free(rdev, scratch);
1790         radeon_ib_free(rdev, &ib);
1791         return r;
1792 }
1793
1794 /*
1795  * CP.
1796  * On CIK, gfx and compute now have independant command processors.
1797  *
1798  * GFX
1799  * Gfx consists of a single ring and can process both gfx jobs and
1800  * compute jobs.  The gfx CP consists of three microengines (ME):
1801  * PFP - Pre-Fetch Parser
1802  * ME - Micro Engine
1803  * CE - Constant Engine
1804  * The PFP and ME make up what is considered the Drawing Engine (DE).
1805  * The CE is an asynchronous engine used for updating buffer desciptors
1806  * used by the DE so that they can be loaded into cache in parallel
1807  * while the DE is processing state update packets.
1808  *
1809  * Compute
1810  * The compute CP consists of two microengines (ME):
1811  * MEC1 - Compute MicroEngine 1
1812  * MEC2 - Compute MicroEngine 2
1813  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1814  * The queues are exposed to userspace and are programmed directly
1815  * by the compute runtime.
1816  */
1817 /**
1818  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1819  *
1820  * @rdev: radeon_device pointer
1821  * @enable: enable or disable the MEs
1822  *
1823  * Halts or unhalts the gfx MEs.
1824  */
1825 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1826 {
1827         if (enable)
1828                 WREG32(CP_ME_CNTL, 0);
1829         else {
1830                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1831                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1832         }
1833         udelay(50);
1834 }
1835
1836 /**
1837  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1838  *
1839  * @rdev: radeon_device pointer
1840  *
1841  * Loads the gfx PFP, ME, and CE ucode.
1842  * Returns 0 for success, -EINVAL if the ucode is not available.
1843  */
1844 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1845 {
1846         const __be32 *fw_data;
1847         int i;
1848
1849         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1850                 return -EINVAL;
1851
1852         cik_cp_gfx_enable(rdev, false);
1853
1854         /* PFP */
1855         fw_data = (const __be32 *)rdev->pfp_fw->data;
1856         WREG32(CP_PFP_UCODE_ADDR, 0);
1857         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1858                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1859         WREG32(CP_PFP_UCODE_ADDR, 0);
1860
1861         /* CE */
1862         fw_data = (const __be32 *)rdev->ce_fw->data;
1863         WREG32(CP_CE_UCODE_ADDR, 0);
1864         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1865                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1866         WREG32(CP_CE_UCODE_ADDR, 0);
1867
1868         /* ME */
1869         fw_data = (const __be32 *)rdev->me_fw->data;
1870         WREG32(CP_ME_RAM_WADDR, 0);
1871         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1872                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1873         WREG32(CP_ME_RAM_WADDR, 0);
1874
1875         WREG32(CP_PFP_UCODE_ADDR, 0);
1876         WREG32(CP_CE_UCODE_ADDR, 0);
1877         WREG32(CP_ME_RAM_WADDR, 0);
1878         WREG32(CP_ME_RAM_RADDR, 0);
1879         return 0;
1880 }
1881
1882 /**
1883  * cik_cp_gfx_start - start the gfx ring
1884  *
1885  * @rdev: radeon_device pointer
1886  *
1887  * Enables the ring and loads the clear state context and other
1888  * packets required to init the ring.
1889  * Returns 0 for success, error for failure.
1890  */
1891 static int cik_cp_gfx_start(struct radeon_device *rdev)
1892 {
1893         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1894         int r, i;
1895
1896         /* init the CP */
1897         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1898         WREG32(CP_ENDIAN_SWAP, 0);
1899         WREG32(CP_DEVICE_ID, 1);
1900
1901         cik_cp_gfx_enable(rdev, true);
1902
1903         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1904         if (r) {
1905                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1906                 return r;
1907         }
1908
1909         /* init the CE partitions.  CE only used for gfx on CIK */
1910         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1911         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1912         radeon_ring_write(ring, 0xc000);
1913         radeon_ring_write(ring, 0xc000);
1914
1915         /* setup clear context state */
1916         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1917         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1918
1919         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1920         radeon_ring_write(ring, 0x80000000);
1921         radeon_ring_write(ring, 0x80000000);
1922
1923         for (i = 0; i < cik_default_size; i++)
1924                 radeon_ring_write(ring, cik_default_state[i]);
1925
1926         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1927         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1928
1929         /* set clear context state */
1930         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1931         radeon_ring_write(ring, 0);
1932
1933         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1934         radeon_ring_write(ring, 0x00000316);
1935         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1936         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1937
1938         radeon_ring_unlock_commit(rdev, ring);
1939
1940         return 0;
1941 }
1942
1943 /**
1944  * cik_cp_gfx_fini - stop the gfx ring
1945  *
1946  * @rdev: radeon_device pointer
1947  *
1948  * Stop the gfx ring and tear down the driver ring
1949  * info.
1950  */
1951 static void cik_cp_gfx_fini(struct radeon_device *rdev)
1952 {
1953         cik_cp_gfx_enable(rdev, false);
1954         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1955 }
1956
1957 /**
1958  * cik_cp_gfx_resume - setup the gfx ring buffer registers
1959  *
1960  * @rdev: radeon_device pointer
1961  *
1962  * Program the location and size of the gfx ring buffer
1963  * and test it to make sure it's working.
1964  * Returns 0 for success, error for failure.
1965  */
1966 static int cik_cp_gfx_resume(struct radeon_device *rdev)
1967 {
1968         struct radeon_ring *ring;
1969         u32 tmp;
1970         u32 rb_bufsz;
1971         u64 rb_addr;
1972         int r;
1973
1974         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1975         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1976
1977         /* Set the write pointer delay */
1978         WREG32(CP_RB_WPTR_DELAY, 0);
1979
1980         /* set the RB to use vmid 0 */
1981         WREG32(CP_RB_VMID, 0);
1982
1983         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1984
1985         /* ring 0 - compute and gfx */
1986         /* Set ring buffer size */
1987         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1988         rb_bufsz = drm_order(ring->ring_size / 8);
1989         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1990 #ifdef __BIG_ENDIAN
1991         tmp |= BUF_SWAP_32BIT;
1992 #endif
1993         WREG32(CP_RB0_CNTL, tmp);
1994
1995         /* Initialize the ring buffer's read and write pointers */
1996         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1997         ring->wptr = 0;
1998         WREG32(CP_RB0_WPTR, ring->wptr);
1999
2000         /* set the wb address wether it's enabled or not */
2001         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2002         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2003
2004         /* scratch register shadowing is no longer supported */
2005         WREG32(SCRATCH_UMSK, 0);
2006
2007         if (!rdev->wb.enabled)
2008                 tmp |= RB_NO_UPDATE;
2009
2010         mdelay(1);
2011         WREG32(CP_RB0_CNTL, tmp);
2012
2013         rb_addr = ring->gpu_addr >> 8;
2014         WREG32(CP_RB0_BASE, rb_addr);
2015         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2016
2017         ring->rptr = RREG32(CP_RB0_RPTR);
2018
2019         /* start the ring */
2020         cik_cp_gfx_start(rdev);
2021         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2022         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2023         if (r) {
2024                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2025                 return r;
2026         }
2027         return 0;
2028 }
2029
2030 /**
2031  * cik_cp_compute_enable - enable/disable the compute CP MEs
2032  *
2033  * @rdev: radeon_device pointer
2034  * @enable: enable or disable the MEs
2035  *
2036  * Halts or unhalts the compute MEs.
2037  */
2038 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2039 {
2040         if (enable)
2041                 WREG32(CP_MEC_CNTL, 0);
2042         else
2043                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2044         udelay(50);
2045 }
2046
2047 /**
2048  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2049  *
2050  * @rdev: radeon_device pointer
2051  *
2052  * Loads the compute MEC1&2 ucode.
2053  * Returns 0 for success, -EINVAL if the ucode is not available.
2054  */
2055 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2056 {
2057         const __be32 *fw_data;
2058         int i;
2059
2060         if (!rdev->mec_fw)
2061                 return -EINVAL;
2062
2063         cik_cp_compute_enable(rdev, false);
2064
2065         /* MEC1 */
2066         fw_data = (const __be32 *)rdev->mec_fw->data;
2067         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2068         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2069                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2070         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2071
2072         if (rdev->family == CHIP_KAVERI) {
2073                 /* MEC2 */
2074                 fw_data = (const __be32 *)rdev->mec_fw->data;
2075                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2076                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2077                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2078                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2079         }
2080
2081         return 0;
2082 }
2083
2084 /**
2085  * cik_cp_compute_start - start the compute queues
2086  *
2087  * @rdev: radeon_device pointer
2088  *
2089  * Enable the compute queues.
2090  * Returns 0 for success, error for failure.
2091  */
2092 static int cik_cp_compute_start(struct radeon_device *rdev)
2093 {
2094         //todo
2095         return 0;
2096 }
2097
2098 /**
2099  * cik_cp_compute_fini - stop the compute queues
2100  *
2101  * @rdev: radeon_device pointer
2102  *
2103  * Stop the compute queues and tear down the driver queue
2104  * info.
2105  */
2106 static void cik_cp_compute_fini(struct radeon_device *rdev)
2107 {
2108         cik_cp_compute_enable(rdev, false);
2109         //todo
2110 }
2111
2112 /**
2113  * cik_cp_compute_resume - setup the compute queue registers
2114  *
2115  * @rdev: radeon_device pointer
2116  *
2117  * Program the compute queues and test them to make sure they
2118  * are working.
2119  * Returns 0 for success, error for failure.
2120  */
2121 static int cik_cp_compute_resume(struct radeon_device *rdev)
2122 {
2123         int r;
2124
2125         //todo
2126         r = cik_cp_compute_start(rdev);
2127         if (r)
2128                 return r;
2129         return 0;
2130 }
2131
2132 /* XXX temporary wrappers to handle both compute and gfx */
2133 /* XXX */
2134 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2135 {
2136         cik_cp_gfx_enable(rdev, enable);
2137         cik_cp_compute_enable(rdev, enable);
2138 }
2139
2140 /* XXX */
2141 static int cik_cp_load_microcode(struct radeon_device *rdev)
2142 {
2143         int r;
2144
2145         r = cik_cp_gfx_load_microcode(rdev);
2146         if (r)
2147                 return r;
2148         r = cik_cp_compute_load_microcode(rdev);
2149         if (r)
2150                 return r;
2151
2152         return 0;
2153 }
2154
2155 /* XXX */
2156 static void cik_cp_fini(struct radeon_device *rdev)
2157 {
2158         cik_cp_gfx_fini(rdev);
2159         cik_cp_compute_fini(rdev);
2160 }
2161
2162 /* XXX */
2163 static int cik_cp_resume(struct radeon_device *rdev)
2164 {
2165         int r;
2166
2167         /* Reset all cp blocks */
2168         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2169         RREG32(GRBM_SOFT_RESET);
2170         mdelay(15);
2171         WREG32(GRBM_SOFT_RESET, 0);
2172         RREG32(GRBM_SOFT_RESET);
2173
2174         r = cik_cp_load_microcode(rdev);
2175         if (r)
2176                 return r;
2177
2178         r = cik_cp_gfx_resume(rdev);
2179         if (r)
2180                 return r;
2181         r = cik_cp_compute_resume(rdev);
2182         if (r)
2183                 return r;
2184
2185         return 0;
2186 }
2187
2188 /*
2189  * sDMA - System DMA
2190  * Starting with CIK, the GPU has new asynchronous
2191  * DMA engines.  These engines are used for compute
2192  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
2193  * and each one supports 1 ring buffer used for gfx
2194  * and 2 queues used for compute.
2195  *
2196  * The programming model is very similar to the CP
2197  * (ring buffer, IBs, etc.), but sDMA has it's own
2198  * packet format that is different from the PM4 format
2199  * used by the CP. sDMA supports copying data, writing
2200  * embedded data, solid fills, and a number of other
2201  * things.  It also has support for tiling/detiling of
2202  * buffers.
2203  */
2204 /**
2205  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2206  *
2207  * @rdev: radeon_device pointer
2208  * @ib: IB object to schedule
2209  *
2210  * Schedule an IB in the DMA ring (CIK).
2211  */
2212 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2213                               struct radeon_ib *ib)
2214 {
2215         struct radeon_ring *ring = &rdev->ring[ib->ring];
2216         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2217
2218         if (rdev->wb.enabled) {
2219                 u32 next_rptr = ring->wptr + 5;
2220                 while ((next_rptr & 7) != 4)
2221                         next_rptr++;
2222                 next_rptr += 4;
2223                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2224                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2225                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2226                 radeon_ring_write(ring, 1); /* number of DWs to follow */
2227                 radeon_ring_write(ring, next_rptr);
2228         }
2229
2230         /* IB packet must end on a 8 DW boundary */
2231         while ((ring->wptr & 7) != 4)
2232                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2233         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2234         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2235         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2236         radeon_ring_write(ring, ib->length_dw);
2237
2238 }
2239
2240 /**
2241  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2242  *
2243  * @rdev: radeon_device pointer
2244  * @fence: radeon fence object
2245  *
2246  * Add a DMA fence packet to the ring to write
2247  * the fence seq number and DMA trap packet to generate
2248  * an interrupt if needed (CIK).
2249  */
2250 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2251                               struct radeon_fence *fence)
2252 {
2253         struct radeon_ring *ring = &rdev->ring[fence->ring];
2254         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2255         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2256                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2257         u32 ref_and_mask;
2258
2259         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2260                 ref_and_mask = SDMA0;
2261         else
2262                 ref_and_mask = SDMA1;
2263
2264         /* write the fence */
2265         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2266         radeon_ring_write(ring, addr & 0xffffffff);
2267         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2268         radeon_ring_write(ring, fence->seq);
2269         /* generate an interrupt */
2270         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2271         /* flush HDP */
2272         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2273         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2274         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2275         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2276         radeon_ring_write(ring, ref_and_mask); /* MASK */
2277         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2278 }
2279
2280 /**
2281  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2282  *
2283  * @rdev: radeon_device pointer
2284  * @ring: radeon_ring structure holding ring information
2285  * @semaphore: radeon semaphore object
2286  * @emit_wait: wait or signal semaphore
2287  *
2288  * Add a DMA semaphore packet to the ring wait on or signal
2289  * other rings (CIK).
2290  */
2291 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2292                                   struct radeon_ring *ring,
2293                                   struct radeon_semaphore *semaphore,
2294                                   bool emit_wait)
2295 {
2296         u64 addr = semaphore->gpu_addr;
2297         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2298
2299         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2300         radeon_ring_write(ring, addr & 0xfffffff8);
2301         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2302 }
2303
2304 /**
2305  * cik_sdma_gfx_stop - stop the gfx async dma engines
2306  *
2307  * @rdev: radeon_device pointer
2308  *
2309  * Stop the gfx async dma ring buffers (CIK).
2310  */
2311 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2312 {
2313         u32 rb_cntl, reg_offset;
2314         int i;
2315
2316         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2317
2318         for (i = 0; i < 2; i++) {
2319                 if (i == 0)
2320                         reg_offset = SDMA0_REGISTER_OFFSET;
2321                 else
2322                         reg_offset = SDMA1_REGISTER_OFFSET;
2323                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2324                 rb_cntl &= ~SDMA_RB_ENABLE;
2325                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2326                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2327         }
2328 }
2329
2330 /**
2331  * cik_sdma_rlc_stop - stop the compute async dma engines
2332  *
2333  * @rdev: radeon_device pointer
2334  *
2335  * Stop the compute async dma queues (CIK).
2336  */
2337 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2338 {
2339         /* XXX todo */
2340 }
2341
2342 /**
2343  * cik_sdma_enable - stop the async dma engines
2344  *
2345  * @rdev: radeon_device pointer
2346  * @enable: enable/disable the DMA MEs.
2347  *
2348  * Halt or unhalt the async dma engines (CIK).
2349  */
2350 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2351 {
2352         u32 me_cntl, reg_offset;
2353         int i;
2354
2355         for (i = 0; i < 2; i++) {
2356                 if (i == 0)
2357                         reg_offset = SDMA0_REGISTER_OFFSET;
2358                 else
2359                         reg_offset = SDMA1_REGISTER_OFFSET;
2360                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2361                 if (enable)
2362                         me_cntl &= ~SDMA_HALT;
2363                 else
2364                         me_cntl |= SDMA_HALT;
2365                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2366         }
2367 }
2368
2369 /**
2370  * cik_sdma_gfx_resume - setup and start the async dma engines
2371  *
2372  * @rdev: radeon_device pointer
2373  *
2374  * Set up the gfx DMA ring buffers and enable them (CIK).
2375  * Returns 0 for success, error for failure.
2376  */
2377 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2378 {
2379         struct radeon_ring *ring;
2380         u32 rb_cntl, ib_cntl;
2381         u32 rb_bufsz;
2382         u32 reg_offset, wb_offset;
2383         int i, r;
2384
2385         for (i = 0; i < 2; i++) {
2386                 if (i == 0) {
2387                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2388                         reg_offset = SDMA0_REGISTER_OFFSET;
2389                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
2390                 } else {
2391                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2392                         reg_offset = SDMA1_REGISTER_OFFSET;
2393                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2394                 }
2395
2396                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2397                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2398
2399                 /* Set ring buffer size in dwords */
2400                 rb_bufsz = drm_order(ring->ring_size / 4);
2401                 rb_cntl = rb_bufsz << 1;
2402 #ifdef __BIG_ENDIAN
2403                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2404 #endif
2405                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2406
2407                 /* Initialize the ring buffer's read and write pointers */
2408                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2409                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2410
2411                 /* set the wb address whether it's enabled or not */
2412                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2413                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2414                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2415                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2416
2417                 if (rdev->wb.enabled)
2418                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2419
2420                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2421                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2422
2423                 ring->wptr = 0;
2424                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2425
2426                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2427
2428                 /* enable DMA RB */
2429                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2430
2431                 ib_cntl = SDMA_IB_ENABLE;
2432 #ifdef __BIG_ENDIAN
2433                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2434 #endif
2435                 /* enable DMA IBs */
2436                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2437
2438                 ring->ready = true;
2439
2440                 r = radeon_ring_test(rdev, ring->idx, ring);
2441                 if (r) {
2442                         ring->ready = false;
2443                         return r;
2444                 }
2445         }
2446
2447         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2448
2449         return 0;
2450 }
2451
2452 /**
2453  * cik_sdma_rlc_resume - setup and start the async dma engines
2454  *
2455  * @rdev: radeon_device pointer
2456  *
2457  * Set up the compute DMA queues and enable them (CIK).
2458  * Returns 0 for success, error for failure.
2459  */
2460 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2461 {
2462         /* XXX todo */
2463         return 0;
2464 }
2465
2466 /**
2467  * cik_sdma_load_microcode - load the sDMA ME ucode
2468  *
2469  * @rdev: radeon_device pointer
2470  *
2471  * Loads the sDMA0/1 ucode.
2472  * Returns 0 for success, -EINVAL if the ucode is not available.
2473  */
2474 static int cik_sdma_load_microcode(struct radeon_device *rdev)
2475 {
2476         const __be32 *fw_data;
2477         int i;
2478
2479         if (!rdev->sdma_fw)
2480                 return -EINVAL;
2481
2482         /* stop the gfx rings and rlc compute queues */
2483         cik_sdma_gfx_stop(rdev);
2484         cik_sdma_rlc_stop(rdev);
2485
2486         /* halt the MEs */
2487         cik_sdma_enable(rdev, false);
2488
2489         /* sdma0 */
2490         fw_data = (const __be32 *)rdev->sdma_fw->data;
2491         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2492         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2493                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2494         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2495
2496         /* sdma1 */
2497         fw_data = (const __be32 *)rdev->sdma_fw->data;
2498         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2499         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2500                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2501         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2502
2503         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2504         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2505         return 0;
2506 }
2507
2508 /**
2509  * cik_sdma_resume - setup and start the async dma engines
2510  *
2511  * @rdev: radeon_device pointer
2512  *
2513  * Set up the DMA engines and enable them (CIK).
2514  * Returns 0 for success, error for failure.
2515  */
2516 static int cik_sdma_resume(struct radeon_device *rdev)
2517 {
2518         int r;
2519
2520         /* Reset dma */
2521         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2522         RREG32(SRBM_SOFT_RESET);
2523         udelay(50);
2524         WREG32(SRBM_SOFT_RESET, 0);
2525         RREG32(SRBM_SOFT_RESET);
2526
2527         r = cik_sdma_load_microcode(rdev);
2528         if (r)
2529                 return r;
2530
2531         /* unhalt the MEs */
2532         cik_sdma_enable(rdev, true);
2533
2534         /* start the gfx rings and rlc compute queues */
2535         r = cik_sdma_gfx_resume(rdev);
2536         if (r)
2537                 return r;
2538         r = cik_sdma_rlc_resume(rdev);
2539         if (r)
2540                 return r;
2541
2542         return 0;
2543 }
2544
2545 /**
2546  * cik_sdma_fini - tear down the async dma engines
2547  *
2548  * @rdev: radeon_device pointer
2549  *
2550  * Stop the async dma engines and free the rings (CIK).
2551  */
2552 static void cik_sdma_fini(struct radeon_device *rdev)
2553 {
2554         /* stop the gfx rings and rlc compute queues */
2555         cik_sdma_gfx_stop(rdev);
2556         cik_sdma_rlc_stop(rdev);
2557         /* halt the MEs */
2558         cik_sdma_enable(rdev, false);
2559         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2560         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2561         /* XXX - compute dma queue tear down */
2562 }
2563
2564 /**
2565  * cik_copy_dma - copy pages using the DMA engine
2566  *
2567  * @rdev: radeon_device pointer
2568  * @src_offset: src GPU address
2569  * @dst_offset: dst GPU address
2570  * @num_gpu_pages: number of GPU pages to xfer
2571  * @fence: radeon fence object
2572  *
2573  * Copy GPU paging using the DMA engine (CIK).
2574  * Used by the radeon ttm implementation to move pages if
2575  * registered as the asic copy callback.
2576  */
2577 int cik_copy_dma(struct radeon_device *rdev,
2578                  uint64_t src_offset, uint64_t dst_offset,
2579                  unsigned num_gpu_pages,
2580                  struct radeon_fence **fence)
2581 {
2582         struct radeon_semaphore *sem = NULL;
2583         int ring_index = rdev->asic->copy.dma_ring_index;
2584         struct radeon_ring *ring = &rdev->ring[ring_index];
2585         u32 size_in_bytes, cur_size_in_bytes;
2586         int i, num_loops;
2587         int r = 0;
2588
2589         r = radeon_semaphore_create(rdev, &sem);
2590         if (r) {
2591                 DRM_ERROR("radeon: moving bo (%d).\n", r);
2592                 return r;
2593         }
2594
2595         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2596         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2597         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2598         if (r) {
2599                 DRM_ERROR("radeon: moving bo (%d).\n", r);
2600                 radeon_semaphore_free(rdev, &sem, NULL);
2601                 return r;
2602         }
2603
2604         if (radeon_fence_need_sync(*fence, ring->idx)) {
2605                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2606                                             ring->idx);
2607                 radeon_fence_note_sync(*fence, ring->idx);
2608         } else {
2609                 radeon_semaphore_free(rdev, &sem, NULL);
2610         }
2611
2612         for (i = 0; i < num_loops; i++) {
2613                 cur_size_in_bytes = size_in_bytes;
2614                 if (cur_size_in_bytes > 0x1fffff)
2615                         cur_size_in_bytes = 0x1fffff;
2616                 size_in_bytes -= cur_size_in_bytes;
2617                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2618                 radeon_ring_write(ring, cur_size_in_bytes);
2619                 radeon_ring_write(ring, 0); /* src/dst endian swap */
2620                 radeon_ring_write(ring, src_offset & 0xffffffff);
2621                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2622                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2623                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2624                 src_offset += cur_size_in_bytes;
2625                 dst_offset += cur_size_in_bytes;
2626         }
2627
2628         r = radeon_fence_emit(rdev, fence, ring->idx);
2629         if (r) {
2630                 radeon_ring_unlock_undo(rdev, ring);
2631                 return r;
2632         }
2633
2634         radeon_ring_unlock_commit(rdev, ring);
2635         radeon_semaphore_free(rdev, &sem, *fence);
2636
2637         return r;
2638 }
2639
2640 /**
2641  * cik_sdma_ring_test - simple async dma engine test
2642  *
2643  * @rdev: radeon_device pointer
2644  * @ring: radeon_ring structure holding ring information
2645  *
2646  * Test the DMA engine by writing using it to write an
2647  * value to memory. (CIK).
2648  * Returns 0 for success, error for failure.
2649  */
2650 int cik_sdma_ring_test(struct radeon_device *rdev,
2651                        struct radeon_ring *ring)
2652 {
2653         unsigned i;
2654         int r;
2655         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2656         u32 tmp;
2657
2658         if (!ptr) {
2659                 DRM_ERROR("invalid vram scratch pointer\n");
2660                 return -EINVAL;
2661         }
2662
2663         tmp = 0xCAFEDEAD;
2664         writel(tmp, ptr);
2665
2666         r = radeon_ring_lock(rdev, ring, 4);
2667         if (r) {
2668                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2669                 return r;
2670         }
2671         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2672         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2673         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2674         radeon_ring_write(ring, 1); /* number of DWs to follow */
2675         radeon_ring_write(ring, 0xDEADBEEF);
2676         radeon_ring_unlock_commit(rdev, ring);
2677
2678         for (i = 0; i < rdev->usec_timeout; i++) {
2679                 tmp = readl(ptr);
2680                 if (tmp == 0xDEADBEEF)
2681                         break;
2682                 DRM_UDELAY(1);
2683         }
2684
2685         if (i < rdev->usec_timeout) {
2686                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2687         } else {
2688                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2689                           ring->idx, tmp);
2690                 r = -EINVAL;
2691         }
2692         return r;
2693 }
2694
2695 /**
2696  * cik_sdma_ib_test - test an IB on the DMA engine
2697  *
2698  * @rdev: radeon_device pointer
2699  * @ring: radeon_ring structure holding ring information
2700  *
2701  * Test a simple IB in the DMA ring (CIK).
2702  * Returns 0 on success, error on failure.
2703  */
2704 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2705 {
2706         struct radeon_ib ib;
2707         unsigned i;
2708         int r;
2709         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2710         u32 tmp = 0;
2711
2712         if (!ptr) {
2713                 DRM_ERROR("invalid vram scratch pointer\n");
2714                 return -EINVAL;
2715         }
2716
2717         tmp = 0xCAFEDEAD;
2718         writel(tmp, ptr);
2719
2720         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2721         if (r) {
2722                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2723                 return r;
2724         }
2725
2726         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2727         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2728         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2729         ib.ptr[3] = 1;
2730         ib.ptr[4] = 0xDEADBEEF;
2731         ib.length_dw = 5;
2732
2733         r = radeon_ib_schedule(rdev, &ib, NULL);
2734         if (r) {
2735                 radeon_ib_free(rdev, &ib);
2736                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2737                 return r;
2738         }
2739         r = radeon_fence_wait(ib.fence, false);
2740         if (r) {
2741                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2742                 return r;
2743         }
2744         for (i = 0; i < rdev->usec_timeout; i++) {
2745                 tmp = readl(ptr);
2746                 if (tmp == 0xDEADBEEF)
2747                         break;
2748                 DRM_UDELAY(1);
2749         }
2750         if (i < rdev->usec_timeout) {
2751                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2752         } else {
2753                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2754                 r = -EINVAL;
2755         }
2756         radeon_ib_free(rdev, &ib);
2757         return r;
2758 }
2759
2760
2761 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
2762 {
2763         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2764                 RREG32(GRBM_STATUS));
2765         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2766                 RREG32(GRBM_STATUS2));
2767         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2768                 RREG32(GRBM_STATUS_SE0));
2769         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2770                 RREG32(GRBM_STATUS_SE1));
2771         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2772                 RREG32(GRBM_STATUS_SE2));
2773         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2774                 RREG32(GRBM_STATUS_SE3));
2775         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2776                 RREG32(SRBM_STATUS));
2777         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2778                 RREG32(SRBM_STATUS2));
2779         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
2780                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2781         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
2782                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2783 }
2784
2785 /**
2786  * cik_gpu_check_soft_reset - check which blocks are busy
2787  *
2788  * @rdev: radeon_device pointer
2789  *
2790  * Check which blocks are busy and return the relevant reset
2791  * mask to be used by cik_gpu_soft_reset().
2792  * Returns a mask of the blocks to be reset.
2793  */
2794 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2795 {
2796         u32 reset_mask = 0;
2797         u32 tmp;
2798
2799         /* GRBM_STATUS */
2800         tmp = RREG32(GRBM_STATUS);
2801         if (tmp & (PA_BUSY | SC_BUSY |
2802                    BCI_BUSY | SX_BUSY |
2803                    TA_BUSY | VGT_BUSY |
2804                    DB_BUSY | CB_BUSY |
2805                    GDS_BUSY | SPI_BUSY |
2806                    IA_BUSY | IA_BUSY_NO_DMA))
2807                 reset_mask |= RADEON_RESET_GFX;
2808
2809         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2810                 reset_mask |= RADEON_RESET_CP;
2811
2812         /* GRBM_STATUS2 */
2813         tmp = RREG32(GRBM_STATUS2);
2814         if (tmp & RLC_BUSY)
2815                 reset_mask |= RADEON_RESET_RLC;
2816
2817         /* SDMA0_STATUS_REG */
2818         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2819         if (!(tmp & SDMA_IDLE))
2820                 reset_mask |= RADEON_RESET_DMA;
2821
2822         /* SDMA1_STATUS_REG */
2823         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2824         if (!(tmp & SDMA_IDLE))
2825                 reset_mask |= RADEON_RESET_DMA1;
2826
2827         /* SRBM_STATUS2 */
2828         tmp = RREG32(SRBM_STATUS2);
2829         if (tmp & SDMA_BUSY)
2830                 reset_mask |= RADEON_RESET_DMA;
2831
2832         if (tmp & SDMA1_BUSY)
2833                 reset_mask |= RADEON_RESET_DMA1;
2834
2835         /* SRBM_STATUS */
2836         tmp = RREG32(SRBM_STATUS);
2837
2838         if (tmp & IH_BUSY)
2839                 reset_mask |= RADEON_RESET_IH;
2840
2841         if (tmp & SEM_BUSY)
2842                 reset_mask |= RADEON_RESET_SEM;
2843
2844         if (tmp & GRBM_RQ_PENDING)
2845                 reset_mask |= RADEON_RESET_GRBM;
2846
2847         if (tmp & VMC_BUSY)
2848                 reset_mask |= RADEON_RESET_VMC;
2849
2850         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2851                    MCC_BUSY | MCD_BUSY))
2852                 reset_mask |= RADEON_RESET_MC;
2853
2854         if (evergreen_is_display_hung(rdev))
2855                 reset_mask |= RADEON_RESET_DISPLAY;
2856
2857         /* Skip MC reset as it's mostly likely not hung, just busy */
2858         if (reset_mask & RADEON_RESET_MC) {
2859                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2860                 reset_mask &= ~RADEON_RESET_MC;
2861         }
2862
2863         return reset_mask;
2864 }
2865
2866 /**
2867  * cik_gpu_soft_reset - soft reset GPU
2868  *
2869  * @rdev: radeon_device pointer
2870  * @reset_mask: mask of which blocks to reset
2871  *
2872  * Soft reset the blocks specified in @reset_mask.
2873  */
2874 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2875 {
2876         struct evergreen_mc_save save;
2877         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2878         u32 tmp;
2879
2880         if (reset_mask == 0)
2881                 return;
2882
2883         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2884
2885         cik_print_gpu_status_regs(rdev);
2886         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
2887                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2888         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2889                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2890
2891         /* stop the rlc */
2892         cik_rlc_stop(rdev);
2893
2894         /* Disable GFX parsing/prefetching */
2895         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2896
2897         /* Disable MEC parsing/prefetching */
2898         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2899
2900         if (reset_mask & RADEON_RESET_DMA) {
2901                 /* sdma0 */
2902                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2903                 tmp |= SDMA_HALT;
2904                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2905         }
2906         if (reset_mask & RADEON_RESET_DMA1) {
2907                 /* sdma1 */
2908                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2909                 tmp |= SDMA_HALT;
2910                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2911         }
2912
2913         evergreen_mc_stop(rdev, &save);
2914         if (evergreen_mc_wait_for_idle(rdev)) {
2915                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2916         }
2917
2918         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
2919                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
2920
2921         if (reset_mask & RADEON_RESET_CP) {
2922                 grbm_soft_reset |= SOFT_RESET_CP;
2923
2924                 srbm_soft_reset |= SOFT_RESET_GRBM;
2925         }
2926
2927         if (reset_mask & RADEON_RESET_DMA)
2928                 srbm_soft_reset |= SOFT_RESET_SDMA;
2929
2930         if (reset_mask & RADEON_RESET_DMA1)
2931                 srbm_soft_reset |= SOFT_RESET_SDMA1;
2932
2933         if (reset_mask & RADEON_RESET_DISPLAY)
2934                 srbm_soft_reset |= SOFT_RESET_DC;
2935
2936         if (reset_mask & RADEON_RESET_RLC)
2937                 grbm_soft_reset |= SOFT_RESET_RLC;
2938
2939         if (reset_mask & RADEON_RESET_SEM)
2940                 srbm_soft_reset |= SOFT_RESET_SEM;
2941
2942         if (reset_mask & RADEON_RESET_IH)
2943                 srbm_soft_reset |= SOFT_RESET_IH;
2944
2945         if (reset_mask & RADEON_RESET_GRBM)
2946                 srbm_soft_reset |= SOFT_RESET_GRBM;
2947
2948         if (reset_mask & RADEON_RESET_VMC)
2949                 srbm_soft_reset |= SOFT_RESET_VMC;
2950
2951         if (!(rdev->flags & RADEON_IS_IGP)) {
2952                 if (reset_mask & RADEON_RESET_MC)
2953                         srbm_soft_reset |= SOFT_RESET_MC;
2954         }
2955
2956         if (grbm_soft_reset) {
2957                 tmp = RREG32(GRBM_SOFT_RESET);
2958                 tmp |= grbm_soft_reset;
2959                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
2960                 WREG32(GRBM_SOFT_RESET, tmp);
2961                 tmp = RREG32(GRBM_SOFT_RESET);
2962
2963                 udelay(50);
2964
2965                 tmp &= ~grbm_soft_reset;
2966                 WREG32(GRBM_SOFT_RESET, tmp);
2967                 tmp = RREG32(GRBM_SOFT_RESET);
2968         }
2969
2970         if (srbm_soft_reset) {
2971                 tmp = RREG32(SRBM_SOFT_RESET);
2972                 tmp |= srbm_soft_reset;
2973                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
2974                 WREG32(SRBM_SOFT_RESET, tmp);
2975                 tmp = RREG32(SRBM_SOFT_RESET);
2976
2977                 udelay(50);
2978
2979                 tmp &= ~srbm_soft_reset;
2980                 WREG32(SRBM_SOFT_RESET, tmp);
2981                 tmp = RREG32(SRBM_SOFT_RESET);
2982         }
2983
2984         /* Wait a little for things to settle down */
2985         udelay(50);
2986
2987         evergreen_mc_resume(rdev, &save);
2988         udelay(50);
2989
2990         cik_print_gpu_status_regs(rdev);
2991 }
2992
2993 /**
2994  * cik_asic_reset - soft reset GPU
2995  *
2996  * @rdev: radeon_device pointer
2997  *
2998  * Look up which blocks are hung and attempt
2999  * to reset them.
3000  * Returns 0 for success.
3001  */
3002 int cik_asic_reset(struct radeon_device *rdev)
3003 {
3004         u32 reset_mask;
3005
3006         reset_mask = cik_gpu_check_soft_reset(rdev);
3007
3008         if (reset_mask)
3009                 r600_set_bios_scratch_engine_hung(rdev, true);
3010
3011         cik_gpu_soft_reset(rdev, reset_mask);
3012
3013         reset_mask = cik_gpu_check_soft_reset(rdev);
3014
3015         if (!reset_mask)
3016                 r600_set_bios_scratch_engine_hung(rdev, false);
3017
3018         return 0;
3019 }
3020
3021 /**
3022  * cik_gfx_is_lockup - check if the 3D engine is locked up
3023  *
3024  * @rdev: radeon_device pointer
3025  * @ring: radeon_ring structure holding ring information
3026  *
3027  * Check if the 3D engine is locked up (CIK).
3028  * Returns true if the engine is locked, false if not.
3029  */
3030 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3031 {
3032         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3033
3034         if (!(reset_mask & (RADEON_RESET_GFX |
3035                             RADEON_RESET_COMPUTE |
3036                             RADEON_RESET_CP))) {
3037                 radeon_ring_lockup_update(ring);
3038                 return false;
3039         }
3040         /* force CP activities */
3041         radeon_ring_force_activity(rdev, ring);
3042         return radeon_ring_test_lockup(rdev, ring);
3043 }
3044
3045 /**
3046  * cik_sdma_is_lockup - Check if the DMA engine is locked up
3047  *
3048  * @rdev: radeon_device pointer
3049  * @ring: radeon_ring structure holding ring information
3050  *
3051  * Check if the async DMA engine is locked up (CIK).
3052  * Returns true if the engine appears to be locked up, false if not.
3053  */
3054 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3055 {
3056         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3057         u32 mask;
3058
3059         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3060                 mask = RADEON_RESET_DMA;
3061         else
3062                 mask = RADEON_RESET_DMA1;
3063
3064         if (!(reset_mask & mask)) {
3065                 radeon_ring_lockup_update(ring);
3066                 return false;
3067         }
3068         /* force ring activities */
3069         radeon_ring_force_activity(rdev, ring);
3070         return radeon_ring_test_lockup(rdev, ring);
3071 }
3072
3073 /* MC */
3074 /**
3075  * cik_mc_program - program the GPU memory controller
3076  *
3077  * @rdev: radeon_device pointer
3078  *
3079  * Set the location of vram, gart, and AGP in the GPU's
3080  * physical address space (CIK).
3081  */
3082 static void cik_mc_program(struct radeon_device *rdev)
3083 {
3084         struct evergreen_mc_save save;
3085         u32 tmp;
3086         int i, j;
3087
3088         /* Initialize HDP */
3089         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3090                 WREG32((0x2c14 + j), 0x00000000);
3091                 WREG32((0x2c18 + j), 0x00000000);
3092                 WREG32((0x2c1c + j), 0x00000000);
3093                 WREG32((0x2c20 + j), 0x00000000);
3094                 WREG32((0x2c24 + j), 0x00000000);
3095         }
3096         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3097
3098         evergreen_mc_stop(rdev, &save);
3099         if (radeon_mc_wait_for_idle(rdev)) {
3100                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3101         }
3102         /* Lockout access through VGA aperture*/
3103         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3104         /* Update configuration */
3105         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3106                rdev->mc.vram_start >> 12);
3107         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3108                rdev->mc.vram_end >> 12);
3109         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3110                rdev->vram_scratch.gpu_addr >> 12);
3111         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3112         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3113         WREG32(MC_VM_FB_LOCATION, tmp);
3114         /* XXX double check these! */
3115         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3116         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3117         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3118         WREG32(MC_VM_AGP_BASE, 0);
3119         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3120         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3121         if (radeon_mc_wait_for_idle(rdev)) {
3122                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3123         }
3124         evergreen_mc_resume(rdev, &save);
3125         /* we need to own VRAM, so turn off the VGA renderer here
3126          * to stop it overwriting our objects */
3127         rv515_vga_render_disable(rdev);
3128 }
3129
3130 /**
3131  * cik_mc_init - initialize the memory controller driver params
3132  *
3133  * @rdev: radeon_device pointer
3134  *
3135  * Look up the amount of vram, vram width, and decide how to place
3136  * vram and gart within the GPU's physical address space (CIK).
3137  * Returns 0 for success.
3138  */
3139 static int cik_mc_init(struct radeon_device *rdev)
3140 {
3141         u32 tmp;
3142         int chansize, numchan;
3143
3144         /* Get VRAM informations */
3145         rdev->mc.vram_is_ddr = true;
3146         tmp = RREG32(MC_ARB_RAMCFG);
3147         if (tmp & CHANSIZE_MASK) {
3148                 chansize = 64;
3149         } else {
3150                 chansize = 32;
3151         }
3152         tmp = RREG32(MC_SHARED_CHMAP);
3153         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3154         case 0:
3155         default:
3156                 numchan = 1;
3157                 break;
3158         case 1:
3159                 numchan = 2;
3160                 break;
3161         case 2:
3162                 numchan = 4;
3163                 break;
3164         case 3:
3165                 numchan = 8;
3166                 break;
3167         case 4:
3168                 numchan = 3;
3169                 break;
3170         case 5:
3171                 numchan = 6;
3172                 break;
3173         case 6:
3174                 numchan = 10;
3175                 break;
3176         case 7:
3177                 numchan = 12;
3178                 break;
3179         case 8:
3180                 numchan = 16;
3181                 break;
3182         }
3183         rdev->mc.vram_width = numchan * chansize;
3184         /* Could aper size report 0 ? */
3185         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3186         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3187         /* size in MB on si */
3188         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3189         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3190         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3191         si_vram_gtt_location(rdev, &rdev->mc);
3192         radeon_update_bandwidth_info(rdev);
3193
3194         return 0;
3195 }
3196
3197 /*
3198  * GART
3199  * VMID 0 is the physical GPU addresses as used by the kernel.
3200  * VMIDs 1-15 are used for userspace clients and are handled
3201  * by the radeon vm/hsa code.
3202  */
3203 /**
3204  * cik_pcie_gart_tlb_flush - gart tlb flush callback
3205  *
3206  * @rdev: radeon_device pointer
3207  *
3208  * Flush the TLB for the VMID 0 page table (CIK).
3209  */
3210 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3211 {
3212         /* flush hdp cache */
3213         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3214
3215         /* bits 0-15 are the VM contexts0-15 */
3216         WREG32(VM_INVALIDATE_REQUEST, 0x1);
3217 }
3218
3219 /**
3220  * cik_pcie_gart_enable - gart enable
3221  *
3222  * @rdev: radeon_device pointer
3223  *
3224  * This sets up the TLBs, programs the page tables for VMID0,
3225  * sets up the hw for VMIDs 1-15 which are allocated on
3226  * demand, and sets up the global locations for the LDS, GDS,
3227  * and GPUVM for FSA64 clients (CIK).
3228  * Returns 0 for success, errors for failure.
3229  */
3230 static int cik_pcie_gart_enable(struct radeon_device *rdev)
3231 {
3232         int r, i;
3233
3234         if (rdev->gart.robj == NULL) {
3235                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3236                 return -EINVAL;
3237         }
3238         r = radeon_gart_table_vram_pin(rdev);
3239         if (r)
3240                 return r;
3241         radeon_gart_restore(rdev);
3242         /* Setup TLB control */
3243         WREG32(MC_VM_MX_L1_TLB_CNTL,
3244                (0xA << 7) |
3245                ENABLE_L1_TLB |
3246                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3247                ENABLE_ADVANCED_DRIVER_MODEL |
3248                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3249         /* Setup L2 cache */
3250         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3251                ENABLE_L2_FRAGMENT_PROCESSING |
3252                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3253                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3254                EFFECTIVE_L2_QUEUE_SIZE(7) |
3255                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3256         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3257         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3258                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3259         /* setup context0 */
3260         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3261         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3262         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3263         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3264                         (u32)(rdev->dummy_page.addr >> 12));
3265         WREG32(VM_CONTEXT0_CNTL2, 0);
3266         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3267                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3268
3269         WREG32(0x15D4, 0);
3270         WREG32(0x15D8, 0);
3271         WREG32(0x15DC, 0);
3272
3273         /* empty context1-15 */
3274         /* FIXME start with 4G, once using 2 level pt switch to full
3275          * vm size space
3276          */
3277         /* set vm size, must be a multiple of 4 */
3278         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3279         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3280         for (i = 1; i < 16; i++) {
3281                 if (i < 8)
3282                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3283                                rdev->gart.table_addr >> 12);
3284                 else
3285                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3286                                rdev->gart.table_addr >> 12);
3287         }
3288
3289         /* enable context1-15 */
3290         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3291                (u32)(rdev->dummy_page.addr >> 12));
3292         WREG32(VM_CONTEXT1_CNTL2, 4);
3293         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3294                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3295                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3296                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3297                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3298                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3299                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3300                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3301                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3302                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3303                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3304                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3305                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3306
3307         /* TC cache setup ??? */
3308         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3309         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3310         WREG32(TC_CFG_L1_STORE_POLICY, 0);
3311
3312         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3313         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3314         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3315         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3316         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3317
3318         WREG32(TC_CFG_L1_VOLATILE, 0);
3319         WREG32(TC_CFG_L2_VOLATILE, 0);
3320
3321         if (rdev->family == CHIP_KAVERI) {
3322                 u32 tmp = RREG32(CHUB_CONTROL);
3323                 tmp &= ~BYPASS_VM;
3324                 WREG32(CHUB_CONTROL, tmp);
3325         }
3326
3327         /* XXX SH_MEM regs */
3328         /* where to put LDS, scratch, GPUVM in FSA64 space */
3329         for (i = 0; i < 16; i++) {
3330                 WREG32(SRBM_GFX_CNTL, VMID(i));
3331                 /* CP and shaders */
3332                 WREG32(SH_MEM_CONFIG, 0);
3333                 WREG32(SH_MEM_APE1_BASE, 1);
3334                 WREG32(SH_MEM_APE1_LIMIT, 0);
3335                 WREG32(SH_MEM_BASES, 0);
3336                 /* SDMA GFX */
3337                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3338                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3339                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3340                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3341                 /* XXX SDMA RLC - todo */
3342         }
3343         WREG32(SRBM_GFX_CNTL, 0);
3344
3345         cik_pcie_gart_tlb_flush(rdev);
3346         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3347                  (unsigned)(rdev->mc.gtt_size >> 20),
3348                  (unsigned long long)rdev->gart.table_addr);
3349         rdev->gart.ready = true;
3350         return 0;
3351 }
3352
3353 /**
3354  * cik_pcie_gart_disable - gart disable
3355  *
3356  * @rdev: radeon_device pointer
3357  *
3358  * This disables all VM page table (CIK).
3359  */
3360 static void cik_pcie_gart_disable(struct radeon_device *rdev)
3361 {
3362         /* Disable all tables */
3363         WREG32(VM_CONTEXT0_CNTL, 0);
3364         WREG32(VM_CONTEXT1_CNTL, 0);
3365         /* Setup TLB control */
3366         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3367                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3368         /* Setup L2 cache */
3369         WREG32(VM_L2_CNTL,
3370                ENABLE_L2_FRAGMENT_PROCESSING |
3371                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3372                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3373                EFFECTIVE_L2_QUEUE_SIZE(7) |
3374                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3375         WREG32(VM_L2_CNTL2, 0);
3376         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3377                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3378         radeon_gart_table_vram_unpin(rdev);
3379 }
3380
3381 /**
3382  * cik_pcie_gart_fini - vm fini callback
3383  *
3384  * @rdev: radeon_device pointer
3385  *
3386  * Tears down the driver GART/VM setup (CIK).
3387  */
3388 static void cik_pcie_gart_fini(struct radeon_device *rdev)
3389 {
3390         cik_pcie_gart_disable(rdev);
3391         radeon_gart_table_vram_free(rdev);
3392         radeon_gart_fini(rdev);
3393 }
3394
3395 /* vm parser */
3396 /**
3397  * cik_ib_parse - vm ib_parse callback
3398  *
3399  * @rdev: radeon_device pointer
3400  * @ib: indirect buffer pointer
3401  *
3402  * CIK uses hw IB checking so this is a nop (CIK).
3403  */
3404 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3405 {
3406         return 0;
3407 }
3408
3409 /*
3410  * vm
3411  * VMID 0 is the physical GPU addresses as used by the kernel.
3412  * VMIDs 1-15 are used for userspace clients and are handled
3413  * by the radeon vm/hsa code.
3414  */
3415 /**
3416  * cik_vm_init - cik vm init callback
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Inits cik specific vm parameters (number of VMs, base of vram for
3421  * VMIDs 1-15) (CIK).
3422  * Returns 0 for success.
3423  */
3424 int cik_vm_init(struct radeon_device *rdev)
3425 {
3426         /* number of VMs */
3427         rdev->vm_manager.nvm = 16;
3428         /* base offset of vram pages */
3429         if (rdev->flags & RADEON_IS_IGP) {
3430                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3431                 tmp <<= 22;
3432                 rdev->vm_manager.vram_base_offset = tmp;
3433         } else
3434                 rdev->vm_manager.vram_base_offset = 0;
3435
3436         return 0;
3437 }
3438
3439 /**
3440  * cik_vm_fini - cik vm fini callback
3441  *
3442  * @rdev: radeon_device pointer
3443  *
3444  * Tear down any asic specific VM setup (CIK).
3445  */
3446 void cik_vm_fini(struct radeon_device *rdev)
3447 {
3448 }
3449
3450 /**
3451  * cik_vm_flush - cik vm flush using the CP
3452  *
3453  * @rdev: radeon_device pointer
3454  *
3455  * Update the page table base and flush the VM TLB
3456  * using the CP (CIK).
3457  */
3458 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3459 {
3460         struct radeon_ring *ring = &rdev->ring[ridx];
3461
3462         if (vm == NULL)
3463                 return;
3464
3465         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3466         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3467                                  WRITE_DATA_DST_SEL(0)));
3468         if (vm->id < 8) {
3469                 radeon_ring_write(ring,
3470                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3471         } else {
3472                 radeon_ring_write(ring,
3473                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3474         }
3475         radeon_ring_write(ring, 0);
3476         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3477
3478         /* update SH_MEM_* regs */
3479         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3480         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3481                                  WRITE_DATA_DST_SEL(0)));
3482         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3483         radeon_ring_write(ring, 0);
3484         radeon_ring_write(ring, VMID(vm->id));
3485
3486         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3487         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3488                                  WRITE_DATA_DST_SEL(0)));
3489         radeon_ring_write(ring, SH_MEM_BASES >> 2);
3490         radeon_ring_write(ring, 0);
3491
3492         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3493         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3494         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3495         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3496
3497         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3498         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3499                                  WRITE_DATA_DST_SEL(0)));
3500         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3501         radeon_ring_write(ring, 0);
3502         radeon_ring_write(ring, VMID(0));
3503
3504         /* HDP flush */
3505         /* We should be using the WAIT_REG_MEM packet here like in
3506          * cik_fence_ring_emit(), but it causes the CP to hang in this
3507          * context...
3508          */
3509         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3510         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3511                                  WRITE_DATA_DST_SEL(0)));
3512         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3513         radeon_ring_write(ring, 0);
3514         radeon_ring_write(ring, 0);
3515
3516         /* bits 0-15 are the VM contexts0-15 */
3517         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3518         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3519                                  WRITE_DATA_DST_SEL(0)));
3520         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3521         radeon_ring_write(ring, 0);
3522         radeon_ring_write(ring, 1 << vm->id);
3523
3524         /* sync PFP to ME, otherwise we might get invalid PFP reads */
3525         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3526         radeon_ring_write(ring, 0x0);
3527 }
3528
3529 /**
3530  * cik_vm_set_page - update the page tables using sDMA
3531  *
3532  * @rdev: radeon_device pointer
3533  * @ib: indirect buffer to fill with commands
3534  * @pe: addr of the page entry
3535  * @addr: dst addr to write into pe
3536  * @count: number of page entries to update
3537  * @incr: increase next addr by incr bytes
3538  * @flags: access flags
3539  *
3540  * Update the page tables using CP or sDMA (CIK).
3541  */
3542 void cik_vm_set_page(struct radeon_device *rdev,
3543                      struct radeon_ib *ib,
3544                      uint64_t pe,
3545                      uint64_t addr, unsigned count,
3546                      uint32_t incr, uint32_t flags)
3547 {
3548         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3549         uint64_t value;
3550         unsigned ndw;
3551
3552         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3553                 /* CP */
3554                 while (count) {
3555                         ndw = 2 + count * 2;
3556                         if (ndw > 0x3FFE)
3557                                 ndw = 0x3FFE;
3558
3559                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3560                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3561                                                     WRITE_DATA_DST_SEL(1));
3562                         ib->ptr[ib->length_dw++] = pe;
3563                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3564                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3565                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3566                                         value = radeon_vm_map_gart(rdev, addr);
3567                                         value &= 0xFFFFFFFFFFFFF000ULL;
3568                                 } else if (flags & RADEON_VM_PAGE_VALID) {
3569                                         value = addr;
3570                                 } else {
3571                                         value = 0;
3572                                 }
3573                                 addr += incr;
3574                                 value |= r600_flags;
3575                                 ib->ptr[ib->length_dw++] = value;
3576                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3577                         }
3578                 }
3579         } else {
3580                 /* DMA */
3581                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3582                         while (count) {
3583                                 ndw = count * 2;
3584                                 if (ndw > 0xFFFFE)
3585                                         ndw = 0xFFFFE;
3586
3587                                 /* for non-physically contiguous pages (system) */
3588                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3589                                 ib->ptr[ib->length_dw++] = pe;
3590                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3591                                 ib->ptr[ib->length_dw++] = ndw;
3592                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3593                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
3594                                                 value = radeon_vm_map_gart(rdev, addr);
3595                                                 value &= 0xFFFFFFFFFFFFF000ULL;
3596                                         } else if (flags & RADEON_VM_PAGE_VALID) {
3597                                                 value = addr;
3598                                         } else {
3599                                                 value = 0;
3600                                         }
3601                                         addr += incr;
3602                                         value |= r600_flags;
3603                                         ib->ptr[ib->length_dw++] = value;
3604                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
3605                                 }
3606                         }
3607                 } else {
3608                         while (count) {
3609                                 ndw = count;
3610                                 if (ndw > 0x7FFFF)
3611                                         ndw = 0x7FFFF;
3612
3613                                 if (flags & RADEON_VM_PAGE_VALID)
3614                                         value = addr;
3615                                 else
3616                                         value = 0;
3617                                 /* for physically contiguous pages (vram) */
3618                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3619                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3620                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3621                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3622                                 ib->ptr[ib->length_dw++] = 0;
3623                                 ib->ptr[ib->length_dw++] = value; /* value */
3624                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3625                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
3626                                 ib->ptr[ib->length_dw++] = 0;
3627                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3628                                 pe += ndw * 8;
3629                                 addr += ndw * incr;
3630                                 count -= ndw;
3631                         }
3632                 }
3633                 while (ib->length_dw & 0x7)
3634                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3635         }
3636 }
3637
3638 /**
3639  * cik_dma_vm_flush - cik vm flush using sDMA
3640  *
3641  * @rdev: radeon_device pointer
3642  *
3643  * Update the page table base and flush the VM TLB
3644  * using sDMA (CIK).
3645  */
3646 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3647 {
3648         struct radeon_ring *ring = &rdev->ring[ridx];
3649         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3650                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3651         u32 ref_and_mask;
3652
3653         if (vm == NULL)
3654                 return;
3655
3656         if (ridx == R600_RING_TYPE_DMA_INDEX)
3657                 ref_and_mask = SDMA0;
3658         else
3659                 ref_and_mask = SDMA1;
3660
3661         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3662         if (vm->id < 8) {
3663                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3664         } else {
3665                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3666         }
3667         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3668
3669         /* update SH_MEM_* regs */
3670         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3671         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3672         radeon_ring_write(ring, VMID(vm->id));
3673
3674         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3675         radeon_ring_write(ring, SH_MEM_BASES >> 2);
3676         radeon_ring_write(ring, 0);
3677
3678         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3679         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3680         radeon_ring_write(ring, 0);
3681
3682         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3683         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3684         radeon_ring_write(ring, 1);
3685
3686         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3687         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3688         radeon_ring_write(ring, 0);
3689
3690         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3691         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3692         radeon_ring_write(ring, VMID(0));
3693
3694         /* flush HDP */
3695         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3696         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3697         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3698         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3699         radeon_ring_write(ring, ref_and_mask); /* MASK */
3700         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3701
3702         /* flush TLB */
3703         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3704         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3705         radeon_ring_write(ring, 1 << vm->id);
3706 }
3707
3708 /*
3709  * RLC
3710  * The RLC is a multi-purpose microengine that handles a
3711  * variety of functions, the most important of which is
3712  * the interrupt controller.
3713  */
3714 /**
3715  * cik_rlc_stop - stop the RLC ME
3716  *
3717  * @rdev: radeon_device pointer
3718  *
3719  * Halt the RLC ME (MicroEngine) (CIK).
3720  */
3721 static void cik_rlc_stop(struct radeon_device *rdev)
3722 {
3723         int i, j, k;
3724         u32 mask, tmp;
3725
3726         tmp = RREG32(CP_INT_CNTL_RING0);
3727         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3728         WREG32(CP_INT_CNTL_RING0, tmp);
3729
3730         RREG32(CB_CGTT_SCLK_CTRL);
3731         RREG32(CB_CGTT_SCLK_CTRL);
3732         RREG32(CB_CGTT_SCLK_CTRL);
3733         RREG32(CB_CGTT_SCLK_CTRL);
3734
3735         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3736         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3737
3738         WREG32(RLC_CNTL, 0);
3739
3740         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3741                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3742                         cik_select_se_sh(rdev, i, j);
3743                         for (k = 0; k < rdev->usec_timeout; k++) {
3744                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3745                                         break;
3746                                 udelay(1);
3747                         }
3748                 }
3749         }
3750         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3751
3752         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3753         for (k = 0; k < rdev->usec_timeout; k++) {
3754                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3755                         break;
3756                 udelay(1);
3757         }
3758 }
3759
3760 /**
3761  * cik_rlc_start - start the RLC ME
3762  *
3763  * @rdev: radeon_device pointer
3764  *
3765  * Unhalt the RLC ME (MicroEngine) (CIK).
3766  */
3767 static void cik_rlc_start(struct radeon_device *rdev)
3768 {
3769         u32 tmp;
3770
3771         WREG32(RLC_CNTL, RLC_ENABLE);
3772
3773         tmp = RREG32(CP_INT_CNTL_RING0);
3774         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3775         WREG32(CP_INT_CNTL_RING0, tmp);
3776
3777         udelay(50);
3778 }
3779
3780 /**
3781  * cik_rlc_resume - setup the RLC hw
3782  *
3783  * @rdev: radeon_device pointer
3784  *
3785  * Initialize the RLC registers, load the ucode,
3786  * and start the RLC (CIK).
3787  * Returns 0 for success, -EINVAL if the ucode is not available.
3788  */
3789 static int cik_rlc_resume(struct radeon_device *rdev)
3790 {
3791         u32 i, size;
3792         u32 clear_state_info[3];
3793         const __be32 *fw_data;
3794
3795         if (!rdev->rlc_fw)
3796                 return -EINVAL;
3797
3798         switch (rdev->family) {
3799         case CHIP_BONAIRE:
3800         default:
3801                 size = BONAIRE_RLC_UCODE_SIZE;
3802                 break;
3803         case CHIP_KAVERI:
3804                 size = KV_RLC_UCODE_SIZE;
3805                 break;
3806         case CHIP_KABINI:
3807                 size = KB_RLC_UCODE_SIZE;
3808                 break;
3809         }
3810
3811         cik_rlc_stop(rdev);
3812
3813         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3814         RREG32(GRBM_SOFT_RESET);
3815         udelay(50);
3816         WREG32(GRBM_SOFT_RESET, 0);
3817         RREG32(GRBM_SOFT_RESET);
3818         udelay(50);
3819
3820         WREG32(RLC_LB_CNTR_INIT, 0);
3821         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3822
3823         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3824         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3825         WREG32(RLC_LB_PARAMS, 0x00600408);
3826         WREG32(RLC_LB_CNTL, 0x80000004);
3827
3828         WREG32(RLC_MC_CNTL, 0);
3829         WREG32(RLC_UCODE_CNTL, 0);
3830
3831         fw_data = (const __be32 *)rdev->rlc_fw->data;
3832                 WREG32(RLC_GPM_UCODE_ADDR, 0);
3833         for (i = 0; i < size; i++)
3834                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3835         WREG32(RLC_GPM_UCODE_ADDR, 0);
3836
3837         /* XXX */
3838         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3839         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3840         clear_state_info[2] = 0;//cik_default_size;
3841         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3842         for (i = 0; i < 3; i++)
3843                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3844         WREG32(RLC_DRIVER_DMA_STATUS, 0);
3845
3846         cik_rlc_start(rdev);
3847
3848         return 0;
3849 }
3850
3851 /*
3852  * Interrupts
3853  * Starting with r6xx, interrupts are handled via a ring buffer.
3854  * Ring buffers are areas of GPU accessible memory that the GPU
3855  * writes interrupt vectors into and the host reads vectors out of.
3856  * There is a rptr (read pointer) that determines where the
3857  * host is currently reading, and a wptr (write pointer)
3858  * which determines where the GPU has written.  When the
3859  * pointers are equal, the ring is idle.  When the GPU
3860  * writes vectors to the ring buffer, it increments the
3861  * wptr.  When there is an interrupt, the host then starts
3862  * fetching commands and processing them until the pointers are
3863  * equal again at which point it updates the rptr.
3864  */
3865
3866 /**
3867  * cik_enable_interrupts - Enable the interrupt ring buffer
3868  *
3869  * @rdev: radeon_device pointer
3870  *
3871  * Enable the interrupt ring buffer (CIK).
3872  */
3873 static void cik_enable_interrupts(struct radeon_device *rdev)
3874 {
3875         u32 ih_cntl = RREG32(IH_CNTL);
3876         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3877
3878         ih_cntl |= ENABLE_INTR;
3879         ih_rb_cntl |= IH_RB_ENABLE;
3880         WREG32(IH_CNTL, ih_cntl);
3881         WREG32(IH_RB_CNTL, ih_rb_cntl);
3882         rdev->ih.enabled = true;
3883 }
3884
3885 /**
3886  * cik_disable_interrupts - Disable the interrupt ring buffer
3887  *
3888  * @rdev: radeon_device pointer
3889  *
3890  * Disable the interrupt ring buffer (CIK).
3891  */
3892 static void cik_disable_interrupts(struct radeon_device *rdev)
3893 {
3894         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3895         u32 ih_cntl = RREG32(IH_CNTL);
3896
3897         ih_rb_cntl &= ~IH_RB_ENABLE;
3898         ih_cntl &= ~ENABLE_INTR;
3899         WREG32(IH_RB_CNTL, ih_rb_cntl);
3900         WREG32(IH_CNTL, ih_cntl);
3901         /* set rptr, wptr to 0 */
3902         WREG32(IH_RB_RPTR, 0);
3903         WREG32(IH_RB_WPTR, 0);
3904         rdev->ih.enabled = false;
3905         rdev->ih.rptr = 0;
3906 }
3907
3908 /**
3909  * cik_disable_interrupt_state - Disable all interrupt sources
3910  *
3911  * @rdev: radeon_device pointer
3912  *
3913  * Clear all interrupt enable bits used by the driver (CIK).
3914  */
3915 static void cik_disable_interrupt_state(struct radeon_device *rdev)
3916 {
3917         u32 tmp;
3918
3919         /* gfx ring */
3920         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3921         /* sdma */
3922         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3923         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3924         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3925         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3926         /* compute queues */
3927         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3928         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3929         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3930         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3931         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3932         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3933         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3934         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3935         /* grbm */
3936         WREG32(GRBM_INT_CNTL, 0);
3937         /* vline/vblank, etc. */
3938         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3939         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3940         if (rdev->num_crtc >= 4) {
3941                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3942                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3943         }
3944         if (rdev->num_crtc >= 6) {
3945                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3946                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3947         }
3948
3949         /* dac hotplug */
3950         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3951
3952         /* digital hotplug */
3953         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3954         WREG32(DC_HPD1_INT_CONTROL, tmp);
3955         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3956         WREG32(DC_HPD2_INT_CONTROL, tmp);
3957         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3958         WREG32(DC_HPD3_INT_CONTROL, tmp);
3959         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3960         WREG32(DC_HPD4_INT_CONTROL, tmp);
3961         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3962         WREG32(DC_HPD5_INT_CONTROL, tmp);
3963         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3964         WREG32(DC_HPD6_INT_CONTROL, tmp);
3965
3966 }
3967
3968 /**
3969  * cik_irq_init - init and enable the interrupt ring
3970  *
3971  * @rdev: radeon_device pointer
3972  *
3973  * Allocate a ring buffer for the interrupt controller,
3974  * enable the RLC, disable interrupts, enable the IH
3975  * ring buffer and enable it (CIK).
3976  * Called at device load and reume.
3977  * Returns 0 for success, errors for failure.
3978  */
3979 static int cik_irq_init(struct radeon_device *rdev)
3980 {
3981         int ret = 0;
3982         int rb_bufsz;
3983         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3984
3985         /* allocate ring */
3986         ret = r600_ih_ring_alloc(rdev);
3987         if (ret)
3988                 return ret;
3989
3990         /* disable irqs */
3991         cik_disable_interrupts(rdev);
3992
3993         /* init rlc */
3994         ret = cik_rlc_resume(rdev);
3995         if (ret) {
3996                 r600_ih_ring_fini(rdev);
3997                 return ret;
3998         }
3999
4000         /* setup interrupt control */
4001         /* XXX this should actually be a bus address, not an MC address. same on older asics */
4002         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4003         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4004         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4005          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4006          */
4007         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4008         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4009         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4010         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4011
4012         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4013         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4014
4015         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4016                       IH_WPTR_OVERFLOW_CLEAR |
4017                       (rb_bufsz << 1));
4018
4019         if (rdev->wb.enabled)
4020                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4021
4022         /* set the writeback address whether it's enabled or not */
4023         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4024         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4025
4026         WREG32(IH_RB_CNTL, ih_rb_cntl);
4027
4028         /* set rptr, wptr to 0 */
4029         WREG32(IH_RB_RPTR, 0);
4030         WREG32(IH_RB_WPTR, 0);
4031
4032         /* Default settings for IH_CNTL (disabled at first) */
4033         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4034         /* RPTR_REARM only works if msi's are enabled */
4035         if (rdev->msi_enabled)
4036                 ih_cntl |= RPTR_REARM;
4037         WREG32(IH_CNTL, ih_cntl);
4038
4039         /* force the active interrupt state to all disabled */
4040         cik_disable_interrupt_state(rdev);
4041
4042         pci_set_master(rdev->pdev);
4043
4044         /* enable irqs */
4045         cik_enable_interrupts(rdev);
4046
4047         return ret;
4048 }
4049
4050 /**
4051  * cik_irq_set - enable/disable interrupt sources
4052  *
4053  * @rdev: radeon_device pointer
4054  *
4055  * Enable interrupt sources on the GPU (vblanks, hpd,
4056  * etc.) (CIK).
4057  * Returns 0 for success, errors for failure.
4058  */
4059 int cik_irq_set(struct radeon_device *rdev)
4060 {
4061         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4062                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4063         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4064         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4065         u32 grbm_int_cntl = 0;
4066         u32 dma_cntl, dma_cntl1;
4067
4068         if (!rdev->irq.installed) {
4069                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4070                 return -EINVAL;
4071         }
4072         /* don't enable anything if the ih is disabled */
4073         if (!rdev->ih.enabled) {
4074                 cik_disable_interrupts(rdev);
4075                 /* force the active interrupt state to all disabled */
4076                 cik_disable_interrupt_state(rdev);
4077                 return 0;
4078         }
4079
4080         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4081         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4082         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4083         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4084         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4085         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4086
4087         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4088         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4089
4090         /* enable CP interrupts on all rings */
4091         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4092                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4093                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4094         }
4095         /* TODO: compute queues! */
4096         /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4097
4098         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4099                 DRM_DEBUG("cik_irq_set: sw int dma\n");
4100                 dma_cntl |= TRAP_ENABLE;
4101         }
4102
4103         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4104                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4105                 dma_cntl1 |= TRAP_ENABLE;
4106         }
4107
4108         if (rdev->irq.crtc_vblank_int[0] ||
4109             atomic_read(&rdev->irq.pflip[0])) {
4110                 DRM_DEBUG("cik_irq_set: vblank 0\n");
4111                 crtc1 |= VBLANK_INTERRUPT_MASK;
4112         }
4113         if (rdev->irq.crtc_vblank_int[1] ||
4114             atomic_read(&rdev->irq.pflip[1])) {
4115                 DRM_DEBUG("cik_irq_set: vblank 1\n");
4116                 crtc2 |= VBLANK_INTERRUPT_MASK;
4117         }
4118         if (rdev->irq.crtc_vblank_int[2] ||
4119             atomic_read(&rdev->irq.pflip[2])) {
4120                 DRM_DEBUG("cik_irq_set: vblank 2\n");
4121                 crtc3 |= VBLANK_INTERRUPT_MASK;
4122         }
4123         if (rdev->irq.crtc_vblank_int[3] ||
4124             atomic_read(&rdev->irq.pflip[3])) {
4125                 DRM_DEBUG("cik_irq_set: vblank 3\n");
4126                 crtc4 |= VBLANK_INTERRUPT_MASK;
4127         }
4128         if (rdev->irq.crtc_vblank_int[4] ||
4129             atomic_read(&rdev->irq.pflip[4])) {
4130                 DRM_DEBUG("cik_irq_set: vblank 4\n");
4131                 crtc5 |= VBLANK_INTERRUPT_MASK;
4132         }
4133         if (rdev->irq.crtc_vblank_int[5] ||
4134             atomic_read(&rdev->irq.pflip[5])) {
4135                 DRM_DEBUG("cik_irq_set: vblank 5\n");
4136                 crtc6 |= VBLANK_INTERRUPT_MASK;
4137         }
4138         if (rdev->irq.hpd[0]) {
4139                 DRM_DEBUG("cik_irq_set: hpd 1\n");
4140                 hpd1 |= DC_HPDx_INT_EN;
4141         }
4142         if (rdev->irq.hpd[1]) {
4143                 DRM_DEBUG("cik_irq_set: hpd 2\n");
4144                 hpd2 |= DC_HPDx_INT_EN;
4145         }
4146         if (rdev->irq.hpd[2]) {
4147                 DRM_DEBUG("cik_irq_set: hpd 3\n");
4148                 hpd3 |= DC_HPDx_INT_EN;
4149         }
4150         if (rdev->irq.hpd[3]) {
4151                 DRM_DEBUG("cik_irq_set: hpd 4\n");
4152                 hpd4 |= DC_HPDx_INT_EN;
4153         }
4154         if (rdev->irq.hpd[4]) {
4155                 DRM_DEBUG("cik_irq_set: hpd 5\n");
4156                 hpd5 |= DC_HPDx_INT_EN;
4157         }
4158         if (rdev->irq.hpd[5]) {
4159                 DRM_DEBUG("cik_irq_set: hpd 6\n");
4160                 hpd6 |= DC_HPDx_INT_EN;
4161         }
4162
4163         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4164
4165         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4166         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4167
4168         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4169
4170         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4171         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4172         if (rdev->num_crtc >= 4) {
4173                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4174                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4175         }
4176         if (rdev->num_crtc >= 6) {
4177                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4178                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4179         }
4180
4181         WREG32(DC_HPD1_INT_CONTROL, hpd1);
4182         WREG32(DC_HPD2_INT_CONTROL, hpd2);
4183         WREG32(DC_HPD3_INT_CONTROL, hpd3);
4184         WREG32(DC_HPD4_INT_CONTROL, hpd4);
4185         WREG32(DC_HPD5_INT_CONTROL, hpd5);
4186         WREG32(DC_HPD6_INT_CONTROL, hpd6);
4187
4188         return 0;
4189 }
4190
4191 /**
4192  * cik_irq_ack - ack interrupt sources
4193  *
4194  * @rdev: radeon_device pointer
4195  *
4196  * Ack interrupt sources on the GPU (vblanks, hpd,
4197  * etc.) (CIK).  Certain interrupts sources are sw
4198  * generated and do not require an explicit ack.
4199  */
4200 static inline void cik_irq_ack(struct radeon_device *rdev)
4201 {
4202         u32 tmp;
4203
4204         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4205         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4206         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4207         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4208         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4209         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4210         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4211
4212         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4213                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4214         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4215                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4216         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4217                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4218         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4219                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4220
4221         if (rdev->num_crtc >= 4) {
4222                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4223                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4224                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4225                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4226                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4227                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4228                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4229                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4230         }
4231
4232         if (rdev->num_crtc >= 6) {
4233                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4234                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4235                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4236                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4237                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4238                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4239                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4240                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4241         }
4242
4243         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4244                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4245                 tmp |= DC_HPDx_INT_ACK;
4246                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4247         }
4248         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4249                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4250                 tmp |= DC_HPDx_INT_ACK;
4251                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4252         }
4253         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4254                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4255                 tmp |= DC_HPDx_INT_ACK;
4256                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4257         }
4258         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4259                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4260                 tmp |= DC_HPDx_INT_ACK;
4261                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4262         }
4263         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4264                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4265                 tmp |= DC_HPDx_INT_ACK;
4266                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4267         }
4268         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4269                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4270                 tmp |= DC_HPDx_INT_ACK;
4271                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4272         }
4273 }
4274
4275 /**
4276  * cik_irq_disable - disable interrupts
4277  *
4278  * @rdev: radeon_device pointer
4279  *
4280  * Disable interrupts on the hw (CIK).
4281  */
4282 static void cik_irq_disable(struct radeon_device *rdev)
4283 {
4284         cik_disable_interrupts(rdev);
4285         /* Wait and acknowledge irq */
4286         mdelay(1);
4287         cik_irq_ack(rdev);
4288         cik_disable_interrupt_state(rdev);
4289 }
4290
4291 /**
4292  * cik_irq_disable - disable interrupts for suspend
4293  *
4294  * @rdev: radeon_device pointer
4295  *
4296  * Disable interrupts and stop the RLC (CIK).
4297  * Used for suspend.
4298  */
4299 static void cik_irq_suspend(struct radeon_device *rdev)
4300 {
4301         cik_irq_disable(rdev);
4302         cik_rlc_stop(rdev);
4303 }
4304
4305 /**
4306  * cik_irq_fini - tear down interrupt support
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Disable interrupts on the hw and free the IH ring
4311  * buffer (CIK).
4312  * Used for driver unload.
4313  */
4314 static void cik_irq_fini(struct radeon_device *rdev)
4315 {
4316         cik_irq_suspend(rdev);
4317         r600_ih_ring_fini(rdev);
4318 }
4319
4320 /**
4321  * cik_get_ih_wptr - get the IH ring buffer wptr
4322  *
4323  * @rdev: radeon_device pointer
4324  *
4325  * Get the IH ring buffer wptr from either the register
4326  * or the writeback memory buffer (CIK).  Also check for
4327  * ring buffer overflow and deal with it.
4328  * Used by cik_irq_process().
4329  * Returns the value of the wptr.
4330  */
4331 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4332 {
4333         u32 wptr, tmp;
4334
4335         if (rdev->wb.enabled)
4336                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4337         else
4338                 wptr = RREG32(IH_RB_WPTR);
4339
4340         if (wptr & RB_OVERFLOW) {
4341                 /* When a ring buffer overflow happen start parsing interrupt
4342                  * from the last not overwritten vector (wptr + 16). Hopefully
4343                  * this should allow us to catchup.
4344                  */
4345                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4346                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4347                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4348                 tmp = RREG32(IH_RB_CNTL);
4349                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4350                 WREG32(IH_RB_CNTL, tmp);
4351         }
4352         return (wptr & rdev->ih.ptr_mask);
4353 }
4354
4355 /*        CIK IV Ring
4356  * Each IV ring entry is 128 bits:
4357  * [7:0]    - interrupt source id
4358  * [31:8]   - reserved
4359  * [59:32]  - interrupt source data
4360  * [63:60]  - reserved
4361  * [71:64]  - RINGID
4362  *            CP:
4363  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
4364  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4365  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4366  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4367  *            PIPE_ID - ME0 0=3D
4368  *                    - ME1&2 compute dispatcher (4 pipes each)
4369  *            SDMA:
4370  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
4371  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
4372  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
4373  * [79:72]  - VMID
4374  * [95:80]  - PASID
4375  * [127:96] - reserved
4376  */
4377 /**
4378  * cik_irq_process - interrupt handler
4379  *
4380  * @rdev: radeon_device pointer
4381  *
4382  * Interrupt hander (CIK).  Walk the IH ring,
4383  * ack interrupts and schedule work to handle
4384  * interrupt events.
4385  * Returns irq process return code.
4386  */
4387 int cik_irq_process(struct radeon_device *rdev)
4388 {
4389         u32 wptr;
4390         u32 rptr;
4391         u32 src_id, src_data, ring_id;
4392         u8 me_id, pipe_id, queue_id;
4393         u32 ring_index;
4394         bool queue_hotplug = false;
4395         bool queue_reset = false;
4396
4397         if (!rdev->ih.enabled || rdev->shutdown)
4398                 return IRQ_NONE;
4399
4400         wptr = cik_get_ih_wptr(rdev);
4401
4402 restart_ih:
4403         /* is somebody else already processing irqs? */
4404         if (atomic_xchg(&rdev->ih.lock, 1))
4405                 return IRQ_NONE;
4406
4407         rptr = rdev->ih.rptr;
4408         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4409
4410         /* Order reading of wptr vs. reading of IH ring data */
4411         rmb();
4412
4413         /* display interrupts */
4414         cik_irq_ack(rdev);
4415
4416         while (rptr != wptr) {
4417                 /* wptr/rptr are in bytes! */
4418                 ring_index = rptr / 4;
4419                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4420                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4421                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4422
4423                 switch (src_id) {
4424                 case 1: /* D1 vblank/vline */
4425                         switch (src_data) {
4426                         case 0: /* D1 vblank */
4427                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4428                                         if (rdev->irq.crtc_vblank_int[0]) {
4429                                                 drm_handle_vblank(rdev->ddev, 0);
4430                                                 rdev->pm.vblank_sync = true;
4431                                                 wake_up(&rdev->irq.vblank_queue);
4432                                         }
4433                                         if (atomic_read(&rdev->irq.pflip[0]))
4434                                                 radeon_crtc_handle_flip(rdev, 0);
4435                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4436                                         DRM_DEBUG("IH: D1 vblank\n");
4437                                 }
4438                                 break;
4439                         case 1: /* D1 vline */
4440                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4441                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4442                                         DRM_DEBUG("IH: D1 vline\n");
4443                                 }
4444                                 break;
4445                         default:
4446                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4447                                 break;
4448                         }
4449                         break;
4450                 case 2: /* D2 vblank/vline */
4451                         switch (src_data) {
4452                         case 0: /* D2 vblank */
4453                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4454                                         if (rdev->irq.crtc_vblank_int[1]) {
4455                                                 drm_handle_vblank(rdev->ddev, 1);
4456                                                 rdev->pm.vblank_sync = true;
4457                                                 wake_up(&rdev->irq.vblank_queue);
4458                                         }
4459                                         if (atomic_read(&rdev->irq.pflip[1]))
4460                                                 radeon_crtc_handle_flip(rdev, 1);
4461                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4462                                         DRM_DEBUG("IH: D2 vblank\n");
4463                                 }
4464                                 break;
4465                         case 1: /* D2 vline */
4466                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4467                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4468                                         DRM_DEBUG("IH: D2 vline\n");
4469                                 }
4470                                 break;
4471                         default:
4472                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4473                                 break;
4474                         }
4475                         break;
4476                 case 3: /* D3 vblank/vline */
4477                         switch (src_data) {
4478                         case 0: /* D3 vblank */
4479                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4480                                         if (rdev->irq.crtc_vblank_int[2]) {
4481                                                 drm_handle_vblank(rdev->ddev, 2);
4482                                                 rdev->pm.vblank_sync = true;
4483                                                 wake_up(&rdev->irq.vblank_queue);
4484                                         }
4485                                         if (atomic_read(&rdev->irq.pflip[2]))
4486                                                 radeon_crtc_handle_flip(rdev, 2);
4487                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4488                                         DRM_DEBUG("IH: D3 vblank\n");
4489                                 }
4490                                 break;
4491                         case 1: /* D3 vline */
4492                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4493                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4494                                         DRM_DEBUG("IH: D3 vline\n");
4495                                 }
4496                                 break;
4497                         default:
4498                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4499                                 break;
4500                         }
4501                         break;
4502                 case 4: /* D4 vblank/vline */
4503                         switch (src_data) {
4504                         case 0: /* D4 vblank */
4505                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4506                                         if (rdev->irq.crtc_vblank_int[3]) {
4507                                                 drm_handle_vblank(rdev->ddev, 3);
4508                                                 rdev->pm.vblank_sync = true;
4509                                                 wake_up(&rdev->irq.vblank_queue);
4510                                         }
4511                                         if (atomic_read(&rdev->irq.pflip[3]))
4512                                                 radeon_crtc_handle_flip(rdev, 3);
4513                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4514                                         DRM_DEBUG("IH: D4 vblank\n");
4515                                 }
4516                                 break;
4517                         case 1: /* D4 vline */
4518                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4519                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4520                                         DRM_DEBUG("IH: D4 vline\n");
4521                                 }
4522                                 break;
4523                         default:
4524                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4525                                 break;
4526                         }
4527                         break;
4528                 case 5: /* D5 vblank/vline */
4529                         switch (src_data) {
4530                         case 0: /* D5 vblank */
4531                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4532                                         if (rdev->irq.crtc_vblank_int[4]) {
4533                                                 drm_handle_vblank(rdev->ddev, 4);
4534                                                 rdev->pm.vblank_sync = true;
4535                                                 wake_up(&rdev->irq.vblank_queue);
4536                                         }
4537                                         if (atomic_read(&rdev->irq.pflip[4]))
4538                                                 radeon_crtc_handle_flip(rdev, 4);
4539                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4540                                         DRM_DEBUG("IH: D5 vblank\n");
4541                                 }
4542                                 break;
4543                         case 1: /* D5 vline */
4544                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4545                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4546                                         DRM_DEBUG("IH: D5 vline\n");
4547                                 }
4548                                 break;
4549                         default:
4550                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4551                                 break;
4552                         }
4553                         break;
4554                 case 6: /* D6 vblank/vline */
4555                         switch (src_data) {
4556                         case 0: /* D6 vblank */
4557                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4558                                         if (rdev->irq.crtc_vblank_int[5]) {
4559                                                 drm_handle_vblank(rdev->ddev, 5);
4560                                                 rdev->pm.vblank_sync = true;
4561                                                 wake_up(&rdev->irq.vblank_queue);
4562                                         }
4563                                         if (atomic_read(&rdev->irq.pflip[5]))
4564                                                 radeon_crtc_handle_flip(rdev, 5);
4565                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4566                                         DRM_DEBUG("IH: D6 vblank\n");
4567                                 }
4568                                 break;
4569                         case 1: /* D6 vline */
4570                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4571                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4572                                         DRM_DEBUG("IH: D6 vline\n");
4573                                 }
4574                                 break;
4575                         default:
4576                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4577                                 break;
4578                         }
4579                         break;
4580                 case 42: /* HPD hotplug */
4581                         switch (src_data) {
4582                         case 0:
4583                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4584                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4585                                         queue_hotplug = true;
4586                                         DRM_DEBUG("IH: HPD1\n");
4587                                 }
4588                                 break;
4589                         case 1:
4590                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4591                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4592                                         queue_hotplug = true;
4593                                         DRM_DEBUG("IH: HPD2\n");
4594                                 }
4595                                 break;
4596                         case 2:
4597                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4598                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4599                                         queue_hotplug = true;
4600                                         DRM_DEBUG("IH: HPD3\n");
4601                                 }
4602                                 break;
4603                         case 3:
4604                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4605                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4606                                         queue_hotplug = true;
4607                                         DRM_DEBUG("IH: HPD4\n");
4608                                 }
4609                                 break;
4610                         case 4:
4611                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4612                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4613                                         queue_hotplug = true;
4614                                         DRM_DEBUG("IH: HPD5\n");
4615                                 }
4616                                 break;
4617                         case 5:
4618                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4619                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4620                                         queue_hotplug = true;
4621                                         DRM_DEBUG("IH: HPD6\n");
4622                                 }
4623                                 break;
4624                         default:
4625                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4626                                 break;
4627                         }
4628                         break;
4629                 case 146:
4630                 case 147:
4631                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4632                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4633                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4634                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4635                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4636                         /* reset addr and status */
4637                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4638                         break;
4639                 case 176: /* GFX RB CP_INT */
4640                 case 177: /* GFX IB CP_INT */
4641                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4642                         break;
4643                 case 181: /* CP EOP event */
4644                         DRM_DEBUG("IH: CP EOP\n");
4645                         /* XXX check the bitfield order! */
4646                         me_id = (ring_id & 0x60) >> 5;
4647                         pipe_id = (ring_id & 0x18) >> 3;
4648                         queue_id = (ring_id & 0x7) >> 0;
4649                         switch (me_id) {
4650                         case 0:
4651                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4652                                 break;
4653                         case 1:
4654                                 /* XXX compute */
4655                                 break;
4656                         case 2:
4657                                 /* XXX compute */
4658                                 break;
4659                         }
4660                         break;
4661                 case 184: /* CP Privileged reg access */
4662                         DRM_ERROR("Illegal register access in command stream\n");
4663                         /* XXX check the bitfield order! */
4664                         me_id = (ring_id & 0x60) >> 5;
4665                         pipe_id = (ring_id & 0x18) >> 3;
4666                         queue_id = (ring_id & 0x7) >> 0;
4667                         switch (me_id) {
4668                         case 0:
4669                                 /* This results in a full GPU reset, but all we need to do is soft
4670                                  * reset the CP for gfx
4671                                  */
4672                                 queue_reset = true;
4673                                 break;
4674                         case 1:
4675                                 /* XXX compute */
4676                                 break;
4677                         case 2:
4678                                 /* XXX compute */
4679                                 break;
4680                         }
4681                         break;
4682                 case 185: /* CP Privileged inst */
4683                         DRM_ERROR("Illegal instruction in command stream\n");
4684                         /* XXX check the bitfield order! */
4685                         me_id = (ring_id & 0x60) >> 5;
4686                         pipe_id = (ring_id & 0x18) >> 3;
4687                         queue_id = (ring_id & 0x7) >> 0;
4688                         switch (me_id) {
4689                         case 0:
4690                                 /* This results in a full GPU reset, but all we need to do is soft
4691                                  * reset the CP for gfx
4692                                  */
4693                                 queue_reset = true;
4694                                 break;
4695                         case 1:
4696                                 /* XXX compute */
4697                                 break;
4698                         case 2:
4699                                 /* XXX compute */
4700                                 break;
4701                         }
4702                         break;
4703                 case 224: /* SDMA trap event */
4704                         /* XXX check the bitfield order! */
4705                         me_id = (ring_id & 0x3) >> 0;
4706                         queue_id = (ring_id & 0xc) >> 2;
4707                         DRM_DEBUG("IH: SDMA trap\n");
4708                         switch (me_id) {
4709                         case 0:
4710                                 switch (queue_id) {
4711                                 case 0:
4712                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4713                                         break;
4714                                 case 1:
4715                                         /* XXX compute */
4716                                         break;
4717                                 case 2:
4718                                         /* XXX compute */
4719                                         break;
4720                                 }
4721                                 break;
4722                         case 1:
4723                                 switch (queue_id) {
4724                                 case 0:
4725                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4726                                         break;
4727                                 case 1:
4728                                         /* XXX compute */
4729                                         break;
4730                                 case 2:
4731                                         /* XXX compute */
4732                                         break;
4733                                 }
4734                                 break;
4735                         }
4736                         break;
4737                 case 241: /* SDMA Privileged inst */
4738                 case 247: /* SDMA Privileged inst */
4739                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
4740                         /* XXX check the bitfield order! */
4741                         me_id = (ring_id & 0x3) >> 0;
4742                         queue_id = (ring_id & 0xc) >> 2;
4743                         switch (me_id) {
4744                         case 0:
4745                                 switch (queue_id) {
4746                                 case 0:
4747                                         queue_reset = true;
4748                                         break;
4749                                 case 1:
4750                                         /* XXX compute */
4751                                         queue_reset = true;
4752                                         break;
4753                                 case 2:
4754                                         /* XXX compute */
4755                                         queue_reset = true;
4756                                         break;
4757                                 }
4758                                 break;
4759                         case 1:
4760                                 switch (queue_id) {
4761                                 case 0:
4762                                         queue_reset = true;
4763                                         break;
4764                                 case 1:
4765                                         /* XXX compute */
4766                                         queue_reset = true;
4767                                         break;
4768                                 case 2:
4769                                         /* XXX compute */
4770                                         queue_reset = true;
4771                                         break;
4772                                 }
4773                                 break;
4774                         }
4775                         break;
4776                 case 233: /* GUI IDLE */
4777                         DRM_DEBUG("IH: GUI idle\n");
4778                         break;
4779                 default:
4780                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4781                         break;
4782                 }
4783
4784                 /* wptr/rptr are in bytes! */
4785                 rptr += 16;
4786                 rptr &= rdev->ih.ptr_mask;
4787         }
4788         if (queue_hotplug)
4789                 schedule_work(&rdev->hotplug_work);
4790         if (queue_reset)
4791                 schedule_work(&rdev->reset_work);
4792         rdev->ih.rptr = rptr;
4793         WREG32(IH_RB_RPTR, rdev->ih.rptr);
4794         atomic_set(&rdev->ih.lock, 0);
4795
4796         /* make sure wptr hasn't changed while processing */
4797         wptr = cik_get_ih_wptr(rdev);
4798         if (wptr != rptr)
4799                 goto restart_ih;
4800
4801         return IRQ_HANDLED;
4802 }
4803
4804 /*
4805  * startup/shutdown callbacks
4806  */
4807 /**
4808  * cik_startup - program the asic to a functional state
4809  *
4810  * @rdev: radeon_device pointer
4811  *
4812  * Programs the asic to a functional state (CIK).
4813  * Called by cik_init() and cik_resume().
4814  * Returns 0 for success, error for failure.
4815  */
4816 static int cik_startup(struct radeon_device *rdev)
4817 {
4818         struct radeon_ring *ring;
4819         int r;
4820
4821         if (rdev->flags & RADEON_IS_IGP) {
4822                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4823                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4824                         r = cik_init_microcode(rdev);
4825                         if (r) {
4826                                 DRM_ERROR("Failed to load firmware!\n");
4827                                 return r;
4828                         }
4829                 }
4830         } else {
4831                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4832                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4833                     !rdev->mc_fw) {
4834                         r = cik_init_microcode(rdev);
4835                         if (r) {
4836                                 DRM_ERROR("Failed to load firmware!\n");
4837                                 return r;
4838                         }
4839                 }
4840
4841                 r = ci_mc_load_microcode(rdev);
4842                 if (r) {
4843                         DRM_ERROR("Failed to load MC firmware!\n");
4844                         return r;
4845                 }
4846         }
4847
4848         r = r600_vram_scratch_init(rdev);
4849         if (r)
4850                 return r;
4851
4852         cik_mc_program(rdev);
4853         r = cik_pcie_gart_enable(rdev);
4854         if (r)
4855                 return r;
4856         cik_gpu_init(rdev);
4857
4858         /* allocate rlc buffers */
4859         r = si_rlc_init(rdev);
4860         if (r) {
4861                 DRM_ERROR("Failed to init rlc BOs!\n");
4862                 return r;
4863         }
4864
4865         /* allocate wb buffer */
4866         r = radeon_wb_init(rdev);
4867         if (r)
4868                 return r;
4869
4870         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4871         if (r) {
4872                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4873                 return r;
4874         }
4875
4876         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4877         if (r) {
4878                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4879                 return r;
4880         }
4881
4882         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4883         if (r) {
4884                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4885                 return r;
4886         }
4887
4888         /* Enable IRQ */
4889         if (!rdev->irq.installed) {
4890                 r = radeon_irq_kms_init(rdev);
4891                 if (r)
4892                         return r;
4893         }
4894
4895         r = cik_irq_init(rdev);
4896         if (r) {
4897                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4898                 radeon_irq_kms_fini(rdev);
4899                 return r;
4900         }
4901         cik_irq_set(rdev);
4902
4903         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4904         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4905                              CP_RB0_RPTR, CP_RB0_WPTR,
4906                              0, 0xfffff, RADEON_CP_PACKET2);
4907         if (r)
4908                 return r;
4909
4910         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4911         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4912                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4913                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4914                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4915         if (r)
4916                 return r;
4917
4918         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4919         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4920                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4921                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4922                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4923         if (r)
4924                 return r;
4925
4926         r = cik_cp_resume(rdev);
4927         if (r)
4928                 return r;
4929
4930         r = cik_sdma_resume(rdev);
4931         if (r)
4932                 return r;
4933
4934         r = radeon_ib_pool_init(rdev);
4935         if (r) {
4936                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4937                 return r;
4938         }
4939
4940         r = radeon_vm_manager_init(rdev);
4941         if (r) {
4942                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4943                 return r;
4944         }
4945
4946         return 0;
4947 }
4948
4949 /**
4950  * cik_resume - resume the asic to a functional state
4951  *
4952  * @rdev: radeon_device pointer
4953  *
4954  * Programs the asic to a functional state (CIK).
4955  * Called at resume.
4956  * Returns 0 for success, error for failure.
4957  */
4958 int cik_resume(struct radeon_device *rdev)
4959 {
4960         int r;
4961
4962         /* post card */
4963         atom_asic_init(rdev->mode_info.atom_context);
4964
4965         rdev->accel_working = true;
4966         r = cik_startup(rdev);
4967         if (r) {
4968                 DRM_ERROR("cik startup failed on resume\n");
4969                 rdev->accel_working = false;
4970                 return r;
4971         }
4972
4973         return r;
4974
4975 }
4976
4977 /**
4978  * cik_suspend - suspend the asic
4979  *
4980  * @rdev: radeon_device pointer
4981  *
4982  * Bring the chip into a state suitable for suspend (CIK).
4983  * Called at suspend.
4984  * Returns 0 for success.
4985  */
4986 int cik_suspend(struct radeon_device *rdev)
4987 {
4988         radeon_vm_manager_fini(rdev);
4989         cik_cp_enable(rdev, false);
4990         cik_sdma_enable(rdev, false);
4991         cik_irq_suspend(rdev);
4992         radeon_wb_disable(rdev);
4993         cik_pcie_gart_disable(rdev);
4994         return 0;
4995 }
4996
4997 /* Plan is to move initialization in that function and use
4998  * helper function so that radeon_device_init pretty much
4999  * do nothing more than calling asic specific function. This
5000  * should also allow to remove a bunch of callback function
5001  * like vram_info.
5002  */
5003 /**
5004  * cik_init - asic specific driver and hw init
5005  *
5006  * @rdev: radeon_device pointer
5007  *
5008  * Setup asic specific driver variables and program the hw
5009  * to a functional state (CIK).
5010  * Called at driver startup.
5011  * Returns 0 for success, errors for failure.
5012  */
5013 int cik_init(struct radeon_device *rdev)
5014 {
5015         struct radeon_ring *ring;
5016         int r;
5017
5018         /* Read BIOS */
5019         if (!radeon_get_bios(rdev)) {
5020                 if (ASIC_IS_AVIVO(rdev))
5021                         return -EINVAL;
5022         }
5023         /* Must be an ATOMBIOS */
5024         if (!rdev->is_atom_bios) {
5025                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5026                 return -EINVAL;
5027         }
5028         r = radeon_atombios_init(rdev);
5029         if (r)
5030                 return r;
5031
5032         /* Post card if necessary */
5033         if (!radeon_card_posted(rdev)) {
5034                 if (!rdev->bios) {
5035                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5036                         return -EINVAL;
5037                 }
5038                 DRM_INFO("GPU not posted. posting now...\n");
5039                 atom_asic_init(rdev->mode_info.atom_context);
5040         }
5041         /* Initialize scratch registers */
5042         cik_scratch_init(rdev);
5043         /* Initialize surface registers */
5044         radeon_surface_init(rdev);
5045         /* Initialize clocks */
5046         radeon_get_clock_info(rdev->ddev);
5047
5048         /* Fence driver */
5049         r = radeon_fence_driver_init(rdev);
5050         if (r)
5051                 return r;
5052
5053         /* initialize memory controller */
5054         r = cik_mc_init(rdev);
5055         if (r)
5056                 return r;
5057         /* Memory manager */
5058         r = radeon_bo_init(rdev);
5059         if (r)
5060                 return r;
5061
5062         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5063         ring->ring_obj = NULL;
5064         r600_ring_init(rdev, ring, 1024 * 1024);
5065
5066         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5067         ring->ring_obj = NULL;
5068         r600_ring_init(rdev, ring, 256 * 1024);
5069
5070         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5071         ring->ring_obj = NULL;
5072         r600_ring_init(rdev, ring, 256 * 1024);
5073
5074         rdev->ih.ring_obj = NULL;
5075         r600_ih_ring_init(rdev, 64 * 1024);
5076
5077         r = r600_pcie_gart_init(rdev);
5078         if (r)
5079                 return r;
5080
5081         rdev->accel_working = true;
5082         r = cik_startup(rdev);
5083         if (r) {
5084                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5085                 cik_cp_fini(rdev);
5086                 cik_sdma_fini(rdev);
5087                 cik_irq_fini(rdev);
5088                 si_rlc_fini(rdev);
5089                 radeon_wb_fini(rdev);
5090                 radeon_ib_pool_fini(rdev);
5091                 radeon_vm_manager_fini(rdev);
5092                 radeon_irq_kms_fini(rdev);
5093                 cik_pcie_gart_fini(rdev);
5094                 rdev->accel_working = false;
5095         }
5096
5097         /* Don't start up if the MC ucode is missing.
5098          * The default clocks and voltages before the MC ucode
5099          * is loaded are not suffient for advanced operations.
5100          */
5101         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5102                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5103                 return -EINVAL;
5104         }
5105
5106         return 0;
5107 }
5108
5109 /**
5110  * cik_fini - asic specific driver and hw fini
5111  *
5112  * @rdev: radeon_device pointer
5113  *
5114  * Tear down the asic specific driver variables and program the hw
5115  * to an idle state (CIK).
5116  * Called at driver unload.
5117  */
5118 void cik_fini(struct radeon_device *rdev)
5119 {
5120         cik_cp_fini(rdev);
5121         cik_sdma_fini(rdev);
5122         cik_irq_fini(rdev);
5123         si_rlc_fini(rdev);
5124         radeon_wb_fini(rdev);
5125         radeon_vm_manager_fini(rdev);
5126         radeon_ib_pool_fini(rdev);
5127         radeon_irq_kms_fini(rdev);
5128         cik_pcie_gart_fini(rdev);
5129         r600_vram_scratch_fini(rdev);
5130         radeon_gem_fini(rdev);
5131         radeon_fence_driver_fini(rdev);
5132         radeon_bo_fini(rdev);
5133         radeon_atombios_fini(rdev);
5134         kfree(rdev->bios);
5135         rdev->bios = NULL;
5136 }
5137
5138 /* display watermark setup */
5139 /**
5140  * dce8_line_buffer_adjust - Set up the line buffer
5141  *
5142  * @rdev: radeon_device pointer
5143  * @radeon_crtc: the selected display controller
5144  * @mode: the current display mode on the selected display
5145  * controller
5146  *
5147  * Setup up the line buffer allocation for
5148  * the selected display controller (CIK).
5149  * Returns the line buffer size in pixels.
5150  */
5151 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5152                                    struct radeon_crtc *radeon_crtc,
5153                                    struct drm_display_mode *mode)
5154 {
5155         u32 tmp;
5156
5157         /*
5158          * Line Buffer Setup
5159          * There are 6 line buffers, one for each display controllers.
5160          * There are 3 partitions per LB. Select the number of partitions
5161          * to enable based on the display width.  For display widths larger
5162          * than 4096, you need use to use 2 display controllers and combine
5163          * them using the stereo blender.
5164          */
5165         if (radeon_crtc->base.enabled && mode) {
5166                 if (mode->crtc_hdisplay < 1920)
5167                         tmp = 1;
5168                 else if (mode->crtc_hdisplay < 2560)
5169                         tmp = 2;
5170                 else if (mode->crtc_hdisplay < 4096)
5171                         tmp = 0;
5172                 else {
5173                         DRM_DEBUG_KMS("Mode too big for LB!\n");
5174                         tmp = 0;
5175                 }
5176         } else
5177                 tmp = 1;
5178
5179         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5180                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5181
5182         if (radeon_crtc->base.enabled && mode) {
5183                 switch (tmp) {
5184                 case 0:
5185                 default:
5186                         return 4096 * 2;
5187                 case 1:
5188                         return 1920 * 2;
5189                 case 2:
5190                         return 2560 * 2;
5191                 }
5192         }
5193
5194         /* controller not enabled, so no lb used */
5195         return 0;
5196 }
5197
5198 /**
5199  * cik_get_number_of_dram_channels - get the number of dram channels
5200  *
5201  * @rdev: radeon_device pointer
5202  *
5203  * Look up the number of video ram channels (CIK).
5204  * Used for display watermark bandwidth calculations
5205  * Returns the number of dram channels
5206  */
5207 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5208 {
5209         u32 tmp = RREG32(MC_SHARED_CHMAP);
5210
5211         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5212         case 0:
5213         default:
5214                 return 1;
5215         case 1:
5216                 return 2;
5217         case 2:
5218                 return 4;
5219         case 3:
5220                 return 8;
5221         case 4:
5222                 return 3;
5223         case 5:
5224                 return 6;
5225         case 6:
5226                 return 10;
5227         case 7:
5228                 return 12;
5229         case 8:
5230                 return 16;
5231         }
5232 }
5233
5234 struct dce8_wm_params {
5235         u32 dram_channels; /* number of dram channels */
5236         u32 yclk;          /* bandwidth per dram data pin in kHz */
5237         u32 sclk;          /* engine clock in kHz */
5238         u32 disp_clk;      /* display clock in kHz */
5239         u32 src_width;     /* viewport width */
5240         u32 active_time;   /* active display time in ns */
5241         u32 blank_time;    /* blank time in ns */
5242         bool interlaced;    /* mode is interlaced */
5243         fixed20_12 vsc;    /* vertical scale ratio */
5244         u32 num_heads;     /* number of active crtcs */
5245         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5246         u32 lb_size;       /* line buffer allocated to pipe */
5247         u32 vtaps;         /* vertical scaler taps */
5248 };
5249
5250 /**
5251  * dce8_dram_bandwidth - get the dram bandwidth
5252  *
5253  * @wm: watermark calculation data
5254  *
5255  * Calculate the raw dram bandwidth (CIK).
5256  * Used for display watermark bandwidth calculations
5257  * Returns the dram bandwidth in MBytes/s
5258  */
5259 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5260 {
5261         /* Calculate raw DRAM Bandwidth */
5262         fixed20_12 dram_efficiency; /* 0.7 */
5263         fixed20_12 yclk, dram_channels, bandwidth;
5264         fixed20_12 a;
5265
5266         a.full = dfixed_const(1000);
5267         yclk.full = dfixed_const(wm->yclk);
5268         yclk.full = dfixed_div(yclk, a);
5269         dram_channels.full = dfixed_const(wm->dram_channels * 4);
5270         a.full = dfixed_const(10);
5271         dram_efficiency.full = dfixed_const(7);
5272         dram_efficiency.full = dfixed_div(dram_efficiency, a);
5273         bandwidth.full = dfixed_mul(dram_channels, yclk);
5274         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5275
5276         return dfixed_trunc(bandwidth);
5277 }
5278
5279 /**
5280  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5281  *
5282  * @wm: watermark calculation data
5283  *
5284  * Calculate the dram bandwidth used for display (CIK).
5285  * Used for display watermark bandwidth calculations
5286  * Returns the dram bandwidth for display in MBytes/s
5287  */
5288 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5289 {
5290         /* Calculate DRAM Bandwidth and the part allocated to display. */
5291         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5292         fixed20_12 yclk, dram_channels, bandwidth;
5293         fixed20_12 a;
5294
5295         a.full = dfixed_const(1000);
5296         yclk.full = dfixed_const(wm->yclk);
5297         yclk.full = dfixed_div(yclk, a);
5298         dram_channels.full = dfixed_const(wm->dram_channels * 4);
5299         a.full = dfixed_const(10);
5300         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5301         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5302         bandwidth.full = dfixed_mul(dram_channels, yclk);
5303         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5304
5305         return dfixed_trunc(bandwidth);
5306 }
5307
5308 /**
5309  * dce8_data_return_bandwidth - get the data return bandwidth
5310  *
5311  * @wm: watermark calculation data
5312  *
5313  * Calculate the data return bandwidth used for display (CIK).
5314  * Used for display watermark bandwidth calculations
5315  * Returns the data return bandwidth in MBytes/s
5316  */
5317 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5318 {
5319         /* Calculate the display Data return Bandwidth */
5320         fixed20_12 return_efficiency; /* 0.8 */
5321         fixed20_12 sclk, bandwidth;
5322         fixed20_12 a;
5323
5324         a.full = dfixed_const(1000);
5325         sclk.full = dfixed_const(wm->sclk);
5326         sclk.full = dfixed_div(sclk, a);
5327         a.full = dfixed_const(10);
5328         return_efficiency.full = dfixed_const(8);
5329         return_efficiency.full = dfixed_div(return_efficiency, a);
5330         a.full = dfixed_const(32);
5331         bandwidth.full = dfixed_mul(a, sclk);
5332         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5333
5334         return dfixed_trunc(bandwidth);
5335 }
5336
5337 /**
5338  * dce8_dmif_request_bandwidth - get the dmif bandwidth
5339  *
5340  * @wm: watermark calculation data
5341  *
5342  * Calculate the dmif bandwidth used for display (CIK).
5343  * Used for display watermark bandwidth calculations
5344  * Returns the dmif bandwidth in MBytes/s
5345  */
5346 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5347 {
5348         /* Calculate the DMIF Request Bandwidth */
5349         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5350         fixed20_12 disp_clk, bandwidth;
5351         fixed20_12 a, b;
5352
5353         a.full = dfixed_const(1000);
5354         disp_clk.full = dfixed_const(wm->disp_clk);
5355         disp_clk.full = dfixed_div(disp_clk, a);
5356         a.full = dfixed_const(32);
5357         b.full = dfixed_mul(a, disp_clk);
5358
5359         a.full = dfixed_const(10);
5360         disp_clk_request_efficiency.full = dfixed_const(8);
5361         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5362
5363         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5364
5365         return dfixed_trunc(bandwidth);
5366 }
5367
5368 /**
5369  * dce8_available_bandwidth - get the min available bandwidth
5370  *
5371  * @wm: watermark calculation data
5372  *
5373  * Calculate the min available bandwidth used for display (CIK).
5374  * Used for display watermark bandwidth calculations
5375  * Returns the min available bandwidth in MBytes/s
5376  */
5377 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5378 {
5379         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5380         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5381         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5382         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5383
5384         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5385 }
5386
5387 /**
5388  * dce8_average_bandwidth - get the average available bandwidth
5389  *
5390  * @wm: watermark calculation data
5391  *
5392  * Calculate the average available bandwidth used for display (CIK).
5393  * Used for display watermark bandwidth calculations
5394  * Returns the average available bandwidth in MBytes/s
5395  */
5396 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5397 {
5398         /* Calculate the display mode Average Bandwidth
5399          * DisplayMode should contain the source and destination dimensions,
5400          * timing, etc.
5401          */
5402         fixed20_12 bpp;
5403         fixed20_12 line_time;
5404         fixed20_12 src_width;
5405         fixed20_12 bandwidth;
5406         fixed20_12 a;
5407
5408         a.full = dfixed_const(1000);
5409         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5410         line_time.full = dfixed_div(line_time, a);
5411         bpp.full = dfixed_const(wm->bytes_per_pixel);
5412         src_width.full = dfixed_const(wm->src_width);
5413         bandwidth.full = dfixed_mul(src_width, bpp);
5414         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5415         bandwidth.full = dfixed_div(bandwidth, line_time);
5416
5417         return dfixed_trunc(bandwidth);
5418 }
5419
5420 /**
5421  * dce8_latency_watermark - get the latency watermark
5422  *
5423  * @wm: watermark calculation data
5424  *
5425  * Calculate the latency watermark (CIK).
5426  * Used for display watermark bandwidth calculations
5427  * Returns the latency watermark in ns
5428  */
5429 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5430 {
5431         /* First calculate the latency in ns */
5432         u32 mc_latency = 2000; /* 2000 ns. */
5433         u32 available_bandwidth = dce8_available_bandwidth(wm);
5434         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5435         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5436         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5437         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5438                 (wm->num_heads * cursor_line_pair_return_time);
5439         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5440         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5441         u32 tmp, dmif_size = 12288;
5442         fixed20_12 a, b, c;
5443
5444         if (wm->num_heads == 0)
5445                 return 0;
5446
5447         a.full = dfixed_const(2);
5448         b.full = dfixed_const(1);
5449         if ((wm->vsc.full > a.full) ||
5450             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5451             (wm->vtaps >= 5) ||
5452             ((wm->vsc.full >= a.full) && wm->interlaced))
5453                 max_src_lines_per_dst_line = 4;
5454         else
5455                 max_src_lines_per_dst_line = 2;
5456
5457         a.full = dfixed_const(available_bandwidth);
5458         b.full = dfixed_const(wm->num_heads);
5459         a.full = dfixed_div(a, b);
5460
5461         b.full = dfixed_const(mc_latency + 512);
5462         c.full = dfixed_const(wm->disp_clk);
5463         b.full = dfixed_div(b, c);
5464
5465         c.full = dfixed_const(dmif_size);
5466         b.full = dfixed_div(c, b);
5467
5468         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5469
5470         b.full = dfixed_const(1000);
5471         c.full = dfixed_const(wm->disp_clk);
5472         b.full = dfixed_div(c, b);
5473         c.full = dfixed_const(wm->bytes_per_pixel);
5474         b.full = dfixed_mul(b, c);
5475
5476         lb_fill_bw = min(tmp, dfixed_trunc(b));
5477
5478         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5479         b.full = dfixed_const(1000);
5480         c.full = dfixed_const(lb_fill_bw);
5481         b.full = dfixed_div(c, b);
5482         a.full = dfixed_div(a, b);
5483         line_fill_time = dfixed_trunc(a);
5484
5485         if (line_fill_time < wm->active_time)
5486                 return latency;
5487         else
5488                 return latency + (line_fill_time - wm->active_time);
5489
5490 }
5491
5492 /**
5493  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5494  * average and available dram bandwidth
5495  *
5496  * @wm: watermark calculation data
5497  *
5498  * Check if the display average bandwidth fits in the display
5499  * dram bandwidth (CIK).
5500  * Used for display watermark bandwidth calculations
5501  * Returns true if the display fits, false if not.
5502  */
5503 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5504 {
5505         if (dce8_average_bandwidth(wm) <=
5506             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5507                 return true;
5508         else
5509                 return false;
5510 }
5511
5512 /**
5513  * dce8_average_bandwidth_vs_available_bandwidth - check
5514  * average and available bandwidth
5515  *
5516  * @wm: watermark calculation data
5517  *
5518  * Check if the display average bandwidth fits in the display
5519  * available bandwidth (CIK).
5520  * Used for display watermark bandwidth calculations
5521  * Returns true if the display fits, false if not.
5522  */
5523 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5524 {
5525         if (dce8_average_bandwidth(wm) <=
5526             (dce8_available_bandwidth(wm) / wm->num_heads))
5527                 return true;
5528         else
5529                 return false;
5530 }
5531
5532 /**
5533  * dce8_check_latency_hiding - check latency hiding
5534  *
5535  * @wm: watermark calculation data
5536  *
5537  * Check latency hiding (CIK).
5538  * Used for display watermark bandwidth calculations
5539  * Returns true if the display fits, false if not.
5540  */
5541 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5542 {
5543         u32 lb_partitions = wm->lb_size / wm->src_width;
5544         u32 line_time = wm->active_time + wm->blank_time;
5545         u32 latency_tolerant_lines;
5546         u32 latency_hiding;
5547         fixed20_12 a;
5548
5549         a.full = dfixed_const(1);
5550         if (wm->vsc.full > a.full)
5551                 latency_tolerant_lines = 1;
5552         else {
5553                 if (lb_partitions <= (wm->vtaps + 1))
5554                         latency_tolerant_lines = 1;
5555                 else
5556                         latency_tolerant_lines = 2;
5557         }
5558
5559         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5560
5561         if (dce8_latency_watermark(wm) <= latency_hiding)
5562                 return true;
5563         else
5564                 return false;
5565 }
5566
5567 /**
5568  * dce8_program_watermarks - program display watermarks
5569  *
5570  * @rdev: radeon_device pointer
5571  * @radeon_crtc: the selected display controller
5572  * @lb_size: line buffer size
5573  * @num_heads: number of display controllers in use
5574  *
5575  * Calculate and program the display watermarks for the
5576  * selected display controller (CIK).
5577  */
5578 static void dce8_program_watermarks(struct radeon_device *rdev,
5579                                     struct radeon_crtc *radeon_crtc,
5580                                     u32 lb_size, u32 num_heads)
5581 {
5582         struct drm_display_mode *mode = &radeon_crtc->base.mode;
5583         struct dce8_wm_params wm;
5584         u32 pixel_period;
5585         u32 line_time = 0;
5586         u32 latency_watermark_a = 0, latency_watermark_b = 0;
5587         u32 tmp, wm_mask;
5588
5589         if (radeon_crtc->base.enabled && num_heads && mode) {
5590                 pixel_period = 1000000 / (u32)mode->clock;
5591                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5592
5593                 wm.yclk = rdev->pm.current_mclk * 10;
5594                 wm.sclk = rdev->pm.current_sclk * 10;
5595                 wm.disp_clk = mode->clock;
5596                 wm.src_width = mode->crtc_hdisplay;
5597                 wm.active_time = mode->crtc_hdisplay * pixel_period;
5598                 wm.blank_time = line_time - wm.active_time;
5599                 wm.interlaced = false;
5600                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5601                         wm.interlaced = true;
5602                 wm.vsc = radeon_crtc->vsc;
5603                 wm.vtaps = 1;
5604                 if (radeon_crtc->rmx_type != RMX_OFF)
5605                         wm.vtaps = 2;
5606                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5607                 wm.lb_size = lb_size;
5608                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5609                 wm.num_heads = num_heads;
5610
5611                 /* set for high clocks */
5612                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5613                 /* set for low clocks */
5614                 /* wm.yclk = low clk; wm.sclk = low clk */
5615                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5616
5617                 /* possibly force display priority to high */
5618                 /* should really do this at mode validation time... */
5619                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5620                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5621                     !dce8_check_latency_hiding(&wm) ||
5622                     (rdev->disp_priority == 2)) {
5623                         DRM_DEBUG_KMS("force priority to high\n");
5624                 }
5625         }
5626
5627         /* select wm A */
5628         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5629         tmp = wm_mask;
5630         tmp &= ~LATENCY_WATERMARK_MASK(3);
5631         tmp |= LATENCY_WATERMARK_MASK(1);
5632         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5633         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5634                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5635                 LATENCY_HIGH_WATERMARK(line_time)));
5636         /* select wm B */
5637         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5638         tmp &= ~LATENCY_WATERMARK_MASK(3);
5639         tmp |= LATENCY_WATERMARK_MASK(2);
5640         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5641         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5642                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5643                 LATENCY_HIGH_WATERMARK(line_time)));
5644         /* restore original selection */
5645         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5646 }
5647
5648 /**
5649  * dce8_bandwidth_update - program display watermarks
5650  *
5651  * @rdev: radeon_device pointer
5652  *
5653  * Calculate and program the display watermarks and line
5654  * buffer allocation (CIK).
5655  */
5656 void dce8_bandwidth_update(struct radeon_device *rdev)
5657 {
5658         struct drm_display_mode *mode = NULL;
5659         u32 num_heads = 0, lb_size;
5660         int i;
5661
5662         radeon_update_display_priority(rdev);
5663
5664         for (i = 0; i < rdev->num_crtc; i++) {
5665                 if (rdev->mode_info.crtcs[i]->base.enabled)
5666                         num_heads++;
5667         }
5668         for (i = 0; i < rdev->num_crtc; i++) {
5669                 mode = &rdev->mode_info.crtcs[i]->base.mode;
5670                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5671                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5672         }
5673 }
5674
5675 /**
5676  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5677  *
5678  * @rdev: radeon_device pointer
5679  *
5680  * Fetches a GPU clock counter snapshot (SI).
5681  * Returns the 64 bit clock counter snapshot.
5682  */
5683 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5684 {
5685         uint64_t clock;
5686
5687         mutex_lock(&rdev->gpu_clock_mutex);
5688         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5689         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5690                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5691         mutex_unlock(&rdev->gpu_clock_mutex);
5692         return clock;
5693 }
5694