]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon: implement async vm_flush for the CP (v7)
[linux-imx.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34
35 /* GFX */
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
39 /* compute */
40 #define CIK_MEC_UCODE_SIZE 4192
41 /* interrupts */
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
45 /* gddr controller */
46 #define CIK_MC_UCODE_SIZE 7866
47
48 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
60 MODULE_FIRMWARE("radeon/KABINI_me.bin");
61 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
62 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
63 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
64
65 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
66 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
67 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
68
69 #define BONAIRE_IO_MC_REGS_SIZE 36
70
71 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
72 {
73         {0x00000070, 0x04400000},
74         {0x00000071, 0x80c01803},
75         {0x00000072, 0x00004004},
76         {0x00000073, 0x00000100},
77         {0x00000074, 0x00ff0000},
78         {0x00000075, 0x34000000},
79         {0x00000076, 0x08000014},
80         {0x00000077, 0x00cc08ec},
81         {0x00000078, 0x00000400},
82         {0x00000079, 0x00000000},
83         {0x0000007a, 0x04090000},
84         {0x0000007c, 0x00000000},
85         {0x0000007e, 0x4408a8e8},
86         {0x0000007f, 0x00000304},
87         {0x00000080, 0x00000000},
88         {0x00000082, 0x00000001},
89         {0x00000083, 0x00000002},
90         {0x00000084, 0xf3e4f400},
91         {0x00000085, 0x052024e3},
92         {0x00000087, 0x00000000},
93         {0x00000088, 0x01000000},
94         {0x0000008a, 0x1c0a0000},
95         {0x0000008b, 0xff010000},
96         {0x0000008d, 0xffffefff},
97         {0x0000008e, 0xfff3efff},
98         {0x0000008f, 0xfff3efbf},
99         {0x00000092, 0xf7ffffff},
100         {0x00000093, 0xffffff7f},
101         {0x00000095, 0x00101101},
102         {0x00000096, 0x00000fff},
103         {0x00000097, 0x00116fff},
104         {0x00000098, 0x60010000},
105         {0x00000099, 0x10010000},
106         {0x0000009a, 0x00006000},
107         {0x0000009b, 0x00001000},
108         {0x0000009f, 0x00b48000}
109 };
110
111 /* ucode loading */
112 /**
113  * ci_mc_load_microcode - load MC ucode into the hw
114  *
115  * @rdev: radeon_device pointer
116  *
117  * Load the GDDR MC ucode into the hw (CIK).
118  * Returns 0 on success, error on failure.
119  */
120 static int ci_mc_load_microcode(struct radeon_device *rdev)
121 {
122         const __be32 *fw_data;
123         u32 running, blackout = 0;
124         u32 *io_mc_regs;
125         int i, ucode_size, regs_size;
126
127         if (!rdev->mc_fw)
128                 return -EINVAL;
129
130         switch (rdev->family) {
131         case CHIP_BONAIRE:
132         default:
133                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
134                 ucode_size = CIK_MC_UCODE_SIZE;
135                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
136                 break;
137         }
138
139         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
140
141         if (running == 0) {
142                 if (running) {
143                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
144                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
145                 }
146
147                 /* reset the engine and set to writable */
148                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
149                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
150
151                 /* load mc io regs */
152                 for (i = 0; i < regs_size; i++) {
153                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
154                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
155                 }
156                 /* load the MC ucode */
157                 fw_data = (const __be32 *)rdev->mc_fw->data;
158                 for (i = 0; i < ucode_size; i++)
159                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
160
161                 /* put the engine back into the active state */
162                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
163                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
164                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
165
166                 /* wait for training to complete */
167                 for (i = 0; i < rdev->usec_timeout; i++) {
168                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
169                                 break;
170                         udelay(1);
171                 }
172                 for (i = 0; i < rdev->usec_timeout; i++) {
173                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
174                                 break;
175                         udelay(1);
176                 }
177
178                 if (running)
179                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
180         }
181
182         return 0;
183 }
184
185 /**
186  * cik_init_microcode - load ucode images from disk
187  *
188  * @rdev: radeon_device pointer
189  *
190  * Use the firmware interface to load the ucode images into
191  * the driver (not loaded into hw).
192  * Returns 0 on success, error on failure.
193  */
194 static int cik_init_microcode(struct radeon_device *rdev)
195 {
196         struct platform_device *pdev;
197         const char *chip_name;
198         size_t pfp_req_size, me_req_size, ce_req_size,
199                 mec_req_size, rlc_req_size, mc_req_size;
200         char fw_name[30];
201         int err;
202
203         DRM_DEBUG("\n");
204
205         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
206         err = IS_ERR(pdev);
207         if (err) {
208                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
209                 return -EINVAL;
210         }
211
212         switch (rdev->family) {
213         case CHIP_BONAIRE:
214                 chip_name = "BONAIRE";
215                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
216                 me_req_size = CIK_ME_UCODE_SIZE * 4;
217                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
218                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
219                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
220                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
221                 break;
222         case CHIP_KAVERI:
223                 chip_name = "KAVERI";
224                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
225                 me_req_size = CIK_ME_UCODE_SIZE * 4;
226                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
227                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
228                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
229                 break;
230         case CHIP_KABINI:
231                 chip_name = "KABINI";
232                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
233                 me_req_size = CIK_ME_UCODE_SIZE * 4;
234                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
235                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
236                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
237                 break;
238         default: BUG();
239         }
240
241         DRM_INFO("Loading %s Microcode\n", chip_name);
242
243         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
244         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
245         if (err)
246                 goto out;
247         if (rdev->pfp_fw->size != pfp_req_size) {
248                 printk(KERN_ERR
249                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
250                        rdev->pfp_fw->size, fw_name);
251                 err = -EINVAL;
252                 goto out;
253         }
254
255         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
256         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
257         if (err)
258                 goto out;
259         if (rdev->me_fw->size != me_req_size) {
260                 printk(KERN_ERR
261                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
262                        rdev->me_fw->size, fw_name);
263                 err = -EINVAL;
264         }
265
266         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
267         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
268         if (err)
269                 goto out;
270         if (rdev->ce_fw->size != ce_req_size) {
271                 printk(KERN_ERR
272                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
273                        rdev->ce_fw->size, fw_name);
274                 err = -EINVAL;
275         }
276
277         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
278         err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
279         if (err)
280                 goto out;
281         if (rdev->mec_fw->size != mec_req_size) {
282                 printk(KERN_ERR
283                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
284                        rdev->mec_fw->size, fw_name);
285                 err = -EINVAL;
286         }
287
288         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
289         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
290         if (err)
291                 goto out;
292         if (rdev->rlc_fw->size != rlc_req_size) {
293                 printk(KERN_ERR
294                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
295                        rdev->rlc_fw->size, fw_name);
296                 err = -EINVAL;
297         }
298
299         /* No MC ucode on APUs */
300         if (!(rdev->flags & RADEON_IS_IGP)) {
301                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
302                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
303                 if (err)
304                         goto out;
305                 if (rdev->mc_fw->size != mc_req_size) {
306                         printk(KERN_ERR
307                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
308                                rdev->mc_fw->size, fw_name);
309                         err = -EINVAL;
310                 }
311         }
312
313 out:
314         platform_device_unregister(pdev);
315
316         if (err) {
317                 if (err != -EINVAL)
318                         printk(KERN_ERR
319                                "cik_cp: Failed to load firmware \"%s\"\n",
320                                fw_name);
321                 release_firmware(rdev->pfp_fw);
322                 rdev->pfp_fw = NULL;
323                 release_firmware(rdev->me_fw);
324                 rdev->me_fw = NULL;
325                 release_firmware(rdev->ce_fw);
326                 rdev->ce_fw = NULL;
327                 release_firmware(rdev->rlc_fw);
328                 rdev->rlc_fw = NULL;
329                 release_firmware(rdev->mc_fw);
330                 rdev->mc_fw = NULL;
331         }
332         return err;
333 }
334
335 /*
336  * Core functions
337  */
338 /**
339  * cik_tiling_mode_table_init - init the hw tiling table
340  *
341  * @rdev: radeon_device pointer
342  *
343  * Starting with SI, the tiling setup is done globally in a
344  * set of 32 tiling modes.  Rather than selecting each set of
345  * parameters per surface as on older asics, we just select
346  * which index in the tiling table we want to use, and the
347  * surface uses those parameters (CIK).
348  */
349 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
350 {
351         const u32 num_tile_mode_states = 32;
352         const u32 num_secondary_tile_mode_states = 16;
353         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
354         u32 num_pipe_configs;
355         u32 num_rbs = rdev->config.cik.max_backends_per_se *
356                 rdev->config.cik.max_shader_engines;
357
358         switch (rdev->config.cik.mem_row_size_in_kb) {
359         case 1:
360                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
361                 break;
362         case 2:
363         default:
364                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
365                 break;
366         case 4:
367                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
368                 break;
369         }
370
371         num_pipe_configs = rdev->config.cik.max_tile_pipes;
372         if (num_pipe_configs > 8)
373                 num_pipe_configs = 8; /* ??? */
374
375         if (num_pipe_configs == 8) {
376                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
377                         switch (reg_offset) {
378                         case 0:
379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
380                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
381                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
383                                 break;
384                         case 1:
385                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
386                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
387                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
388                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
389                                 break;
390                         case 2:
391                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
392                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
393                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
394                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
395                                 break;
396                         case 3:
397                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
398                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
399                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
400                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
401                                 break;
402                         case 4:
403                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
404                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
405                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
406                                                  TILE_SPLIT(split_equal_to_row_size));
407                                 break;
408                         case 5:
409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
411                                 break;
412                         case 6:
413                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
414                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
415                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
416                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
417                                 break;
418                         case 7:
419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
420                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
422                                                  TILE_SPLIT(split_equal_to_row_size));
423                                 break;
424                         case 8:
425                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
426                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
427                                 break;
428                         case 9:
429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
431                                 break;
432                         case 10:
433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
434                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
436                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
437                                 break;
438                         case 11:
439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
440                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
442                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
443                                 break;
444                         case 12:
445                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
446                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
447                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
448                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
449                                 break;
450                         case 13:
451                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
452                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
453                                 break;
454                         case 14:
455                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
456                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
457                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
458                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
459                                 break;
460                         case 16:
461                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
462                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
463                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
464                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
465                                 break;
466                         case 17:
467                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
468                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
469                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
470                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
471                                 break;
472                         case 27:
473                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
474                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
475                                 break;
476                         case 28:
477                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
478                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
479                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
480                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
481                                 break;
482                         case 29:
483                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
484                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
485                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
486                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
487                                 break;
488                         case 30:
489                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
490                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
491                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
492                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
493                                 break;
494                         default:
495                                 gb_tile_moden = 0;
496                                 break;
497                         }
498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
499                 }
500                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
501                         switch (reg_offset) {
502                         case 0:
503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
507                                 break;
508                         case 1:
509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
513                                 break;
514                         case 2:
515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
519                                 break;
520                         case 3:
521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
525                                 break;
526                         case 4:
527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
530                                                  NUM_BANKS(ADDR_SURF_8_BANK));
531                                 break;
532                         case 5:
533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
536                                                  NUM_BANKS(ADDR_SURF_4_BANK));
537                                 break;
538                         case 6:
539                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
542                                                  NUM_BANKS(ADDR_SURF_2_BANK));
543                                 break;
544                         case 8:
545                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
548                                                  NUM_BANKS(ADDR_SURF_16_BANK));
549                                 break;
550                         case 9:
551                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
554                                                  NUM_BANKS(ADDR_SURF_16_BANK));
555                                 break;
556                         case 10:
557                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
560                                                  NUM_BANKS(ADDR_SURF_16_BANK));
561                                 break;
562                         case 11:
563                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
566                                                  NUM_BANKS(ADDR_SURF_16_BANK));
567                                 break;
568                         case 12:
569                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
572                                                  NUM_BANKS(ADDR_SURF_8_BANK));
573                                 break;
574                         case 13:
575                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
578                                                  NUM_BANKS(ADDR_SURF_4_BANK));
579                                 break;
580                         case 14:
581                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
584                                                  NUM_BANKS(ADDR_SURF_2_BANK));
585                                 break;
586                         default:
587                                 gb_tile_moden = 0;
588                                 break;
589                         }
590                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
591                 }
592         } else if (num_pipe_configs == 4) {
593                 if (num_rbs == 4) {
594                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
595                                 switch (reg_offset) {
596                                 case 0:
597                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
598                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
599                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
600                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
601                                         break;
602                                 case 1:
603                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
604                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
605                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
606                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
607                                         break;
608                                 case 2:
609                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
610                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
611                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
612                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
613                                         break;
614                                 case 3:
615                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
616                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
617                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
618                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
619                                         break;
620                                 case 4:
621                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
622                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
623                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
624                                                          TILE_SPLIT(split_equal_to_row_size));
625                                         break;
626                                 case 5:
627                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
628                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
629                                         break;
630                                 case 6:
631                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
632                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
633                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
634                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
635                                         break;
636                                 case 7:
637                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
638                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
639                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
640                                                          TILE_SPLIT(split_equal_to_row_size));
641                                         break;
642                                 case 8:
643                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
644                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
645                                         break;
646                                 case 9:
647                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
648                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
649                                         break;
650                                 case 10:
651                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
652                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
653                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
654                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
655                                         break;
656                                 case 11:
657                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
658                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
659                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
660                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
661                                         break;
662                                 case 12:
663                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
664                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
665                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
666                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
667                                         break;
668                                 case 13:
669                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
670                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
671                                         break;
672                                 case 14:
673                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
674                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
675                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
676                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
677                                         break;
678                                 case 16:
679                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
680                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
681                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
682                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
683                                         break;
684                                 case 17:
685                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
686                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
687                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
688                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
689                                         break;
690                                 case 27:
691                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
692                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
693                                         break;
694                                 case 28:
695                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
696                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
697                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
698                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
699                                         break;
700                                 case 29:
701                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
702                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
703                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
704                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
705                                         break;
706                                 case 30:
707                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
708                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
709                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
710                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
711                                         break;
712                                 default:
713                                         gb_tile_moden = 0;
714                                         break;
715                                 }
716                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
717                         }
718                 } else if (num_rbs < 4) {
719                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
720                                 switch (reg_offset) {
721                                 case 0:
722                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
723                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
724                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
725                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
726                                         break;
727                                 case 1:
728                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
730                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
732                                         break;
733                                 case 2:
734                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
735                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
736                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
737                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
738                                         break;
739                                 case 3:
740                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
742                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
743                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
744                                         break;
745                                 case 4:
746                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
747                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
748                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
749                                                          TILE_SPLIT(split_equal_to_row_size));
750                                         break;
751                                 case 5:
752                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
753                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
754                                         break;
755                                 case 6:
756                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
757                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
758                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
759                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
760                                         break;
761                                 case 7:
762                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
763                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
764                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
765                                                          TILE_SPLIT(split_equal_to_row_size));
766                                         break;
767                                 case 8:
768                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
769                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
770                                         break;
771                                 case 9:
772                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
773                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
774                                         break;
775                                 case 10:
776                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
777                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
778                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
779                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
780                                         break;
781                                 case 11:
782                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
783                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
784                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
785                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
786                                         break;
787                                 case 12:
788                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
789                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
790                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
791                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792                                         break;
793                                 case 13:
794                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
795                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
796                                         break;
797                                 case 14:
798                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
799                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
800                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
801                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
802                                         break;
803                                 case 16:
804                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
805                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
806                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
807                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
808                                         break;
809                                 case 17:
810                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
811                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
812                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
814                                         break;
815                                 case 27:
816                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
817                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
818                                         break;
819                                 case 28:
820                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
821                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
822                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
823                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
824                                         break;
825                                 case 29:
826                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
827                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
828                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
829                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
830                                         break;
831                                 case 30:
832                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
833                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
834                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
836                                         break;
837                                 default:
838                                         gb_tile_moden = 0;
839                                         break;
840                                 }
841                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
842                         }
843                 }
844                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
845                         switch (reg_offset) {
846                         case 0:
847                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
848                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
849                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
850                                                  NUM_BANKS(ADDR_SURF_16_BANK));
851                                 break;
852                         case 1:
853                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
854                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
855                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
856                                                  NUM_BANKS(ADDR_SURF_16_BANK));
857                                 break;
858                         case 2:
859                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
860                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
861                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
862                                                  NUM_BANKS(ADDR_SURF_16_BANK));
863                                 break;
864                         case 3:
865                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
866                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
867                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
868                                                  NUM_BANKS(ADDR_SURF_16_BANK));
869                                 break;
870                         case 4:
871                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
872                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
873                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
874                                                  NUM_BANKS(ADDR_SURF_16_BANK));
875                                 break;
876                         case 5:
877                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
878                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
879                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
880                                                  NUM_BANKS(ADDR_SURF_8_BANK));
881                                 break;
882                         case 6:
883                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
884                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
885                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
886                                                  NUM_BANKS(ADDR_SURF_4_BANK));
887                                 break;
888                         case 8:
889                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
890                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
891                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
892                                                  NUM_BANKS(ADDR_SURF_16_BANK));
893                                 break;
894                         case 9:
895                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
896                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
897                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
898                                                  NUM_BANKS(ADDR_SURF_16_BANK));
899                                 break;
900                         case 10:
901                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
902                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
903                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
904                                                  NUM_BANKS(ADDR_SURF_16_BANK));
905                                 break;
906                         case 11:
907                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
908                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
909                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
910                                                  NUM_BANKS(ADDR_SURF_16_BANK));
911                                 break;
912                         case 12:
913                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
914                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
915                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
916                                                  NUM_BANKS(ADDR_SURF_16_BANK));
917                                 break;
918                         case 13:
919                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
920                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
921                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
922                                                  NUM_BANKS(ADDR_SURF_8_BANK));
923                                 break;
924                         case 14:
925                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
926                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
927                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
928                                                  NUM_BANKS(ADDR_SURF_4_BANK));
929                                 break;
930                         default:
931                                 gb_tile_moden = 0;
932                                 break;
933                         }
934                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
935                 }
936         } else if (num_pipe_configs == 2) {
937                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
938                         switch (reg_offset) {
939                         case 0:
940                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
941                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
942                                                  PIPE_CONFIG(ADDR_SURF_P2) |
943                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
944                                 break;
945                         case 1:
946                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
947                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
948                                                  PIPE_CONFIG(ADDR_SURF_P2) |
949                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
950                                 break;
951                         case 2:
952                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
953                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
954                                                  PIPE_CONFIG(ADDR_SURF_P2) |
955                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
956                                 break;
957                         case 3:
958                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
959                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
960                                                  PIPE_CONFIG(ADDR_SURF_P2) |
961                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
962                                 break;
963                         case 4:
964                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
965                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
966                                                  PIPE_CONFIG(ADDR_SURF_P2) |
967                                                  TILE_SPLIT(split_equal_to_row_size));
968                                 break;
969                         case 5:
970                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
971                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
972                                 break;
973                         case 6:
974                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
975                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
976                                                  PIPE_CONFIG(ADDR_SURF_P2) |
977                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
978                                 break;
979                         case 7:
980                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
981                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
982                                                  PIPE_CONFIG(ADDR_SURF_P2) |
983                                                  TILE_SPLIT(split_equal_to_row_size));
984                                 break;
985                         case 8:
986                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
987                                 break;
988                         case 9:
989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
990                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
991                                 break;
992                         case 10:
993                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
994                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995                                                  PIPE_CONFIG(ADDR_SURF_P2) |
996                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
997                                 break;
998                         case 11:
999                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1000                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1001                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1002                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1003                                 break;
1004                         case 12:
1005                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1006                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1007                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1008                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1009                                 break;
1010                         case 13:
1011                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1012                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1013                                 break;
1014                         case 14:
1015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1016                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1018                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1019                                 break;
1020                         case 16:
1021                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1022                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1023                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1024                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1025                                 break;
1026                         case 17:
1027                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1028                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1029                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1030                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1031                                 break;
1032                         case 27:
1033                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1034                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1035                                 break;
1036                         case 28:
1037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1040                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1041                                 break;
1042                         case 29:
1043                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1044                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1045                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1046                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1047                                 break;
1048                         case 30:
1049                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1050                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1051                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1052                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1053                                 break;
1054                         default:
1055                                 gb_tile_moden = 0;
1056                                 break;
1057                         }
1058                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1059                 }
1060                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1061                         switch (reg_offset) {
1062                         case 0:
1063                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1064                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1065                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1066                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1067                                 break;
1068                         case 1:
1069                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1072                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1073                                 break;
1074                         case 2:
1075                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1077                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1078                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1079                                 break;
1080                         case 3:
1081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1085                                 break;
1086                         case 4:
1087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1091                                 break;
1092                         case 5:
1093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1097                                 break;
1098                         case 6:
1099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1102                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1103                                 break;
1104                         case 8:
1105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1109                                 break;
1110                         case 9:
1111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1114                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1115                                 break;
1116                         case 10:
1117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1120                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1121                                 break;
1122                         case 11:
1123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1127                                 break;
1128                         case 12:
1129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1133                                 break;
1134                         case 13:
1135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1139                                 break;
1140                         case 14:
1141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1144                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1145                                 break;
1146                         default:
1147                                 gb_tile_moden = 0;
1148                                 break;
1149                         }
1150                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1151                 }
1152         } else
1153                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1154 }
1155
1156 /**
1157  * cik_select_se_sh - select which SE, SH to address
1158  *
1159  * @rdev: radeon_device pointer
1160  * @se_num: shader engine to address
1161  * @sh_num: sh block to address
1162  *
1163  * Select which SE, SH combinations to address. Certain
1164  * registers are instanced per SE or SH.  0xffffffff means
1165  * broadcast to all SEs or SHs (CIK).
1166  */
1167 static void cik_select_se_sh(struct radeon_device *rdev,
1168                              u32 se_num, u32 sh_num)
1169 {
1170         u32 data = INSTANCE_BROADCAST_WRITES;
1171
1172         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1173                 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1174         else if (se_num == 0xffffffff)
1175                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1176         else if (sh_num == 0xffffffff)
1177                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1178         else
1179                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1180         WREG32(GRBM_GFX_INDEX, data);
1181 }
1182
1183 /**
1184  * cik_create_bitmask - create a bitmask
1185  *
1186  * @bit_width: length of the mask
1187  *
1188  * create a variable length bit mask (CIK).
1189  * Returns the bitmask.
1190  */
1191 static u32 cik_create_bitmask(u32 bit_width)
1192 {
1193         u32 i, mask = 0;
1194
1195         for (i = 0; i < bit_width; i++) {
1196                 mask <<= 1;
1197                 mask |= 1;
1198         }
1199         return mask;
1200 }
1201
1202 /**
1203  * cik_select_se_sh - select which SE, SH to address
1204  *
1205  * @rdev: radeon_device pointer
1206  * @max_rb_num: max RBs (render backends) for the asic
1207  * @se_num: number of SEs (shader engines) for the asic
1208  * @sh_per_se: number of SH blocks per SE for the asic
1209  *
1210  * Calculates the bitmask of disabled RBs (CIK).
1211  * Returns the disabled RB bitmask.
1212  */
1213 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1214                               u32 max_rb_num, u32 se_num,
1215                               u32 sh_per_se)
1216 {
1217         u32 data, mask;
1218
1219         data = RREG32(CC_RB_BACKEND_DISABLE);
1220         if (data & 1)
1221                 data &= BACKEND_DISABLE_MASK;
1222         else
1223                 data = 0;
1224         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1225
1226         data >>= BACKEND_DISABLE_SHIFT;
1227
1228         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1229
1230         return data & mask;
1231 }
1232
1233 /**
1234  * cik_setup_rb - setup the RBs on the asic
1235  *
1236  * @rdev: radeon_device pointer
1237  * @se_num: number of SEs (shader engines) for the asic
1238  * @sh_per_se: number of SH blocks per SE for the asic
1239  * @max_rb_num: max RBs (render backends) for the asic
1240  *
1241  * Configures per-SE/SH RB registers (CIK).
1242  */
1243 static void cik_setup_rb(struct radeon_device *rdev,
1244                          u32 se_num, u32 sh_per_se,
1245                          u32 max_rb_num)
1246 {
1247         int i, j;
1248         u32 data, mask;
1249         u32 disabled_rbs = 0;
1250         u32 enabled_rbs = 0;
1251
1252         for (i = 0; i < se_num; i++) {
1253                 for (j = 0; j < sh_per_se; j++) {
1254                         cik_select_se_sh(rdev, i, j);
1255                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1256                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1257                 }
1258         }
1259         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1260
1261         mask = 1;
1262         for (i = 0; i < max_rb_num; i++) {
1263                 if (!(disabled_rbs & mask))
1264                         enabled_rbs |= mask;
1265                 mask <<= 1;
1266         }
1267
1268         for (i = 0; i < se_num; i++) {
1269                 cik_select_se_sh(rdev, i, 0xffffffff);
1270                 data = 0;
1271                 for (j = 0; j < sh_per_se; j++) {
1272                         switch (enabled_rbs & 3) {
1273                         case 1:
1274                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1275                                 break;
1276                         case 2:
1277                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1278                                 break;
1279                         case 3:
1280                         default:
1281                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1282                                 break;
1283                         }
1284                         enabled_rbs >>= 2;
1285                 }
1286                 WREG32(PA_SC_RASTER_CONFIG, data);
1287         }
1288         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1289 }
1290
1291 /**
1292  * cik_gpu_init - setup the 3D engine
1293  *
1294  * @rdev: radeon_device pointer
1295  *
1296  * Configures the 3D engine and tiling configuration
1297  * registers so that the 3D engine is usable.
1298  */
1299 static void cik_gpu_init(struct radeon_device *rdev)
1300 {
1301         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1302         u32 mc_shared_chmap, mc_arb_ramcfg;
1303         u32 hdp_host_path_cntl;
1304         u32 tmp;
1305         int i, j;
1306
1307         switch (rdev->family) {
1308         case CHIP_BONAIRE:
1309                 rdev->config.cik.max_shader_engines = 2;
1310                 rdev->config.cik.max_tile_pipes = 4;
1311                 rdev->config.cik.max_cu_per_sh = 7;
1312                 rdev->config.cik.max_sh_per_se = 1;
1313                 rdev->config.cik.max_backends_per_se = 2;
1314                 rdev->config.cik.max_texture_channel_caches = 4;
1315                 rdev->config.cik.max_gprs = 256;
1316                 rdev->config.cik.max_gs_threads = 32;
1317                 rdev->config.cik.max_hw_contexts = 8;
1318
1319                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1320                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1321                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1322                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1323                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1324                 break;
1325         case CHIP_KAVERI:
1326                 /* TODO */
1327                 break;
1328         case CHIP_KABINI:
1329         default:
1330                 rdev->config.cik.max_shader_engines = 1;
1331                 rdev->config.cik.max_tile_pipes = 2;
1332                 rdev->config.cik.max_cu_per_sh = 2;
1333                 rdev->config.cik.max_sh_per_se = 1;
1334                 rdev->config.cik.max_backends_per_se = 1;
1335                 rdev->config.cik.max_texture_channel_caches = 2;
1336                 rdev->config.cik.max_gprs = 256;
1337                 rdev->config.cik.max_gs_threads = 16;
1338                 rdev->config.cik.max_hw_contexts = 8;
1339
1340                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1341                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1342                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1343                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1344                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         }
1347
1348         /* Initialize HDP */
1349         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1350                 WREG32((0x2c14 + j), 0x00000000);
1351                 WREG32((0x2c18 + j), 0x00000000);
1352                 WREG32((0x2c1c + j), 0x00000000);
1353                 WREG32((0x2c20 + j), 0x00000000);
1354                 WREG32((0x2c24 + j), 0x00000000);
1355         }
1356
1357         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1358
1359         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1360
1361         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1362         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1363
1364         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1365         rdev->config.cik.mem_max_burst_length_bytes = 256;
1366         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1367         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1368         if (rdev->config.cik.mem_row_size_in_kb > 4)
1369                 rdev->config.cik.mem_row_size_in_kb = 4;
1370         /* XXX use MC settings? */
1371         rdev->config.cik.shader_engine_tile_size = 32;
1372         rdev->config.cik.num_gpus = 1;
1373         rdev->config.cik.multi_gpu_tile_size = 64;
1374
1375         /* fix up row size */
1376         gb_addr_config &= ~ROW_SIZE_MASK;
1377         switch (rdev->config.cik.mem_row_size_in_kb) {
1378         case 1:
1379         default:
1380                 gb_addr_config |= ROW_SIZE(0);
1381                 break;
1382         case 2:
1383                 gb_addr_config |= ROW_SIZE(1);
1384                 break;
1385         case 4:
1386                 gb_addr_config |= ROW_SIZE(2);
1387                 break;
1388         }
1389
1390         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1391          * not have bank info, so create a custom tiling dword.
1392          * bits 3:0   num_pipes
1393          * bits 7:4   num_banks
1394          * bits 11:8  group_size
1395          * bits 15:12 row_size
1396          */
1397         rdev->config.cik.tile_config = 0;
1398         switch (rdev->config.cik.num_tile_pipes) {
1399         case 1:
1400                 rdev->config.cik.tile_config |= (0 << 0);
1401                 break;
1402         case 2:
1403                 rdev->config.cik.tile_config |= (1 << 0);
1404                 break;
1405         case 4:
1406                 rdev->config.cik.tile_config |= (2 << 0);
1407                 break;
1408         case 8:
1409         default:
1410                 /* XXX what about 12? */
1411                 rdev->config.cik.tile_config |= (3 << 0);
1412                 break;
1413         }
1414         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1415                 rdev->config.cik.tile_config |= 1 << 4;
1416         else
1417                 rdev->config.cik.tile_config |= 0 << 4;
1418         rdev->config.cik.tile_config |=
1419                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1420         rdev->config.cik.tile_config |=
1421                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1422
1423         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1424         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1425         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1426
1427         cik_tiling_mode_table_init(rdev);
1428
1429         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1430                      rdev->config.cik.max_sh_per_se,
1431                      rdev->config.cik.max_backends_per_se);
1432
1433         /* set HW defaults for 3D engine */
1434         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1435
1436         WREG32(SX_DEBUG_1, 0x20);
1437
1438         WREG32(TA_CNTL_AUX, 0x00010000);
1439
1440         tmp = RREG32(SPI_CONFIG_CNTL);
1441         tmp |= 0x03000000;
1442         WREG32(SPI_CONFIG_CNTL, tmp);
1443
1444         WREG32(SQ_CONFIG, 1);
1445
1446         WREG32(DB_DEBUG, 0);
1447
1448         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1449         tmp |= 0x00000400;
1450         WREG32(DB_DEBUG2, tmp);
1451
1452         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1453         tmp |= 0x00020200;
1454         WREG32(DB_DEBUG3, tmp);
1455
1456         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1457         tmp |= 0x00018208;
1458         WREG32(CB_HW_CONTROL, tmp);
1459
1460         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1461
1462         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1463                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1464                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1465                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1466
1467         WREG32(VGT_NUM_INSTANCES, 1);
1468
1469         WREG32(CP_PERFMON_CNTL, 0);
1470
1471         WREG32(SQ_CONFIG, 0);
1472
1473         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1474                                           FORCE_EOV_MAX_REZ_CNT(255)));
1475
1476         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1477                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1478
1479         WREG32(VGT_GS_VERTEX_REUSE, 16);
1480         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1481
1482         tmp = RREG32(HDP_MISC_CNTL);
1483         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1484         WREG32(HDP_MISC_CNTL, tmp);
1485
1486         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1487         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1488
1489         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1490         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1491
1492         udelay(50);
1493 }
1494
1495 /*
1496  * GPU scratch registers helpers function.
1497  */
1498 /**
1499  * cik_scratch_init - setup driver info for CP scratch regs
1500  *
1501  * @rdev: radeon_device pointer
1502  *
1503  * Set up the number and offset of the CP scratch registers.
1504  * NOTE: use of CP scratch registers is a legacy inferface and
1505  * is not used by default on newer asics (r6xx+).  On newer asics,
1506  * memory buffers are used for fences rather than scratch regs.
1507  */
1508 static void cik_scratch_init(struct radeon_device *rdev)
1509 {
1510         int i;
1511
1512         rdev->scratch.num_reg = 7;
1513         rdev->scratch.reg_base = SCRATCH_REG0;
1514         for (i = 0; i < rdev->scratch.num_reg; i++) {
1515                 rdev->scratch.free[i] = true;
1516                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1517         }
1518 }
1519
1520 /**
1521  * cik_ring_test - basic gfx ring test
1522  *
1523  * @rdev: radeon_device pointer
1524  * @ring: radeon_ring structure holding ring information
1525  *
1526  * Allocate a scratch register and write to it using the gfx ring (CIK).
1527  * Provides a basic gfx ring test to verify that the ring is working.
1528  * Used by cik_cp_gfx_resume();
1529  * Returns 0 on success, error on failure.
1530  */
1531 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1532 {
1533         uint32_t scratch;
1534         uint32_t tmp = 0;
1535         unsigned i;
1536         int r;
1537
1538         r = radeon_scratch_get(rdev, &scratch);
1539         if (r) {
1540                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1541                 return r;
1542         }
1543         WREG32(scratch, 0xCAFEDEAD);
1544         r = radeon_ring_lock(rdev, ring, 3);
1545         if (r) {
1546                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1547                 radeon_scratch_free(rdev, scratch);
1548                 return r;
1549         }
1550         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1551         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1552         radeon_ring_write(ring, 0xDEADBEEF);
1553         radeon_ring_unlock_commit(rdev, ring);
1554         for (i = 0; i < rdev->usec_timeout; i++) {
1555                 tmp = RREG32(scratch);
1556                 if (tmp == 0xDEADBEEF)
1557                         break;
1558                 DRM_UDELAY(1);
1559         }
1560         if (i < rdev->usec_timeout) {
1561                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1562         } else {
1563                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1564                           ring->idx, scratch, tmp);
1565                 r = -EINVAL;
1566         }
1567         radeon_scratch_free(rdev, scratch);
1568         return r;
1569 }
1570
1571 /**
1572  * cik_fence_ring_emit - emit a fence on the gfx ring
1573  *
1574  * @rdev: radeon_device pointer
1575  * @fence: radeon fence object
1576  *
1577  * Emits a fence sequnce number on the gfx ring and flushes
1578  * GPU caches.
1579  */
1580 void cik_fence_ring_emit(struct radeon_device *rdev,
1581                          struct radeon_fence *fence)
1582 {
1583         struct radeon_ring *ring = &rdev->ring[fence->ring];
1584         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1585
1586         /* EVENT_WRITE_EOP - flush caches, send int */
1587         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1588         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1589                                  EOP_TC_ACTION_EN |
1590                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1591                                  EVENT_INDEX(5)));
1592         radeon_ring_write(ring, addr & 0xfffffffc);
1593         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1594         radeon_ring_write(ring, fence->seq);
1595         radeon_ring_write(ring, 0);
1596         /* HDP flush */
1597         /* We should be using the new WAIT_REG_MEM special op packet here
1598          * but it causes the CP to hang
1599          */
1600         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1601         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1602                                  WRITE_DATA_DST_SEL(0)));
1603         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1604         radeon_ring_write(ring, 0);
1605         radeon_ring_write(ring, 0);
1606 }
1607
1608 void cik_semaphore_ring_emit(struct radeon_device *rdev,
1609                              struct radeon_ring *ring,
1610                              struct radeon_semaphore *semaphore,
1611                              bool emit_wait)
1612 {
1613         uint64_t addr = semaphore->gpu_addr;
1614         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1615
1616         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1617         radeon_ring_write(ring, addr & 0xffffffff);
1618         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1619 }
1620
1621 /*
1622  * IB stuff
1623  */
1624 /**
1625  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1626  *
1627  * @rdev: radeon_device pointer
1628  * @ib: radeon indirect buffer object
1629  *
1630  * Emits an DE (drawing engine) or CE (constant engine) IB
1631  * on the gfx ring.  IBs are usually generated by userspace
1632  * acceleration drivers and submitted to the kernel for
1633  * sheduling on the ring.  This function schedules the IB
1634  * on the gfx ring for execution by the GPU.
1635  */
1636 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1637 {
1638         struct radeon_ring *ring = &rdev->ring[ib->ring];
1639         u32 header, control = INDIRECT_BUFFER_VALID;
1640
1641         if (ib->is_const_ib) {
1642                 /* set switch buffer packet before const IB */
1643                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1644                 radeon_ring_write(ring, 0);
1645
1646                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1647         } else {
1648                 u32 next_rptr;
1649                 if (ring->rptr_save_reg) {
1650                         next_rptr = ring->wptr + 3 + 4;
1651                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1652                         radeon_ring_write(ring, ((ring->rptr_save_reg -
1653                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
1654                         radeon_ring_write(ring, next_rptr);
1655                 } else if (rdev->wb.enabled) {
1656                         next_rptr = ring->wptr + 5 + 4;
1657                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1658                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1659                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1660                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1661                         radeon_ring_write(ring, next_rptr);
1662                 }
1663
1664                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1665         }
1666
1667         control |= ib->length_dw |
1668                 (ib->vm ? (ib->vm->id << 24) : 0);
1669
1670         radeon_ring_write(ring, header);
1671         radeon_ring_write(ring,
1672 #ifdef __BIG_ENDIAN
1673                           (2 << 0) |
1674 #endif
1675                           (ib->gpu_addr & 0xFFFFFFFC));
1676         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1677         radeon_ring_write(ring, control);
1678 }
1679
1680 /**
1681  * cik_ib_test - basic gfx ring IB test
1682  *
1683  * @rdev: radeon_device pointer
1684  * @ring: radeon_ring structure holding ring information
1685  *
1686  * Allocate an IB and execute it on the gfx ring (CIK).
1687  * Provides a basic gfx ring test to verify that IBs are working.
1688  * Returns 0 on success, error on failure.
1689  */
1690 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1691 {
1692         struct radeon_ib ib;
1693         uint32_t scratch;
1694         uint32_t tmp = 0;
1695         unsigned i;
1696         int r;
1697
1698         r = radeon_scratch_get(rdev, &scratch);
1699         if (r) {
1700                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1701                 return r;
1702         }
1703         WREG32(scratch, 0xCAFEDEAD);
1704         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1705         if (r) {
1706                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1707                 return r;
1708         }
1709         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1710         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1711         ib.ptr[2] = 0xDEADBEEF;
1712         ib.length_dw = 3;
1713         r = radeon_ib_schedule(rdev, &ib, NULL);
1714         if (r) {
1715                 radeon_scratch_free(rdev, scratch);
1716                 radeon_ib_free(rdev, &ib);
1717                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1718                 return r;
1719         }
1720         r = radeon_fence_wait(ib.fence, false);
1721         if (r) {
1722                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1723                 return r;
1724         }
1725         for (i = 0; i < rdev->usec_timeout; i++) {
1726                 tmp = RREG32(scratch);
1727                 if (tmp == 0xDEADBEEF)
1728                         break;
1729                 DRM_UDELAY(1);
1730         }
1731         if (i < rdev->usec_timeout) {
1732                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1733         } else {
1734                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1735                           scratch, tmp);
1736                 r = -EINVAL;
1737         }
1738         radeon_scratch_free(rdev, scratch);
1739         radeon_ib_free(rdev, &ib);
1740         return r;
1741 }
1742
1743 /*
1744  * CP.
1745  * On CIK, gfx and compute now have independant command processors.
1746  *
1747  * GFX
1748  * Gfx consists of a single ring and can process both gfx jobs and
1749  * compute jobs.  The gfx CP consists of three microengines (ME):
1750  * PFP - Pre-Fetch Parser
1751  * ME - Micro Engine
1752  * CE - Constant Engine
1753  * The PFP and ME make up what is considered the Drawing Engine (DE).
1754  * The CE is an asynchronous engine used for updating buffer desciptors
1755  * used by the DE so that they can be loaded into cache in parallel
1756  * while the DE is processing state update packets.
1757  *
1758  * Compute
1759  * The compute CP consists of two microengines (ME):
1760  * MEC1 - Compute MicroEngine 1
1761  * MEC2 - Compute MicroEngine 2
1762  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1763  * The queues are exposed to userspace and are programmed directly
1764  * by the compute runtime.
1765  */
1766 /**
1767  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1768  *
1769  * @rdev: radeon_device pointer
1770  * @enable: enable or disable the MEs
1771  *
1772  * Halts or unhalts the gfx MEs.
1773  */
1774 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1775 {
1776         if (enable)
1777                 WREG32(CP_ME_CNTL, 0);
1778         else {
1779                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1780                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1781         }
1782         udelay(50);
1783 }
1784
1785 /**
1786  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1787  *
1788  * @rdev: radeon_device pointer
1789  *
1790  * Loads the gfx PFP, ME, and CE ucode.
1791  * Returns 0 for success, -EINVAL if the ucode is not available.
1792  */
1793 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1794 {
1795         const __be32 *fw_data;
1796         int i;
1797
1798         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1799                 return -EINVAL;
1800
1801         cik_cp_gfx_enable(rdev, false);
1802
1803         /* PFP */
1804         fw_data = (const __be32 *)rdev->pfp_fw->data;
1805         WREG32(CP_PFP_UCODE_ADDR, 0);
1806         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1807                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1808         WREG32(CP_PFP_UCODE_ADDR, 0);
1809
1810         /* CE */
1811         fw_data = (const __be32 *)rdev->ce_fw->data;
1812         WREG32(CP_CE_UCODE_ADDR, 0);
1813         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1814                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1815         WREG32(CP_CE_UCODE_ADDR, 0);
1816
1817         /* ME */
1818         fw_data = (const __be32 *)rdev->me_fw->data;
1819         WREG32(CP_ME_RAM_WADDR, 0);
1820         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1821                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1822         WREG32(CP_ME_RAM_WADDR, 0);
1823
1824         WREG32(CP_PFP_UCODE_ADDR, 0);
1825         WREG32(CP_CE_UCODE_ADDR, 0);
1826         WREG32(CP_ME_RAM_WADDR, 0);
1827         WREG32(CP_ME_RAM_RADDR, 0);
1828         return 0;
1829 }
1830
1831 /**
1832  * cik_cp_gfx_start - start the gfx ring
1833  *
1834  * @rdev: radeon_device pointer
1835  *
1836  * Enables the ring and loads the clear state context and other
1837  * packets required to init the ring.
1838  * Returns 0 for success, error for failure.
1839  */
1840 static int cik_cp_gfx_start(struct radeon_device *rdev)
1841 {
1842         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1843         int r, i;
1844
1845         /* init the CP */
1846         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1847         WREG32(CP_ENDIAN_SWAP, 0);
1848         WREG32(CP_DEVICE_ID, 1);
1849
1850         cik_cp_gfx_enable(rdev, true);
1851
1852         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1853         if (r) {
1854                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1855                 return r;
1856         }
1857
1858         /* init the CE partitions.  CE only used for gfx on CIK */
1859         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1860         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1861         radeon_ring_write(ring, 0xc000);
1862         radeon_ring_write(ring, 0xc000);
1863
1864         /* setup clear context state */
1865         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1866         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1867
1868         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1869         radeon_ring_write(ring, 0x80000000);
1870         radeon_ring_write(ring, 0x80000000);
1871
1872         for (i = 0; i < cik_default_size; i++)
1873                 radeon_ring_write(ring, cik_default_state[i]);
1874
1875         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1876         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1877
1878         /* set clear context state */
1879         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1880         radeon_ring_write(ring, 0);
1881
1882         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1883         radeon_ring_write(ring, 0x00000316);
1884         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1885         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1886
1887         radeon_ring_unlock_commit(rdev, ring);
1888
1889         return 0;
1890 }
1891
1892 /**
1893  * cik_cp_gfx_fini - stop the gfx ring
1894  *
1895  * @rdev: radeon_device pointer
1896  *
1897  * Stop the gfx ring and tear down the driver ring
1898  * info.
1899  */
1900 static void cik_cp_gfx_fini(struct radeon_device *rdev)
1901 {
1902         cik_cp_gfx_enable(rdev, false);
1903         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1904 }
1905
1906 /**
1907  * cik_cp_gfx_resume - setup the gfx ring buffer registers
1908  *
1909  * @rdev: radeon_device pointer
1910  *
1911  * Program the location and size of the gfx ring buffer
1912  * and test it to make sure it's working.
1913  * Returns 0 for success, error for failure.
1914  */
1915 static int cik_cp_gfx_resume(struct radeon_device *rdev)
1916 {
1917         struct radeon_ring *ring;
1918         u32 tmp;
1919         u32 rb_bufsz;
1920         u64 rb_addr;
1921         int r;
1922
1923         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1924         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1925
1926         /* Set the write pointer delay */
1927         WREG32(CP_RB_WPTR_DELAY, 0);
1928
1929         /* set the RB to use vmid 0 */
1930         WREG32(CP_RB_VMID, 0);
1931
1932         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1933
1934         /* ring 0 - compute and gfx */
1935         /* Set ring buffer size */
1936         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1937         rb_bufsz = drm_order(ring->ring_size / 8);
1938         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1939 #ifdef __BIG_ENDIAN
1940         tmp |= BUF_SWAP_32BIT;
1941 #endif
1942         WREG32(CP_RB0_CNTL, tmp);
1943
1944         /* Initialize the ring buffer's read and write pointers */
1945         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1946         ring->wptr = 0;
1947         WREG32(CP_RB0_WPTR, ring->wptr);
1948
1949         /* set the wb address wether it's enabled or not */
1950         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1951         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1952
1953         /* scratch register shadowing is no longer supported */
1954         WREG32(SCRATCH_UMSK, 0);
1955
1956         if (!rdev->wb.enabled)
1957                 tmp |= RB_NO_UPDATE;
1958
1959         mdelay(1);
1960         WREG32(CP_RB0_CNTL, tmp);
1961
1962         rb_addr = ring->gpu_addr >> 8;
1963         WREG32(CP_RB0_BASE, rb_addr);
1964         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1965
1966         ring->rptr = RREG32(CP_RB0_RPTR);
1967
1968         /* start the ring */
1969         cik_cp_gfx_start(rdev);
1970         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1971         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1972         if (r) {
1973                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1974                 return r;
1975         }
1976         return 0;
1977 }
1978
1979 /**
1980  * cik_cp_compute_enable - enable/disable the compute CP MEs
1981  *
1982  * @rdev: radeon_device pointer
1983  * @enable: enable or disable the MEs
1984  *
1985  * Halts or unhalts the compute MEs.
1986  */
1987 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
1988 {
1989         if (enable)
1990                 WREG32(CP_MEC_CNTL, 0);
1991         else
1992                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
1993         udelay(50);
1994 }
1995
1996 /**
1997  * cik_cp_compute_load_microcode - load the compute CP ME ucode
1998  *
1999  * @rdev: radeon_device pointer
2000  *
2001  * Loads the compute MEC1&2 ucode.
2002  * Returns 0 for success, -EINVAL if the ucode is not available.
2003  */
2004 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2005 {
2006         const __be32 *fw_data;
2007         int i;
2008
2009         if (!rdev->mec_fw)
2010                 return -EINVAL;
2011
2012         cik_cp_compute_enable(rdev, false);
2013
2014         /* MEC1 */
2015         fw_data = (const __be32 *)rdev->mec_fw->data;
2016         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2017         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2018                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2019         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2020
2021         if (rdev->family == CHIP_KAVERI) {
2022                 /* MEC2 */
2023                 fw_data = (const __be32 *)rdev->mec_fw->data;
2024                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2025                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2026                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2027                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2028         }
2029
2030         return 0;
2031 }
2032
2033 /**
2034  * cik_cp_compute_start - start the compute queues
2035  *
2036  * @rdev: radeon_device pointer
2037  *
2038  * Enable the compute queues.
2039  * Returns 0 for success, error for failure.
2040  */
2041 static int cik_cp_compute_start(struct radeon_device *rdev)
2042 {
2043         //todo
2044         return 0;
2045 }
2046
2047 /**
2048  * cik_cp_compute_fini - stop the compute queues
2049  *
2050  * @rdev: radeon_device pointer
2051  *
2052  * Stop the compute queues and tear down the driver queue
2053  * info.
2054  */
2055 static void cik_cp_compute_fini(struct radeon_device *rdev)
2056 {
2057         cik_cp_compute_enable(rdev, false);
2058         //todo
2059 }
2060
2061 /**
2062  * cik_cp_compute_resume - setup the compute queue registers
2063  *
2064  * @rdev: radeon_device pointer
2065  *
2066  * Program the compute queues and test them to make sure they
2067  * are working.
2068  * Returns 0 for success, error for failure.
2069  */
2070 static int cik_cp_compute_resume(struct radeon_device *rdev)
2071 {
2072         int r;
2073
2074         //todo
2075         r = cik_cp_compute_start(rdev);
2076         if (r)
2077                 return r;
2078         return 0;
2079 }
2080
2081 /* XXX temporary wrappers to handle both compute and gfx */
2082 /* XXX */
2083 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2084 {
2085         cik_cp_gfx_enable(rdev, enable);
2086         cik_cp_compute_enable(rdev, enable);
2087 }
2088
2089 /* XXX */
2090 static int cik_cp_load_microcode(struct radeon_device *rdev)
2091 {
2092         int r;
2093
2094         r = cik_cp_gfx_load_microcode(rdev);
2095         if (r)
2096                 return r;
2097         r = cik_cp_compute_load_microcode(rdev);
2098         if (r)
2099                 return r;
2100
2101         return 0;
2102 }
2103
2104 /* XXX */
2105 static void cik_cp_fini(struct radeon_device *rdev)
2106 {
2107         cik_cp_gfx_fini(rdev);
2108         cik_cp_compute_fini(rdev);
2109 }
2110
2111 /* XXX */
2112 static int cik_cp_resume(struct radeon_device *rdev)
2113 {
2114         int r;
2115
2116         /* Reset all cp blocks */
2117         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2118         RREG32(GRBM_SOFT_RESET);
2119         mdelay(15);
2120         WREG32(GRBM_SOFT_RESET, 0);
2121         RREG32(GRBM_SOFT_RESET);
2122
2123         r = cik_cp_load_microcode(rdev);
2124         if (r)
2125                 return r;
2126
2127         r = cik_cp_gfx_resume(rdev);
2128         if (r)
2129                 return r;
2130         r = cik_cp_compute_resume(rdev);
2131         if (r)
2132                 return r;
2133
2134         return 0;
2135 }
2136
2137 /**
2138  * cik_gpu_is_lockup - check if the 3D engine is locked up
2139  *
2140  * @rdev: radeon_device pointer
2141  * @ring: radeon_ring structure holding ring information
2142  *
2143  * Check if the 3D engine is locked up (CIK).
2144  * Returns true if the engine is locked, false if not.
2145  */
2146 bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2147 {
2148         u32 srbm_status, srbm_status2;
2149         u32 grbm_status, grbm_status2;
2150         u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
2151
2152         srbm_status = RREG32(SRBM_STATUS);
2153         srbm_status2 = RREG32(SRBM_STATUS2);
2154         grbm_status = RREG32(GRBM_STATUS);
2155         grbm_status2 = RREG32(GRBM_STATUS2);
2156         grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2157         grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2158         grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
2159         grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
2160         if (!(grbm_status & GUI_ACTIVE)) {
2161                 radeon_ring_lockup_update(ring);
2162                 return false;
2163         }
2164         /* force CP activities */
2165         radeon_ring_force_activity(rdev, ring);
2166         return radeon_ring_test_lockup(rdev, ring);
2167 }
2168
2169 /**
2170  * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
2171  *
2172  * @rdev: radeon_device pointer
2173  *
2174  * Soft reset the GFX engine and CPG blocks (CIK).
2175  * XXX: deal with reseting RLC and CPF
2176  * Returns 0 for success.
2177  */
2178 static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
2179 {
2180         struct evergreen_mc_save save;
2181         u32 grbm_reset = 0;
2182
2183         if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2184                 return 0;
2185
2186         dev_info(rdev->dev, "GPU GFX softreset \n");
2187         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2188                 RREG32(GRBM_STATUS));
2189         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2190                 RREG32(GRBM_STATUS2));
2191         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2192                 RREG32(GRBM_STATUS_SE0));
2193         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2194                 RREG32(GRBM_STATUS_SE1));
2195         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2196                 RREG32(GRBM_STATUS_SE2));
2197         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2198                 RREG32(GRBM_STATUS_SE3));
2199         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2200                 RREG32(SRBM_STATUS));
2201         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2202                 RREG32(SRBM_STATUS2));
2203         evergreen_mc_stop(rdev, &save);
2204         if (radeon_mc_wait_for_idle(rdev)) {
2205                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2206         }
2207         /* Disable CP parsing/prefetching */
2208         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2209
2210         /* reset all the gfx block and all CPG blocks */
2211         grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
2212
2213         dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2214         WREG32(GRBM_SOFT_RESET, grbm_reset);
2215         (void)RREG32(GRBM_SOFT_RESET);
2216         udelay(50);
2217         WREG32(GRBM_SOFT_RESET, 0);
2218         (void)RREG32(GRBM_SOFT_RESET);
2219         /* Wait a little for things to settle down */
2220         udelay(50);
2221         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2222                 RREG32(GRBM_STATUS));
2223         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2224                 RREG32(GRBM_STATUS2));
2225         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2226                 RREG32(GRBM_STATUS_SE0));
2227         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2228                 RREG32(GRBM_STATUS_SE1));
2229         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2230                 RREG32(GRBM_STATUS_SE2));
2231         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2232                 RREG32(GRBM_STATUS_SE3));
2233         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2234                 RREG32(SRBM_STATUS));
2235         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2236                 RREG32(SRBM_STATUS2));
2237         evergreen_mc_resume(rdev, &save);
2238         return 0;
2239 }
2240
2241 /**
2242  * cik_compute_gpu_soft_reset - soft reset CPC
2243  *
2244  * @rdev: radeon_device pointer
2245  *
2246  * Soft reset the CPC blocks (CIK).
2247  * XXX: deal with reseting RLC and CPF
2248  * Returns 0 for success.
2249  */
2250 static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
2251 {
2252         struct evergreen_mc_save save;
2253         u32 grbm_reset = 0;
2254
2255         dev_info(rdev->dev, "GPU compute softreset \n");
2256         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2257                 RREG32(GRBM_STATUS));
2258         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2259                 RREG32(GRBM_STATUS2));
2260         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2261                 RREG32(GRBM_STATUS_SE0));
2262         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2263                 RREG32(GRBM_STATUS_SE1));
2264         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2265                 RREG32(GRBM_STATUS_SE2));
2266         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2267                 RREG32(GRBM_STATUS_SE3));
2268         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2269                 RREG32(SRBM_STATUS));
2270         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2271                 RREG32(SRBM_STATUS2));
2272         evergreen_mc_stop(rdev, &save);
2273         if (radeon_mc_wait_for_idle(rdev)) {
2274                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2275         }
2276         /* Disable CP parsing/prefetching */
2277         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2278
2279         /* reset all the CPC blocks */
2280         grbm_reset = SOFT_RESET_CPG;
2281
2282         dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2283         WREG32(GRBM_SOFT_RESET, grbm_reset);
2284         (void)RREG32(GRBM_SOFT_RESET);
2285         udelay(50);
2286         WREG32(GRBM_SOFT_RESET, 0);
2287         (void)RREG32(GRBM_SOFT_RESET);
2288         /* Wait a little for things to settle down */
2289         udelay(50);
2290         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2291                 RREG32(GRBM_STATUS));
2292         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2293                 RREG32(GRBM_STATUS2));
2294         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2295                 RREG32(GRBM_STATUS_SE0));
2296         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2297                 RREG32(GRBM_STATUS_SE1));
2298         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2299                 RREG32(GRBM_STATUS_SE2));
2300         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2301                 RREG32(GRBM_STATUS_SE3));
2302         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2303                 RREG32(SRBM_STATUS));
2304         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
2305                 RREG32(SRBM_STATUS2));
2306         evergreen_mc_resume(rdev, &save);
2307         return 0;
2308 }
2309
2310 /**
2311  * cik_asic_reset - soft reset compute and gfx
2312  *
2313  * @rdev: radeon_device pointer
2314  *
2315  * Soft reset the CPC blocks (CIK).
2316  * XXX: make this more fine grained and only reset
2317  * what is necessary.
2318  * Returns 0 for success.
2319  */
2320 int cik_asic_reset(struct radeon_device *rdev)
2321 {
2322         int r;
2323
2324         r = cik_compute_gpu_soft_reset(rdev);
2325         if (r)
2326                 dev_info(rdev->dev, "Compute reset failed!\n");
2327
2328         return cik_gfx_gpu_soft_reset(rdev);
2329 }
2330
2331 /* MC */
2332 /**
2333  * cik_mc_program - program the GPU memory controller
2334  *
2335  * @rdev: radeon_device pointer
2336  *
2337  * Set the location of vram, gart, and AGP in the GPU's
2338  * physical address space (CIK).
2339  */
2340 static void cik_mc_program(struct radeon_device *rdev)
2341 {
2342         struct evergreen_mc_save save;
2343         u32 tmp;
2344         int i, j;
2345
2346         /* Initialize HDP */
2347         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2348                 WREG32((0x2c14 + j), 0x00000000);
2349                 WREG32((0x2c18 + j), 0x00000000);
2350                 WREG32((0x2c1c + j), 0x00000000);
2351                 WREG32((0x2c20 + j), 0x00000000);
2352                 WREG32((0x2c24 + j), 0x00000000);
2353         }
2354         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2355
2356         evergreen_mc_stop(rdev, &save);
2357         if (radeon_mc_wait_for_idle(rdev)) {
2358                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2359         }
2360         /* Lockout access through VGA aperture*/
2361         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2362         /* Update configuration */
2363         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2364                rdev->mc.vram_start >> 12);
2365         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2366                rdev->mc.vram_end >> 12);
2367         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2368                rdev->vram_scratch.gpu_addr >> 12);
2369         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2370         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2371         WREG32(MC_VM_FB_LOCATION, tmp);
2372         /* XXX double check these! */
2373         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2374         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2375         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2376         WREG32(MC_VM_AGP_BASE, 0);
2377         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2378         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2379         if (radeon_mc_wait_for_idle(rdev)) {
2380                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2381         }
2382         evergreen_mc_resume(rdev, &save);
2383         /* we need to own VRAM, so turn off the VGA renderer here
2384          * to stop it overwriting our objects */
2385         rv515_vga_render_disable(rdev);
2386 }
2387
2388 /**
2389  * cik_mc_init - initialize the memory controller driver params
2390  *
2391  * @rdev: radeon_device pointer
2392  *
2393  * Look up the amount of vram, vram width, and decide how to place
2394  * vram and gart within the GPU's physical address space (CIK).
2395  * Returns 0 for success.
2396  */
2397 static int cik_mc_init(struct radeon_device *rdev)
2398 {
2399         u32 tmp;
2400         int chansize, numchan;
2401
2402         /* Get VRAM informations */
2403         rdev->mc.vram_is_ddr = true;
2404         tmp = RREG32(MC_ARB_RAMCFG);
2405         if (tmp & CHANSIZE_MASK) {
2406                 chansize = 64;
2407         } else {
2408                 chansize = 32;
2409         }
2410         tmp = RREG32(MC_SHARED_CHMAP);
2411         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2412         case 0:
2413         default:
2414                 numchan = 1;
2415                 break;
2416         case 1:
2417                 numchan = 2;
2418                 break;
2419         case 2:
2420                 numchan = 4;
2421                 break;
2422         case 3:
2423                 numchan = 8;
2424                 break;
2425         case 4:
2426                 numchan = 3;
2427                 break;
2428         case 5:
2429                 numchan = 6;
2430                 break;
2431         case 6:
2432                 numchan = 10;
2433                 break;
2434         case 7:
2435                 numchan = 12;
2436                 break;
2437         case 8:
2438                 numchan = 16;
2439                 break;
2440         }
2441         rdev->mc.vram_width = numchan * chansize;
2442         /* Could aper size report 0 ? */
2443         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2444         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2445         /* size in MB on si */
2446         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2447         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2448         rdev->mc.visible_vram_size = rdev->mc.aper_size;
2449         si_vram_gtt_location(rdev, &rdev->mc);
2450         radeon_update_bandwidth_info(rdev);
2451
2452         return 0;
2453 }
2454
2455 /*
2456  * GART
2457  * VMID 0 is the physical GPU addresses as used by the kernel.
2458  * VMIDs 1-15 are used for userspace clients and are handled
2459  * by the radeon vm/hsa code.
2460  */
2461 /**
2462  * cik_pcie_gart_tlb_flush - gart tlb flush callback
2463  *
2464  * @rdev: radeon_device pointer
2465  *
2466  * Flush the TLB for the VMID 0 page table (CIK).
2467  */
2468 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
2469 {
2470         /* flush hdp cache */
2471         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
2472
2473         /* bits 0-15 are the VM contexts0-15 */
2474         WREG32(VM_INVALIDATE_REQUEST, 0x1);
2475 }
2476
2477 /**
2478  * cik_pcie_gart_enable - gart enable
2479  *
2480  * @rdev: radeon_device pointer
2481  *
2482  * This sets up the TLBs, programs the page tables for VMID0,
2483  * sets up the hw for VMIDs 1-15 which are allocated on
2484  * demand, and sets up the global locations for the LDS, GDS,
2485  * and GPUVM for FSA64 clients (CIK).
2486  * Returns 0 for success, errors for failure.
2487  */
2488 static int cik_pcie_gart_enable(struct radeon_device *rdev)
2489 {
2490         int r, i;
2491
2492         if (rdev->gart.robj == NULL) {
2493                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2494                 return -EINVAL;
2495         }
2496         r = radeon_gart_table_vram_pin(rdev);
2497         if (r)
2498                 return r;
2499         radeon_gart_restore(rdev);
2500         /* Setup TLB control */
2501         WREG32(MC_VM_MX_L1_TLB_CNTL,
2502                (0xA << 7) |
2503                ENABLE_L1_TLB |
2504                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2505                ENABLE_ADVANCED_DRIVER_MODEL |
2506                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2507         /* Setup L2 cache */
2508         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2509                ENABLE_L2_FRAGMENT_PROCESSING |
2510                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2511                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2512                EFFECTIVE_L2_QUEUE_SIZE(7) |
2513                CONTEXT1_IDENTITY_ACCESS_MODE(1));
2514         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2515         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2516                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2517         /* setup context0 */
2518         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2519         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2520         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2521         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2522                         (u32)(rdev->dummy_page.addr >> 12));
2523         WREG32(VM_CONTEXT0_CNTL2, 0);
2524         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2525                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2526
2527         WREG32(0x15D4, 0);
2528         WREG32(0x15D8, 0);
2529         WREG32(0x15DC, 0);
2530
2531         /* empty context1-15 */
2532         /* FIXME start with 4G, once using 2 level pt switch to full
2533          * vm size space
2534          */
2535         /* set vm size, must be a multiple of 4 */
2536         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2537         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2538         for (i = 1; i < 16; i++) {
2539                 if (i < 8)
2540                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2541                                rdev->gart.table_addr >> 12);
2542                 else
2543                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2544                                rdev->gart.table_addr >> 12);
2545         }
2546
2547         /* enable context1-15 */
2548         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2549                (u32)(rdev->dummy_page.addr >> 12));
2550         WREG32(VM_CONTEXT1_CNTL2, 4);
2551         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2552                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2553                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2554                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2555                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2556                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2557                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2558                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2559                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2560                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2561                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2562                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2563                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2564
2565         /* TC cache setup ??? */
2566         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
2567         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
2568         WREG32(TC_CFG_L1_STORE_POLICY, 0);
2569
2570         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
2571         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
2572         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
2573         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
2574         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
2575
2576         WREG32(TC_CFG_L1_VOLATILE, 0);
2577         WREG32(TC_CFG_L2_VOLATILE, 0);
2578
2579         if (rdev->family == CHIP_KAVERI) {
2580                 u32 tmp = RREG32(CHUB_CONTROL);
2581                 tmp &= ~BYPASS_VM;
2582                 WREG32(CHUB_CONTROL, tmp);
2583         }
2584
2585         /* XXX SH_MEM regs */
2586         /* where to put LDS, scratch, GPUVM in FSA64 space */
2587         for (i = 0; i < 16; i++) {
2588                 WREG32(SRBM_GFX_CNTL, VMID(i));
2589                 WREG32(SH_MEM_CONFIG, 0);
2590                 WREG32(SH_MEM_APE1_BASE, 1);
2591                 WREG32(SH_MEM_APE1_LIMIT, 0);
2592                 WREG32(SH_MEM_BASES, 0);
2593         }
2594         WREG32(SRBM_GFX_CNTL, 0);
2595
2596         cik_pcie_gart_tlb_flush(rdev);
2597         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2598                  (unsigned)(rdev->mc.gtt_size >> 20),
2599                  (unsigned long long)rdev->gart.table_addr);
2600         rdev->gart.ready = true;
2601         return 0;
2602 }
2603
2604 /**
2605  * cik_pcie_gart_disable - gart disable
2606  *
2607  * @rdev: radeon_device pointer
2608  *
2609  * This disables all VM page table (CIK).
2610  */
2611 static void cik_pcie_gart_disable(struct radeon_device *rdev)
2612 {
2613         /* Disable all tables */
2614         WREG32(VM_CONTEXT0_CNTL, 0);
2615         WREG32(VM_CONTEXT1_CNTL, 0);
2616         /* Setup TLB control */
2617         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2618                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2619         /* Setup L2 cache */
2620         WREG32(VM_L2_CNTL,
2621                ENABLE_L2_FRAGMENT_PROCESSING |
2622                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2623                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2624                EFFECTIVE_L2_QUEUE_SIZE(7) |
2625                CONTEXT1_IDENTITY_ACCESS_MODE(1));
2626         WREG32(VM_L2_CNTL2, 0);
2627         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2628                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2629         radeon_gart_table_vram_unpin(rdev);
2630 }
2631
2632 /**
2633  * cik_pcie_gart_fini - vm fini callback
2634  *
2635  * @rdev: radeon_device pointer
2636  *
2637  * Tears down the driver GART/VM setup (CIK).
2638  */
2639 static void cik_pcie_gart_fini(struct radeon_device *rdev)
2640 {
2641         cik_pcie_gart_disable(rdev);
2642         radeon_gart_table_vram_free(rdev);
2643         radeon_gart_fini(rdev);
2644 }
2645
2646 /* vm parser */
2647 /**
2648  * cik_ib_parse - vm ib_parse callback
2649  *
2650  * @rdev: radeon_device pointer
2651  * @ib: indirect buffer pointer
2652  *
2653  * CIK uses hw IB checking so this is a nop (CIK).
2654  */
2655 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2656 {
2657         return 0;
2658 }
2659
2660 /*
2661  * vm
2662  * VMID 0 is the physical GPU addresses as used by the kernel.
2663  * VMIDs 1-15 are used for userspace clients and are handled
2664  * by the radeon vm/hsa code.
2665  */
2666 /**
2667  * cik_vm_init - cik vm init callback
2668  *
2669  * @rdev: radeon_device pointer
2670  *
2671  * Inits cik specific vm parameters (number of VMs, base of vram for
2672  * VMIDs 1-15) (CIK).
2673  * Returns 0 for success.
2674  */
2675 int cik_vm_init(struct radeon_device *rdev)
2676 {
2677         /* number of VMs */
2678         rdev->vm_manager.nvm = 16;
2679         /* base offset of vram pages */
2680         if (rdev->flags & RADEON_IS_IGP) {
2681                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
2682                 tmp <<= 22;
2683                 rdev->vm_manager.vram_base_offset = tmp;
2684         } else
2685                 rdev->vm_manager.vram_base_offset = 0;
2686
2687         return 0;
2688 }
2689
2690 /**
2691  * cik_vm_fini - cik vm fini callback
2692  *
2693  * @rdev: radeon_device pointer
2694  *
2695  * Tear down any asic specific VM setup (CIK).
2696  */
2697 void cik_vm_fini(struct radeon_device *rdev)
2698 {
2699 }
2700
2701 /**
2702  * cik_vm_flush - cik vm flush using the CP
2703  *
2704  * @rdev: radeon_device pointer
2705  *
2706  * Update the page table base and flush the VM TLB
2707  * using the CP (CIK).
2708  */
2709 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2710 {
2711         struct radeon_ring *ring = &rdev->ring[ridx];
2712
2713         if (vm == NULL)
2714                 return;
2715
2716         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2717         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2718                                  WRITE_DATA_DST_SEL(0)));
2719         if (vm->id < 8) {
2720                 radeon_ring_write(ring,
2721                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2722         } else {
2723                 radeon_ring_write(ring,
2724                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2725         }
2726         radeon_ring_write(ring, 0);
2727         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2728
2729         /* update SH_MEM_* regs */
2730         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2731         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2732                                  WRITE_DATA_DST_SEL(0)));
2733         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2734         radeon_ring_write(ring, 0);
2735         radeon_ring_write(ring, VMID(vm->id));
2736
2737         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
2738         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2739                                  WRITE_DATA_DST_SEL(0)));
2740         radeon_ring_write(ring, SH_MEM_BASES >> 2);
2741         radeon_ring_write(ring, 0);
2742
2743         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
2744         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
2745         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
2746         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
2747
2748         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2749         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2750                                  WRITE_DATA_DST_SEL(0)));
2751         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2752         radeon_ring_write(ring, 0);
2753         radeon_ring_write(ring, VMID(0));
2754
2755         /* HDP flush */
2756         /* We should be using the WAIT_REG_MEM packet here like in
2757          * cik_fence_ring_emit(), but it causes the CP to hang in this
2758          * context...
2759          */
2760         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2761         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2762                                  WRITE_DATA_DST_SEL(0)));
2763         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2764         radeon_ring_write(ring, 0);
2765         radeon_ring_write(ring, 0);
2766
2767         /* bits 0-15 are the VM contexts0-15 */
2768         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2769         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2770                                  WRITE_DATA_DST_SEL(0)));
2771         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
2772         radeon_ring_write(ring, 0);
2773         radeon_ring_write(ring, 1 << vm->id);
2774
2775         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2776         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2777         radeon_ring_write(ring, 0x0);
2778 }
2779