2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
33 #include "cik_blit_shaders.h"
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
40 #define CIK_MEC_UCODE_SIZE 4192
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
46 #define CIK_MC_UCODE_SIZE 7866
48 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
60 MODULE_FIRMWARE("radeon/KABINI_me.bin");
61 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
62 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
63 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
71 #define BONAIRE_IO_MC_REGS_SIZE 36
73 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
75 {0x00000070, 0x04400000},
76 {0x00000071, 0x80c01803},
77 {0x00000072, 0x00004004},
78 {0x00000073, 0x00000100},
79 {0x00000074, 0x00ff0000},
80 {0x00000075, 0x34000000},
81 {0x00000076, 0x08000014},
82 {0x00000077, 0x00cc08ec},
83 {0x00000078, 0x00000400},
84 {0x00000079, 0x00000000},
85 {0x0000007a, 0x04090000},
86 {0x0000007c, 0x00000000},
87 {0x0000007e, 0x4408a8e8},
88 {0x0000007f, 0x00000304},
89 {0x00000080, 0x00000000},
90 {0x00000082, 0x00000001},
91 {0x00000083, 0x00000002},
92 {0x00000084, 0xf3e4f400},
93 {0x00000085, 0x052024e3},
94 {0x00000087, 0x00000000},
95 {0x00000088, 0x01000000},
96 {0x0000008a, 0x1c0a0000},
97 {0x0000008b, 0xff010000},
98 {0x0000008d, 0xffffefff},
99 {0x0000008e, 0xfff3efff},
100 {0x0000008f, 0xfff3efbf},
101 {0x00000092, 0xf7ffffff},
102 {0x00000093, 0xffffff7f},
103 {0x00000095, 0x00101101},
104 {0x00000096, 0x00000fff},
105 {0x00000097, 0x00116fff},
106 {0x00000098, 0x60010000},
107 {0x00000099, 0x10010000},
108 {0x0000009a, 0x00006000},
109 {0x0000009b, 0x00001000},
110 {0x0000009f, 0x00b48000}
115 * ci_mc_load_microcode - load MC ucode into the hw
117 * @rdev: radeon_device pointer
119 * Load the GDDR MC ucode into the hw (CIK).
120 * Returns 0 on success, error on failure.
122 static int ci_mc_load_microcode(struct radeon_device *rdev)
124 const __be32 *fw_data;
125 u32 running, blackout = 0;
127 int i, ucode_size, regs_size;
132 switch (rdev->family) {
135 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
136 ucode_size = CIK_MC_UCODE_SIZE;
137 regs_size = BONAIRE_IO_MC_REGS_SIZE;
141 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
145 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
146 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
149 /* reset the engine and set to writable */
150 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
151 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
153 /* load mc io regs */
154 for (i = 0; i < regs_size; i++) {
155 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
156 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
158 /* load the MC ucode */
159 fw_data = (const __be32 *)rdev->mc_fw->data;
160 for (i = 0; i < ucode_size; i++)
161 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
163 /* put the engine back into the active state */
164 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
165 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
166 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
168 /* wait for training to complete */
169 for (i = 0; i < rdev->usec_timeout; i++) {
170 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
174 for (i = 0; i < rdev->usec_timeout; i++) {
175 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
181 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
188 * cik_init_microcode - load ucode images from disk
190 * @rdev: radeon_device pointer
192 * Use the firmware interface to load the ucode images into
193 * the driver (not loaded into hw).
194 * Returns 0 on success, error on failure.
196 static int cik_init_microcode(struct radeon_device *rdev)
198 struct platform_device *pdev;
199 const char *chip_name;
200 size_t pfp_req_size, me_req_size, ce_req_size,
201 mec_req_size, rlc_req_size, mc_req_size;
207 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
210 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
214 switch (rdev->family) {
216 chip_name = "BONAIRE";
217 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
218 me_req_size = CIK_ME_UCODE_SIZE * 4;
219 ce_req_size = CIK_CE_UCODE_SIZE * 4;
220 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
221 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
222 mc_req_size = CIK_MC_UCODE_SIZE * 4;
225 chip_name = "KAVERI";
226 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
227 me_req_size = CIK_ME_UCODE_SIZE * 4;
228 ce_req_size = CIK_CE_UCODE_SIZE * 4;
229 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
230 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
233 chip_name = "KABINI";
234 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
235 me_req_size = CIK_ME_UCODE_SIZE * 4;
236 ce_req_size = CIK_CE_UCODE_SIZE * 4;
237 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
238 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
243 DRM_INFO("Loading %s Microcode\n", chip_name);
245 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
246 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
249 if (rdev->pfp_fw->size != pfp_req_size) {
251 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
252 rdev->pfp_fw->size, fw_name);
257 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
258 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
261 if (rdev->me_fw->size != me_req_size) {
263 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
264 rdev->me_fw->size, fw_name);
268 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
269 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
272 if (rdev->ce_fw->size != ce_req_size) {
274 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
275 rdev->ce_fw->size, fw_name);
279 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
280 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
283 if (rdev->mec_fw->size != mec_req_size) {
285 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
286 rdev->mec_fw->size, fw_name);
290 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
291 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
294 if (rdev->rlc_fw->size != rlc_req_size) {
296 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
297 rdev->rlc_fw->size, fw_name);
301 /* No MC ucode on APUs */
302 if (!(rdev->flags & RADEON_IS_IGP)) {
303 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
304 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
307 if (rdev->mc_fw->size != mc_req_size) {
309 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
310 rdev->mc_fw->size, fw_name);
316 platform_device_unregister(pdev);
321 "cik_cp: Failed to load firmware \"%s\"\n",
323 release_firmware(rdev->pfp_fw);
325 release_firmware(rdev->me_fw);
327 release_firmware(rdev->ce_fw);
329 release_firmware(rdev->rlc_fw);
331 release_firmware(rdev->mc_fw);
341 * cik_tiling_mode_table_init - init the hw tiling table
343 * @rdev: radeon_device pointer
345 * Starting with SI, the tiling setup is done globally in a
346 * set of 32 tiling modes. Rather than selecting each set of
347 * parameters per surface as on older asics, we just select
348 * which index in the tiling table we want to use, and the
349 * surface uses those parameters (CIK).
351 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
353 const u32 num_tile_mode_states = 32;
354 const u32 num_secondary_tile_mode_states = 16;
355 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
356 u32 num_pipe_configs;
357 u32 num_rbs = rdev->config.cik.max_backends_per_se *
358 rdev->config.cik.max_shader_engines;
360 switch (rdev->config.cik.mem_row_size_in_kb) {
362 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
366 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
369 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
373 num_pipe_configs = rdev->config.cik.max_tile_pipes;
374 if (num_pipe_configs > 8)
375 num_pipe_configs = 8; /* ??? */
377 if (num_pipe_configs == 8) {
378 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
379 switch (reg_offset) {
381 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
383 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
387 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
389 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
390 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
393 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
395 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
399 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
401 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
405 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
407 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
408 TILE_SPLIT(split_equal_to_row_size));
411 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
415 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
417 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
421 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
423 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
424 TILE_SPLIT(split_equal_to_row_size));
427 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
428 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
431 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
432 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
435 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
437 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
441 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
442 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
443 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
447 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
448 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
449 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
453 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
457 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
458 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
463 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
465 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
469 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
471 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
475 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
476 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
479 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
480 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
485 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
487 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
491 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
492 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
500 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
502 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
503 switch (reg_offset) {
505 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
508 NUM_BANKS(ADDR_SURF_16_BANK));
511 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
514 NUM_BANKS(ADDR_SURF_16_BANK));
517 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
520 NUM_BANKS(ADDR_SURF_16_BANK));
523 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
526 NUM_BANKS(ADDR_SURF_16_BANK));
529 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
532 NUM_BANKS(ADDR_SURF_8_BANK));
535 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
538 NUM_BANKS(ADDR_SURF_4_BANK));
541 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
544 NUM_BANKS(ADDR_SURF_2_BANK));
547 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
550 NUM_BANKS(ADDR_SURF_16_BANK));
553 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
556 NUM_BANKS(ADDR_SURF_16_BANK));
559 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
562 NUM_BANKS(ADDR_SURF_16_BANK));
565 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
568 NUM_BANKS(ADDR_SURF_16_BANK));
571 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
574 NUM_BANKS(ADDR_SURF_8_BANK));
577 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
580 NUM_BANKS(ADDR_SURF_4_BANK));
583 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
586 NUM_BANKS(ADDR_SURF_2_BANK));
592 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
594 } else if (num_pipe_configs == 4) {
596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
597 switch (reg_offset) {
599 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
601 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
605 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
607 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
611 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
613 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
617 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
619 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
620 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
623 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
626 TILE_SPLIT(split_equal_to_row_size));
629 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
633 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
635 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
636 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
639 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
640 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
642 TILE_SPLIT(split_equal_to_row_size));
645 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
646 PIPE_CONFIG(ADDR_SURF_P4_16x16));
649 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
650 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
653 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
654 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
655 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
659 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
661 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
665 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
666 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
671 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
675 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
677 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
681 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
682 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
683 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
687 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
689 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
693 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
697 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
703 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
704 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
709 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
710 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
718 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
720 } else if (num_rbs < 4) {
721 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
722 switch (reg_offset) {
724 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
726 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
730 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
732 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
736 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
737 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
738 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
742 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
743 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
744 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
748 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
749 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
750 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
751 TILE_SPLIT(split_equal_to_row_size));
754 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
755 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
758 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
760 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
764 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
766 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
767 TILE_SPLIT(split_equal_to_row_size));
770 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
771 PIPE_CONFIG(ADDR_SURF_P4_8x16));
774 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
775 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
780 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
784 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
785 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
786 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
790 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
791 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
792 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
796 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
797 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
800 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
802 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
807 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
808 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
814 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
818 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
822 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
824 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
828 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
829 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
830 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
834 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
835 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
836 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
843 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
846 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
847 switch (reg_offset) {
849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
852 NUM_BANKS(ADDR_SURF_16_BANK));
855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
858 NUM_BANKS(ADDR_SURF_16_BANK));
861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
864 NUM_BANKS(ADDR_SURF_16_BANK));
867 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
870 NUM_BANKS(ADDR_SURF_16_BANK));
873 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
876 NUM_BANKS(ADDR_SURF_16_BANK));
879 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
882 NUM_BANKS(ADDR_SURF_8_BANK));
885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
888 NUM_BANKS(ADDR_SURF_4_BANK));
891 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
894 NUM_BANKS(ADDR_SURF_16_BANK));
897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
900 NUM_BANKS(ADDR_SURF_16_BANK));
903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
906 NUM_BANKS(ADDR_SURF_16_BANK));
909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
912 NUM_BANKS(ADDR_SURF_16_BANK));
915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
918 NUM_BANKS(ADDR_SURF_16_BANK));
921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
924 NUM_BANKS(ADDR_SURF_8_BANK));
927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
930 NUM_BANKS(ADDR_SURF_4_BANK));
936 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
938 } else if (num_pipe_configs == 2) {
939 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
940 switch (reg_offset) {
942 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
943 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
944 PIPE_CONFIG(ADDR_SURF_P2) |
945 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
948 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
950 PIPE_CONFIG(ADDR_SURF_P2) |
951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
954 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
955 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
956 PIPE_CONFIG(ADDR_SURF_P2) |
957 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
960 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
961 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
962 PIPE_CONFIG(ADDR_SURF_P2) |
963 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
966 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 PIPE_CONFIG(ADDR_SURF_P2) |
969 TILE_SPLIT(split_equal_to_row_size));
972 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
976 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
978 PIPE_CONFIG(ADDR_SURF_P2) |
979 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
982 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
984 PIPE_CONFIG(ADDR_SURF_P2) |
985 TILE_SPLIT(split_equal_to_row_size));
988 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
991 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
997 PIPE_CONFIG(ADDR_SURF_P2) |
998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1001 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1002 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1003 PIPE_CONFIG(ADDR_SURF_P2) |
1004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1008 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P2) |
1010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1014 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1017 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1018 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1019 PIPE_CONFIG(ADDR_SURF_P2) |
1020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1023 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1024 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1025 PIPE_CONFIG(ADDR_SURF_P2) |
1026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1031 PIPE_CONFIG(ADDR_SURF_P2) |
1032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1036 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1039 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1041 PIPE_CONFIG(ADDR_SURF_P2) |
1042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1047 PIPE_CONFIG(ADDR_SURF_P2) |
1048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1053 PIPE_CONFIG(ADDR_SURF_P2) |
1054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1060 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1062 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1063 switch (reg_offset) {
1065 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1068 NUM_BANKS(ADDR_SURF_16_BANK));
1071 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1074 NUM_BANKS(ADDR_SURF_16_BANK));
1077 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1080 NUM_BANKS(ADDR_SURF_16_BANK));
1083 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1086 NUM_BANKS(ADDR_SURF_16_BANK));
1089 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1092 NUM_BANKS(ADDR_SURF_16_BANK));
1095 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1096 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1097 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1098 NUM_BANKS(ADDR_SURF_16_BANK));
1101 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1104 NUM_BANKS(ADDR_SURF_8_BANK));
1107 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1110 NUM_BANKS(ADDR_SURF_16_BANK));
1113 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1116 NUM_BANKS(ADDR_SURF_16_BANK));
1119 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1120 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1121 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1122 NUM_BANKS(ADDR_SURF_16_BANK));
1125 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1128 NUM_BANKS(ADDR_SURF_16_BANK));
1131 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1134 NUM_BANKS(ADDR_SURF_16_BANK));
1137 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1140 NUM_BANKS(ADDR_SURF_16_BANK));
1143 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1146 NUM_BANKS(ADDR_SURF_8_BANK));
1152 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1155 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1159 * cik_select_se_sh - select which SE, SH to address
1161 * @rdev: radeon_device pointer
1162 * @se_num: shader engine to address
1163 * @sh_num: sh block to address
1165 * Select which SE, SH combinations to address. Certain
1166 * registers are instanced per SE or SH. 0xffffffff means
1167 * broadcast to all SEs or SHs (CIK).
1169 static void cik_select_se_sh(struct radeon_device *rdev,
1170 u32 se_num, u32 sh_num)
1172 u32 data = INSTANCE_BROADCAST_WRITES;
1174 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1175 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1176 else if (se_num == 0xffffffff)
1177 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1178 else if (sh_num == 0xffffffff)
1179 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1181 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1182 WREG32(GRBM_GFX_INDEX, data);
1186 * cik_create_bitmask - create a bitmask
1188 * @bit_width: length of the mask
1190 * create a variable length bit mask (CIK).
1191 * Returns the bitmask.
1193 static u32 cik_create_bitmask(u32 bit_width)
1197 for (i = 0; i < bit_width; i++) {
1205 * cik_select_se_sh - select which SE, SH to address
1207 * @rdev: radeon_device pointer
1208 * @max_rb_num: max RBs (render backends) for the asic
1209 * @se_num: number of SEs (shader engines) for the asic
1210 * @sh_per_se: number of SH blocks per SE for the asic
1212 * Calculates the bitmask of disabled RBs (CIK).
1213 * Returns the disabled RB bitmask.
1215 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1216 u32 max_rb_num, u32 se_num,
1221 data = RREG32(CC_RB_BACKEND_DISABLE);
1223 data &= BACKEND_DISABLE_MASK;
1226 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1228 data >>= BACKEND_DISABLE_SHIFT;
1230 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1236 * cik_setup_rb - setup the RBs on the asic
1238 * @rdev: radeon_device pointer
1239 * @se_num: number of SEs (shader engines) for the asic
1240 * @sh_per_se: number of SH blocks per SE for the asic
1241 * @max_rb_num: max RBs (render backends) for the asic
1243 * Configures per-SE/SH RB registers (CIK).
1245 static void cik_setup_rb(struct radeon_device *rdev,
1246 u32 se_num, u32 sh_per_se,
1251 u32 disabled_rbs = 0;
1252 u32 enabled_rbs = 0;
1254 for (i = 0; i < se_num; i++) {
1255 for (j = 0; j < sh_per_se; j++) {
1256 cik_select_se_sh(rdev, i, j);
1257 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1258 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1261 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1264 for (i = 0; i < max_rb_num; i++) {
1265 if (!(disabled_rbs & mask))
1266 enabled_rbs |= mask;
1270 for (i = 0; i < se_num; i++) {
1271 cik_select_se_sh(rdev, i, 0xffffffff);
1273 for (j = 0; j < sh_per_se; j++) {
1274 switch (enabled_rbs & 3) {
1276 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1279 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1283 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1288 WREG32(PA_SC_RASTER_CONFIG, data);
1290 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1294 * cik_gpu_init - setup the 3D engine
1296 * @rdev: radeon_device pointer
1298 * Configures the 3D engine and tiling configuration
1299 * registers so that the 3D engine is usable.
1301 static void cik_gpu_init(struct radeon_device *rdev)
1303 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1304 u32 mc_shared_chmap, mc_arb_ramcfg;
1305 u32 hdp_host_path_cntl;
1309 switch (rdev->family) {
1311 rdev->config.cik.max_shader_engines = 2;
1312 rdev->config.cik.max_tile_pipes = 4;
1313 rdev->config.cik.max_cu_per_sh = 7;
1314 rdev->config.cik.max_sh_per_se = 1;
1315 rdev->config.cik.max_backends_per_se = 2;
1316 rdev->config.cik.max_texture_channel_caches = 4;
1317 rdev->config.cik.max_gprs = 256;
1318 rdev->config.cik.max_gs_threads = 32;
1319 rdev->config.cik.max_hw_contexts = 8;
1321 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1322 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1323 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1324 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1325 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1332 rdev->config.cik.max_shader_engines = 1;
1333 rdev->config.cik.max_tile_pipes = 2;
1334 rdev->config.cik.max_cu_per_sh = 2;
1335 rdev->config.cik.max_sh_per_se = 1;
1336 rdev->config.cik.max_backends_per_se = 1;
1337 rdev->config.cik.max_texture_channel_caches = 2;
1338 rdev->config.cik.max_gprs = 256;
1339 rdev->config.cik.max_gs_threads = 16;
1340 rdev->config.cik.max_hw_contexts = 8;
1342 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1343 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1344 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1345 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1346 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1350 /* Initialize HDP */
1351 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1352 WREG32((0x2c14 + j), 0x00000000);
1353 WREG32((0x2c18 + j), 0x00000000);
1354 WREG32((0x2c1c + j), 0x00000000);
1355 WREG32((0x2c20 + j), 0x00000000);
1356 WREG32((0x2c24 + j), 0x00000000);
1359 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1361 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1363 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1364 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1366 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1367 rdev->config.cik.mem_max_burst_length_bytes = 256;
1368 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1369 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1370 if (rdev->config.cik.mem_row_size_in_kb > 4)
1371 rdev->config.cik.mem_row_size_in_kb = 4;
1372 /* XXX use MC settings? */
1373 rdev->config.cik.shader_engine_tile_size = 32;
1374 rdev->config.cik.num_gpus = 1;
1375 rdev->config.cik.multi_gpu_tile_size = 64;
1377 /* fix up row size */
1378 gb_addr_config &= ~ROW_SIZE_MASK;
1379 switch (rdev->config.cik.mem_row_size_in_kb) {
1382 gb_addr_config |= ROW_SIZE(0);
1385 gb_addr_config |= ROW_SIZE(1);
1388 gb_addr_config |= ROW_SIZE(2);
1392 /* setup tiling info dword. gb_addr_config is not adequate since it does
1393 * not have bank info, so create a custom tiling dword.
1394 * bits 3:0 num_pipes
1395 * bits 7:4 num_banks
1396 * bits 11:8 group_size
1397 * bits 15:12 row_size
1399 rdev->config.cik.tile_config = 0;
1400 switch (rdev->config.cik.num_tile_pipes) {
1402 rdev->config.cik.tile_config |= (0 << 0);
1405 rdev->config.cik.tile_config |= (1 << 0);
1408 rdev->config.cik.tile_config |= (2 << 0);
1412 /* XXX what about 12? */
1413 rdev->config.cik.tile_config |= (3 << 0);
1416 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1417 rdev->config.cik.tile_config |= 1 << 4;
1419 rdev->config.cik.tile_config |= 0 << 4;
1420 rdev->config.cik.tile_config |=
1421 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1422 rdev->config.cik.tile_config |=
1423 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1425 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1426 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1427 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1429 cik_tiling_mode_table_init(rdev);
1431 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1432 rdev->config.cik.max_sh_per_se,
1433 rdev->config.cik.max_backends_per_se);
1435 /* set HW defaults for 3D engine */
1436 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1438 WREG32(SX_DEBUG_1, 0x20);
1440 WREG32(TA_CNTL_AUX, 0x00010000);
1442 tmp = RREG32(SPI_CONFIG_CNTL);
1444 WREG32(SPI_CONFIG_CNTL, tmp);
1446 WREG32(SQ_CONFIG, 1);
1448 WREG32(DB_DEBUG, 0);
1450 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1452 WREG32(DB_DEBUG2, tmp);
1454 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1456 WREG32(DB_DEBUG3, tmp);
1458 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1460 WREG32(CB_HW_CONTROL, tmp);
1462 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1464 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1465 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1466 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1467 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1469 WREG32(VGT_NUM_INSTANCES, 1);
1471 WREG32(CP_PERFMON_CNTL, 0);
1473 WREG32(SQ_CONFIG, 0);
1475 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1476 FORCE_EOV_MAX_REZ_CNT(255)));
1478 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1479 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1481 WREG32(VGT_GS_VERTEX_REUSE, 16);
1482 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1484 tmp = RREG32(HDP_MISC_CNTL);
1485 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1486 WREG32(HDP_MISC_CNTL, tmp);
1488 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1489 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1491 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1492 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1498 * GPU scratch registers helpers function.
1501 * cik_scratch_init - setup driver info for CP scratch regs
1503 * @rdev: radeon_device pointer
1505 * Set up the number and offset of the CP scratch registers.
1506 * NOTE: use of CP scratch registers is a legacy inferface and
1507 * is not used by default on newer asics (r6xx+). On newer asics,
1508 * memory buffers are used for fences rather than scratch regs.
1510 static void cik_scratch_init(struct radeon_device *rdev)
1514 rdev->scratch.num_reg = 7;
1515 rdev->scratch.reg_base = SCRATCH_REG0;
1516 for (i = 0; i < rdev->scratch.num_reg; i++) {
1517 rdev->scratch.free[i] = true;
1518 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1523 * cik_ring_test - basic gfx ring test
1525 * @rdev: radeon_device pointer
1526 * @ring: radeon_ring structure holding ring information
1528 * Allocate a scratch register and write to it using the gfx ring (CIK).
1529 * Provides a basic gfx ring test to verify that the ring is working.
1530 * Used by cik_cp_gfx_resume();
1531 * Returns 0 on success, error on failure.
1533 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1540 r = radeon_scratch_get(rdev, &scratch);
1542 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1545 WREG32(scratch, 0xCAFEDEAD);
1546 r = radeon_ring_lock(rdev, ring, 3);
1548 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1549 radeon_scratch_free(rdev, scratch);
1552 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1553 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1554 radeon_ring_write(ring, 0xDEADBEEF);
1555 radeon_ring_unlock_commit(rdev, ring);
1556 for (i = 0; i < rdev->usec_timeout; i++) {
1557 tmp = RREG32(scratch);
1558 if (tmp == 0xDEADBEEF)
1562 if (i < rdev->usec_timeout) {
1563 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1565 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1566 ring->idx, scratch, tmp);
1569 radeon_scratch_free(rdev, scratch);
1574 * cik_fence_ring_emit - emit a fence on the gfx ring
1576 * @rdev: radeon_device pointer
1577 * @fence: radeon fence object
1579 * Emits a fence sequnce number on the gfx ring and flushes
1582 void cik_fence_ring_emit(struct radeon_device *rdev,
1583 struct radeon_fence *fence)
1585 struct radeon_ring *ring = &rdev->ring[fence->ring];
1586 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1588 /* EVENT_WRITE_EOP - flush caches, send int */
1589 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1590 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1592 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1594 radeon_ring_write(ring, addr & 0xfffffffc);
1595 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1596 radeon_ring_write(ring, fence->seq);
1597 radeon_ring_write(ring, 0);
1599 /* We should be using the new WAIT_REG_MEM special op packet here
1600 * but it causes the CP to hang
1602 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1603 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1604 WRITE_DATA_DST_SEL(0)));
1605 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1606 radeon_ring_write(ring, 0);
1607 radeon_ring_write(ring, 0);
1610 void cik_semaphore_ring_emit(struct radeon_device *rdev,
1611 struct radeon_ring *ring,
1612 struct radeon_semaphore *semaphore,
1615 uint64_t addr = semaphore->gpu_addr;
1616 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1618 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1619 radeon_ring_write(ring, addr & 0xffffffff);
1620 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1627 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1629 * @rdev: radeon_device pointer
1630 * @ib: radeon indirect buffer object
1632 * Emits an DE (drawing engine) or CE (constant engine) IB
1633 * on the gfx ring. IBs are usually generated by userspace
1634 * acceleration drivers and submitted to the kernel for
1635 * sheduling on the ring. This function schedules the IB
1636 * on the gfx ring for execution by the GPU.
1638 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1640 struct radeon_ring *ring = &rdev->ring[ib->ring];
1641 u32 header, control = INDIRECT_BUFFER_VALID;
1643 if (ib->is_const_ib) {
1644 /* set switch buffer packet before const IB */
1645 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1646 radeon_ring_write(ring, 0);
1648 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1651 if (ring->rptr_save_reg) {
1652 next_rptr = ring->wptr + 3 + 4;
1653 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1654 radeon_ring_write(ring, ((ring->rptr_save_reg -
1655 PACKET3_SET_UCONFIG_REG_START) >> 2));
1656 radeon_ring_write(ring, next_rptr);
1657 } else if (rdev->wb.enabled) {
1658 next_rptr = ring->wptr + 5 + 4;
1659 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1660 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1661 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1662 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1663 radeon_ring_write(ring, next_rptr);
1666 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1669 control |= ib->length_dw |
1670 (ib->vm ? (ib->vm->id << 24) : 0);
1672 radeon_ring_write(ring, header);
1673 radeon_ring_write(ring,
1677 (ib->gpu_addr & 0xFFFFFFFC));
1678 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1679 radeon_ring_write(ring, control);
1683 * cik_ib_test - basic gfx ring IB test
1685 * @rdev: radeon_device pointer
1686 * @ring: radeon_ring structure holding ring information
1688 * Allocate an IB and execute it on the gfx ring (CIK).
1689 * Provides a basic gfx ring test to verify that IBs are working.
1690 * Returns 0 on success, error on failure.
1692 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1694 struct radeon_ib ib;
1700 r = radeon_scratch_get(rdev, &scratch);
1702 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1705 WREG32(scratch, 0xCAFEDEAD);
1706 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1708 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1711 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1712 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1713 ib.ptr[2] = 0xDEADBEEF;
1715 r = radeon_ib_schedule(rdev, &ib, NULL);
1717 radeon_scratch_free(rdev, scratch);
1718 radeon_ib_free(rdev, &ib);
1719 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1722 r = radeon_fence_wait(ib.fence, false);
1724 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1727 for (i = 0; i < rdev->usec_timeout; i++) {
1728 tmp = RREG32(scratch);
1729 if (tmp == 0xDEADBEEF)
1733 if (i < rdev->usec_timeout) {
1734 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1736 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1740 radeon_scratch_free(rdev, scratch);
1741 radeon_ib_free(rdev, &ib);
1747 * On CIK, gfx and compute now have independant command processors.
1750 * Gfx consists of a single ring and can process both gfx jobs and
1751 * compute jobs. The gfx CP consists of three microengines (ME):
1752 * PFP - Pre-Fetch Parser
1754 * CE - Constant Engine
1755 * The PFP and ME make up what is considered the Drawing Engine (DE).
1756 * The CE is an asynchronous engine used for updating buffer desciptors
1757 * used by the DE so that they can be loaded into cache in parallel
1758 * while the DE is processing state update packets.
1761 * The compute CP consists of two microengines (ME):
1762 * MEC1 - Compute MicroEngine 1
1763 * MEC2 - Compute MicroEngine 2
1764 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1765 * The queues are exposed to userspace and are programmed directly
1766 * by the compute runtime.
1769 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1771 * @rdev: radeon_device pointer
1772 * @enable: enable or disable the MEs
1774 * Halts or unhalts the gfx MEs.
1776 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1779 WREG32(CP_ME_CNTL, 0);
1781 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1782 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1788 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1790 * @rdev: radeon_device pointer
1792 * Loads the gfx PFP, ME, and CE ucode.
1793 * Returns 0 for success, -EINVAL if the ucode is not available.
1795 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1797 const __be32 *fw_data;
1800 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1803 cik_cp_gfx_enable(rdev, false);
1806 fw_data = (const __be32 *)rdev->pfp_fw->data;
1807 WREG32(CP_PFP_UCODE_ADDR, 0);
1808 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1809 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1810 WREG32(CP_PFP_UCODE_ADDR, 0);
1813 fw_data = (const __be32 *)rdev->ce_fw->data;
1814 WREG32(CP_CE_UCODE_ADDR, 0);
1815 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1816 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1817 WREG32(CP_CE_UCODE_ADDR, 0);
1820 fw_data = (const __be32 *)rdev->me_fw->data;
1821 WREG32(CP_ME_RAM_WADDR, 0);
1822 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1823 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1824 WREG32(CP_ME_RAM_WADDR, 0);
1826 WREG32(CP_PFP_UCODE_ADDR, 0);
1827 WREG32(CP_CE_UCODE_ADDR, 0);
1828 WREG32(CP_ME_RAM_WADDR, 0);
1829 WREG32(CP_ME_RAM_RADDR, 0);
1834 * cik_cp_gfx_start - start the gfx ring
1836 * @rdev: radeon_device pointer
1838 * Enables the ring and loads the clear state context and other
1839 * packets required to init the ring.
1840 * Returns 0 for success, error for failure.
1842 static int cik_cp_gfx_start(struct radeon_device *rdev)
1844 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1848 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1849 WREG32(CP_ENDIAN_SWAP, 0);
1850 WREG32(CP_DEVICE_ID, 1);
1852 cik_cp_gfx_enable(rdev, true);
1854 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1856 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1860 /* init the CE partitions. CE only used for gfx on CIK */
1861 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1862 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1863 radeon_ring_write(ring, 0xc000);
1864 radeon_ring_write(ring, 0xc000);
1866 /* setup clear context state */
1867 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1868 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1870 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1871 radeon_ring_write(ring, 0x80000000);
1872 radeon_ring_write(ring, 0x80000000);
1874 for (i = 0; i < cik_default_size; i++)
1875 radeon_ring_write(ring, cik_default_state[i]);
1877 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1878 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1880 /* set clear context state */
1881 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1882 radeon_ring_write(ring, 0);
1884 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1885 radeon_ring_write(ring, 0x00000316);
1886 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1887 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1889 radeon_ring_unlock_commit(rdev, ring);
1895 * cik_cp_gfx_fini - stop the gfx ring
1897 * @rdev: radeon_device pointer
1899 * Stop the gfx ring and tear down the driver ring
1902 static void cik_cp_gfx_fini(struct radeon_device *rdev)
1904 cik_cp_gfx_enable(rdev, false);
1905 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1909 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1911 * @rdev: radeon_device pointer
1913 * Program the location and size of the gfx ring buffer
1914 * and test it to make sure it's working.
1915 * Returns 0 for success, error for failure.
1917 static int cik_cp_gfx_resume(struct radeon_device *rdev)
1919 struct radeon_ring *ring;
1925 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1926 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1928 /* Set the write pointer delay */
1929 WREG32(CP_RB_WPTR_DELAY, 0);
1931 /* set the RB to use vmid 0 */
1932 WREG32(CP_RB_VMID, 0);
1934 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1936 /* ring 0 - compute and gfx */
1937 /* Set ring buffer size */
1938 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1939 rb_bufsz = drm_order(ring->ring_size / 8);
1940 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1942 tmp |= BUF_SWAP_32BIT;
1944 WREG32(CP_RB0_CNTL, tmp);
1946 /* Initialize the ring buffer's read and write pointers */
1947 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1949 WREG32(CP_RB0_WPTR, ring->wptr);
1951 /* set the wb address wether it's enabled or not */
1952 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1953 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1955 /* scratch register shadowing is no longer supported */
1956 WREG32(SCRATCH_UMSK, 0);
1958 if (!rdev->wb.enabled)
1959 tmp |= RB_NO_UPDATE;
1962 WREG32(CP_RB0_CNTL, tmp);
1964 rb_addr = ring->gpu_addr >> 8;
1965 WREG32(CP_RB0_BASE, rb_addr);
1966 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1968 ring->rptr = RREG32(CP_RB0_RPTR);
1970 /* start the ring */
1971 cik_cp_gfx_start(rdev);
1972 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1973 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1975 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1982 * cik_cp_compute_enable - enable/disable the compute CP MEs
1984 * @rdev: radeon_device pointer
1985 * @enable: enable or disable the MEs
1987 * Halts or unhalts the compute MEs.
1989 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
1992 WREG32(CP_MEC_CNTL, 0);
1994 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
1999 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2001 * @rdev: radeon_device pointer
2003 * Loads the compute MEC1&2 ucode.
2004 * Returns 0 for success, -EINVAL if the ucode is not available.
2006 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2008 const __be32 *fw_data;
2014 cik_cp_compute_enable(rdev, false);
2017 fw_data = (const __be32 *)rdev->mec_fw->data;
2018 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2019 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2020 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2021 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2023 if (rdev->family == CHIP_KAVERI) {
2025 fw_data = (const __be32 *)rdev->mec_fw->data;
2026 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2027 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2028 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2029 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2036 * cik_cp_compute_start - start the compute queues
2038 * @rdev: radeon_device pointer
2040 * Enable the compute queues.
2041 * Returns 0 for success, error for failure.
2043 static int cik_cp_compute_start(struct radeon_device *rdev)
2050 * cik_cp_compute_fini - stop the compute queues
2052 * @rdev: radeon_device pointer
2054 * Stop the compute queues and tear down the driver queue
2057 static void cik_cp_compute_fini(struct radeon_device *rdev)
2059 cik_cp_compute_enable(rdev, false);
2064 * cik_cp_compute_resume - setup the compute queue registers
2066 * @rdev: radeon_device pointer
2068 * Program the compute queues and test them to make sure they
2070 * Returns 0 for success, error for failure.
2072 static int cik_cp_compute_resume(struct radeon_device *rdev)
2077 r = cik_cp_compute_start(rdev);
2083 /* XXX temporary wrappers to handle both compute and gfx */
2085 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2087 cik_cp_gfx_enable(rdev, enable);
2088 cik_cp_compute_enable(rdev, enable);
2092 static int cik_cp_load_microcode(struct radeon_device *rdev)
2096 r = cik_cp_gfx_load_microcode(rdev);
2099 r = cik_cp_compute_load_microcode(rdev);
2107 static void cik_cp_fini(struct radeon_device *rdev)
2109 cik_cp_gfx_fini(rdev);
2110 cik_cp_compute_fini(rdev);
2114 static int cik_cp_resume(struct radeon_device *rdev)
2118 /* Reset all cp blocks */
2119 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2120 RREG32(GRBM_SOFT_RESET);
2122 WREG32(GRBM_SOFT_RESET, 0);
2123 RREG32(GRBM_SOFT_RESET);
2125 r = cik_cp_load_microcode(rdev);
2129 r = cik_cp_gfx_resume(rdev);
2132 r = cik_cp_compute_resume(rdev);
2140 * cik_gpu_is_lockup - check if the 3D engine is locked up
2142 * @rdev: radeon_device pointer
2143 * @ring: radeon_ring structure holding ring information
2145 * Check if the 3D engine is locked up (CIK).
2146 * Returns true if the engine is locked, false if not.
2148 bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2150 u32 srbm_status, srbm_status2;
2151 u32 grbm_status, grbm_status2;
2152 u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
2154 srbm_status = RREG32(SRBM_STATUS);
2155 srbm_status2 = RREG32(SRBM_STATUS2);
2156 grbm_status = RREG32(GRBM_STATUS);
2157 grbm_status2 = RREG32(GRBM_STATUS2);
2158 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2159 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2160 grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
2161 grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
2162 if (!(grbm_status & GUI_ACTIVE)) {
2163 radeon_ring_lockup_update(ring);
2166 /* force CP activities */
2167 radeon_ring_force_activity(rdev, ring);
2168 return radeon_ring_test_lockup(rdev, ring);
2172 * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
2174 * @rdev: radeon_device pointer
2176 * Soft reset the GFX engine and CPG blocks (CIK).
2177 * XXX: deal with reseting RLC and CPF
2178 * Returns 0 for success.
2180 static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
2182 struct evergreen_mc_save save;
2185 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2188 dev_info(rdev->dev, "GPU GFX softreset \n");
2189 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2190 RREG32(GRBM_STATUS));
2191 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2192 RREG32(GRBM_STATUS2));
2193 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2194 RREG32(GRBM_STATUS_SE0));
2195 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2196 RREG32(GRBM_STATUS_SE1));
2197 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2198 RREG32(GRBM_STATUS_SE2));
2199 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2200 RREG32(GRBM_STATUS_SE3));
2201 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2202 RREG32(SRBM_STATUS));
2203 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2204 RREG32(SRBM_STATUS2));
2205 evergreen_mc_stop(rdev, &save);
2206 if (radeon_mc_wait_for_idle(rdev)) {
2207 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2209 /* Disable CP parsing/prefetching */
2210 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2212 /* reset all the gfx block and all CPG blocks */
2213 grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
2215 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2216 WREG32(GRBM_SOFT_RESET, grbm_reset);
2217 (void)RREG32(GRBM_SOFT_RESET);
2219 WREG32(GRBM_SOFT_RESET, 0);
2220 (void)RREG32(GRBM_SOFT_RESET);
2221 /* Wait a little for things to settle down */
2223 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2224 RREG32(GRBM_STATUS));
2225 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2226 RREG32(GRBM_STATUS2));
2227 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2228 RREG32(GRBM_STATUS_SE0));
2229 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2230 RREG32(GRBM_STATUS_SE1));
2231 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2232 RREG32(GRBM_STATUS_SE2));
2233 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2234 RREG32(GRBM_STATUS_SE3));
2235 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2236 RREG32(SRBM_STATUS));
2237 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2238 RREG32(SRBM_STATUS2));
2239 evergreen_mc_resume(rdev, &save);
2244 * cik_compute_gpu_soft_reset - soft reset CPC
2246 * @rdev: radeon_device pointer
2248 * Soft reset the CPC blocks (CIK).
2249 * XXX: deal with reseting RLC and CPF
2250 * Returns 0 for success.
2252 static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
2254 struct evergreen_mc_save save;
2257 dev_info(rdev->dev, "GPU compute softreset \n");
2258 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2259 RREG32(GRBM_STATUS));
2260 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2261 RREG32(GRBM_STATUS2));
2262 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2263 RREG32(GRBM_STATUS_SE0));
2264 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2265 RREG32(GRBM_STATUS_SE1));
2266 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2267 RREG32(GRBM_STATUS_SE2));
2268 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2269 RREG32(GRBM_STATUS_SE3));
2270 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2271 RREG32(SRBM_STATUS));
2272 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2273 RREG32(SRBM_STATUS2));
2274 evergreen_mc_stop(rdev, &save);
2275 if (radeon_mc_wait_for_idle(rdev)) {
2276 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2278 /* Disable CP parsing/prefetching */
2279 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2281 /* reset all the CPC blocks */
2282 grbm_reset = SOFT_RESET_CPG;
2284 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2285 WREG32(GRBM_SOFT_RESET, grbm_reset);
2286 (void)RREG32(GRBM_SOFT_RESET);
2288 WREG32(GRBM_SOFT_RESET, 0);
2289 (void)RREG32(GRBM_SOFT_RESET);
2290 /* Wait a little for things to settle down */
2292 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2293 RREG32(GRBM_STATUS));
2294 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2295 RREG32(GRBM_STATUS2));
2296 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2297 RREG32(GRBM_STATUS_SE0));
2298 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2299 RREG32(GRBM_STATUS_SE1));
2300 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2301 RREG32(GRBM_STATUS_SE2));
2302 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2303 RREG32(GRBM_STATUS_SE3));
2304 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2305 RREG32(SRBM_STATUS));
2306 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2307 RREG32(SRBM_STATUS2));
2308 evergreen_mc_resume(rdev, &save);
2313 * cik_asic_reset - soft reset compute and gfx
2315 * @rdev: radeon_device pointer
2317 * Soft reset the CPC blocks (CIK).
2318 * XXX: make this more fine grained and only reset
2319 * what is necessary.
2320 * Returns 0 for success.
2322 int cik_asic_reset(struct radeon_device *rdev)
2326 r = cik_compute_gpu_soft_reset(rdev);
2328 dev_info(rdev->dev, "Compute reset failed!\n");
2330 return cik_gfx_gpu_soft_reset(rdev);
2335 * cik_mc_program - program the GPU memory controller
2337 * @rdev: radeon_device pointer
2339 * Set the location of vram, gart, and AGP in the GPU's
2340 * physical address space (CIK).
2342 static void cik_mc_program(struct radeon_device *rdev)
2344 struct evergreen_mc_save save;
2348 /* Initialize HDP */
2349 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2350 WREG32((0x2c14 + j), 0x00000000);
2351 WREG32((0x2c18 + j), 0x00000000);
2352 WREG32((0x2c1c + j), 0x00000000);
2353 WREG32((0x2c20 + j), 0x00000000);
2354 WREG32((0x2c24 + j), 0x00000000);
2356 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2358 evergreen_mc_stop(rdev, &save);
2359 if (radeon_mc_wait_for_idle(rdev)) {
2360 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2362 /* Lockout access through VGA aperture*/
2363 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2364 /* Update configuration */
2365 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2366 rdev->mc.vram_start >> 12);
2367 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2368 rdev->mc.vram_end >> 12);
2369 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2370 rdev->vram_scratch.gpu_addr >> 12);
2371 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2372 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2373 WREG32(MC_VM_FB_LOCATION, tmp);
2374 /* XXX double check these! */
2375 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2376 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2377 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2378 WREG32(MC_VM_AGP_BASE, 0);
2379 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2380 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2381 if (radeon_mc_wait_for_idle(rdev)) {
2382 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2384 evergreen_mc_resume(rdev, &save);
2385 /* we need to own VRAM, so turn off the VGA renderer here
2386 * to stop it overwriting our objects */
2387 rv515_vga_render_disable(rdev);
2391 * cik_mc_init - initialize the memory controller driver params
2393 * @rdev: radeon_device pointer
2395 * Look up the amount of vram, vram width, and decide how to place
2396 * vram and gart within the GPU's physical address space (CIK).
2397 * Returns 0 for success.
2399 static int cik_mc_init(struct radeon_device *rdev)
2402 int chansize, numchan;
2404 /* Get VRAM informations */
2405 rdev->mc.vram_is_ddr = true;
2406 tmp = RREG32(MC_ARB_RAMCFG);
2407 if (tmp & CHANSIZE_MASK) {
2412 tmp = RREG32(MC_SHARED_CHMAP);
2413 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2443 rdev->mc.vram_width = numchan * chansize;
2444 /* Could aper size report 0 ? */
2445 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2446 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2447 /* size in MB on si */
2448 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2449 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2450 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2451 si_vram_gtt_location(rdev, &rdev->mc);
2452 radeon_update_bandwidth_info(rdev);
2459 * VMID 0 is the physical GPU addresses as used by the kernel.
2460 * VMIDs 1-15 are used for userspace clients and are handled
2461 * by the radeon vm/hsa code.
2464 * cik_pcie_gart_tlb_flush - gart tlb flush callback
2466 * @rdev: radeon_device pointer
2468 * Flush the TLB for the VMID 0 page table (CIK).
2470 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
2472 /* flush hdp cache */
2473 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
2475 /* bits 0-15 are the VM contexts0-15 */
2476 WREG32(VM_INVALIDATE_REQUEST, 0x1);
2480 * cik_pcie_gart_enable - gart enable
2482 * @rdev: radeon_device pointer
2484 * This sets up the TLBs, programs the page tables for VMID0,
2485 * sets up the hw for VMIDs 1-15 which are allocated on
2486 * demand, and sets up the global locations for the LDS, GDS,
2487 * and GPUVM for FSA64 clients (CIK).
2488 * Returns 0 for success, errors for failure.
2490 static int cik_pcie_gart_enable(struct radeon_device *rdev)
2494 if (rdev->gart.robj == NULL) {
2495 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2498 r = radeon_gart_table_vram_pin(rdev);
2501 radeon_gart_restore(rdev);
2502 /* Setup TLB control */
2503 WREG32(MC_VM_MX_L1_TLB_CNTL,
2506 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2507 ENABLE_ADVANCED_DRIVER_MODEL |
2508 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2509 /* Setup L2 cache */
2510 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2511 ENABLE_L2_FRAGMENT_PROCESSING |
2512 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2513 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2514 EFFECTIVE_L2_QUEUE_SIZE(7) |
2515 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2516 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2517 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2518 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2519 /* setup context0 */
2520 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2521 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2522 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2523 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2524 (u32)(rdev->dummy_page.addr >> 12));
2525 WREG32(VM_CONTEXT0_CNTL2, 0);
2526 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2527 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2533 /* empty context1-15 */
2534 /* FIXME start with 4G, once using 2 level pt switch to full
2537 /* set vm size, must be a multiple of 4 */
2538 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2539 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2540 for (i = 1; i < 16; i++) {
2542 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2543 rdev->gart.table_addr >> 12);
2545 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2546 rdev->gart.table_addr >> 12);
2549 /* enable context1-15 */
2550 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2551 (u32)(rdev->dummy_page.addr >> 12));
2552 WREG32(VM_CONTEXT1_CNTL2, 4);
2553 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2554 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2555 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2556 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2557 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2558 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2559 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2560 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2561 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2562 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2563 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2564 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2565 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2567 /* TC cache setup ??? */
2568 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
2569 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
2570 WREG32(TC_CFG_L1_STORE_POLICY, 0);
2572 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
2573 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
2574 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
2575 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
2576 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
2578 WREG32(TC_CFG_L1_VOLATILE, 0);
2579 WREG32(TC_CFG_L2_VOLATILE, 0);
2581 if (rdev->family == CHIP_KAVERI) {
2582 u32 tmp = RREG32(CHUB_CONTROL);
2584 WREG32(CHUB_CONTROL, tmp);
2587 /* XXX SH_MEM regs */
2588 /* where to put LDS, scratch, GPUVM in FSA64 space */
2589 for (i = 0; i < 16; i++) {
2590 WREG32(SRBM_GFX_CNTL, VMID(i));
2591 WREG32(SH_MEM_CONFIG, 0);
2592 WREG32(SH_MEM_APE1_BASE, 1);
2593 WREG32(SH_MEM_APE1_LIMIT, 0);
2594 WREG32(SH_MEM_BASES, 0);
2596 WREG32(SRBM_GFX_CNTL, 0);
2598 cik_pcie_gart_tlb_flush(rdev);
2599 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2600 (unsigned)(rdev->mc.gtt_size >> 20),
2601 (unsigned long long)rdev->gart.table_addr);
2602 rdev->gart.ready = true;
2607 * cik_pcie_gart_disable - gart disable
2609 * @rdev: radeon_device pointer
2611 * This disables all VM page table (CIK).
2613 static void cik_pcie_gart_disable(struct radeon_device *rdev)
2615 /* Disable all tables */
2616 WREG32(VM_CONTEXT0_CNTL, 0);
2617 WREG32(VM_CONTEXT1_CNTL, 0);
2618 /* Setup TLB control */
2619 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2620 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2621 /* Setup L2 cache */
2623 ENABLE_L2_FRAGMENT_PROCESSING |
2624 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2625 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2626 EFFECTIVE_L2_QUEUE_SIZE(7) |
2627 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2628 WREG32(VM_L2_CNTL2, 0);
2629 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2630 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2631 radeon_gart_table_vram_unpin(rdev);
2635 * cik_pcie_gart_fini - vm fini callback
2637 * @rdev: radeon_device pointer
2639 * Tears down the driver GART/VM setup (CIK).
2641 static void cik_pcie_gart_fini(struct radeon_device *rdev)
2643 cik_pcie_gart_disable(rdev);
2644 radeon_gart_table_vram_free(rdev);
2645 radeon_gart_fini(rdev);
2650 * cik_ib_parse - vm ib_parse callback
2652 * @rdev: radeon_device pointer
2653 * @ib: indirect buffer pointer
2655 * CIK uses hw IB checking so this is a nop (CIK).
2657 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2664 * VMID 0 is the physical GPU addresses as used by the kernel.
2665 * VMIDs 1-15 are used for userspace clients and are handled
2666 * by the radeon vm/hsa code.
2669 * cik_vm_init - cik vm init callback
2671 * @rdev: radeon_device pointer
2673 * Inits cik specific vm parameters (number of VMs, base of vram for
2674 * VMIDs 1-15) (CIK).
2675 * Returns 0 for success.
2677 int cik_vm_init(struct radeon_device *rdev)
2680 rdev->vm_manager.nvm = 16;
2681 /* base offset of vram pages */
2682 if (rdev->flags & RADEON_IS_IGP) {
2683 u64 tmp = RREG32(MC_VM_FB_OFFSET);
2685 rdev->vm_manager.vram_base_offset = tmp;
2687 rdev->vm_manager.vram_base_offset = 0;
2693 * cik_vm_fini - cik vm fini callback
2695 * @rdev: radeon_device pointer
2697 * Tear down any asic specific VM setup (CIK).
2699 void cik_vm_fini(struct radeon_device *rdev)
2704 * cik_vm_flush - cik vm flush using the CP
2706 * @rdev: radeon_device pointer
2708 * Update the page table base and flush the VM TLB
2709 * using the CP (CIK).
2711 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2713 struct radeon_ring *ring = &rdev->ring[ridx];
2718 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2719 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2720 WRITE_DATA_DST_SEL(0)));
2722 radeon_ring_write(ring,
2723 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2725 radeon_ring_write(ring,
2726 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2728 radeon_ring_write(ring, 0);
2729 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2731 /* update SH_MEM_* regs */
2732 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2733 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2734 WRITE_DATA_DST_SEL(0)));
2735 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2736 radeon_ring_write(ring, 0);
2737 radeon_ring_write(ring, VMID(vm->id));
2739 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
2740 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2741 WRITE_DATA_DST_SEL(0)));
2742 radeon_ring_write(ring, SH_MEM_BASES >> 2);
2743 radeon_ring_write(ring, 0);
2745 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
2746 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
2747 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
2748 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
2750 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2751 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2752 WRITE_DATA_DST_SEL(0)));
2753 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2754 radeon_ring_write(ring, 0);
2755 radeon_ring_write(ring, VMID(0));
2758 /* We should be using the WAIT_REG_MEM packet here like in
2759 * cik_fence_ring_emit(), but it causes the CP to hang in this
2762 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2763 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2764 WRITE_DATA_DST_SEL(0)));
2765 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2766 radeon_ring_write(ring, 0);
2767 radeon_ring_write(ring, 0);
2769 /* bits 0-15 are the VM contexts0-15 */
2770 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2771 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2772 WRITE_DATA_DST_SEL(0)));
2773 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
2774 radeon_ring_write(ring, 0);
2775 radeon_ring_write(ring, 1 << vm->id);
2777 /* sync PFP to ME, otherwise we might get invalid PFP reads */
2778 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2779 radeon_ring_write(ring, 0x0);
2784 * The RLC is a multi-purpose microengine that handles a
2785 * variety of functions, the most important of which is
2786 * the interrupt controller.
2789 * cik_rlc_stop - stop the RLC ME
2791 * @rdev: radeon_device pointer
2793 * Halt the RLC ME (MicroEngine) (CIK).
2795 static void cik_rlc_stop(struct radeon_device *rdev)
2800 tmp = RREG32(CP_INT_CNTL_RING0);
2801 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2802 WREG32(CP_INT_CNTL_RING0, tmp);
2804 RREG32(CB_CGTT_SCLK_CTRL);
2805 RREG32(CB_CGTT_SCLK_CTRL);
2806 RREG32(CB_CGTT_SCLK_CTRL);
2807 RREG32(CB_CGTT_SCLK_CTRL);
2809 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
2810 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
2812 WREG32(RLC_CNTL, 0);
2814 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
2815 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
2816 cik_select_se_sh(rdev, i, j);
2817 for (k = 0; k < rdev->usec_timeout; k++) {
2818 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
2824 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2826 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
2827 for (k = 0; k < rdev->usec_timeout; k++) {
2828 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2835 * cik_rlc_start - start the RLC ME
2837 * @rdev: radeon_device pointer
2839 * Unhalt the RLC ME (MicroEngine) (CIK).
2841 static void cik_rlc_start(struct radeon_device *rdev)
2845 WREG32(RLC_CNTL, RLC_ENABLE);
2847 tmp = RREG32(CP_INT_CNTL_RING0);
2848 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2849 WREG32(CP_INT_CNTL_RING0, tmp);
2855 * cik_rlc_resume - setup the RLC hw
2857 * @rdev: radeon_device pointer
2859 * Initialize the RLC registers, load the ucode,
2860 * and start the RLC (CIK).
2861 * Returns 0 for success, -EINVAL if the ucode is not available.
2863 static int cik_rlc_resume(struct radeon_device *rdev)
2866 u32 clear_state_info[3];
2867 const __be32 *fw_data;
2872 switch (rdev->family) {
2875 size = BONAIRE_RLC_UCODE_SIZE;
2878 size = KV_RLC_UCODE_SIZE;
2881 size = KB_RLC_UCODE_SIZE;
2887 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
2888 RREG32(GRBM_SOFT_RESET);
2890 WREG32(GRBM_SOFT_RESET, 0);
2891 RREG32(GRBM_SOFT_RESET);
2894 WREG32(RLC_LB_CNTR_INIT, 0);
2895 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
2897 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2898 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
2899 WREG32(RLC_LB_PARAMS, 0x00600408);
2900 WREG32(RLC_LB_CNTL, 0x80000004);
2902 WREG32(RLC_MC_CNTL, 0);
2903 WREG32(RLC_UCODE_CNTL, 0);
2905 fw_data = (const __be32 *)rdev->rlc_fw->data;
2906 WREG32(RLC_GPM_UCODE_ADDR, 0);
2907 for (i = 0; i < size; i++)
2908 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
2909 WREG32(RLC_GPM_UCODE_ADDR, 0);
2912 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
2913 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
2914 clear_state_info[2] = 0;//cik_default_size;
2915 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
2916 for (i = 0; i < 3; i++)
2917 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
2918 WREG32(RLC_DRIVER_DMA_STATUS, 0);
2920 cik_rlc_start(rdev);
2927 * Starting with r6xx, interrupts are handled via a ring buffer.
2928 * Ring buffers are areas of GPU accessible memory that the GPU
2929 * writes interrupt vectors into and the host reads vectors out of.
2930 * There is a rptr (read pointer) that determines where the
2931 * host is currently reading, and a wptr (write pointer)
2932 * which determines where the GPU has written. When the
2933 * pointers are equal, the ring is idle. When the GPU
2934 * writes vectors to the ring buffer, it increments the
2935 * wptr. When there is an interrupt, the host then starts
2936 * fetching commands and processing them until the pointers are
2937 * equal again at which point it updates the rptr.
2941 * cik_enable_interrupts - Enable the interrupt ring buffer
2943 * @rdev: radeon_device pointer
2945 * Enable the interrupt ring buffer (CIK).
2947 static void cik_enable_interrupts(struct radeon_device *rdev)
2949 u32 ih_cntl = RREG32(IH_CNTL);
2950 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2952 ih_cntl |= ENABLE_INTR;
2953 ih_rb_cntl |= IH_RB_ENABLE;
2954 WREG32(IH_CNTL, ih_cntl);
2955 WREG32(IH_RB_CNTL, ih_rb_cntl);
2956 rdev->ih.enabled = true;
2960 * cik_disable_interrupts - Disable the interrupt ring buffer
2962 * @rdev: radeon_device pointer
2964 * Disable the interrupt ring buffer (CIK).
2966 static void cik_disable_interrupts(struct radeon_device *rdev)
2968 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2969 u32 ih_cntl = RREG32(IH_CNTL);
2971 ih_rb_cntl &= ~IH_RB_ENABLE;
2972 ih_cntl &= ~ENABLE_INTR;
2973 WREG32(IH_RB_CNTL, ih_rb_cntl);
2974 WREG32(IH_CNTL, ih_cntl);
2975 /* set rptr, wptr to 0 */
2976 WREG32(IH_RB_RPTR, 0);
2977 WREG32(IH_RB_WPTR, 0);
2978 rdev->ih.enabled = false;
2983 * cik_disable_interrupt_state - Disable all interrupt sources
2985 * @rdev: radeon_device pointer
2987 * Clear all interrupt enable bits used by the driver (CIK).
2989 static void cik_disable_interrupt_state(struct radeon_device *rdev)
2994 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2995 /* compute queues */
2996 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
2997 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
2998 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
2999 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3000 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3001 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3002 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3003 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3005 WREG32(GRBM_INT_CNTL, 0);
3006 /* vline/vblank, etc. */
3007 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3008 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3009 if (rdev->num_crtc >= 4) {
3010 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3011 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3013 if (rdev->num_crtc >= 6) {
3014 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3015 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3019 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3021 /* digital hotplug */
3022 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3023 WREG32(DC_HPD1_INT_CONTROL, tmp);
3024 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3025 WREG32(DC_HPD2_INT_CONTROL, tmp);
3026 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3027 WREG32(DC_HPD3_INT_CONTROL, tmp);
3028 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3029 WREG32(DC_HPD4_INT_CONTROL, tmp);
3030 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3031 WREG32(DC_HPD5_INT_CONTROL, tmp);
3032 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3033 WREG32(DC_HPD6_INT_CONTROL, tmp);
3038 * cik_irq_init - init and enable the interrupt ring
3040 * @rdev: radeon_device pointer
3042 * Allocate a ring buffer for the interrupt controller,
3043 * enable the RLC, disable interrupts, enable the IH
3044 * ring buffer and enable it (CIK).
3045 * Called at device load and reume.
3046 * Returns 0 for success, errors for failure.
3048 static int cik_irq_init(struct radeon_device *rdev)
3052 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3055 ret = r600_ih_ring_alloc(rdev);
3060 cik_disable_interrupts(rdev);
3063 ret = cik_rlc_resume(rdev);
3065 r600_ih_ring_fini(rdev);
3069 /* setup interrupt control */
3070 /* XXX this should actually be a bus address, not an MC address. same on older asics */
3071 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3072 interrupt_cntl = RREG32(INTERRUPT_CNTL);
3073 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3074 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3076 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3077 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3078 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3079 WREG32(INTERRUPT_CNTL, interrupt_cntl);
3081 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3082 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3084 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3085 IH_WPTR_OVERFLOW_CLEAR |
3088 if (rdev->wb.enabled)
3089 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3091 /* set the writeback address whether it's enabled or not */
3092 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3093 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3095 WREG32(IH_RB_CNTL, ih_rb_cntl);
3097 /* set rptr, wptr to 0 */
3098 WREG32(IH_RB_RPTR, 0);
3099 WREG32(IH_RB_WPTR, 0);
3101 /* Default settings for IH_CNTL (disabled at first) */
3102 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3103 /* RPTR_REARM only works if msi's are enabled */
3104 if (rdev->msi_enabled)
3105 ih_cntl |= RPTR_REARM;
3106 WREG32(IH_CNTL, ih_cntl);
3108 /* force the active interrupt state to all disabled */
3109 cik_disable_interrupt_state(rdev);
3111 pci_set_master(rdev->pdev);
3114 cik_enable_interrupts(rdev);
3120 * cik_irq_set - enable/disable interrupt sources
3122 * @rdev: radeon_device pointer
3124 * Enable interrupt sources on the GPU (vblanks, hpd,
3126 * Returns 0 for success, errors for failure.
3128 int cik_irq_set(struct radeon_device *rdev)
3130 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
3131 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
3132 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3133 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3134 u32 grbm_int_cntl = 0;
3136 if (!rdev->irq.installed) {
3137 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3140 /* don't enable anything if the ih is disabled */
3141 if (!rdev->ih.enabled) {
3142 cik_disable_interrupts(rdev);
3143 /* force the active interrupt state to all disabled */
3144 cik_disable_interrupt_state(rdev);
3148 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3149 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3150 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3151 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3152 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3153 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3155 /* enable CP interrupts on all rings */
3156 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3157 DRM_DEBUG("cik_irq_set: sw int gfx\n");
3158 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3160 /* TODO: compute queues! */
3161 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
3163 if (rdev->irq.crtc_vblank_int[0] ||
3164 atomic_read(&rdev->irq.pflip[0])) {
3165 DRM_DEBUG("cik_irq_set: vblank 0\n");
3166 crtc1 |= VBLANK_INTERRUPT_MASK;
3168 if (rdev->irq.crtc_vblank_int[1] ||
3169 atomic_read(&rdev->irq.pflip[1])) {
3170 DRM_DEBUG("cik_irq_set: vblank 1\n");
3171 crtc2 |= VBLANK_INTERRUPT_MASK;
3173 if (rdev->irq.crtc_vblank_int[2] ||
3174 atomic_read(&rdev->irq.pflip[2])) {
3175 DRM_DEBUG("cik_irq_set: vblank 2\n");
3176 crtc3 |= VBLANK_INTERRUPT_MASK;
3178 if (rdev->irq.crtc_vblank_int[3] ||
3179 atomic_read(&rdev->irq.pflip[3])) {
3180 DRM_DEBUG("cik_irq_set: vblank 3\n");
3181 crtc4 |= VBLANK_INTERRUPT_MASK;
3183 if (rdev->irq.crtc_vblank_int[4] ||
3184 atomic_read(&rdev->irq.pflip[4])) {
3185 DRM_DEBUG("cik_irq_set: vblank 4\n");
3186 crtc5 |= VBLANK_INTERRUPT_MASK;
3188 if (rdev->irq.crtc_vblank_int[5] ||
3189 atomic_read(&rdev->irq.pflip[5])) {
3190 DRM_DEBUG("cik_irq_set: vblank 5\n");
3191 crtc6 |= VBLANK_INTERRUPT_MASK;
3193 if (rdev->irq.hpd[0]) {
3194 DRM_DEBUG("cik_irq_set: hpd 1\n");
3195 hpd1 |= DC_HPDx_INT_EN;
3197 if (rdev->irq.hpd[1]) {
3198 DRM_DEBUG("cik_irq_set: hpd 2\n");
3199 hpd2 |= DC_HPDx_INT_EN;
3201 if (rdev->irq.hpd[2]) {
3202 DRM_DEBUG("cik_irq_set: hpd 3\n");
3203 hpd3 |= DC_HPDx_INT_EN;
3205 if (rdev->irq.hpd[3]) {
3206 DRM_DEBUG("cik_irq_set: hpd 4\n");
3207 hpd4 |= DC_HPDx_INT_EN;
3209 if (rdev->irq.hpd[4]) {
3210 DRM_DEBUG("cik_irq_set: hpd 5\n");
3211 hpd5 |= DC_HPDx_INT_EN;
3213 if (rdev->irq.hpd[5]) {
3214 DRM_DEBUG("cik_irq_set: hpd 6\n");
3215 hpd6 |= DC_HPDx_INT_EN;
3218 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3220 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3222 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3223 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3224 if (rdev->num_crtc >= 4) {
3225 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3226 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3228 if (rdev->num_crtc >= 6) {
3229 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3230 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3233 WREG32(DC_HPD1_INT_CONTROL, hpd1);
3234 WREG32(DC_HPD2_INT_CONTROL, hpd2);
3235 WREG32(DC_HPD3_INT_CONTROL, hpd3);
3236 WREG32(DC_HPD4_INT_CONTROL, hpd4);
3237 WREG32(DC_HPD5_INT_CONTROL, hpd5);
3238 WREG32(DC_HPD6_INT_CONTROL, hpd6);
3244 * cik_irq_ack - ack interrupt sources
3246 * @rdev: radeon_device pointer
3248 * Ack interrupt sources on the GPU (vblanks, hpd,
3249 * etc.) (CIK). Certain interrupts sources are sw
3250 * generated and do not require an explicit ack.
3252 static inline void cik_irq_ack(struct radeon_device *rdev)
3256 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3257 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3258 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3259 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3260 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3261 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3262 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
3264 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
3265 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3266 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
3267 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3268 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3269 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3270 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3271 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3273 if (rdev->num_crtc >= 4) {
3274 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3275 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3276 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3277 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3278 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3279 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3280 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3281 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3284 if (rdev->num_crtc >= 6) {
3285 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3286 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3287 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3288 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3289 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3290 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3291 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3292 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3295 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3296 tmp = RREG32(DC_HPD1_INT_CONTROL);
3297 tmp |= DC_HPDx_INT_ACK;
3298 WREG32(DC_HPD1_INT_CONTROL, tmp);
3300 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3301 tmp = RREG32(DC_HPD2_INT_CONTROL);
3302 tmp |= DC_HPDx_INT_ACK;
3303 WREG32(DC_HPD2_INT_CONTROL, tmp);
3305 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3306 tmp = RREG32(DC_HPD3_INT_CONTROL);
3307 tmp |= DC_HPDx_INT_ACK;
3308 WREG32(DC_HPD3_INT_CONTROL, tmp);
3310 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3311 tmp = RREG32(DC_HPD4_INT_CONTROL);
3312 tmp |= DC_HPDx_INT_ACK;
3313 WREG32(DC_HPD4_INT_CONTROL, tmp);
3315 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3316 tmp = RREG32(DC_HPD5_INT_CONTROL);
3317 tmp |= DC_HPDx_INT_ACK;
3318 WREG32(DC_HPD5_INT_CONTROL, tmp);
3320 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3321 tmp = RREG32(DC_HPD5_INT_CONTROL);
3322 tmp |= DC_HPDx_INT_ACK;
3323 WREG32(DC_HPD6_INT_CONTROL, tmp);
3328 * cik_irq_disable - disable interrupts
3330 * @rdev: radeon_device pointer
3332 * Disable interrupts on the hw (CIK).
3334 static void cik_irq_disable(struct radeon_device *rdev)
3336 cik_disable_interrupts(rdev);
3337 /* Wait and acknowledge irq */
3340 cik_disable_interrupt_state(rdev);
3344 * cik_irq_disable - disable interrupts for suspend
3346 * @rdev: radeon_device pointer
3348 * Disable interrupts and stop the RLC (CIK).
3351 static void cik_irq_suspend(struct radeon_device *rdev)
3353 cik_irq_disable(rdev);
3358 * cik_irq_fini - tear down interrupt support
3360 * @rdev: radeon_device pointer
3362 * Disable interrupts on the hw and free the IH ring
3364 * Used for driver unload.
3366 static void cik_irq_fini(struct radeon_device *rdev)
3368 cik_irq_suspend(rdev);
3369 r600_ih_ring_fini(rdev);
3373 * cik_get_ih_wptr - get the IH ring buffer wptr
3375 * @rdev: radeon_device pointer
3377 * Get the IH ring buffer wptr from either the register
3378 * or the writeback memory buffer (CIK). Also check for
3379 * ring buffer overflow and deal with it.
3380 * Used by cik_irq_process().
3381 * Returns the value of the wptr.
3383 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
3387 if (rdev->wb.enabled)
3388 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3390 wptr = RREG32(IH_RB_WPTR);
3392 if (wptr & RB_OVERFLOW) {
3393 /* When a ring buffer overflow happen start parsing interrupt
3394 * from the last not overwritten vector (wptr + 16). Hopefully
3395 * this should allow us to catchup.
3397 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3398 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3399 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3400 tmp = RREG32(IH_RB_CNTL);
3401 tmp |= IH_WPTR_OVERFLOW_CLEAR;
3402 WREG32(IH_RB_CNTL, tmp);
3404 return (wptr & rdev->ih.ptr_mask);
3408 * Each IV ring entry is 128 bits:
3409 * [7:0] - interrupt source id
3411 * [59:32] - interrupt source data
3412 * [63:60] - reserved
3413 * [71:64] - RINGID: ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
3414 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
3415 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
3416 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
3417 * PIPE_ID - ME0 0=3D
3418 * - ME1&2 compute dispatcher (4 pipes each)
3421 * [127:96] - reserved
3424 * cik_irq_process - interrupt handler
3426 * @rdev: radeon_device pointer
3428 * Interrupt hander (CIK). Walk the IH ring,
3429 * ack interrupts and schedule work to handle
3431 * Returns irq process return code.
3433 int cik_irq_process(struct radeon_device *rdev)
3437 u32 src_id, src_data, ring_id;
3438 u8 me_id, pipe_id, queue_id;
3440 bool queue_hotplug = false;
3441 bool queue_reset = false;
3443 if (!rdev->ih.enabled || rdev->shutdown)
3446 wptr = cik_get_ih_wptr(rdev);
3449 /* is somebody else already processing irqs? */
3450 if (atomic_xchg(&rdev->ih.lock, 1))
3453 rptr = rdev->ih.rptr;
3454 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3456 /* Order reading of wptr vs. reading of IH ring data */
3459 /* display interrupts */
3462 while (rptr != wptr) {
3463 /* wptr/rptr are in bytes! */
3464 ring_index = rptr / 4;
3465 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3466 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3467 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3468 /* XXX check the bitfield order! */
3469 me_id = (ring_id & 0x60) >> 5;
3470 pipe_id = (ring_id & 0x18) >> 3;
3471 queue_id = (ring_id & 0x7) >> 0;
3474 case 1: /* D1 vblank/vline */
3476 case 0: /* D1 vblank */
3477 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
3478 if (rdev->irq.crtc_vblank_int[0]) {
3479 drm_handle_vblank(rdev->ddev, 0);
3480 rdev->pm.vblank_sync = true;
3481 wake_up(&rdev->irq.vblank_queue);
3483 if (atomic_read(&rdev->irq.pflip[0]))
3484 radeon_crtc_handle_flip(rdev, 0);
3485 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3486 DRM_DEBUG("IH: D1 vblank\n");
3489 case 1: /* D1 vline */
3490 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
3491 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3492 DRM_DEBUG("IH: D1 vline\n");
3496 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3500 case 2: /* D2 vblank/vline */
3502 case 0: /* D2 vblank */
3503 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3504 if (rdev->irq.crtc_vblank_int[1]) {
3505 drm_handle_vblank(rdev->ddev, 1);
3506 rdev->pm.vblank_sync = true;
3507 wake_up(&rdev->irq.vblank_queue);
3509 if (atomic_read(&rdev->irq.pflip[1]))
3510 radeon_crtc_handle_flip(rdev, 1);
3511 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3512 DRM_DEBUG("IH: D2 vblank\n");
3515 case 1: /* D2 vline */
3516 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3517 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3518 DRM_DEBUG("IH: D2 vline\n");
3522 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3526 case 3: /* D3 vblank/vline */
3528 case 0: /* D3 vblank */
3529 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3530 if (rdev->irq.crtc_vblank_int[2]) {
3531 drm_handle_vblank(rdev->ddev, 2);
3532 rdev->pm.vblank_sync = true;
3533 wake_up(&rdev->irq.vblank_queue);
3535 if (atomic_read(&rdev->irq.pflip[2]))
3536 radeon_crtc_handle_flip(rdev, 2);
3537 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3538 DRM_DEBUG("IH: D3 vblank\n");
3541 case 1: /* D3 vline */
3542 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3543 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3544 DRM_DEBUG("IH: D3 vline\n");
3548 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3552 case 4: /* D4 vblank/vline */
3554 case 0: /* D4 vblank */
3555 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3556 if (rdev->irq.crtc_vblank_int[3]) {
3557 drm_handle_vblank(rdev->ddev, 3);
3558 rdev->pm.vblank_sync = true;
3559 wake_up(&rdev->irq.vblank_queue);
3561 if (atomic_read(&rdev->irq.pflip[3]))
3562 radeon_crtc_handle_flip(rdev, 3);
3563 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3564 DRM_DEBUG("IH: D4 vblank\n");
3567 case 1: /* D4 vline */
3568 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3569 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3570 DRM_DEBUG("IH: D4 vline\n");
3574 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3578 case 5: /* D5 vblank/vline */
3580 case 0: /* D5 vblank */
3581 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3582 if (rdev->irq.crtc_vblank_int[4]) {
3583 drm_handle_vblank(rdev->ddev, 4);
3584 rdev->pm.vblank_sync = true;
3585 wake_up(&rdev->irq.vblank_queue);
3587 if (atomic_read(&rdev->irq.pflip[4]))
3588 radeon_crtc_handle_flip(rdev, 4);
3589 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3590 DRM_DEBUG("IH: D5 vblank\n");
3593 case 1: /* D5 vline */
3594 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3595 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3596 DRM_DEBUG("IH: D5 vline\n");
3600 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3604 case 6: /* D6 vblank/vline */
3606 case 0: /* D6 vblank */
3607 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3608 if (rdev->irq.crtc_vblank_int[5]) {
3609 drm_handle_vblank(rdev->ddev, 5);
3610 rdev->pm.vblank_sync = true;
3611 wake_up(&rdev->irq.vblank_queue);
3613 if (atomic_read(&rdev->irq.pflip[5]))
3614 radeon_crtc_handle_flip(rdev, 5);
3615 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3616 DRM_DEBUG("IH: D6 vblank\n");
3619 case 1: /* D6 vline */
3620 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3621 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3622 DRM_DEBUG("IH: D6 vline\n");
3626 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3630 case 42: /* HPD hotplug */
3633 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3634 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
3635 queue_hotplug = true;
3636 DRM_DEBUG("IH: HPD1\n");
3640 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3641 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3642 queue_hotplug = true;
3643 DRM_DEBUG("IH: HPD2\n");
3647 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3648 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3649 queue_hotplug = true;
3650 DRM_DEBUG("IH: HPD3\n");
3654 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3655 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3656 queue_hotplug = true;
3657 DRM_DEBUG("IH: HPD4\n");
3661 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3662 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3663 queue_hotplug = true;
3664 DRM_DEBUG("IH: HPD5\n");
3668 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3669 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3670 queue_hotplug = true;
3671 DRM_DEBUG("IH: HPD6\n");
3675 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3681 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3682 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3683 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3684 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3685 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3686 /* reset addr and status */
3687 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3689 case 176: /* GFX RB CP_INT */
3690 case 177: /* GFX IB CP_INT */
3691 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3693 case 181: /* CP EOP event */
3694 DRM_DEBUG("IH: CP EOP\n");
3697 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3707 case 184: /* CP Privileged reg access */
3708 DRM_ERROR("Illegal register access in command stream\n");
3709 /* XXX check the bitfield order! */
3710 me_id = (ring_id & 0x60) >> 5;
3711 pipe_id = (ring_id & 0x18) >> 3;
3712 queue_id = (ring_id & 0x7) >> 0;
3715 /* This results in a full GPU reset, but all we need to do is soft
3716 * reset the CP for gfx
3728 case 185: /* CP Privileged inst */
3729 DRM_ERROR("Illegal instruction in command stream\n");
3732 /* This results in a full GPU reset, but all we need to do is soft
3733 * reset the CP for gfx
3745 case 233: /* GUI IDLE */
3746 DRM_DEBUG("IH: GUI idle\n");
3749 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3753 /* wptr/rptr are in bytes! */
3755 rptr &= rdev->ih.ptr_mask;
3758 schedule_work(&rdev->hotplug_work);
3760 schedule_work(&rdev->reset_work);
3761 rdev->ih.rptr = rptr;
3762 WREG32(IH_RB_RPTR, rdev->ih.rptr);
3763 atomic_set(&rdev->ih.lock, 0);
3765 /* make sure wptr hasn't changed while processing */
3766 wptr = cik_get_ih_wptr(rdev);