2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
39 #define CIK_MEC_UCODE_SIZE 4192
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
45 #define CIK_MC_UCODE_SIZE 7866
47 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
64 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
65 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
66 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
69 * cik_init_microcode - load ucode images from disk
71 * @rdev: radeon_device pointer
73 * Use the firmware interface to load the ucode images into
74 * the driver (not loaded into hw).
75 * Returns 0 on success, error on failure.
77 static int cik_init_microcode(struct radeon_device *rdev)
79 struct platform_device *pdev;
80 const char *chip_name;
81 size_t pfp_req_size, me_req_size, ce_req_size,
82 mec_req_size, rlc_req_size, mc_req_size;
88 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
91 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
95 switch (rdev->family) {
97 chip_name = "BONAIRE";
98 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
99 me_req_size = CIK_ME_UCODE_SIZE * 4;
100 ce_req_size = CIK_CE_UCODE_SIZE * 4;
101 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
102 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
103 mc_req_size = CIK_MC_UCODE_SIZE * 4;
106 chip_name = "KAVERI";
107 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
108 me_req_size = CIK_ME_UCODE_SIZE * 4;
109 ce_req_size = CIK_CE_UCODE_SIZE * 4;
110 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
111 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
114 chip_name = "KABINI";
115 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
116 me_req_size = CIK_ME_UCODE_SIZE * 4;
117 ce_req_size = CIK_CE_UCODE_SIZE * 4;
118 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
119 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
124 DRM_INFO("Loading %s Microcode\n", chip_name);
126 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
127 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
130 if (rdev->pfp_fw->size != pfp_req_size) {
132 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
133 rdev->pfp_fw->size, fw_name);
138 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
139 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
142 if (rdev->me_fw->size != me_req_size) {
144 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
145 rdev->me_fw->size, fw_name);
149 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
150 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
153 if (rdev->ce_fw->size != ce_req_size) {
155 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
156 rdev->ce_fw->size, fw_name);
160 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
161 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
164 if (rdev->mec_fw->size != mec_req_size) {
166 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
167 rdev->mec_fw->size, fw_name);
171 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
172 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
175 if (rdev->rlc_fw->size != rlc_req_size) {
177 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
178 rdev->rlc_fw->size, fw_name);
182 /* No MC ucode on APUs */
183 if (!(rdev->flags & RADEON_IS_IGP)) {
184 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
185 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
188 if (rdev->mc_fw->size != mc_req_size) {
190 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
191 rdev->mc_fw->size, fw_name);
197 platform_device_unregister(pdev);
202 "cik_cp: Failed to load firmware \"%s\"\n",
204 release_firmware(rdev->pfp_fw);
206 release_firmware(rdev->me_fw);
208 release_firmware(rdev->ce_fw);
210 release_firmware(rdev->rlc_fw);
212 release_firmware(rdev->mc_fw);
222 * cik_tiling_mode_table_init - init the hw tiling table
224 * @rdev: radeon_device pointer
226 * Starting with SI, the tiling setup is done globally in a
227 * set of 32 tiling modes. Rather than selecting each set of
228 * parameters per surface as on older asics, we just select
229 * which index in the tiling table we want to use, and the
230 * surface uses those parameters (CIK).
232 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
234 const u32 num_tile_mode_states = 32;
235 const u32 num_secondary_tile_mode_states = 16;
236 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
237 u32 num_pipe_configs;
238 u32 num_rbs = rdev->config.cik.max_backends_per_se *
239 rdev->config.cik.max_shader_engines;
241 switch (rdev->config.cik.mem_row_size_in_kb) {
243 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
247 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
250 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
254 num_pipe_configs = rdev->config.cik.max_tile_pipes;
255 if (num_pipe_configs > 8)
256 num_pipe_configs = 8; /* ??? */
258 if (num_pipe_configs == 8) {
259 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
260 switch (reg_offset) {
262 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
264 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
268 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
270 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
274 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
276 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
280 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
282 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
286 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
288 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
289 TILE_SPLIT(split_equal_to_row_size));
292 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
296 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
298 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
299 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
302 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
304 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
305 TILE_SPLIT(split_equal_to_row_size));
308 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
309 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
312 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
313 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
316 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
317 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
318 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
324 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
329 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
330 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
334 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
338 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
340 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
344 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
346 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
350 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
352 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
356 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
360 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
362 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
366 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
368 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
372 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
374 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
381 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
383 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
384 switch (reg_offset) {
386 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
389 NUM_BANKS(ADDR_SURF_16_BANK));
392 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
395 NUM_BANKS(ADDR_SURF_16_BANK));
398 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
401 NUM_BANKS(ADDR_SURF_16_BANK));
404 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
407 NUM_BANKS(ADDR_SURF_16_BANK));
410 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
413 NUM_BANKS(ADDR_SURF_8_BANK));
416 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
419 NUM_BANKS(ADDR_SURF_4_BANK));
422 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
425 NUM_BANKS(ADDR_SURF_2_BANK));
428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
431 NUM_BANKS(ADDR_SURF_16_BANK));
434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
437 NUM_BANKS(ADDR_SURF_16_BANK));
440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
443 NUM_BANKS(ADDR_SURF_16_BANK));
446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
449 NUM_BANKS(ADDR_SURF_16_BANK));
452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
455 NUM_BANKS(ADDR_SURF_8_BANK));
458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
461 NUM_BANKS(ADDR_SURF_4_BANK));
464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
467 NUM_BANKS(ADDR_SURF_2_BANK));
473 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
475 } else if (num_pipe_configs == 4) {
477 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
478 switch (reg_offset) {
480 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
481 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
482 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
483 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
486 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
487 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
488 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
489 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
492 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
493 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
494 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
495 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
498 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
499 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
500 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
501 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
504 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
506 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
507 TILE_SPLIT(split_equal_to_row_size));
510 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
514 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
516 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
520 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
522 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
523 TILE_SPLIT(split_equal_to_row_size));
526 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
527 PIPE_CONFIG(ADDR_SURF_P4_16x16));
530 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
531 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
540 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
542 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
546 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
547 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
548 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
552 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
553 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
556 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
557 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
558 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
562 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
564 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
568 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
570 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
575 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
578 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
579 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
580 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
584 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
585 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
586 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
590 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
591 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
592 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
599 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
601 } else if (num_rbs < 4) {
602 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
603 switch (reg_offset) {
605 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
607 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
611 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
613 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
617 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
619 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
620 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
623 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
625 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
626 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
629 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
631 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
632 TILE_SPLIT(split_equal_to_row_size));
635 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
639 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
640 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
641 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
645 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
647 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
648 TILE_SPLIT(split_equal_to_row_size));
651 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
652 PIPE_CONFIG(ADDR_SURF_P4_8x16));
655 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
656 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
659 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
661 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
665 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
666 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
667 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
671 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
673 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
677 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
678 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
681 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
682 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
683 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
687 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
689 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
693 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
694 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
695 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
699 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
700 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
703 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
704 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
709 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
710 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
711 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
715 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
716 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
717 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
724 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
727 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
728 switch (reg_offset) {
730 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
733 NUM_BANKS(ADDR_SURF_16_BANK));
736 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
739 NUM_BANKS(ADDR_SURF_16_BANK));
742 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
745 NUM_BANKS(ADDR_SURF_16_BANK));
748 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
751 NUM_BANKS(ADDR_SURF_16_BANK));
754 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
757 NUM_BANKS(ADDR_SURF_16_BANK));
760 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
763 NUM_BANKS(ADDR_SURF_8_BANK));
766 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
769 NUM_BANKS(ADDR_SURF_4_BANK));
772 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
775 NUM_BANKS(ADDR_SURF_16_BANK));
778 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
781 NUM_BANKS(ADDR_SURF_16_BANK));
784 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
787 NUM_BANKS(ADDR_SURF_16_BANK));
790 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
793 NUM_BANKS(ADDR_SURF_16_BANK));
796 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
799 NUM_BANKS(ADDR_SURF_16_BANK));
802 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
805 NUM_BANKS(ADDR_SURF_8_BANK));
808 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
811 NUM_BANKS(ADDR_SURF_4_BANK));
817 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
819 } else if (num_pipe_configs == 2) {
820 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
821 switch (reg_offset) {
823 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
825 PIPE_CONFIG(ADDR_SURF_P2) |
826 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
829 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
830 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
831 PIPE_CONFIG(ADDR_SURF_P2) |
832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
835 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
837 PIPE_CONFIG(ADDR_SURF_P2) |
838 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
841 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
842 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
843 PIPE_CONFIG(ADDR_SURF_P2) |
844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
847 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
849 PIPE_CONFIG(ADDR_SURF_P2) |
850 TILE_SPLIT(split_equal_to_row_size));
853 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
858 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
859 PIPE_CONFIG(ADDR_SURF_P2) |
860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
863 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
864 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
865 PIPE_CONFIG(ADDR_SURF_P2) |
866 TILE_SPLIT(split_equal_to_row_size));
869 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
872 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
873 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
876 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
877 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
878 PIPE_CONFIG(ADDR_SURF_P2) |
879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
882 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
883 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
884 PIPE_CONFIG(ADDR_SURF_P2) |
885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
888 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
889 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
890 PIPE_CONFIG(ADDR_SURF_P2) |
891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
894 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
895 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
898 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
899 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
900 PIPE_CONFIG(ADDR_SURF_P2) |
901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
904 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
906 PIPE_CONFIG(ADDR_SURF_P2) |
907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
910 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
911 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
912 PIPE_CONFIG(ADDR_SURF_P2) |
913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
916 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
920 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
921 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
922 PIPE_CONFIG(ADDR_SURF_P2) |
923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
926 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
927 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
928 PIPE_CONFIG(ADDR_SURF_P2) |
929 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
932 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
934 PIPE_CONFIG(ADDR_SURF_P2) |
935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
941 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
943 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
944 switch (reg_offset) {
946 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
949 NUM_BANKS(ADDR_SURF_16_BANK));
952 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
955 NUM_BANKS(ADDR_SURF_16_BANK));
958 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
961 NUM_BANKS(ADDR_SURF_16_BANK));
964 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
967 NUM_BANKS(ADDR_SURF_16_BANK));
970 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
973 NUM_BANKS(ADDR_SURF_16_BANK));
976 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
979 NUM_BANKS(ADDR_SURF_16_BANK));
982 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
985 NUM_BANKS(ADDR_SURF_8_BANK));
988 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
991 NUM_BANKS(ADDR_SURF_16_BANK));
994 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
997 NUM_BANKS(ADDR_SURF_16_BANK));
1000 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1003 NUM_BANKS(ADDR_SURF_16_BANK));
1006 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1009 NUM_BANKS(ADDR_SURF_16_BANK));
1012 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1015 NUM_BANKS(ADDR_SURF_16_BANK));
1018 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1021 NUM_BANKS(ADDR_SURF_16_BANK));
1024 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1027 NUM_BANKS(ADDR_SURF_8_BANK));
1033 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1036 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1040 * cik_select_se_sh - select which SE, SH to address
1042 * @rdev: radeon_device pointer
1043 * @se_num: shader engine to address
1044 * @sh_num: sh block to address
1046 * Select which SE, SH combinations to address. Certain
1047 * registers are instanced per SE or SH. 0xffffffff means
1048 * broadcast to all SEs or SHs (CIK).
1050 static void cik_select_se_sh(struct radeon_device *rdev,
1051 u32 se_num, u32 sh_num)
1053 u32 data = INSTANCE_BROADCAST_WRITES;
1055 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1056 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1057 else if (se_num == 0xffffffff)
1058 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1059 else if (sh_num == 0xffffffff)
1060 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1062 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1063 WREG32(GRBM_GFX_INDEX, data);
1067 * cik_create_bitmask - create a bitmask
1069 * @bit_width: length of the mask
1071 * create a variable length bit mask (CIK).
1072 * Returns the bitmask.
1074 static u32 cik_create_bitmask(u32 bit_width)
1078 for (i = 0; i < bit_width; i++) {
1086 * cik_select_se_sh - select which SE, SH to address
1088 * @rdev: radeon_device pointer
1089 * @max_rb_num: max RBs (render backends) for the asic
1090 * @se_num: number of SEs (shader engines) for the asic
1091 * @sh_per_se: number of SH blocks per SE for the asic
1093 * Calculates the bitmask of disabled RBs (CIK).
1094 * Returns the disabled RB bitmask.
1096 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1097 u32 max_rb_num, u32 se_num,
1102 data = RREG32(CC_RB_BACKEND_DISABLE);
1104 data &= BACKEND_DISABLE_MASK;
1107 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1109 data >>= BACKEND_DISABLE_SHIFT;
1111 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1117 * cik_setup_rb - setup the RBs on the asic
1119 * @rdev: radeon_device pointer
1120 * @se_num: number of SEs (shader engines) for the asic
1121 * @sh_per_se: number of SH blocks per SE for the asic
1122 * @max_rb_num: max RBs (render backends) for the asic
1124 * Configures per-SE/SH RB registers (CIK).
1126 static void cik_setup_rb(struct radeon_device *rdev,
1127 u32 se_num, u32 sh_per_se,
1132 u32 disabled_rbs = 0;
1133 u32 enabled_rbs = 0;
1135 for (i = 0; i < se_num; i++) {
1136 for (j = 0; j < sh_per_se; j++) {
1137 cik_select_se_sh(rdev, i, j);
1138 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1139 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1142 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1145 for (i = 0; i < max_rb_num; i++) {
1146 if (!(disabled_rbs & mask))
1147 enabled_rbs |= mask;
1151 for (i = 0; i < se_num; i++) {
1152 cik_select_se_sh(rdev, i, 0xffffffff);
1154 for (j = 0; j < sh_per_se; j++) {
1155 switch (enabled_rbs & 3) {
1157 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1160 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1164 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1169 WREG32(PA_SC_RASTER_CONFIG, data);
1171 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1175 * cik_gpu_init - setup the 3D engine
1177 * @rdev: radeon_device pointer
1179 * Configures the 3D engine and tiling configuration
1180 * registers so that the 3D engine is usable.
1182 static void cik_gpu_init(struct radeon_device *rdev)
1184 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1185 u32 mc_shared_chmap, mc_arb_ramcfg;
1186 u32 hdp_host_path_cntl;
1190 switch (rdev->family) {
1192 rdev->config.cik.max_shader_engines = 2;
1193 rdev->config.cik.max_tile_pipes = 4;
1194 rdev->config.cik.max_cu_per_sh = 7;
1195 rdev->config.cik.max_sh_per_se = 1;
1196 rdev->config.cik.max_backends_per_se = 2;
1197 rdev->config.cik.max_texture_channel_caches = 4;
1198 rdev->config.cik.max_gprs = 256;
1199 rdev->config.cik.max_gs_threads = 32;
1200 rdev->config.cik.max_hw_contexts = 8;
1202 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1203 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1204 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1205 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1206 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1213 rdev->config.cik.max_shader_engines = 1;
1214 rdev->config.cik.max_tile_pipes = 2;
1215 rdev->config.cik.max_cu_per_sh = 2;
1216 rdev->config.cik.max_sh_per_se = 1;
1217 rdev->config.cik.max_backends_per_se = 1;
1218 rdev->config.cik.max_texture_channel_caches = 2;
1219 rdev->config.cik.max_gprs = 256;
1220 rdev->config.cik.max_gs_threads = 16;
1221 rdev->config.cik.max_hw_contexts = 8;
1223 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1224 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1225 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1226 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1227 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1231 /* Initialize HDP */
1232 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1233 WREG32((0x2c14 + j), 0x00000000);
1234 WREG32((0x2c18 + j), 0x00000000);
1235 WREG32((0x2c1c + j), 0x00000000);
1236 WREG32((0x2c20 + j), 0x00000000);
1237 WREG32((0x2c24 + j), 0x00000000);
1240 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1242 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1244 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1245 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1247 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1248 rdev->config.cik.mem_max_burst_length_bytes = 256;
1249 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1250 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1251 if (rdev->config.cik.mem_row_size_in_kb > 4)
1252 rdev->config.cik.mem_row_size_in_kb = 4;
1253 /* XXX use MC settings? */
1254 rdev->config.cik.shader_engine_tile_size = 32;
1255 rdev->config.cik.num_gpus = 1;
1256 rdev->config.cik.multi_gpu_tile_size = 64;
1258 /* fix up row size */
1259 gb_addr_config &= ~ROW_SIZE_MASK;
1260 switch (rdev->config.cik.mem_row_size_in_kb) {
1263 gb_addr_config |= ROW_SIZE(0);
1266 gb_addr_config |= ROW_SIZE(1);
1269 gb_addr_config |= ROW_SIZE(2);
1273 /* setup tiling info dword. gb_addr_config is not adequate since it does
1274 * not have bank info, so create a custom tiling dword.
1275 * bits 3:0 num_pipes
1276 * bits 7:4 num_banks
1277 * bits 11:8 group_size
1278 * bits 15:12 row_size
1280 rdev->config.cik.tile_config = 0;
1281 switch (rdev->config.cik.num_tile_pipes) {
1283 rdev->config.cik.tile_config |= (0 << 0);
1286 rdev->config.cik.tile_config |= (1 << 0);
1289 rdev->config.cik.tile_config |= (2 << 0);
1293 /* XXX what about 12? */
1294 rdev->config.cik.tile_config |= (3 << 0);
1297 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1298 rdev->config.cik.tile_config |= 1 << 4;
1300 rdev->config.cik.tile_config |= 0 << 4;
1301 rdev->config.cik.tile_config |=
1302 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1303 rdev->config.cik.tile_config |=
1304 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1306 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1307 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1308 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1310 cik_tiling_mode_table_init(rdev);
1312 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1313 rdev->config.cik.max_sh_per_se,
1314 rdev->config.cik.max_backends_per_se);
1316 /* set HW defaults for 3D engine */
1317 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1319 WREG32(SX_DEBUG_1, 0x20);
1321 WREG32(TA_CNTL_AUX, 0x00010000);
1323 tmp = RREG32(SPI_CONFIG_CNTL);
1325 WREG32(SPI_CONFIG_CNTL, tmp);
1327 WREG32(SQ_CONFIG, 1);
1329 WREG32(DB_DEBUG, 0);
1331 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1333 WREG32(DB_DEBUG2, tmp);
1335 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1337 WREG32(DB_DEBUG3, tmp);
1339 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1341 WREG32(CB_HW_CONTROL, tmp);
1343 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1345 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1346 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1347 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1348 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1350 WREG32(VGT_NUM_INSTANCES, 1);
1352 WREG32(CP_PERFMON_CNTL, 0);
1354 WREG32(SQ_CONFIG, 0);
1356 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1357 FORCE_EOV_MAX_REZ_CNT(255)));
1359 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1360 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1362 WREG32(VGT_GS_VERTEX_REUSE, 16);
1363 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1365 tmp = RREG32(HDP_MISC_CNTL);
1366 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1367 WREG32(HDP_MISC_CNTL, tmp);
1369 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1370 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1372 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1373 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1379 * cik_gpu_is_lockup - check if the 3D engine is locked up
1381 * @rdev: radeon_device pointer
1382 * @ring: radeon_ring structure holding ring information
1384 * Check if the 3D engine is locked up (CIK).
1385 * Returns true if the engine is locked, false if not.
1387 bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1389 u32 srbm_status, srbm_status2;
1390 u32 grbm_status, grbm_status2;
1391 u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
1393 srbm_status = RREG32(SRBM_STATUS);
1394 srbm_status2 = RREG32(SRBM_STATUS2);
1395 grbm_status = RREG32(GRBM_STATUS);
1396 grbm_status2 = RREG32(GRBM_STATUS2);
1397 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1398 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1399 grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
1400 grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
1401 if (!(grbm_status & GUI_ACTIVE)) {
1402 radeon_ring_lockup_update(ring);
1405 /* force CP activities */
1406 radeon_ring_force_activity(rdev, ring);
1407 return radeon_ring_test_lockup(rdev, ring);
1411 * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
1413 * @rdev: radeon_device pointer
1415 * Soft reset the GFX engine and CPG blocks (CIK).
1416 * XXX: deal with reseting RLC and CPF
1417 * Returns 0 for success.
1419 static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
1421 struct evergreen_mc_save save;
1424 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1427 dev_info(rdev->dev, "GPU GFX softreset \n");
1428 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1429 RREG32(GRBM_STATUS));
1430 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1431 RREG32(GRBM_STATUS2));
1432 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1433 RREG32(GRBM_STATUS_SE0));
1434 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1435 RREG32(GRBM_STATUS_SE1));
1436 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1437 RREG32(GRBM_STATUS_SE2));
1438 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1439 RREG32(GRBM_STATUS_SE3));
1440 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1441 RREG32(SRBM_STATUS));
1442 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1443 RREG32(SRBM_STATUS2));
1444 evergreen_mc_stop(rdev, &save);
1445 if (radeon_mc_wait_for_idle(rdev)) {
1446 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1448 /* Disable CP parsing/prefetching */
1449 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
1451 /* reset all the gfx block and all CPG blocks */
1452 grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
1454 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1455 WREG32(GRBM_SOFT_RESET, grbm_reset);
1456 (void)RREG32(GRBM_SOFT_RESET);
1458 WREG32(GRBM_SOFT_RESET, 0);
1459 (void)RREG32(GRBM_SOFT_RESET);
1460 /* Wait a little for things to settle down */
1462 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1463 RREG32(GRBM_STATUS));
1464 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1465 RREG32(GRBM_STATUS2));
1466 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1467 RREG32(GRBM_STATUS_SE0));
1468 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1469 RREG32(GRBM_STATUS_SE1));
1470 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1471 RREG32(GRBM_STATUS_SE2));
1472 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1473 RREG32(GRBM_STATUS_SE3));
1474 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1475 RREG32(SRBM_STATUS));
1476 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1477 RREG32(SRBM_STATUS2));
1478 evergreen_mc_resume(rdev, &save);
1483 * cik_compute_gpu_soft_reset - soft reset CPC
1485 * @rdev: radeon_device pointer
1487 * Soft reset the CPC blocks (CIK).
1488 * XXX: deal with reseting RLC and CPF
1489 * Returns 0 for success.
1491 static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
1493 struct evergreen_mc_save save;
1496 dev_info(rdev->dev, "GPU compute softreset \n");
1497 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1498 RREG32(GRBM_STATUS));
1499 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1500 RREG32(GRBM_STATUS2));
1501 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1502 RREG32(GRBM_STATUS_SE0));
1503 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1504 RREG32(GRBM_STATUS_SE1));
1505 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1506 RREG32(GRBM_STATUS_SE2));
1507 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1508 RREG32(GRBM_STATUS_SE3));
1509 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1510 RREG32(SRBM_STATUS));
1511 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1512 RREG32(SRBM_STATUS2));
1513 evergreen_mc_stop(rdev, &save);
1514 if (radeon_mc_wait_for_idle(rdev)) {
1515 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1517 /* Disable CP parsing/prefetching */
1518 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
1520 /* reset all the CPC blocks */
1521 grbm_reset = SOFT_RESET_CPG;
1523 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1524 WREG32(GRBM_SOFT_RESET, grbm_reset);
1525 (void)RREG32(GRBM_SOFT_RESET);
1527 WREG32(GRBM_SOFT_RESET, 0);
1528 (void)RREG32(GRBM_SOFT_RESET);
1529 /* Wait a little for things to settle down */
1531 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1532 RREG32(GRBM_STATUS));
1533 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1534 RREG32(GRBM_STATUS2));
1535 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1536 RREG32(GRBM_STATUS_SE0));
1537 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1538 RREG32(GRBM_STATUS_SE1));
1539 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1540 RREG32(GRBM_STATUS_SE2));
1541 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1542 RREG32(GRBM_STATUS_SE3));
1543 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1544 RREG32(SRBM_STATUS));
1545 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1546 RREG32(SRBM_STATUS2));
1547 evergreen_mc_resume(rdev, &save);
1552 * cik_asic_reset - soft reset compute and gfx
1554 * @rdev: radeon_device pointer
1556 * Soft reset the CPC blocks (CIK).
1557 * XXX: make this more fine grained and only reset
1558 * what is necessary.
1559 * Returns 0 for success.
1561 int cik_asic_reset(struct radeon_device *rdev)
1565 r = cik_compute_gpu_soft_reset(rdev);
1567 dev_info(rdev->dev, "Compute reset failed!\n");
1569 return cik_gfx_gpu_soft_reset(rdev);
1574 * cik_mc_program - program the GPU memory controller
1576 * @rdev: radeon_device pointer
1578 * Set the location of vram, gart, and AGP in the GPU's
1579 * physical address space (CIK).
1581 static void cik_mc_program(struct radeon_device *rdev)
1583 struct evergreen_mc_save save;
1587 /* Initialize HDP */
1588 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1589 WREG32((0x2c14 + j), 0x00000000);
1590 WREG32((0x2c18 + j), 0x00000000);
1591 WREG32((0x2c1c + j), 0x00000000);
1592 WREG32((0x2c20 + j), 0x00000000);
1593 WREG32((0x2c24 + j), 0x00000000);
1595 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
1597 evergreen_mc_stop(rdev, &save);
1598 if (radeon_mc_wait_for_idle(rdev)) {
1599 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1601 /* Lockout access through VGA aperture*/
1602 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
1603 /* Update configuration */
1604 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
1605 rdev->mc.vram_start >> 12);
1606 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
1607 rdev->mc.vram_end >> 12);
1608 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
1609 rdev->vram_scratch.gpu_addr >> 12);
1610 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
1611 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
1612 WREG32(MC_VM_FB_LOCATION, tmp);
1613 /* XXX double check these! */
1614 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
1615 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
1616 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
1617 WREG32(MC_VM_AGP_BASE, 0);
1618 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
1619 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
1620 if (radeon_mc_wait_for_idle(rdev)) {
1621 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1623 evergreen_mc_resume(rdev, &save);
1624 /* we need to own VRAM, so turn off the VGA renderer here
1625 * to stop it overwriting our objects */
1626 rv515_vga_render_disable(rdev);
1630 * cik_mc_init - initialize the memory controller driver params
1632 * @rdev: radeon_device pointer
1634 * Look up the amount of vram, vram width, and decide how to place
1635 * vram and gart within the GPU's physical address space (CIK).
1636 * Returns 0 for success.
1638 static int cik_mc_init(struct radeon_device *rdev)
1641 int chansize, numchan;
1643 /* Get VRAM informations */
1644 rdev->mc.vram_is_ddr = true;
1645 tmp = RREG32(MC_ARB_RAMCFG);
1646 if (tmp & CHANSIZE_MASK) {
1651 tmp = RREG32(MC_SHARED_CHMAP);
1652 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1682 rdev->mc.vram_width = numchan * chansize;
1683 /* Could aper size report 0 ? */
1684 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
1685 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1686 /* size in MB on si */
1687 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1688 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1689 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1690 si_vram_gtt_location(rdev, &rdev->mc);
1691 radeon_update_bandwidth_info(rdev);
1698 * VMID 0 is the physical GPU addresses as used by the kernel.
1699 * VMIDs 1-15 are used for userspace clients and are handled
1700 * by the radeon vm/hsa code.
1703 * cik_pcie_gart_tlb_flush - gart tlb flush callback
1705 * @rdev: radeon_device pointer
1707 * Flush the TLB for the VMID 0 page table (CIK).
1709 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
1711 /* flush hdp cache */
1712 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
1714 /* bits 0-15 are the VM contexts0-15 */
1715 WREG32(VM_INVALIDATE_REQUEST, 0x1);
1719 * cik_pcie_gart_enable - gart enable
1721 * @rdev: radeon_device pointer
1723 * This sets up the TLBs, programs the page tables for VMID0,
1724 * sets up the hw for VMIDs 1-15 which are allocated on
1725 * demand, and sets up the global locations for the LDS, GDS,
1726 * and GPUVM for FSA64 clients (CIK).
1727 * Returns 0 for success, errors for failure.
1729 static int cik_pcie_gart_enable(struct radeon_device *rdev)
1733 if (rdev->gart.robj == NULL) {
1734 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1737 r = radeon_gart_table_vram_pin(rdev);
1740 radeon_gart_restore(rdev);
1741 /* Setup TLB control */
1742 WREG32(MC_VM_MX_L1_TLB_CNTL,
1745 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1746 ENABLE_ADVANCED_DRIVER_MODEL |
1747 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1748 /* Setup L2 cache */
1749 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1750 ENABLE_L2_FRAGMENT_PROCESSING |
1751 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1752 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1753 EFFECTIVE_L2_QUEUE_SIZE(7) |
1754 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1755 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1756 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1757 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1758 /* setup context0 */
1759 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1760 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1761 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1762 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1763 (u32)(rdev->dummy_page.addr >> 12));
1764 WREG32(VM_CONTEXT0_CNTL2, 0);
1765 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1766 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
1772 /* empty context1-15 */
1773 /* FIXME start with 4G, once using 2 level pt switch to full
1776 /* set vm size, must be a multiple of 4 */
1777 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
1778 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
1779 for (i = 1; i < 16; i++) {
1781 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1782 rdev->gart.table_addr >> 12);
1784 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
1785 rdev->gart.table_addr >> 12);
1788 /* enable context1-15 */
1789 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1790 (u32)(rdev->dummy_page.addr >> 12));
1791 WREG32(VM_CONTEXT1_CNTL2, 4);
1792 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1793 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1794 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1795 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1796 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1797 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1798 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1799 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1800 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1801 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1802 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1803 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1804 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1806 /* TC cache setup ??? */
1807 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
1808 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
1809 WREG32(TC_CFG_L1_STORE_POLICY, 0);
1811 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
1812 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
1813 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
1814 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
1815 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
1817 WREG32(TC_CFG_L1_VOLATILE, 0);
1818 WREG32(TC_CFG_L2_VOLATILE, 0);
1820 if (rdev->family == CHIP_KAVERI) {
1821 u32 tmp = RREG32(CHUB_CONTROL);
1823 WREG32(CHUB_CONTROL, tmp);
1826 /* XXX SH_MEM regs */
1827 /* where to put LDS, scratch, GPUVM in FSA64 space */
1828 for (i = 0; i < 16; i++) {
1829 WREG32(SRBM_GFX_CNTL, VMID(i));
1830 WREG32(SH_MEM_CONFIG, 0);
1831 WREG32(SH_MEM_APE1_BASE, 1);
1832 WREG32(SH_MEM_APE1_LIMIT, 0);
1833 WREG32(SH_MEM_BASES, 0);
1835 WREG32(SRBM_GFX_CNTL, 0);
1837 cik_pcie_gart_tlb_flush(rdev);
1838 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1839 (unsigned)(rdev->mc.gtt_size >> 20),
1840 (unsigned long long)rdev->gart.table_addr);
1841 rdev->gart.ready = true;
1846 * cik_pcie_gart_disable - gart disable
1848 * @rdev: radeon_device pointer
1850 * This disables all VM page table (CIK).
1852 static void cik_pcie_gart_disable(struct radeon_device *rdev)
1854 /* Disable all tables */
1855 WREG32(VM_CONTEXT0_CNTL, 0);
1856 WREG32(VM_CONTEXT1_CNTL, 0);
1857 /* Setup TLB control */
1858 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1859 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1860 /* Setup L2 cache */
1862 ENABLE_L2_FRAGMENT_PROCESSING |
1863 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1864 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1865 EFFECTIVE_L2_QUEUE_SIZE(7) |
1866 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1867 WREG32(VM_L2_CNTL2, 0);
1868 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1869 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1870 radeon_gart_table_vram_unpin(rdev);
1874 * cik_pcie_gart_fini - vm fini callback
1876 * @rdev: radeon_device pointer
1878 * Tears down the driver GART/VM setup (CIK).
1880 static void cik_pcie_gart_fini(struct radeon_device *rdev)
1882 cik_pcie_gart_disable(rdev);
1883 radeon_gart_table_vram_free(rdev);
1884 radeon_gart_fini(rdev);
1889 * cik_ib_parse - vm ib_parse callback
1891 * @rdev: radeon_device pointer
1892 * @ib: indirect buffer pointer
1894 * CIK uses hw IB checking so this is a nop (CIK).
1896 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
1903 * VMID 0 is the physical GPU addresses as used by the kernel.
1904 * VMIDs 1-15 are used for userspace clients and are handled
1905 * by the radeon vm/hsa code.
1908 * cik_vm_init - cik vm init callback
1910 * @rdev: radeon_device pointer
1912 * Inits cik specific vm parameters (number of VMs, base of vram for
1913 * VMIDs 1-15) (CIK).
1914 * Returns 0 for success.
1916 int cik_vm_init(struct radeon_device *rdev)
1919 rdev->vm_manager.nvm = 16;
1920 /* base offset of vram pages */
1921 if (rdev->flags & RADEON_IS_IGP) {
1922 u64 tmp = RREG32(MC_VM_FB_OFFSET);
1924 rdev->vm_manager.vram_base_offset = tmp;
1926 rdev->vm_manager.vram_base_offset = 0;
1932 * cik_vm_fini - cik vm fini callback
1934 * @rdev: radeon_device pointer
1936 * Tear down any asic specific VM setup (CIK).
1938 void cik_vm_fini(struct radeon_device *rdev)