2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
33 #include "cik_blit_shaders.h"
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
40 #define CIK_MEC_UCODE_SIZE 4192
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
46 #define CIK_MC_UCODE_SIZE 7866
48 #define CIK_SDMA_UCODE_SIZE 1050
49 #define CIK_SDMA_UCODE_VERSION 64
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
64 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65 MODULE_FIRMWARE("radeon/KABINI_me.bin");
66 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
69 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
76 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
77 extern void si_rlc_fini(struct radeon_device *rdev);
78 extern int si_rlc_init(struct radeon_device *rdev);
79 static void cik_rlc_stop(struct radeon_device *rdev);
82 * Indirect registers accessor
84 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
94 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
99 (void)RREG32(PCIE_DATA);
102 static const u32 bonaire_golden_spm_registers[] =
104 0x30800, 0xe0ffffff, 0xe0000000
107 static const u32 bonaire_golden_common_registers[] =
109 0xc770, 0xffffffff, 0x00000800,
110 0xc774, 0xffffffff, 0x00000800,
111 0xc798, 0xffffffff, 0x00007fbf,
112 0xc79c, 0xffffffff, 0x00007faf
115 static const u32 bonaire_golden_registers[] =
117 0x3354, 0x00000333, 0x00000333,
118 0x3350, 0x000c0fc0, 0x00040200,
119 0x9a10, 0x00010000, 0x00058208,
120 0x3c000, 0xffff1fff, 0x00140000,
121 0x3c200, 0xfdfc0fff, 0x00000100,
122 0x3c234, 0x40000000, 0x40000200,
123 0x9830, 0xffffffff, 0x00000000,
124 0x9834, 0xf00fffff, 0x00000400,
125 0x9838, 0x0002021c, 0x00020200,
126 0xc78, 0x00000080, 0x00000000,
127 0x5bb0, 0x000000f0, 0x00000070,
128 0x5bc0, 0xf0311fff, 0x80300000,
129 0x98f8, 0x73773777, 0x12010001,
130 0x350c, 0x00810000, 0x408af000,
131 0x7030, 0x31000111, 0x00000011,
132 0x2f48, 0x73773777, 0x12010001,
133 0x220c, 0x00007fb6, 0x0021a1b1,
134 0x2210, 0x00007fb6, 0x002021b1,
135 0x2180, 0x00007fb6, 0x00002191,
136 0x2218, 0x00007fb6, 0x002121b1,
137 0x221c, 0x00007fb6, 0x002021b1,
138 0x21dc, 0x00007fb6, 0x00002191,
139 0x21e0, 0x00007fb6, 0x00002191,
140 0x3628, 0x0000003f, 0x0000000a,
141 0x362c, 0x0000003f, 0x0000000a,
142 0x2ae4, 0x00073ffe, 0x000022a2,
143 0x240c, 0x000007ff, 0x00000000,
144 0x8a14, 0xf000003f, 0x00000007,
145 0x8bf0, 0x00002001, 0x00000001,
146 0x8b24, 0xffffffff, 0x00ffffff,
147 0x30a04, 0x0000ff0f, 0x00000000,
148 0x28a4c, 0x07ffffff, 0x06000000,
149 0x4d8, 0x00000fff, 0x00000100,
150 0x3e78, 0x00000001, 0x00000002,
151 0x9100, 0x03000000, 0x0362c688,
152 0x8c00, 0x000000ff, 0x00000001,
153 0xe40, 0x00001fff, 0x00001fff,
154 0x9060, 0x0000007f, 0x00000020,
155 0x9508, 0x00010000, 0x00010000,
156 0xac14, 0x000003ff, 0x000000f3,
157 0xac0c, 0xffffffff, 0x00001032
160 static const u32 bonaire_mgcg_cgcg_init[] =
162 0xc420, 0xffffffff, 0xfffffffc,
163 0x30800, 0xffffffff, 0xe0000000,
164 0x3c2a0, 0xffffffff, 0x00000100,
165 0x3c208, 0xffffffff, 0x00000100,
166 0x3c2c0, 0xffffffff, 0xc0000100,
167 0x3c2c8, 0xffffffff, 0xc0000100,
168 0x3c2c4, 0xffffffff, 0xc0000100,
169 0x55e4, 0xffffffff, 0x00600100,
170 0x3c280, 0xffffffff, 0x00000100,
171 0x3c214, 0xffffffff, 0x06000100,
172 0x3c220, 0xffffffff, 0x00000100,
173 0x3c218, 0xffffffff, 0x06000100,
174 0x3c204, 0xffffffff, 0x00000100,
175 0x3c2e0, 0xffffffff, 0x00000100,
176 0x3c224, 0xffffffff, 0x00000100,
177 0x3c200, 0xffffffff, 0x00000100,
178 0x3c230, 0xffffffff, 0x00000100,
179 0x3c234, 0xffffffff, 0x00000100,
180 0x3c250, 0xffffffff, 0x00000100,
181 0x3c254, 0xffffffff, 0x00000100,
182 0x3c258, 0xffffffff, 0x00000100,
183 0x3c25c, 0xffffffff, 0x00000100,
184 0x3c260, 0xffffffff, 0x00000100,
185 0x3c27c, 0xffffffff, 0x00000100,
186 0x3c278, 0xffffffff, 0x00000100,
187 0x3c210, 0xffffffff, 0x06000100,
188 0x3c290, 0xffffffff, 0x00000100,
189 0x3c274, 0xffffffff, 0x00000100,
190 0x3c2b4, 0xffffffff, 0x00000100,
191 0x3c2b0, 0xffffffff, 0x00000100,
192 0x3c270, 0xffffffff, 0x00000100,
193 0x30800, 0xffffffff, 0xe0000000,
194 0x3c020, 0xffffffff, 0x00010000,
195 0x3c024, 0xffffffff, 0x00030002,
196 0x3c028, 0xffffffff, 0x00040007,
197 0x3c02c, 0xffffffff, 0x00060005,
198 0x3c030, 0xffffffff, 0x00090008,
199 0x3c034, 0xffffffff, 0x00010000,
200 0x3c038, 0xffffffff, 0x00030002,
201 0x3c03c, 0xffffffff, 0x00040007,
202 0x3c040, 0xffffffff, 0x00060005,
203 0x3c044, 0xffffffff, 0x00090008,
204 0x3c048, 0xffffffff, 0x00010000,
205 0x3c04c, 0xffffffff, 0x00030002,
206 0x3c050, 0xffffffff, 0x00040007,
207 0x3c054, 0xffffffff, 0x00060005,
208 0x3c058, 0xffffffff, 0x00090008,
209 0x3c05c, 0xffffffff, 0x00010000,
210 0x3c060, 0xffffffff, 0x00030002,
211 0x3c064, 0xffffffff, 0x00040007,
212 0x3c068, 0xffffffff, 0x00060005,
213 0x3c06c, 0xffffffff, 0x00090008,
214 0x3c070, 0xffffffff, 0x00010000,
215 0x3c074, 0xffffffff, 0x00030002,
216 0x3c078, 0xffffffff, 0x00040007,
217 0x3c07c, 0xffffffff, 0x00060005,
218 0x3c080, 0xffffffff, 0x00090008,
219 0x3c084, 0xffffffff, 0x00010000,
220 0x3c088, 0xffffffff, 0x00030002,
221 0x3c08c, 0xffffffff, 0x00040007,
222 0x3c090, 0xffffffff, 0x00060005,
223 0x3c094, 0xffffffff, 0x00090008,
224 0x3c098, 0xffffffff, 0x00010000,
225 0x3c09c, 0xffffffff, 0x00030002,
226 0x3c0a0, 0xffffffff, 0x00040007,
227 0x3c0a4, 0xffffffff, 0x00060005,
228 0x3c0a8, 0xffffffff, 0x00090008,
229 0x3c000, 0xffffffff, 0x96e00200,
230 0x8708, 0xffffffff, 0x00900100,
231 0xc424, 0xffffffff, 0x0020003f,
232 0x38, 0xffffffff, 0x0140001c,
233 0x3c, 0x000f0000, 0x000f0000,
234 0x220, 0xffffffff, 0xC060000C,
235 0x224, 0xc0000fff, 0x00000100,
236 0xf90, 0xffffffff, 0x00000100,
237 0xf98, 0x00000101, 0x00000000,
238 0x20a8, 0xffffffff, 0x00000104,
239 0x55e4, 0xff000fff, 0x00000100,
240 0x30cc, 0xc0000fff, 0x00000104,
241 0xc1e4, 0x00000001, 0x00000001,
242 0xd00c, 0xff000ff0, 0x00000100,
243 0xd80c, 0xff000ff0, 0x00000100
246 static const u32 spectre_golden_spm_registers[] =
248 0x30800, 0xe0ffffff, 0xe0000000
251 static const u32 spectre_golden_common_registers[] =
253 0xc770, 0xffffffff, 0x00000800,
254 0xc774, 0xffffffff, 0x00000800,
255 0xc798, 0xffffffff, 0x00007fbf,
256 0xc79c, 0xffffffff, 0x00007faf
259 static const u32 spectre_golden_registers[] =
261 0x3c000, 0xffff1fff, 0x96940200,
262 0x3c00c, 0xffff0001, 0xff000000,
263 0x3c200, 0xfffc0fff, 0x00000100,
264 0x6ed8, 0x00010101, 0x00010000,
265 0x9834, 0xf00fffff, 0x00000400,
266 0x9838, 0xfffffffc, 0x00020200,
267 0x5bb0, 0x000000f0, 0x00000070,
268 0x5bc0, 0xf0311fff, 0x80300000,
269 0x98f8, 0x73773777, 0x12010001,
270 0x9b7c, 0x00ff0000, 0x00fc0000,
271 0x2f48, 0x73773777, 0x12010001,
272 0x8a14, 0xf000003f, 0x00000007,
273 0x8b24, 0xffffffff, 0x00ffffff,
274 0x28350, 0x3f3f3fff, 0x00000082,
275 0x28355, 0x0000003f, 0x00000000,
276 0x3e78, 0x00000001, 0x00000002,
277 0x913c, 0xffff03df, 0x00000004,
278 0xc768, 0x00000008, 0x00000008,
279 0x8c00, 0x000008ff, 0x00000800,
280 0x9508, 0x00010000, 0x00010000,
281 0xac0c, 0xffffffff, 0x54763210,
282 0x214f8, 0x01ff01ff, 0x00000002,
283 0x21498, 0x007ff800, 0x00200000,
284 0x2015c, 0xffffffff, 0x00000f40,
285 0x30934, 0xffffffff, 0x00000001
288 static const u32 spectre_mgcg_cgcg_init[] =
290 0xc420, 0xffffffff, 0xfffffffc,
291 0x30800, 0xffffffff, 0xe0000000,
292 0x3c2a0, 0xffffffff, 0x00000100,
293 0x3c208, 0xffffffff, 0x00000100,
294 0x3c2c0, 0xffffffff, 0x00000100,
295 0x3c2c8, 0xffffffff, 0x00000100,
296 0x3c2c4, 0xffffffff, 0x00000100,
297 0x55e4, 0xffffffff, 0x00600100,
298 0x3c280, 0xffffffff, 0x00000100,
299 0x3c214, 0xffffffff, 0x06000100,
300 0x3c220, 0xffffffff, 0x00000100,
301 0x3c218, 0xffffffff, 0x06000100,
302 0x3c204, 0xffffffff, 0x00000100,
303 0x3c2e0, 0xffffffff, 0x00000100,
304 0x3c224, 0xffffffff, 0x00000100,
305 0x3c200, 0xffffffff, 0x00000100,
306 0x3c230, 0xffffffff, 0x00000100,
307 0x3c234, 0xffffffff, 0x00000100,
308 0x3c250, 0xffffffff, 0x00000100,
309 0x3c254, 0xffffffff, 0x00000100,
310 0x3c258, 0xffffffff, 0x00000100,
311 0x3c25c, 0xffffffff, 0x00000100,
312 0x3c260, 0xffffffff, 0x00000100,
313 0x3c27c, 0xffffffff, 0x00000100,
314 0x3c278, 0xffffffff, 0x00000100,
315 0x3c210, 0xffffffff, 0x06000100,
316 0x3c290, 0xffffffff, 0x00000100,
317 0x3c274, 0xffffffff, 0x00000100,
318 0x3c2b4, 0xffffffff, 0x00000100,
319 0x3c2b0, 0xffffffff, 0x00000100,
320 0x3c270, 0xffffffff, 0x00000100,
321 0x30800, 0xffffffff, 0xe0000000,
322 0x3c020, 0xffffffff, 0x00010000,
323 0x3c024, 0xffffffff, 0x00030002,
324 0x3c028, 0xffffffff, 0x00040007,
325 0x3c02c, 0xffffffff, 0x00060005,
326 0x3c030, 0xffffffff, 0x00090008,
327 0x3c034, 0xffffffff, 0x00010000,
328 0x3c038, 0xffffffff, 0x00030002,
329 0x3c03c, 0xffffffff, 0x00040007,
330 0x3c040, 0xffffffff, 0x00060005,
331 0x3c044, 0xffffffff, 0x00090008,
332 0x3c048, 0xffffffff, 0x00010000,
333 0x3c04c, 0xffffffff, 0x00030002,
334 0x3c050, 0xffffffff, 0x00040007,
335 0x3c054, 0xffffffff, 0x00060005,
336 0x3c058, 0xffffffff, 0x00090008,
337 0x3c05c, 0xffffffff, 0x00010000,
338 0x3c060, 0xffffffff, 0x00030002,
339 0x3c064, 0xffffffff, 0x00040007,
340 0x3c068, 0xffffffff, 0x00060005,
341 0x3c06c, 0xffffffff, 0x00090008,
342 0x3c070, 0xffffffff, 0x00010000,
343 0x3c074, 0xffffffff, 0x00030002,
344 0x3c078, 0xffffffff, 0x00040007,
345 0x3c07c, 0xffffffff, 0x00060005,
346 0x3c080, 0xffffffff, 0x00090008,
347 0x3c084, 0xffffffff, 0x00010000,
348 0x3c088, 0xffffffff, 0x00030002,
349 0x3c08c, 0xffffffff, 0x00040007,
350 0x3c090, 0xffffffff, 0x00060005,
351 0x3c094, 0xffffffff, 0x00090008,
352 0x3c098, 0xffffffff, 0x00010000,
353 0x3c09c, 0xffffffff, 0x00030002,
354 0x3c0a0, 0xffffffff, 0x00040007,
355 0x3c0a4, 0xffffffff, 0x00060005,
356 0x3c0a8, 0xffffffff, 0x00090008,
357 0x3c0ac, 0xffffffff, 0x00010000,
358 0x3c0b0, 0xffffffff, 0x00030002,
359 0x3c0b4, 0xffffffff, 0x00040007,
360 0x3c0b8, 0xffffffff, 0x00060005,
361 0x3c0bc, 0xffffffff, 0x00090008,
362 0x3c000, 0xffffffff, 0x96e00200,
363 0x8708, 0xffffffff, 0x00900100,
364 0xc424, 0xffffffff, 0x0020003f,
365 0x38, 0xffffffff, 0x0140001c,
366 0x3c, 0x000f0000, 0x000f0000,
367 0x220, 0xffffffff, 0xC060000C,
368 0x224, 0xc0000fff, 0x00000100,
369 0xf90, 0xffffffff, 0x00000100,
370 0xf98, 0x00000101, 0x00000000,
371 0x20a8, 0xffffffff, 0x00000104,
372 0x55e4, 0xff000fff, 0x00000100,
373 0x30cc, 0xc0000fff, 0x00000104,
374 0xc1e4, 0x00000001, 0x00000001,
375 0xd00c, 0xff000ff0, 0x00000100,
376 0xd80c, 0xff000ff0, 0x00000100
379 static const u32 kalindi_golden_spm_registers[] =
381 0x30800, 0xe0ffffff, 0xe0000000
384 static const u32 kalindi_golden_common_registers[] =
386 0xc770, 0xffffffff, 0x00000800,
387 0xc774, 0xffffffff, 0x00000800,
388 0xc798, 0xffffffff, 0x00007fbf,
389 0xc79c, 0xffffffff, 0x00007faf
392 static const u32 kalindi_golden_registers[] =
394 0x3c000, 0xffffdfff, 0x6e944040,
395 0x55e4, 0xff607fff, 0xfc000100,
396 0x3c220, 0xff000fff, 0x00000100,
397 0x3c224, 0xff000fff, 0x00000100,
398 0x3c200, 0xfffc0fff, 0x00000100,
399 0x6ed8, 0x00010101, 0x00010000,
400 0x9830, 0xffffffff, 0x00000000,
401 0x9834, 0xf00fffff, 0x00000400,
402 0x5bb0, 0x000000f0, 0x00000070,
403 0x5bc0, 0xf0311fff, 0x80300000,
404 0x98f8, 0x73773777, 0x12010001,
405 0x98fc, 0xffffffff, 0x00000010,
406 0x9b7c, 0x00ff0000, 0x00fc0000,
407 0x8030, 0x00001f0f, 0x0000100a,
408 0x2f48, 0x73773777, 0x12010001,
409 0x2408, 0x000fffff, 0x000c007f,
410 0x8a14, 0xf000003f, 0x00000007,
411 0x8b24, 0x3fff3fff, 0x00ffcfff,
412 0x30a04, 0x0000ff0f, 0x00000000,
413 0x28a4c, 0x07ffffff, 0x06000000,
414 0x4d8, 0x00000fff, 0x00000100,
415 0x3e78, 0x00000001, 0x00000002,
416 0xc768, 0x00000008, 0x00000008,
417 0x8c00, 0x000000ff, 0x00000003,
418 0x214f8, 0x01ff01ff, 0x00000002,
419 0x21498, 0x007ff800, 0x00200000,
420 0x2015c, 0xffffffff, 0x00000f40,
421 0x88c4, 0x001f3ae3, 0x00000082,
422 0x88d4, 0x0000001f, 0x00000010,
423 0x30934, 0xffffffff, 0x00000000
426 static const u32 kalindi_mgcg_cgcg_init[] =
428 0xc420, 0xffffffff, 0xfffffffc,
429 0x30800, 0xffffffff, 0xe0000000,
430 0x3c2a0, 0xffffffff, 0x00000100,
431 0x3c208, 0xffffffff, 0x00000100,
432 0x3c2c0, 0xffffffff, 0x00000100,
433 0x3c2c8, 0xffffffff, 0x00000100,
434 0x3c2c4, 0xffffffff, 0x00000100,
435 0x55e4, 0xffffffff, 0x00600100,
436 0x3c280, 0xffffffff, 0x00000100,
437 0x3c214, 0xffffffff, 0x06000100,
438 0x3c220, 0xffffffff, 0x00000100,
439 0x3c218, 0xffffffff, 0x06000100,
440 0x3c204, 0xffffffff, 0x00000100,
441 0x3c2e0, 0xffffffff, 0x00000100,
442 0x3c224, 0xffffffff, 0x00000100,
443 0x3c200, 0xffffffff, 0x00000100,
444 0x3c230, 0xffffffff, 0x00000100,
445 0x3c234, 0xffffffff, 0x00000100,
446 0x3c250, 0xffffffff, 0x00000100,
447 0x3c254, 0xffffffff, 0x00000100,
448 0x3c258, 0xffffffff, 0x00000100,
449 0x3c25c, 0xffffffff, 0x00000100,
450 0x3c260, 0xffffffff, 0x00000100,
451 0x3c27c, 0xffffffff, 0x00000100,
452 0x3c278, 0xffffffff, 0x00000100,
453 0x3c210, 0xffffffff, 0x06000100,
454 0x3c290, 0xffffffff, 0x00000100,
455 0x3c274, 0xffffffff, 0x00000100,
456 0x3c2b4, 0xffffffff, 0x00000100,
457 0x3c2b0, 0xffffffff, 0x00000100,
458 0x3c270, 0xffffffff, 0x00000100,
459 0x30800, 0xffffffff, 0xe0000000,
460 0x3c020, 0xffffffff, 0x00010000,
461 0x3c024, 0xffffffff, 0x00030002,
462 0x3c028, 0xffffffff, 0x00040007,
463 0x3c02c, 0xffffffff, 0x00060005,
464 0x3c030, 0xffffffff, 0x00090008,
465 0x3c034, 0xffffffff, 0x00010000,
466 0x3c038, 0xffffffff, 0x00030002,
467 0x3c03c, 0xffffffff, 0x00040007,
468 0x3c040, 0xffffffff, 0x00060005,
469 0x3c044, 0xffffffff, 0x00090008,
470 0x3c000, 0xffffffff, 0x96e00200,
471 0x8708, 0xffffffff, 0x00900100,
472 0xc424, 0xffffffff, 0x0020003f,
473 0x38, 0xffffffff, 0x0140001c,
474 0x3c, 0x000f0000, 0x000f0000,
475 0x220, 0xffffffff, 0xC060000C,
476 0x224, 0xc0000fff, 0x00000100,
477 0x20a8, 0xffffffff, 0x00000104,
478 0x55e4, 0xff000fff, 0x00000100,
479 0x30cc, 0xc0000fff, 0x00000104,
480 0xc1e4, 0x00000001, 0x00000001,
481 0xd00c, 0xff000ff0, 0x00000100,
482 0xd80c, 0xff000ff0, 0x00000100
485 static void cik_init_golden_registers(struct radeon_device *rdev)
487 switch (rdev->family) {
489 radeon_program_register_sequence(rdev,
490 bonaire_mgcg_cgcg_init,
491 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
492 radeon_program_register_sequence(rdev,
493 bonaire_golden_registers,
494 (const u32)ARRAY_SIZE(bonaire_golden_registers));
495 radeon_program_register_sequence(rdev,
496 bonaire_golden_common_registers,
497 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
498 radeon_program_register_sequence(rdev,
499 bonaire_golden_spm_registers,
500 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
503 radeon_program_register_sequence(rdev,
504 kalindi_mgcg_cgcg_init,
505 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
506 radeon_program_register_sequence(rdev,
507 kalindi_golden_registers,
508 (const u32)ARRAY_SIZE(kalindi_golden_registers));
509 radeon_program_register_sequence(rdev,
510 kalindi_golden_common_registers,
511 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
512 radeon_program_register_sequence(rdev,
513 kalindi_golden_spm_registers,
514 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
517 radeon_program_register_sequence(rdev,
518 spectre_mgcg_cgcg_init,
519 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
520 radeon_program_register_sequence(rdev,
521 spectre_golden_registers,
522 (const u32)ARRAY_SIZE(spectre_golden_registers));
523 radeon_program_register_sequence(rdev,
524 spectre_golden_common_registers,
525 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
526 radeon_program_register_sequence(rdev,
527 spectre_golden_spm_registers,
528 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
536 * cik_get_xclk - get the xclk
538 * @rdev: radeon_device pointer
540 * Returns the reference clock used by the gfx engine
543 u32 cik_get_xclk(struct radeon_device *rdev)
545 u32 reference_clock = rdev->clock.spll.reference_freq;
547 if (rdev->flags & RADEON_IS_IGP) {
548 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
549 return reference_clock / 2;
551 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
552 return reference_clock / 4;
554 return reference_clock;
558 * cik_mm_rdoorbell - read a doorbell dword
560 * @rdev: radeon_device pointer
561 * @offset: byte offset into the aperture
563 * Returns the value in the doorbell aperture at the
564 * requested offset (CIK).
566 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
568 if (offset < rdev->doorbell.size) {
569 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
577 * cik_mm_wdoorbell - write a doorbell dword
579 * @rdev: radeon_device pointer
580 * @offset: byte offset into the aperture
583 * Writes @v to the doorbell aperture at the
584 * requested offset (CIK).
586 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
588 if (offset < rdev->doorbell.size) {
589 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
591 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
595 #define BONAIRE_IO_MC_REGS_SIZE 36
597 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
599 {0x00000070, 0x04400000},
600 {0x00000071, 0x80c01803},
601 {0x00000072, 0x00004004},
602 {0x00000073, 0x00000100},
603 {0x00000074, 0x00ff0000},
604 {0x00000075, 0x34000000},
605 {0x00000076, 0x08000014},
606 {0x00000077, 0x00cc08ec},
607 {0x00000078, 0x00000400},
608 {0x00000079, 0x00000000},
609 {0x0000007a, 0x04090000},
610 {0x0000007c, 0x00000000},
611 {0x0000007e, 0x4408a8e8},
612 {0x0000007f, 0x00000304},
613 {0x00000080, 0x00000000},
614 {0x00000082, 0x00000001},
615 {0x00000083, 0x00000002},
616 {0x00000084, 0xf3e4f400},
617 {0x00000085, 0x052024e3},
618 {0x00000087, 0x00000000},
619 {0x00000088, 0x01000000},
620 {0x0000008a, 0x1c0a0000},
621 {0x0000008b, 0xff010000},
622 {0x0000008d, 0xffffefff},
623 {0x0000008e, 0xfff3efff},
624 {0x0000008f, 0xfff3efbf},
625 {0x00000092, 0xf7ffffff},
626 {0x00000093, 0xffffff7f},
627 {0x00000095, 0x00101101},
628 {0x00000096, 0x00000fff},
629 {0x00000097, 0x00116fff},
630 {0x00000098, 0x60010000},
631 {0x00000099, 0x10010000},
632 {0x0000009a, 0x00006000},
633 {0x0000009b, 0x00001000},
634 {0x0000009f, 0x00b48000}
638 * cik_srbm_select - select specific register instances
640 * @rdev: radeon_device pointer
641 * @me: selected ME (micro engine)
646 * Switches the currently active registers instances. Some
647 * registers are instanced per VMID, others are instanced per
648 * me/pipe/queue combination.
650 static void cik_srbm_select(struct radeon_device *rdev,
651 u32 me, u32 pipe, u32 queue, u32 vmid)
653 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
656 QUEUEID(queue & 0x7));
657 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
662 * ci_mc_load_microcode - load MC ucode into the hw
664 * @rdev: radeon_device pointer
666 * Load the GDDR MC ucode into the hw (CIK).
667 * Returns 0 on success, error on failure.
669 static int ci_mc_load_microcode(struct radeon_device *rdev)
671 const __be32 *fw_data;
672 u32 running, blackout = 0;
674 int i, ucode_size, regs_size;
679 switch (rdev->family) {
682 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
683 ucode_size = CIK_MC_UCODE_SIZE;
684 regs_size = BONAIRE_IO_MC_REGS_SIZE;
688 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
692 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
693 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
696 /* reset the engine and set to writable */
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
700 /* load mc io regs */
701 for (i = 0; i < regs_size; i++) {
702 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
703 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
705 /* load the MC ucode */
706 fw_data = (const __be32 *)rdev->mc_fw->data;
707 for (i = 0; i < ucode_size; i++)
708 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
710 /* put the engine back into the active state */
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
713 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
715 /* wait for training to complete */
716 for (i = 0; i < rdev->usec_timeout; i++) {
717 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
721 for (i = 0; i < rdev->usec_timeout; i++) {
722 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
728 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
735 * cik_init_microcode - load ucode images from disk
737 * @rdev: radeon_device pointer
739 * Use the firmware interface to load the ucode images into
740 * the driver (not loaded into hw).
741 * Returns 0 on success, error on failure.
743 static int cik_init_microcode(struct radeon_device *rdev)
745 struct platform_device *pdev;
746 const char *chip_name;
747 size_t pfp_req_size, me_req_size, ce_req_size,
748 mec_req_size, rlc_req_size, mc_req_size,
755 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
758 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
762 switch (rdev->family) {
764 chip_name = "BONAIRE";
765 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
766 me_req_size = CIK_ME_UCODE_SIZE * 4;
767 ce_req_size = CIK_CE_UCODE_SIZE * 4;
768 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
769 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
770 mc_req_size = CIK_MC_UCODE_SIZE * 4;
771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
774 chip_name = "KAVERI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
783 chip_name = "KABINI";
784 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
785 me_req_size = CIK_ME_UCODE_SIZE * 4;
786 ce_req_size = CIK_CE_UCODE_SIZE * 4;
787 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
788 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
789 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
794 DRM_INFO("Loading %s Microcode\n", chip_name);
796 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
797 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
800 if (rdev->pfp_fw->size != pfp_req_size) {
802 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803 rdev->pfp_fw->size, fw_name);
808 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
809 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
812 if (rdev->me_fw->size != me_req_size) {
814 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815 rdev->me_fw->size, fw_name);
819 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
820 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
823 if (rdev->ce_fw->size != ce_req_size) {
825 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
826 rdev->ce_fw->size, fw_name);
830 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
831 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
834 if (rdev->mec_fw->size != mec_req_size) {
836 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
837 rdev->mec_fw->size, fw_name);
841 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
842 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
845 if (rdev->rlc_fw->size != rlc_req_size) {
847 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
848 rdev->rlc_fw->size, fw_name);
852 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
853 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
856 if (rdev->sdma_fw->size != sdma_req_size) {
858 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
859 rdev->sdma_fw->size, fw_name);
863 /* No MC ucode on APUs */
864 if (!(rdev->flags & RADEON_IS_IGP)) {
865 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
866 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
869 if (rdev->mc_fw->size != mc_req_size) {
871 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
872 rdev->mc_fw->size, fw_name);
878 platform_device_unregister(pdev);
883 "cik_cp: Failed to load firmware \"%s\"\n",
885 release_firmware(rdev->pfp_fw);
887 release_firmware(rdev->me_fw);
889 release_firmware(rdev->ce_fw);
891 release_firmware(rdev->rlc_fw);
893 release_firmware(rdev->mc_fw);
903 * cik_tiling_mode_table_init - init the hw tiling table
905 * @rdev: radeon_device pointer
907 * Starting with SI, the tiling setup is done globally in a
908 * set of 32 tiling modes. Rather than selecting each set of
909 * parameters per surface as on older asics, we just select
910 * which index in the tiling table we want to use, and the
911 * surface uses those parameters (CIK).
913 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
915 const u32 num_tile_mode_states = 32;
916 const u32 num_secondary_tile_mode_states = 16;
917 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
918 u32 num_pipe_configs;
919 u32 num_rbs = rdev->config.cik.max_backends_per_se *
920 rdev->config.cik.max_shader_engines;
922 switch (rdev->config.cik.mem_row_size_in_kb) {
924 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
928 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
931 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
935 num_pipe_configs = rdev->config.cik.max_tile_pipes;
936 if (num_pipe_configs > 8)
937 num_pipe_configs = 8; /* ??? */
939 if (num_pipe_configs == 8) {
940 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
941 switch (reg_offset) {
943 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
949 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
951 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
955 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
958 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
961 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
967 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 TILE_SPLIT(split_equal_to_row_size));
973 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
977 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
983 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986 TILE_SPLIT(split_equal_to_row_size));
989 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
993 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1019 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1032 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1041 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1042 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1047 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1048 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1049 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1053 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1054 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1062 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1063 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1065 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1066 switch (reg_offset) {
1068 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1071 NUM_BANKS(ADDR_SURF_16_BANK));
1074 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1077 NUM_BANKS(ADDR_SURF_16_BANK));
1080 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1083 NUM_BANKS(ADDR_SURF_16_BANK));
1086 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1089 NUM_BANKS(ADDR_SURF_16_BANK));
1092 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1095 NUM_BANKS(ADDR_SURF_8_BANK));
1098 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1101 NUM_BANKS(ADDR_SURF_4_BANK));
1104 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1107 NUM_BANKS(ADDR_SURF_2_BANK));
1110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1113 NUM_BANKS(ADDR_SURF_16_BANK));
1116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1119 NUM_BANKS(ADDR_SURF_16_BANK));
1122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1125 NUM_BANKS(ADDR_SURF_16_BANK));
1128 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1131 NUM_BANKS(ADDR_SURF_16_BANK));
1134 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1137 NUM_BANKS(ADDR_SURF_8_BANK));
1140 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1143 NUM_BANKS(ADDR_SURF_4_BANK));
1146 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1149 NUM_BANKS(ADDR_SURF_2_BANK));
1155 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1157 } else if (num_pipe_configs == 4) {
1159 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1160 switch (reg_offset) {
1162 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1164 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1177 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1186 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 TILE_SPLIT(split_equal_to_row_size));
1192 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1193 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1204 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1205 TILE_SPLIT(split_equal_to_row_size));
1208 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1209 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1212 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1216 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1217 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1218 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1222 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1223 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1228 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1230 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1238 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1240 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1246 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1250 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1252 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1256 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1260 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1262 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1266 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1267 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1268 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1272 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1274 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1281 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1282 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1284 } else if (num_rbs < 4) {
1285 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1286 switch (reg_offset) {
1288 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1290 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1294 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1296 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1300 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1302 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1306 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 TILE_SPLIT(split_equal_to_row_size));
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1330 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1331 TILE_SPLIT(split_equal_to_row_size));
1334 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1335 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1338 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1344 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1348 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1349 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1354 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1360 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1364 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1365 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1366 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1378 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1386 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1387 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1388 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1392 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1393 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1394 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1400 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1407 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1408 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1411 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1412 switch (reg_offset) {
1414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1417 NUM_BANKS(ADDR_SURF_16_BANK));
1420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1423 NUM_BANKS(ADDR_SURF_16_BANK));
1426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1429 NUM_BANKS(ADDR_SURF_16_BANK));
1432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1435 NUM_BANKS(ADDR_SURF_16_BANK));
1438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1441 NUM_BANKS(ADDR_SURF_16_BANK));
1444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1447 NUM_BANKS(ADDR_SURF_8_BANK));
1450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1453 NUM_BANKS(ADDR_SURF_4_BANK));
1456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1459 NUM_BANKS(ADDR_SURF_16_BANK));
1462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1465 NUM_BANKS(ADDR_SURF_16_BANK));
1468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1471 NUM_BANKS(ADDR_SURF_16_BANK));
1474 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1477 NUM_BANKS(ADDR_SURF_16_BANK));
1480 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1483 NUM_BANKS(ADDR_SURF_16_BANK));
1486 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1489 NUM_BANKS(ADDR_SURF_8_BANK));
1492 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1495 NUM_BANKS(ADDR_SURF_4_BANK));
1501 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1503 } else if (num_pipe_configs == 2) {
1504 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1505 switch (reg_offset) {
1507 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1508 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1509 PIPE_CONFIG(ADDR_SURF_P2) |
1510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1513 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1515 PIPE_CONFIG(ADDR_SURF_P2) |
1516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1519 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1521 PIPE_CONFIG(ADDR_SURF_P2) |
1522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1525 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 PIPE_CONFIG(ADDR_SURF_P2) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1531 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 PIPE_CONFIG(ADDR_SURF_P2) |
1534 TILE_SPLIT(split_equal_to_row_size));
1537 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1541 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1543 PIPE_CONFIG(ADDR_SURF_P2) |
1544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1547 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1549 PIPE_CONFIG(ADDR_SURF_P2) |
1550 TILE_SPLIT(split_equal_to_row_size));
1553 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1556 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1560 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1562 PIPE_CONFIG(ADDR_SURF_P2) |
1563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1566 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1567 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1568 PIPE_CONFIG(ADDR_SURF_P2) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1572 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1573 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1574 PIPE_CONFIG(ADDR_SURF_P2) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1582 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1584 PIPE_CONFIG(ADDR_SURF_P2) |
1585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1588 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1590 PIPE_CONFIG(ADDR_SURF_P2) |
1591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1595 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1596 PIPE_CONFIG(ADDR_SURF_P2) |
1597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1600 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1604 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1606 PIPE_CONFIG(ADDR_SURF_P2) |
1607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1610 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1611 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1612 PIPE_CONFIG(ADDR_SURF_P2) |
1613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1616 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1617 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1618 PIPE_CONFIG(ADDR_SURF_P2) |
1619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1625 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1626 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1628 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1629 switch (reg_offset) {
1631 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1634 NUM_BANKS(ADDR_SURF_16_BANK));
1637 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1640 NUM_BANKS(ADDR_SURF_16_BANK));
1643 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1646 NUM_BANKS(ADDR_SURF_16_BANK));
1649 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1652 NUM_BANKS(ADDR_SURF_16_BANK));
1655 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1658 NUM_BANKS(ADDR_SURF_16_BANK));
1661 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1664 NUM_BANKS(ADDR_SURF_16_BANK));
1667 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1670 NUM_BANKS(ADDR_SURF_8_BANK));
1673 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1676 NUM_BANKS(ADDR_SURF_16_BANK));
1679 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1682 NUM_BANKS(ADDR_SURF_16_BANK));
1685 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1688 NUM_BANKS(ADDR_SURF_16_BANK));
1691 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1692 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1693 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1694 NUM_BANKS(ADDR_SURF_16_BANK));
1697 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1698 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1699 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1700 NUM_BANKS(ADDR_SURF_16_BANK));
1703 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1704 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1705 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1706 NUM_BANKS(ADDR_SURF_16_BANK));
1709 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1712 NUM_BANKS(ADDR_SURF_8_BANK));
1718 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1721 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1725 * cik_select_se_sh - select which SE, SH to address
1727 * @rdev: radeon_device pointer
1728 * @se_num: shader engine to address
1729 * @sh_num: sh block to address
1731 * Select which SE, SH combinations to address. Certain
1732 * registers are instanced per SE or SH. 0xffffffff means
1733 * broadcast to all SEs or SHs (CIK).
1735 static void cik_select_se_sh(struct radeon_device *rdev,
1736 u32 se_num, u32 sh_num)
1738 u32 data = INSTANCE_BROADCAST_WRITES;
1740 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1741 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1742 else if (se_num == 0xffffffff)
1743 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1744 else if (sh_num == 0xffffffff)
1745 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1747 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1748 WREG32(GRBM_GFX_INDEX, data);
1752 * cik_create_bitmask - create a bitmask
1754 * @bit_width: length of the mask
1756 * create a variable length bit mask (CIK).
1757 * Returns the bitmask.
1759 static u32 cik_create_bitmask(u32 bit_width)
1763 for (i = 0; i < bit_width; i++) {
1771 * cik_select_se_sh - select which SE, SH to address
1773 * @rdev: radeon_device pointer
1774 * @max_rb_num: max RBs (render backends) for the asic
1775 * @se_num: number of SEs (shader engines) for the asic
1776 * @sh_per_se: number of SH blocks per SE for the asic
1778 * Calculates the bitmask of disabled RBs (CIK).
1779 * Returns the disabled RB bitmask.
1781 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1782 u32 max_rb_num, u32 se_num,
1787 data = RREG32(CC_RB_BACKEND_DISABLE);
1789 data &= BACKEND_DISABLE_MASK;
1792 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1794 data >>= BACKEND_DISABLE_SHIFT;
1796 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1802 * cik_setup_rb - setup the RBs on the asic
1804 * @rdev: radeon_device pointer
1805 * @se_num: number of SEs (shader engines) for the asic
1806 * @sh_per_se: number of SH blocks per SE for the asic
1807 * @max_rb_num: max RBs (render backends) for the asic
1809 * Configures per-SE/SH RB registers (CIK).
1811 static void cik_setup_rb(struct radeon_device *rdev,
1812 u32 se_num, u32 sh_per_se,
1817 u32 disabled_rbs = 0;
1818 u32 enabled_rbs = 0;
1820 for (i = 0; i < se_num; i++) {
1821 for (j = 0; j < sh_per_se; j++) {
1822 cik_select_se_sh(rdev, i, j);
1823 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1824 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1827 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1830 for (i = 0; i < max_rb_num; i++) {
1831 if (!(disabled_rbs & mask))
1832 enabled_rbs |= mask;
1836 for (i = 0; i < se_num; i++) {
1837 cik_select_se_sh(rdev, i, 0xffffffff);
1839 for (j = 0; j < sh_per_se; j++) {
1840 switch (enabled_rbs & 3) {
1842 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1845 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1849 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1854 WREG32(PA_SC_RASTER_CONFIG, data);
1856 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1860 * cik_gpu_init - setup the 3D engine
1862 * @rdev: radeon_device pointer
1864 * Configures the 3D engine and tiling configuration
1865 * registers so that the 3D engine is usable.
1867 static void cik_gpu_init(struct radeon_device *rdev)
1869 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1870 u32 mc_shared_chmap, mc_arb_ramcfg;
1871 u32 hdp_host_path_cntl;
1875 switch (rdev->family) {
1877 rdev->config.cik.max_shader_engines = 2;
1878 rdev->config.cik.max_tile_pipes = 4;
1879 rdev->config.cik.max_cu_per_sh = 7;
1880 rdev->config.cik.max_sh_per_se = 1;
1881 rdev->config.cik.max_backends_per_se = 2;
1882 rdev->config.cik.max_texture_channel_caches = 4;
1883 rdev->config.cik.max_gprs = 256;
1884 rdev->config.cik.max_gs_threads = 32;
1885 rdev->config.cik.max_hw_contexts = 8;
1887 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1888 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1889 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1890 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1891 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1898 rdev->config.cik.max_shader_engines = 1;
1899 rdev->config.cik.max_tile_pipes = 2;
1900 rdev->config.cik.max_cu_per_sh = 2;
1901 rdev->config.cik.max_sh_per_se = 1;
1902 rdev->config.cik.max_backends_per_se = 1;
1903 rdev->config.cik.max_texture_channel_caches = 2;
1904 rdev->config.cik.max_gprs = 256;
1905 rdev->config.cik.max_gs_threads = 16;
1906 rdev->config.cik.max_hw_contexts = 8;
1908 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1909 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1910 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1911 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1912 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1916 /* Initialize HDP */
1917 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1918 WREG32((0x2c14 + j), 0x00000000);
1919 WREG32((0x2c18 + j), 0x00000000);
1920 WREG32((0x2c1c + j), 0x00000000);
1921 WREG32((0x2c20 + j), 0x00000000);
1922 WREG32((0x2c24 + j), 0x00000000);
1925 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1927 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1929 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1930 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1932 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1933 rdev->config.cik.mem_max_burst_length_bytes = 256;
1934 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1935 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1936 if (rdev->config.cik.mem_row_size_in_kb > 4)
1937 rdev->config.cik.mem_row_size_in_kb = 4;
1938 /* XXX use MC settings? */
1939 rdev->config.cik.shader_engine_tile_size = 32;
1940 rdev->config.cik.num_gpus = 1;
1941 rdev->config.cik.multi_gpu_tile_size = 64;
1943 /* fix up row size */
1944 gb_addr_config &= ~ROW_SIZE_MASK;
1945 switch (rdev->config.cik.mem_row_size_in_kb) {
1948 gb_addr_config |= ROW_SIZE(0);
1951 gb_addr_config |= ROW_SIZE(1);
1954 gb_addr_config |= ROW_SIZE(2);
1958 /* setup tiling info dword. gb_addr_config is not adequate since it does
1959 * not have bank info, so create a custom tiling dword.
1960 * bits 3:0 num_pipes
1961 * bits 7:4 num_banks
1962 * bits 11:8 group_size
1963 * bits 15:12 row_size
1965 rdev->config.cik.tile_config = 0;
1966 switch (rdev->config.cik.num_tile_pipes) {
1968 rdev->config.cik.tile_config |= (0 << 0);
1971 rdev->config.cik.tile_config |= (1 << 0);
1974 rdev->config.cik.tile_config |= (2 << 0);
1978 /* XXX what about 12? */
1979 rdev->config.cik.tile_config |= (3 << 0);
1982 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1983 rdev->config.cik.tile_config |= 1 << 4;
1985 rdev->config.cik.tile_config |= 0 << 4;
1986 rdev->config.cik.tile_config |=
1987 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1988 rdev->config.cik.tile_config |=
1989 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1991 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1992 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1993 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1994 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1995 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1996 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1997 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1998 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2000 cik_tiling_mode_table_init(rdev);
2002 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2003 rdev->config.cik.max_sh_per_se,
2004 rdev->config.cik.max_backends_per_se);
2006 /* set HW defaults for 3D engine */
2007 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2009 WREG32(SX_DEBUG_1, 0x20);
2011 WREG32(TA_CNTL_AUX, 0x00010000);
2013 tmp = RREG32(SPI_CONFIG_CNTL);
2015 WREG32(SPI_CONFIG_CNTL, tmp);
2017 WREG32(SQ_CONFIG, 1);
2019 WREG32(DB_DEBUG, 0);
2021 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2023 WREG32(DB_DEBUG2, tmp);
2025 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2027 WREG32(DB_DEBUG3, tmp);
2029 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2031 WREG32(CB_HW_CONTROL, tmp);
2033 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2035 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2036 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2037 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2038 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2040 WREG32(VGT_NUM_INSTANCES, 1);
2042 WREG32(CP_PERFMON_CNTL, 0);
2044 WREG32(SQ_CONFIG, 0);
2046 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2047 FORCE_EOV_MAX_REZ_CNT(255)));
2049 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2050 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2052 WREG32(VGT_GS_VERTEX_REUSE, 16);
2053 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2055 tmp = RREG32(HDP_MISC_CNTL);
2056 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2057 WREG32(HDP_MISC_CNTL, tmp);
2059 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2060 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2062 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2063 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2069 * GPU scratch registers helpers function.
2072 * cik_scratch_init - setup driver info for CP scratch regs
2074 * @rdev: radeon_device pointer
2076 * Set up the number and offset of the CP scratch registers.
2077 * NOTE: use of CP scratch registers is a legacy inferface and
2078 * is not used by default on newer asics (r6xx+). On newer asics,
2079 * memory buffers are used for fences rather than scratch regs.
2081 static void cik_scratch_init(struct radeon_device *rdev)
2085 rdev->scratch.num_reg = 7;
2086 rdev->scratch.reg_base = SCRATCH_REG0;
2087 for (i = 0; i < rdev->scratch.num_reg; i++) {
2088 rdev->scratch.free[i] = true;
2089 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2094 * cik_ring_test - basic gfx ring test
2096 * @rdev: radeon_device pointer
2097 * @ring: radeon_ring structure holding ring information
2099 * Allocate a scratch register and write to it using the gfx ring (CIK).
2100 * Provides a basic gfx ring test to verify that the ring is working.
2101 * Used by cik_cp_gfx_resume();
2102 * Returns 0 on success, error on failure.
2104 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2111 r = radeon_scratch_get(rdev, &scratch);
2113 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2116 WREG32(scratch, 0xCAFEDEAD);
2117 r = radeon_ring_lock(rdev, ring, 3);
2119 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2120 radeon_scratch_free(rdev, scratch);
2123 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2124 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2125 radeon_ring_write(ring, 0xDEADBEEF);
2126 radeon_ring_unlock_commit(rdev, ring);
2128 for (i = 0; i < rdev->usec_timeout; i++) {
2129 tmp = RREG32(scratch);
2130 if (tmp == 0xDEADBEEF)
2134 if (i < rdev->usec_timeout) {
2135 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2137 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2138 ring->idx, scratch, tmp);
2141 radeon_scratch_free(rdev, scratch);
2146 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2148 * @rdev: radeon_device pointer
2149 * @fence: radeon fence object
2151 * Emits a fence sequnce number on the gfx ring and flushes
2154 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2155 struct radeon_fence *fence)
2157 struct radeon_ring *ring = &rdev->ring[fence->ring];
2158 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2160 /* EVENT_WRITE_EOP - flush caches, send int */
2161 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2162 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2164 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2166 radeon_ring_write(ring, addr & 0xfffffffc);
2167 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2168 radeon_ring_write(ring, fence->seq);
2169 radeon_ring_write(ring, 0);
2171 /* We should be using the new WAIT_REG_MEM special op packet here
2172 * but it causes the CP to hang
2174 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2175 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2176 WRITE_DATA_DST_SEL(0)));
2177 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2178 radeon_ring_write(ring, 0);
2179 radeon_ring_write(ring, 0);
2183 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2185 * @rdev: radeon_device pointer
2186 * @fence: radeon fence object
2188 * Emits a fence sequnce number on the compute ring and flushes
2191 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2192 struct radeon_fence *fence)
2194 struct radeon_ring *ring = &rdev->ring[fence->ring];
2195 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2197 /* RELEASE_MEM - flush caches, send int */
2198 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2199 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2201 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2203 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2204 radeon_ring_write(ring, addr & 0xfffffffc);
2205 radeon_ring_write(ring, upper_32_bits(addr));
2206 radeon_ring_write(ring, fence->seq);
2207 radeon_ring_write(ring, 0);
2209 /* We should be using the new WAIT_REG_MEM special op packet here
2210 * but it causes the CP to hang
2212 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2213 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2214 WRITE_DATA_DST_SEL(0)));
2215 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2216 radeon_ring_write(ring, 0);
2217 radeon_ring_write(ring, 0);
2220 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2221 struct radeon_ring *ring,
2222 struct radeon_semaphore *semaphore,
2225 uint64_t addr = semaphore->gpu_addr;
2226 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2228 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2229 radeon_ring_write(ring, addr & 0xffffffff);
2230 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2237 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2239 * @rdev: radeon_device pointer
2240 * @ib: radeon indirect buffer object
2242 * Emits an DE (drawing engine) or CE (constant engine) IB
2243 * on the gfx ring. IBs are usually generated by userspace
2244 * acceleration drivers and submitted to the kernel for
2245 * sheduling on the ring. This function schedules the IB
2246 * on the gfx ring for execution by the GPU.
2248 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2250 struct radeon_ring *ring = &rdev->ring[ib->ring];
2251 u32 header, control = INDIRECT_BUFFER_VALID;
2253 if (ib->is_const_ib) {
2254 /* set switch buffer packet before const IB */
2255 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2256 radeon_ring_write(ring, 0);
2258 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2261 if (ring->rptr_save_reg) {
2262 next_rptr = ring->wptr + 3 + 4;
2263 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2264 radeon_ring_write(ring, ((ring->rptr_save_reg -
2265 PACKET3_SET_UCONFIG_REG_START) >> 2));
2266 radeon_ring_write(ring, next_rptr);
2267 } else if (rdev->wb.enabled) {
2268 next_rptr = ring->wptr + 5 + 4;
2269 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2270 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2271 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2272 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2273 radeon_ring_write(ring, next_rptr);
2276 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2279 control |= ib->length_dw |
2280 (ib->vm ? (ib->vm->id << 24) : 0);
2282 radeon_ring_write(ring, header);
2283 radeon_ring_write(ring,
2287 (ib->gpu_addr & 0xFFFFFFFC));
2288 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2289 radeon_ring_write(ring, control);
2293 * cik_ib_test - basic gfx ring IB test
2295 * @rdev: radeon_device pointer
2296 * @ring: radeon_ring structure holding ring information
2298 * Allocate an IB and execute it on the gfx ring (CIK).
2299 * Provides a basic gfx ring test to verify that IBs are working.
2300 * Returns 0 on success, error on failure.
2302 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2304 struct radeon_ib ib;
2310 r = radeon_scratch_get(rdev, &scratch);
2312 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2315 WREG32(scratch, 0xCAFEDEAD);
2316 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2318 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2321 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2322 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2323 ib.ptr[2] = 0xDEADBEEF;
2325 r = radeon_ib_schedule(rdev, &ib, NULL);
2327 radeon_scratch_free(rdev, scratch);
2328 radeon_ib_free(rdev, &ib);
2329 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2332 r = radeon_fence_wait(ib.fence, false);
2334 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2337 for (i = 0; i < rdev->usec_timeout; i++) {
2338 tmp = RREG32(scratch);
2339 if (tmp == 0xDEADBEEF)
2343 if (i < rdev->usec_timeout) {
2344 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2346 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2350 radeon_scratch_free(rdev, scratch);
2351 radeon_ib_free(rdev, &ib);
2357 * On CIK, gfx and compute now have independant command processors.
2360 * Gfx consists of a single ring and can process both gfx jobs and
2361 * compute jobs. The gfx CP consists of three microengines (ME):
2362 * PFP - Pre-Fetch Parser
2364 * CE - Constant Engine
2365 * The PFP and ME make up what is considered the Drawing Engine (DE).
2366 * The CE is an asynchronous engine used for updating buffer desciptors
2367 * used by the DE so that they can be loaded into cache in parallel
2368 * while the DE is processing state update packets.
2371 * The compute CP consists of two microengines (ME):
2372 * MEC1 - Compute MicroEngine 1
2373 * MEC2 - Compute MicroEngine 2
2374 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2375 * The queues are exposed to userspace and are programmed directly
2376 * by the compute runtime.
2379 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2381 * @rdev: radeon_device pointer
2382 * @enable: enable or disable the MEs
2384 * Halts or unhalts the gfx MEs.
2386 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2389 WREG32(CP_ME_CNTL, 0);
2391 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2392 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2398 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2400 * @rdev: radeon_device pointer
2402 * Loads the gfx PFP, ME, and CE ucode.
2403 * Returns 0 for success, -EINVAL if the ucode is not available.
2405 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2407 const __be32 *fw_data;
2410 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2413 cik_cp_gfx_enable(rdev, false);
2416 fw_data = (const __be32 *)rdev->pfp_fw->data;
2417 WREG32(CP_PFP_UCODE_ADDR, 0);
2418 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2419 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2420 WREG32(CP_PFP_UCODE_ADDR, 0);
2423 fw_data = (const __be32 *)rdev->ce_fw->data;
2424 WREG32(CP_CE_UCODE_ADDR, 0);
2425 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2426 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2427 WREG32(CP_CE_UCODE_ADDR, 0);
2430 fw_data = (const __be32 *)rdev->me_fw->data;
2431 WREG32(CP_ME_RAM_WADDR, 0);
2432 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2433 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2434 WREG32(CP_ME_RAM_WADDR, 0);
2436 WREG32(CP_PFP_UCODE_ADDR, 0);
2437 WREG32(CP_CE_UCODE_ADDR, 0);
2438 WREG32(CP_ME_RAM_WADDR, 0);
2439 WREG32(CP_ME_RAM_RADDR, 0);
2444 * cik_cp_gfx_start - start the gfx ring
2446 * @rdev: radeon_device pointer
2448 * Enables the ring and loads the clear state context and other
2449 * packets required to init the ring.
2450 * Returns 0 for success, error for failure.
2452 static int cik_cp_gfx_start(struct radeon_device *rdev)
2454 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2458 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2459 WREG32(CP_ENDIAN_SWAP, 0);
2460 WREG32(CP_DEVICE_ID, 1);
2462 cik_cp_gfx_enable(rdev, true);
2464 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2466 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2470 /* init the CE partitions. CE only used for gfx on CIK */
2471 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2472 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2473 radeon_ring_write(ring, 0xc000);
2474 radeon_ring_write(ring, 0xc000);
2476 /* setup clear context state */
2477 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2478 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2480 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2481 radeon_ring_write(ring, 0x80000000);
2482 radeon_ring_write(ring, 0x80000000);
2484 for (i = 0; i < cik_default_size; i++)
2485 radeon_ring_write(ring, cik_default_state[i]);
2487 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2488 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2490 /* set clear context state */
2491 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2492 radeon_ring_write(ring, 0);
2494 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2495 radeon_ring_write(ring, 0x00000316);
2496 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2497 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2499 radeon_ring_unlock_commit(rdev, ring);
2505 * cik_cp_gfx_fini - stop the gfx ring
2507 * @rdev: radeon_device pointer
2509 * Stop the gfx ring and tear down the driver ring
2512 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2514 cik_cp_gfx_enable(rdev, false);
2515 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2519 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2521 * @rdev: radeon_device pointer
2523 * Program the location and size of the gfx ring buffer
2524 * and test it to make sure it's working.
2525 * Returns 0 for success, error for failure.
2527 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2529 struct radeon_ring *ring;
2535 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2536 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2538 /* Set the write pointer delay */
2539 WREG32(CP_RB_WPTR_DELAY, 0);
2541 /* set the RB to use vmid 0 */
2542 WREG32(CP_RB_VMID, 0);
2544 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2546 /* ring 0 - compute and gfx */
2547 /* Set ring buffer size */
2548 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2549 rb_bufsz = drm_order(ring->ring_size / 8);
2550 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2552 tmp |= BUF_SWAP_32BIT;
2554 WREG32(CP_RB0_CNTL, tmp);
2556 /* Initialize the ring buffer's read and write pointers */
2557 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2559 WREG32(CP_RB0_WPTR, ring->wptr);
2561 /* set the wb address wether it's enabled or not */
2562 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2563 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2565 /* scratch register shadowing is no longer supported */
2566 WREG32(SCRATCH_UMSK, 0);
2568 if (!rdev->wb.enabled)
2569 tmp |= RB_NO_UPDATE;
2572 WREG32(CP_RB0_CNTL, tmp);
2574 rb_addr = ring->gpu_addr >> 8;
2575 WREG32(CP_RB0_BASE, rb_addr);
2576 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2578 ring->rptr = RREG32(CP_RB0_RPTR);
2580 /* start the ring */
2581 cik_cp_gfx_start(rdev);
2582 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2583 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2585 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2591 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2592 struct radeon_ring *ring)
2598 if (rdev->wb.enabled) {
2599 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2601 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2602 rptr = RREG32(CP_HQD_PQ_RPTR);
2603 cik_srbm_select(rdev, 0, 0, 0, 0);
2605 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2610 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2611 struct radeon_ring *ring)
2615 if (rdev->wb.enabled) {
2616 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2618 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2619 wptr = RREG32(CP_HQD_PQ_WPTR);
2620 cik_srbm_select(rdev, 0, 0, 0, 0);
2622 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2627 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2628 struct radeon_ring *ring)
2630 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2632 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2633 WDOORBELL32(ring->doorbell_offset, wptr);
2637 * cik_cp_compute_enable - enable/disable the compute CP MEs
2639 * @rdev: radeon_device pointer
2640 * @enable: enable or disable the MEs
2642 * Halts or unhalts the compute MEs.
2644 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2647 WREG32(CP_MEC_CNTL, 0);
2649 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2654 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2656 * @rdev: radeon_device pointer
2658 * Loads the compute MEC1&2 ucode.
2659 * Returns 0 for success, -EINVAL if the ucode is not available.
2661 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2663 const __be32 *fw_data;
2669 cik_cp_compute_enable(rdev, false);
2672 fw_data = (const __be32 *)rdev->mec_fw->data;
2673 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2674 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2675 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2676 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2678 if (rdev->family == CHIP_KAVERI) {
2680 fw_data = (const __be32 *)rdev->mec_fw->data;
2681 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2682 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2683 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2684 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2691 * cik_cp_compute_start - start the compute queues
2693 * @rdev: radeon_device pointer
2695 * Enable the compute queues.
2696 * Returns 0 for success, error for failure.
2698 static int cik_cp_compute_start(struct radeon_device *rdev)
2700 cik_cp_compute_enable(rdev, true);
2706 * cik_cp_compute_fini - stop the compute queues
2708 * @rdev: radeon_device pointer
2710 * Stop the compute queues and tear down the driver queue
2713 static void cik_cp_compute_fini(struct radeon_device *rdev)
2717 cik_cp_compute_enable(rdev, false);
2719 for (i = 0; i < 2; i++) {
2721 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2723 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2725 if (rdev->ring[idx].mqd_obj) {
2726 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2727 if (unlikely(r != 0))
2728 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2730 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2731 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2733 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2734 rdev->ring[idx].mqd_obj = NULL;
2739 static void cik_mec_fini(struct radeon_device *rdev)
2743 if (rdev->mec.hpd_eop_obj) {
2744 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2745 if (unlikely(r != 0))
2746 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2747 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2748 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2750 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2751 rdev->mec.hpd_eop_obj = NULL;
2755 #define MEC_HPD_SIZE 2048
2757 static int cik_mec_init(struct radeon_device *rdev)
2763 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2764 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2766 if (rdev->family == CHIP_KAVERI)
2767 rdev->mec.num_mec = 2;
2769 rdev->mec.num_mec = 1;
2770 rdev->mec.num_pipe = 4;
2771 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2773 if (rdev->mec.hpd_eop_obj == NULL) {
2774 r = radeon_bo_create(rdev,
2775 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2777 RADEON_GEM_DOMAIN_GTT, NULL,
2778 &rdev->mec.hpd_eop_obj);
2780 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2785 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2786 if (unlikely(r != 0)) {
2790 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2791 &rdev->mec.hpd_eop_gpu_addr);
2793 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2797 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2799 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2804 /* clear memory. Not sure if this is required or not */
2805 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2807 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2808 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2813 struct hqd_registers
2815 u32 cp_mqd_base_addr;
2816 u32 cp_mqd_base_addr_hi;
2819 u32 cp_hqd_persistent_state;
2820 u32 cp_hqd_pipe_priority;
2821 u32 cp_hqd_queue_priority;
2824 u32 cp_hqd_pq_base_hi;
2826 u32 cp_hqd_pq_rptr_report_addr;
2827 u32 cp_hqd_pq_rptr_report_addr_hi;
2828 u32 cp_hqd_pq_wptr_poll_addr;
2829 u32 cp_hqd_pq_wptr_poll_addr_hi;
2830 u32 cp_hqd_pq_doorbell_control;
2832 u32 cp_hqd_pq_control;
2833 u32 cp_hqd_ib_base_addr;
2834 u32 cp_hqd_ib_base_addr_hi;
2836 u32 cp_hqd_ib_control;
2837 u32 cp_hqd_iq_timer;
2839 u32 cp_hqd_dequeue_request;
2840 u32 cp_hqd_dma_offload;
2841 u32 cp_hqd_sema_cmd;
2842 u32 cp_hqd_msg_type;
2843 u32 cp_hqd_atomic0_preop_lo;
2844 u32 cp_hqd_atomic0_preop_hi;
2845 u32 cp_hqd_atomic1_preop_lo;
2846 u32 cp_hqd_atomic1_preop_hi;
2847 u32 cp_hqd_hq_scheduler0;
2848 u32 cp_hqd_hq_scheduler1;
2855 u32 dispatch_initiator;
2859 u32 pipeline_stat_enable;
2860 u32 perf_counter_enable;
2866 u32 resource_limits;
2867 u32 static_thread_mgmt01[2];
2869 u32 static_thread_mgmt23[2];
2871 u32 thread_trace_enable;
2874 u32 vgtcs_invoke_count[2];
2875 struct hqd_registers queue_state;
2877 u32 interrupt_queue[64];
2881 * cik_cp_compute_resume - setup the compute queue registers
2883 * @rdev: radeon_device pointer
2885 * Program the compute queues and test them to make sure they
2887 * Returns 0 for success, error for failure.
2889 static int cik_cp_compute_resume(struct radeon_device *rdev)
2893 bool use_doorbell = true;
2899 struct bonaire_mqd *mqd;
2901 r = cik_cp_compute_start(rdev);
2905 /* fix up chicken bits */
2906 tmp = RREG32(CP_CPF_DEBUG);
2908 WREG32(CP_CPF_DEBUG, tmp);
2910 /* init the pipes */
2911 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2912 int me = (i < 4) ? 1 : 2;
2913 int pipe = (i < 4) ? i : (i - 4);
2915 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2917 cik_srbm_select(rdev, me, pipe, 0, 0);
2919 /* write the EOP addr */
2920 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2921 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2923 /* set the VMID assigned */
2924 WREG32(CP_HPD_EOP_VMID, 0);
2926 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2927 tmp = RREG32(CP_HPD_EOP_CONTROL);
2928 tmp &= ~EOP_SIZE_MASK;
2929 tmp |= drm_order(MEC_HPD_SIZE / 8);
2930 WREG32(CP_HPD_EOP_CONTROL, tmp);
2932 cik_srbm_select(rdev, 0, 0, 0, 0);
2934 /* init the queues. Just two for now. */
2935 for (i = 0; i < 2; i++) {
2937 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2939 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2941 if (rdev->ring[idx].mqd_obj == NULL) {
2942 r = radeon_bo_create(rdev,
2943 sizeof(struct bonaire_mqd),
2945 RADEON_GEM_DOMAIN_GTT, NULL,
2946 &rdev->ring[idx].mqd_obj);
2948 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2953 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2954 if (unlikely(r != 0)) {
2955 cik_cp_compute_fini(rdev);
2958 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2961 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2962 cik_cp_compute_fini(rdev);
2965 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2967 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2968 cik_cp_compute_fini(rdev);
2972 /* doorbell offset */
2973 rdev->ring[idx].doorbell_offset =
2974 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2976 /* init the mqd struct */
2977 memset(buf, 0, sizeof(struct bonaire_mqd));
2979 mqd = (struct bonaire_mqd *)buf;
2980 mqd->header = 0xC0310800;
2981 mqd->static_thread_mgmt01[0] = 0xffffffff;
2982 mqd->static_thread_mgmt01[1] = 0xffffffff;
2983 mqd->static_thread_mgmt23[0] = 0xffffffff;
2984 mqd->static_thread_mgmt23[1] = 0xffffffff;
2986 cik_srbm_select(rdev, rdev->ring[idx].me,
2987 rdev->ring[idx].pipe,
2988 rdev->ring[idx].queue, 0);
2990 /* disable wptr polling */
2991 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2992 tmp &= ~WPTR_POLL_EN;
2993 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2995 /* enable doorbell? */
2996 mqd->queue_state.cp_hqd_pq_doorbell_control =
2997 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2999 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3001 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3002 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3003 mqd->queue_state.cp_hqd_pq_doorbell_control);
3005 /* disable the queue if it's active */
3006 mqd->queue_state.cp_hqd_dequeue_request = 0;
3007 mqd->queue_state.cp_hqd_pq_rptr = 0;
3008 mqd->queue_state.cp_hqd_pq_wptr= 0;
3009 if (RREG32(CP_HQD_ACTIVE) & 1) {
3010 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3011 for (i = 0; i < rdev->usec_timeout; i++) {
3012 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3016 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3017 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3018 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3021 /* set the pointer to the MQD */
3022 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3023 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3024 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3025 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3026 /* set MQD vmid to 0 */
3027 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3028 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3029 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3031 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3033 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3034 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3035 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3036 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3038 /* set up the HQD, this is similar to CP_RB0_CNTL */
3039 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3040 mqd->queue_state.cp_hqd_pq_control &=
3041 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3043 mqd->queue_state.cp_hqd_pq_control |=
3044 drm_order(rdev->ring[idx].ring_size / 8);
3045 mqd->queue_state.cp_hqd_pq_control |=
3046 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3048 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3050 mqd->queue_state.cp_hqd_pq_control &=
3051 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3052 mqd->queue_state.cp_hqd_pq_control |=
3053 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3054 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3056 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3058 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3060 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3061 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3062 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3063 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3064 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3065 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3067 /* set the wb address wether it's enabled or not */
3069 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3071 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3072 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3073 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3074 upper_32_bits(wb_gpu_addr) & 0xffff;
3075 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3076 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3077 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3078 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3080 /* enable the doorbell if requested */
3082 mqd->queue_state.cp_hqd_pq_doorbell_control =
3083 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3084 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3085 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3086 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3087 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3088 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3089 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3092 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3094 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3095 mqd->queue_state.cp_hqd_pq_doorbell_control);
3097 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3098 rdev->ring[idx].wptr = 0;
3099 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3100 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3101 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3102 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3104 /* set the vmid for the queue */
3105 mqd->queue_state.cp_hqd_vmid = 0;
3106 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3108 /* activate the queue */
3109 mqd->queue_state.cp_hqd_active = 1;
3110 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3112 cik_srbm_select(rdev, 0, 0, 0, 0);
3114 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3115 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3117 rdev->ring[idx].ready = true;
3118 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3120 rdev->ring[idx].ready = false;
3126 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3128 cik_cp_gfx_enable(rdev, enable);
3129 cik_cp_compute_enable(rdev, enable);
3132 static int cik_cp_load_microcode(struct radeon_device *rdev)
3136 r = cik_cp_gfx_load_microcode(rdev);
3139 r = cik_cp_compute_load_microcode(rdev);
3146 static void cik_cp_fini(struct radeon_device *rdev)
3148 cik_cp_gfx_fini(rdev);
3149 cik_cp_compute_fini(rdev);
3152 static int cik_cp_resume(struct radeon_device *rdev)
3156 /* Reset all cp blocks */
3157 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3158 RREG32(GRBM_SOFT_RESET);
3160 WREG32(GRBM_SOFT_RESET, 0);
3161 RREG32(GRBM_SOFT_RESET);
3163 r = cik_cp_load_microcode(rdev);
3167 r = cik_cp_gfx_resume(rdev);
3170 r = cik_cp_compute_resume(rdev);
3179 * Starting with CIK, the GPU has new asynchronous
3180 * DMA engines. These engines are used for compute
3181 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3182 * and each one supports 1 ring buffer used for gfx
3183 * and 2 queues used for compute.
3185 * The programming model is very similar to the CP
3186 * (ring buffer, IBs, etc.), but sDMA has it's own
3187 * packet format that is different from the PM4 format
3188 * used by the CP. sDMA supports copying data, writing
3189 * embedded data, solid fills, and a number of other
3190 * things. It also has support for tiling/detiling of
3194 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3196 * @rdev: radeon_device pointer
3197 * @ib: IB object to schedule
3199 * Schedule an IB in the DMA ring (CIK).
3201 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3202 struct radeon_ib *ib)
3204 struct radeon_ring *ring = &rdev->ring[ib->ring];
3205 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3207 if (rdev->wb.enabled) {
3208 u32 next_rptr = ring->wptr + 5;
3209 while ((next_rptr & 7) != 4)
3212 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3213 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3214 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3215 radeon_ring_write(ring, 1); /* number of DWs to follow */
3216 radeon_ring_write(ring, next_rptr);
3219 /* IB packet must end on a 8 DW boundary */
3220 while ((ring->wptr & 7) != 4)
3221 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3222 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3223 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3224 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3225 radeon_ring_write(ring, ib->length_dw);
3230 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3232 * @rdev: radeon_device pointer
3233 * @fence: radeon fence object
3235 * Add a DMA fence packet to the ring to write
3236 * the fence seq number and DMA trap packet to generate
3237 * an interrupt if needed (CIK).
3239 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3240 struct radeon_fence *fence)
3242 struct radeon_ring *ring = &rdev->ring[fence->ring];
3243 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3244 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3245 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3248 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3249 ref_and_mask = SDMA0;
3251 ref_and_mask = SDMA1;
3253 /* write the fence */
3254 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3255 radeon_ring_write(ring, addr & 0xffffffff);
3256 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3257 radeon_ring_write(ring, fence->seq);
3258 /* generate an interrupt */
3259 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3261 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3262 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3263 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3264 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3265 radeon_ring_write(ring, ref_and_mask); /* MASK */
3266 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3270 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3272 * @rdev: radeon_device pointer
3273 * @ring: radeon_ring structure holding ring information
3274 * @semaphore: radeon semaphore object
3275 * @emit_wait: wait or signal semaphore
3277 * Add a DMA semaphore packet to the ring wait on or signal
3278 * other rings (CIK).
3280 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3281 struct radeon_ring *ring,
3282 struct radeon_semaphore *semaphore,
3285 u64 addr = semaphore->gpu_addr;
3286 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3288 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3289 radeon_ring_write(ring, addr & 0xfffffff8);
3290 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3294 * cik_sdma_gfx_stop - stop the gfx async dma engines
3296 * @rdev: radeon_device pointer
3298 * Stop the gfx async dma ring buffers (CIK).
3300 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3302 u32 rb_cntl, reg_offset;
3305 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3307 for (i = 0; i < 2; i++) {
3309 reg_offset = SDMA0_REGISTER_OFFSET;
3311 reg_offset = SDMA1_REGISTER_OFFSET;
3312 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3313 rb_cntl &= ~SDMA_RB_ENABLE;
3314 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3315 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3320 * cik_sdma_rlc_stop - stop the compute async dma engines
3322 * @rdev: radeon_device pointer
3324 * Stop the compute async dma queues (CIK).
3326 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3332 * cik_sdma_enable - stop the async dma engines
3334 * @rdev: radeon_device pointer
3335 * @enable: enable/disable the DMA MEs.
3337 * Halt or unhalt the async dma engines (CIK).
3339 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3341 u32 me_cntl, reg_offset;
3344 for (i = 0; i < 2; i++) {
3346 reg_offset = SDMA0_REGISTER_OFFSET;
3348 reg_offset = SDMA1_REGISTER_OFFSET;
3349 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3351 me_cntl &= ~SDMA_HALT;
3353 me_cntl |= SDMA_HALT;
3354 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3359 * cik_sdma_gfx_resume - setup and start the async dma engines
3361 * @rdev: radeon_device pointer
3363 * Set up the gfx DMA ring buffers and enable them (CIK).
3364 * Returns 0 for success, error for failure.
3366 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3368 struct radeon_ring *ring;
3369 u32 rb_cntl, ib_cntl;
3371 u32 reg_offset, wb_offset;
3374 for (i = 0; i < 2; i++) {
3376 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3377 reg_offset = SDMA0_REGISTER_OFFSET;
3378 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3380 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3381 reg_offset = SDMA1_REGISTER_OFFSET;
3382 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3385 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3386 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3388 /* Set ring buffer size in dwords */
3389 rb_bufsz = drm_order(ring->ring_size / 4);
3390 rb_cntl = rb_bufsz << 1;
3392 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3394 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3396 /* Initialize the ring buffer's read and write pointers */
3397 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3398 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3400 /* set the wb address whether it's enabled or not */
3401 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3402 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3403 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3404 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3406 if (rdev->wb.enabled)
3407 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3409 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3410 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3413 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3415 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3418 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3420 ib_cntl = SDMA_IB_ENABLE;
3422 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3424 /* enable DMA IBs */
3425 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3429 r = radeon_ring_test(rdev, ring->idx, ring);
3431 ring->ready = false;
3436 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3442 * cik_sdma_rlc_resume - setup and start the async dma engines
3444 * @rdev: radeon_device pointer
3446 * Set up the compute DMA queues and enable them (CIK).
3447 * Returns 0 for success, error for failure.
3449 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3456 * cik_sdma_load_microcode - load the sDMA ME ucode
3458 * @rdev: radeon_device pointer
3460 * Loads the sDMA0/1 ucode.
3461 * Returns 0 for success, -EINVAL if the ucode is not available.
3463 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3465 const __be32 *fw_data;
3471 /* stop the gfx rings and rlc compute queues */
3472 cik_sdma_gfx_stop(rdev);
3473 cik_sdma_rlc_stop(rdev);
3476 cik_sdma_enable(rdev, false);
3479 fw_data = (const __be32 *)rdev->sdma_fw->data;
3480 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3481 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3482 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3483 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3486 fw_data = (const __be32 *)rdev->sdma_fw->data;
3487 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3488 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3489 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3490 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3492 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3493 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3498 * cik_sdma_resume - setup and start the async dma engines
3500 * @rdev: radeon_device pointer
3502 * Set up the DMA engines and enable them (CIK).
3503 * Returns 0 for success, error for failure.
3505 static int cik_sdma_resume(struct radeon_device *rdev)
3510 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3511 RREG32(SRBM_SOFT_RESET);
3513 WREG32(SRBM_SOFT_RESET, 0);
3514 RREG32(SRBM_SOFT_RESET);
3516 r = cik_sdma_load_microcode(rdev);
3520 /* unhalt the MEs */
3521 cik_sdma_enable(rdev, true);
3523 /* start the gfx rings and rlc compute queues */
3524 r = cik_sdma_gfx_resume(rdev);
3527 r = cik_sdma_rlc_resume(rdev);
3535 * cik_sdma_fini - tear down the async dma engines
3537 * @rdev: radeon_device pointer
3539 * Stop the async dma engines and free the rings (CIK).
3541 static void cik_sdma_fini(struct radeon_device *rdev)
3543 /* stop the gfx rings and rlc compute queues */
3544 cik_sdma_gfx_stop(rdev);
3545 cik_sdma_rlc_stop(rdev);
3547 cik_sdma_enable(rdev, false);
3548 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3549 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3550 /* XXX - compute dma queue tear down */
3554 * cik_copy_dma - copy pages using the DMA engine
3556 * @rdev: radeon_device pointer
3557 * @src_offset: src GPU address
3558 * @dst_offset: dst GPU address
3559 * @num_gpu_pages: number of GPU pages to xfer
3560 * @fence: radeon fence object
3562 * Copy GPU paging using the DMA engine (CIK).
3563 * Used by the radeon ttm implementation to move pages if
3564 * registered as the asic copy callback.
3566 int cik_copy_dma(struct radeon_device *rdev,
3567 uint64_t src_offset, uint64_t dst_offset,
3568 unsigned num_gpu_pages,
3569 struct radeon_fence **fence)
3571 struct radeon_semaphore *sem = NULL;
3572 int ring_index = rdev->asic->copy.dma_ring_index;
3573 struct radeon_ring *ring = &rdev->ring[ring_index];
3574 u32 size_in_bytes, cur_size_in_bytes;
3578 r = radeon_semaphore_create(rdev, &sem);
3580 DRM_ERROR("radeon: moving bo (%d).\n", r);
3584 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3585 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3586 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3588 DRM_ERROR("radeon: moving bo (%d).\n", r);
3589 radeon_semaphore_free(rdev, &sem, NULL);
3593 if (radeon_fence_need_sync(*fence, ring->idx)) {
3594 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3596 radeon_fence_note_sync(*fence, ring->idx);
3598 radeon_semaphore_free(rdev, &sem, NULL);
3601 for (i = 0; i < num_loops; i++) {
3602 cur_size_in_bytes = size_in_bytes;
3603 if (cur_size_in_bytes > 0x1fffff)
3604 cur_size_in_bytes = 0x1fffff;
3605 size_in_bytes -= cur_size_in_bytes;
3606 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3607 radeon_ring_write(ring, cur_size_in_bytes);
3608 radeon_ring_write(ring, 0); /* src/dst endian swap */
3609 radeon_ring_write(ring, src_offset & 0xffffffff);
3610 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3611 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3612 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3613 src_offset += cur_size_in_bytes;
3614 dst_offset += cur_size_in_bytes;
3617 r = radeon_fence_emit(rdev, fence, ring->idx);
3619 radeon_ring_unlock_undo(rdev, ring);
3623 radeon_ring_unlock_commit(rdev, ring);
3624 radeon_semaphore_free(rdev, &sem, *fence);
3630 * cik_sdma_ring_test - simple async dma engine test
3632 * @rdev: radeon_device pointer
3633 * @ring: radeon_ring structure holding ring information
3635 * Test the DMA engine by writing using it to write an
3636 * value to memory. (CIK).
3637 * Returns 0 for success, error for failure.
3639 int cik_sdma_ring_test(struct radeon_device *rdev,
3640 struct radeon_ring *ring)
3644 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3648 DRM_ERROR("invalid vram scratch pointer\n");
3655 r = radeon_ring_lock(rdev, ring, 4);
3657 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3660 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3661 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3662 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3663 radeon_ring_write(ring, 1); /* number of DWs to follow */
3664 radeon_ring_write(ring, 0xDEADBEEF);
3665 radeon_ring_unlock_commit(rdev, ring);
3667 for (i = 0; i < rdev->usec_timeout; i++) {
3669 if (tmp == 0xDEADBEEF)
3674 if (i < rdev->usec_timeout) {
3675 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3677 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3685 * cik_sdma_ib_test - test an IB on the DMA engine
3687 * @rdev: radeon_device pointer
3688 * @ring: radeon_ring structure holding ring information
3690 * Test a simple IB in the DMA ring (CIK).
3691 * Returns 0 on success, error on failure.
3693 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3695 struct radeon_ib ib;
3698 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3702 DRM_ERROR("invalid vram scratch pointer\n");
3709 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3711 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3715 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3716 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3717 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3719 ib.ptr[4] = 0xDEADBEEF;
3722 r = radeon_ib_schedule(rdev, &ib, NULL);
3724 radeon_ib_free(rdev, &ib);
3725 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3728 r = radeon_fence_wait(ib.fence, false);
3730 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3733 for (i = 0; i < rdev->usec_timeout; i++) {
3735 if (tmp == 0xDEADBEEF)
3739 if (i < rdev->usec_timeout) {
3740 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3742 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3745 radeon_ib_free(rdev, &ib);
3750 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3752 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3753 RREG32(GRBM_STATUS));
3754 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3755 RREG32(GRBM_STATUS2));
3756 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3757 RREG32(GRBM_STATUS_SE0));
3758 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3759 RREG32(GRBM_STATUS_SE1));
3760 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3761 RREG32(GRBM_STATUS_SE2));
3762 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3763 RREG32(GRBM_STATUS_SE3));
3764 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3765 RREG32(SRBM_STATUS));
3766 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3767 RREG32(SRBM_STATUS2));
3768 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3769 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3770 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3771 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3772 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3773 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3774 RREG32(CP_STALLED_STAT1));
3775 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3776 RREG32(CP_STALLED_STAT2));
3777 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3778 RREG32(CP_STALLED_STAT3));
3779 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3780 RREG32(CP_CPF_BUSY_STAT));
3781 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3782 RREG32(CP_CPF_STALLED_STAT1));
3783 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3784 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3785 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3786 RREG32(CP_CPC_STALLED_STAT1));
3787 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3791 * cik_gpu_check_soft_reset - check which blocks are busy
3793 * @rdev: radeon_device pointer
3795 * Check which blocks are busy and return the relevant reset
3796 * mask to be used by cik_gpu_soft_reset().
3797 * Returns a mask of the blocks to be reset.
3799 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3805 tmp = RREG32(GRBM_STATUS);
3806 if (tmp & (PA_BUSY | SC_BUSY |
3807 BCI_BUSY | SX_BUSY |
3808 TA_BUSY | VGT_BUSY |
3810 GDS_BUSY | SPI_BUSY |
3811 IA_BUSY | IA_BUSY_NO_DMA))
3812 reset_mask |= RADEON_RESET_GFX;
3814 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3815 reset_mask |= RADEON_RESET_CP;
3818 tmp = RREG32(GRBM_STATUS2);
3820 reset_mask |= RADEON_RESET_RLC;
3822 /* SDMA0_STATUS_REG */
3823 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3824 if (!(tmp & SDMA_IDLE))
3825 reset_mask |= RADEON_RESET_DMA;
3827 /* SDMA1_STATUS_REG */
3828 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3829 if (!(tmp & SDMA_IDLE))
3830 reset_mask |= RADEON_RESET_DMA1;
3833 tmp = RREG32(SRBM_STATUS2);
3834 if (tmp & SDMA_BUSY)
3835 reset_mask |= RADEON_RESET_DMA;
3837 if (tmp & SDMA1_BUSY)
3838 reset_mask |= RADEON_RESET_DMA1;
3841 tmp = RREG32(SRBM_STATUS);
3844 reset_mask |= RADEON_RESET_IH;
3847 reset_mask |= RADEON_RESET_SEM;
3849 if (tmp & GRBM_RQ_PENDING)
3850 reset_mask |= RADEON_RESET_GRBM;
3853 reset_mask |= RADEON_RESET_VMC;
3855 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3856 MCC_BUSY | MCD_BUSY))
3857 reset_mask |= RADEON_RESET_MC;
3859 if (evergreen_is_display_hung(rdev))
3860 reset_mask |= RADEON_RESET_DISPLAY;
3862 /* Skip MC reset as it's mostly likely not hung, just busy */
3863 if (reset_mask & RADEON_RESET_MC) {
3864 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3865 reset_mask &= ~RADEON_RESET_MC;
3872 * cik_gpu_soft_reset - soft reset GPU
3874 * @rdev: radeon_device pointer
3875 * @reset_mask: mask of which blocks to reset
3877 * Soft reset the blocks specified in @reset_mask.
3879 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3881 struct evergreen_mc_save save;
3882 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3885 if (reset_mask == 0)
3888 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3890 cik_print_gpu_status_regs(rdev);
3891 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3892 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3893 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3894 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3899 /* Disable GFX parsing/prefetching */
3900 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3902 /* Disable MEC parsing/prefetching */
3903 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3905 if (reset_mask & RADEON_RESET_DMA) {
3907 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3909 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3911 if (reset_mask & RADEON_RESET_DMA1) {
3913 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3915 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3918 evergreen_mc_stop(rdev, &save);
3919 if (evergreen_mc_wait_for_idle(rdev)) {
3920 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3923 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3924 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3926 if (reset_mask & RADEON_RESET_CP) {
3927 grbm_soft_reset |= SOFT_RESET_CP;
3929 srbm_soft_reset |= SOFT_RESET_GRBM;
3932 if (reset_mask & RADEON_RESET_DMA)
3933 srbm_soft_reset |= SOFT_RESET_SDMA;
3935 if (reset_mask & RADEON_RESET_DMA1)
3936 srbm_soft_reset |= SOFT_RESET_SDMA1;
3938 if (reset_mask & RADEON_RESET_DISPLAY)
3939 srbm_soft_reset |= SOFT_RESET_DC;
3941 if (reset_mask & RADEON_RESET_RLC)
3942 grbm_soft_reset |= SOFT_RESET_RLC;
3944 if (reset_mask & RADEON_RESET_SEM)
3945 srbm_soft_reset |= SOFT_RESET_SEM;
3947 if (reset_mask & RADEON_RESET_IH)
3948 srbm_soft_reset |= SOFT_RESET_IH;
3950 if (reset_mask & RADEON_RESET_GRBM)
3951 srbm_soft_reset |= SOFT_RESET_GRBM;
3953 if (reset_mask & RADEON_RESET_VMC)
3954 srbm_soft_reset |= SOFT_RESET_VMC;
3956 if (!(rdev->flags & RADEON_IS_IGP)) {
3957 if (reset_mask & RADEON_RESET_MC)
3958 srbm_soft_reset |= SOFT_RESET_MC;
3961 if (grbm_soft_reset) {
3962 tmp = RREG32(GRBM_SOFT_RESET);
3963 tmp |= grbm_soft_reset;
3964 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3965 WREG32(GRBM_SOFT_RESET, tmp);
3966 tmp = RREG32(GRBM_SOFT_RESET);
3970 tmp &= ~grbm_soft_reset;
3971 WREG32(GRBM_SOFT_RESET, tmp);
3972 tmp = RREG32(GRBM_SOFT_RESET);
3975 if (srbm_soft_reset) {
3976 tmp = RREG32(SRBM_SOFT_RESET);
3977 tmp |= srbm_soft_reset;
3978 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3979 WREG32(SRBM_SOFT_RESET, tmp);
3980 tmp = RREG32(SRBM_SOFT_RESET);
3984 tmp &= ~srbm_soft_reset;
3985 WREG32(SRBM_SOFT_RESET, tmp);
3986 tmp = RREG32(SRBM_SOFT_RESET);
3989 /* Wait a little for things to settle down */
3992 evergreen_mc_resume(rdev, &save);
3995 cik_print_gpu_status_regs(rdev);
3999 * cik_asic_reset - soft reset GPU
4001 * @rdev: radeon_device pointer
4003 * Look up which blocks are hung and attempt
4005 * Returns 0 for success.
4007 int cik_asic_reset(struct radeon_device *rdev)
4011 reset_mask = cik_gpu_check_soft_reset(rdev);
4014 r600_set_bios_scratch_engine_hung(rdev, true);
4016 cik_gpu_soft_reset(rdev, reset_mask);
4018 reset_mask = cik_gpu_check_soft_reset(rdev);
4021 r600_set_bios_scratch_engine_hung(rdev, false);
4027 * cik_gfx_is_lockup - check if the 3D engine is locked up
4029 * @rdev: radeon_device pointer
4030 * @ring: radeon_ring structure holding ring information
4032 * Check if the 3D engine is locked up (CIK).
4033 * Returns true if the engine is locked, false if not.
4035 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4037 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4039 if (!(reset_mask & (RADEON_RESET_GFX |
4040 RADEON_RESET_COMPUTE |
4041 RADEON_RESET_CP))) {
4042 radeon_ring_lockup_update(ring);
4045 /* force CP activities */
4046 radeon_ring_force_activity(rdev, ring);
4047 return radeon_ring_test_lockup(rdev, ring);
4051 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4053 * @rdev: radeon_device pointer
4054 * @ring: radeon_ring structure holding ring information
4056 * Check if the async DMA engine is locked up (CIK).
4057 * Returns true if the engine appears to be locked up, false if not.
4059 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4061 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4064 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4065 mask = RADEON_RESET_DMA;
4067 mask = RADEON_RESET_DMA1;
4069 if (!(reset_mask & mask)) {
4070 radeon_ring_lockup_update(ring);
4073 /* force ring activities */
4074 radeon_ring_force_activity(rdev, ring);
4075 return radeon_ring_test_lockup(rdev, ring);
4080 * cik_mc_program - program the GPU memory controller
4082 * @rdev: radeon_device pointer
4084 * Set the location of vram, gart, and AGP in the GPU's
4085 * physical address space (CIK).
4087 static void cik_mc_program(struct radeon_device *rdev)
4089 struct evergreen_mc_save save;
4093 /* Initialize HDP */
4094 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4095 WREG32((0x2c14 + j), 0x00000000);
4096 WREG32((0x2c18 + j), 0x00000000);
4097 WREG32((0x2c1c + j), 0x00000000);
4098 WREG32((0x2c20 + j), 0x00000000);
4099 WREG32((0x2c24 + j), 0x00000000);
4101 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4103 evergreen_mc_stop(rdev, &save);
4104 if (radeon_mc_wait_for_idle(rdev)) {
4105 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4107 /* Lockout access through VGA aperture*/
4108 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4109 /* Update configuration */
4110 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4111 rdev->mc.vram_start >> 12);
4112 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4113 rdev->mc.vram_end >> 12);
4114 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4115 rdev->vram_scratch.gpu_addr >> 12);
4116 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4117 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4118 WREG32(MC_VM_FB_LOCATION, tmp);
4119 /* XXX double check these! */
4120 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4121 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4122 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4123 WREG32(MC_VM_AGP_BASE, 0);
4124 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4125 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4126 if (radeon_mc_wait_for_idle(rdev)) {
4127 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4129 evergreen_mc_resume(rdev, &save);
4130 /* we need to own VRAM, so turn off the VGA renderer here
4131 * to stop it overwriting our objects */
4132 rv515_vga_render_disable(rdev);
4136 * cik_mc_init - initialize the memory controller driver params
4138 * @rdev: radeon_device pointer
4140 * Look up the amount of vram, vram width, and decide how to place
4141 * vram and gart within the GPU's physical address space (CIK).
4142 * Returns 0 for success.
4144 static int cik_mc_init(struct radeon_device *rdev)
4147 int chansize, numchan;
4149 /* Get VRAM informations */
4150 rdev->mc.vram_is_ddr = true;
4151 tmp = RREG32(MC_ARB_RAMCFG);
4152 if (tmp & CHANSIZE_MASK) {
4157 tmp = RREG32(MC_SHARED_CHMAP);
4158 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4188 rdev->mc.vram_width = numchan * chansize;
4189 /* Could aper size report 0 ? */
4190 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4191 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4192 /* size in MB on si */
4193 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4195 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4196 si_vram_gtt_location(rdev, &rdev->mc);
4197 radeon_update_bandwidth_info(rdev);
4204 * VMID 0 is the physical GPU addresses as used by the kernel.
4205 * VMIDs 1-15 are used for userspace clients and are handled
4206 * by the radeon vm/hsa code.
4209 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4211 * @rdev: radeon_device pointer
4213 * Flush the TLB for the VMID 0 page table (CIK).
4215 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4217 /* flush hdp cache */
4218 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4220 /* bits 0-15 are the VM contexts0-15 */
4221 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4225 * cik_pcie_gart_enable - gart enable
4227 * @rdev: radeon_device pointer
4229 * This sets up the TLBs, programs the page tables for VMID0,
4230 * sets up the hw for VMIDs 1-15 which are allocated on
4231 * demand, and sets up the global locations for the LDS, GDS,
4232 * and GPUVM for FSA64 clients (CIK).
4233 * Returns 0 for success, errors for failure.
4235 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4239 if (rdev->gart.robj == NULL) {
4240 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4243 r = radeon_gart_table_vram_pin(rdev);
4246 radeon_gart_restore(rdev);
4247 /* Setup TLB control */
4248 WREG32(MC_VM_MX_L1_TLB_CNTL,
4251 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4252 ENABLE_ADVANCED_DRIVER_MODEL |
4253 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4254 /* Setup L2 cache */
4255 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4256 ENABLE_L2_FRAGMENT_PROCESSING |
4257 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4258 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4259 EFFECTIVE_L2_QUEUE_SIZE(7) |
4260 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4261 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4262 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4263 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4264 /* setup context0 */
4265 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4266 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4267 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4268 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4269 (u32)(rdev->dummy_page.addr >> 12));
4270 WREG32(VM_CONTEXT0_CNTL2, 0);
4271 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4272 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4278 /* empty context1-15 */
4279 /* FIXME start with 4G, once using 2 level pt switch to full
4282 /* set vm size, must be a multiple of 4 */
4283 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4284 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4285 for (i = 1; i < 16; i++) {
4287 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4288 rdev->gart.table_addr >> 12);
4290 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4291 rdev->gart.table_addr >> 12);
4294 /* enable context1-15 */
4295 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4296 (u32)(rdev->dummy_page.addr >> 12));
4297 WREG32(VM_CONTEXT1_CNTL2, 4);
4298 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4299 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4303 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4304 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4305 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4307 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4308 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4309 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4312 /* TC cache setup ??? */
4313 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4314 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4315 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4317 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4318 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4319 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4320 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4321 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4323 WREG32(TC_CFG_L1_VOLATILE, 0);
4324 WREG32(TC_CFG_L2_VOLATILE, 0);
4326 if (rdev->family == CHIP_KAVERI) {
4327 u32 tmp = RREG32(CHUB_CONTROL);
4329 WREG32(CHUB_CONTROL, tmp);
4332 /* XXX SH_MEM regs */
4333 /* where to put LDS, scratch, GPUVM in FSA64 space */
4334 for (i = 0; i < 16; i++) {
4335 cik_srbm_select(rdev, 0, 0, 0, i);
4336 /* CP and shaders */
4337 WREG32(SH_MEM_CONFIG, 0);
4338 WREG32(SH_MEM_APE1_BASE, 1);
4339 WREG32(SH_MEM_APE1_LIMIT, 0);
4340 WREG32(SH_MEM_BASES, 0);
4342 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4343 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4344 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4345 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4346 /* XXX SDMA RLC - todo */
4348 cik_srbm_select(rdev, 0, 0, 0, 0);
4350 cik_pcie_gart_tlb_flush(rdev);
4351 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352 (unsigned)(rdev->mc.gtt_size >> 20),
4353 (unsigned long long)rdev->gart.table_addr);
4354 rdev->gart.ready = true;
4359 * cik_pcie_gart_disable - gart disable
4361 * @rdev: radeon_device pointer
4363 * This disables all VM page table (CIK).
4365 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4367 /* Disable all tables */
4368 WREG32(VM_CONTEXT0_CNTL, 0);
4369 WREG32(VM_CONTEXT1_CNTL, 0);
4370 /* Setup TLB control */
4371 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4372 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4373 /* Setup L2 cache */
4375 ENABLE_L2_FRAGMENT_PROCESSING |
4376 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4377 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4378 EFFECTIVE_L2_QUEUE_SIZE(7) |
4379 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4380 WREG32(VM_L2_CNTL2, 0);
4381 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4382 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4383 radeon_gart_table_vram_unpin(rdev);
4387 * cik_pcie_gart_fini - vm fini callback
4389 * @rdev: radeon_device pointer
4391 * Tears down the driver GART/VM setup (CIK).
4393 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4395 cik_pcie_gart_disable(rdev);
4396 radeon_gart_table_vram_free(rdev);
4397 radeon_gart_fini(rdev);
4402 * cik_ib_parse - vm ib_parse callback
4404 * @rdev: radeon_device pointer
4405 * @ib: indirect buffer pointer
4407 * CIK uses hw IB checking so this is a nop (CIK).
4409 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4416 * VMID 0 is the physical GPU addresses as used by the kernel.
4417 * VMIDs 1-15 are used for userspace clients and are handled
4418 * by the radeon vm/hsa code.
4421 * cik_vm_init - cik vm init callback
4423 * @rdev: radeon_device pointer
4425 * Inits cik specific vm parameters (number of VMs, base of vram for
4426 * VMIDs 1-15) (CIK).
4427 * Returns 0 for success.
4429 int cik_vm_init(struct radeon_device *rdev)
4432 rdev->vm_manager.nvm = 16;
4433 /* base offset of vram pages */
4434 if (rdev->flags & RADEON_IS_IGP) {
4435 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4437 rdev->vm_manager.vram_base_offset = tmp;
4439 rdev->vm_manager.vram_base_offset = 0;
4445 * cik_vm_fini - cik vm fini callback
4447 * @rdev: radeon_device pointer
4449 * Tear down any asic specific VM setup (CIK).
4451 void cik_vm_fini(struct radeon_device *rdev)
4456 * cik_vm_flush - cik vm flush using the CP
4458 * @rdev: radeon_device pointer
4460 * Update the page table base and flush the VM TLB
4461 * using the CP (CIK).
4463 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4465 struct radeon_ring *ring = &rdev->ring[ridx];
4470 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4471 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4472 WRITE_DATA_DST_SEL(0)));
4474 radeon_ring_write(ring,
4475 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4477 radeon_ring_write(ring,
4478 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4480 radeon_ring_write(ring, 0);
4481 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4483 /* update SH_MEM_* regs */
4484 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4485 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4486 WRITE_DATA_DST_SEL(0)));
4487 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4488 radeon_ring_write(ring, 0);
4489 radeon_ring_write(ring, VMID(vm->id));
4491 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4492 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493 WRITE_DATA_DST_SEL(0)));
4494 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4495 radeon_ring_write(ring, 0);
4497 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4498 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4499 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4500 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4502 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4503 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4504 WRITE_DATA_DST_SEL(0)));
4505 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4506 radeon_ring_write(ring, 0);
4507 radeon_ring_write(ring, VMID(0));
4510 /* We should be using the WAIT_REG_MEM packet here like in
4511 * cik_fence_ring_emit(), but it causes the CP to hang in this
4514 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516 WRITE_DATA_DST_SEL(0)));
4517 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4518 radeon_ring_write(ring, 0);
4519 radeon_ring_write(ring, 0);
4521 /* bits 0-15 are the VM contexts0-15 */
4522 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4523 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4524 WRITE_DATA_DST_SEL(0)));
4525 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4526 radeon_ring_write(ring, 0);
4527 radeon_ring_write(ring, 1 << vm->id);
4529 /* compute doesn't have PFP */
4530 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4531 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4532 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533 radeon_ring_write(ring, 0x0);
4538 * cik_vm_set_page - update the page tables using sDMA
4540 * @rdev: radeon_device pointer
4541 * @ib: indirect buffer to fill with commands
4542 * @pe: addr of the page entry
4543 * @addr: dst addr to write into pe
4544 * @count: number of page entries to update
4545 * @incr: increase next addr by incr bytes
4546 * @flags: access flags
4548 * Update the page tables using CP or sDMA (CIK).
4550 void cik_vm_set_page(struct radeon_device *rdev,
4551 struct radeon_ib *ib,
4553 uint64_t addr, unsigned count,
4554 uint32_t incr, uint32_t flags)
4556 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4560 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4563 ndw = 2 + count * 2;
4567 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4568 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4569 WRITE_DATA_DST_SEL(1));
4570 ib->ptr[ib->length_dw++] = pe;
4571 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4572 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4573 if (flags & RADEON_VM_PAGE_SYSTEM) {
4574 value = radeon_vm_map_gart(rdev, addr);
4575 value &= 0xFFFFFFFFFFFFF000ULL;
4576 } else if (flags & RADEON_VM_PAGE_VALID) {
4582 value |= r600_flags;
4583 ib->ptr[ib->length_dw++] = value;
4584 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4589 if (flags & RADEON_VM_PAGE_SYSTEM) {
4595 /* for non-physically contiguous pages (system) */
4596 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4597 ib->ptr[ib->length_dw++] = pe;
4598 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4599 ib->ptr[ib->length_dw++] = ndw;
4600 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4601 if (flags & RADEON_VM_PAGE_SYSTEM) {
4602 value = radeon_vm_map_gart(rdev, addr);
4603 value &= 0xFFFFFFFFFFFFF000ULL;
4604 } else if (flags & RADEON_VM_PAGE_VALID) {
4610 value |= r600_flags;
4611 ib->ptr[ib->length_dw++] = value;
4612 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4621 if (flags & RADEON_VM_PAGE_VALID)
4625 /* for physically contiguous pages (vram) */
4626 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4627 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4628 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4629 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4630 ib->ptr[ib->length_dw++] = 0;
4631 ib->ptr[ib->length_dw++] = value; /* value */
4632 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4633 ib->ptr[ib->length_dw++] = incr; /* increment size */
4634 ib->ptr[ib->length_dw++] = 0;
4635 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4641 while (ib->length_dw & 0x7)
4642 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4647 * cik_dma_vm_flush - cik vm flush using sDMA
4649 * @rdev: radeon_device pointer
4651 * Update the page table base and flush the VM TLB
4654 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4656 struct radeon_ring *ring = &rdev->ring[ridx];
4657 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4658 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4664 if (ridx == R600_RING_TYPE_DMA_INDEX)
4665 ref_and_mask = SDMA0;
4667 ref_and_mask = SDMA1;
4669 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4671 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4673 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4675 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4677 /* update SH_MEM_* regs */
4678 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4680 radeon_ring_write(ring, VMID(vm->id));
4682 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4683 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4684 radeon_ring_write(ring, 0);
4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4688 radeon_ring_write(ring, 0);
4690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4692 radeon_ring_write(ring, 1);
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4696 radeon_ring_write(ring, 0);
4698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4700 radeon_ring_write(ring, VMID(0));
4703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4704 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4705 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4706 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4707 radeon_ring_write(ring, ref_and_mask); /* MASK */
4708 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4711 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713 radeon_ring_write(ring, 1 << vm->id);
4718 * The RLC is a multi-purpose microengine that handles a
4719 * variety of functions, the most important of which is
4720 * the interrupt controller.
4723 * cik_rlc_stop - stop the RLC ME
4725 * @rdev: radeon_device pointer
4727 * Halt the RLC ME (MicroEngine) (CIK).
4729 static void cik_rlc_stop(struct radeon_device *rdev)
4734 tmp = RREG32(CP_INT_CNTL_RING0);
4735 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4736 WREG32(CP_INT_CNTL_RING0, tmp);
4738 RREG32(CB_CGTT_SCLK_CTRL);
4739 RREG32(CB_CGTT_SCLK_CTRL);
4740 RREG32(CB_CGTT_SCLK_CTRL);
4741 RREG32(CB_CGTT_SCLK_CTRL);
4743 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4744 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4746 WREG32(RLC_CNTL, 0);
4748 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4749 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4750 cik_select_se_sh(rdev, i, j);
4751 for (k = 0; k < rdev->usec_timeout; k++) {
4752 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4758 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4760 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4761 for (k = 0; k < rdev->usec_timeout; k++) {
4762 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4769 * cik_rlc_start - start the RLC ME
4771 * @rdev: radeon_device pointer
4773 * Unhalt the RLC ME (MicroEngine) (CIK).
4775 static void cik_rlc_start(struct radeon_device *rdev)
4779 WREG32(RLC_CNTL, RLC_ENABLE);
4781 tmp = RREG32(CP_INT_CNTL_RING0);
4782 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4783 WREG32(CP_INT_CNTL_RING0, tmp);
4789 * cik_rlc_resume - setup the RLC hw
4791 * @rdev: radeon_device pointer
4793 * Initialize the RLC registers, load the ucode,
4794 * and start the RLC (CIK).
4795 * Returns 0 for success, -EINVAL if the ucode is not available.
4797 static int cik_rlc_resume(struct radeon_device *rdev)
4800 u32 clear_state_info[3];
4801 const __be32 *fw_data;
4806 switch (rdev->family) {
4809 size = BONAIRE_RLC_UCODE_SIZE;
4812 size = KV_RLC_UCODE_SIZE;
4815 size = KB_RLC_UCODE_SIZE;
4821 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4822 RREG32(GRBM_SOFT_RESET);
4824 WREG32(GRBM_SOFT_RESET, 0);
4825 RREG32(GRBM_SOFT_RESET);
4828 WREG32(RLC_LB_CNTR_INIT, 0);
4829 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4831 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4832 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4833 WREG32(RLC_LB_PARAMS, 0x00600408);
4834 WREG32(RLC_LB_CNTL, 0x80000004);
4836 WREG32(RLC_MC_CNTL, 0);
4837 WREG32(RLC_UCODE_CNTL, 0);
4839 fw_data = (const __be32 *)rdev->rlc_fw->data;
4840 WREG32(RLC_GPM_UCODE_ADDR, 0);
4841 for (i = 0; i < size; i++)
4842 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4843 WREG32(RLC_GPM_UCODE_ADDR, 0);
4846 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4847 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4848 clear_state_info[2] = 0;//cik_default_size;
4849 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4850 for (i = 0; i < 3; i++)
4851 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4852 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4854 cik_rlc_start(rdev);
4861 * Starting with r6xx, interrupts are handled via a ring buffer.
4862 * Ring buffers are areas of GPU accessible memory that the GPU
4863 * writes interrupt vectors into and the host reads vectors out of.
4864 * There is a rptr (read pointer) that determines where the
4865 * host is currently reading, and a wptr (write pointer)
4866 * which determines where the GPU has written. When the
4867 * pointers are equal, the ring is idle. When the GPU
4868 * writes vectors to the ring buffer, it increments the
4869 * wptr. When there is an interrupt, the host then starts
4870 * fetching commands and processing them until the pointers are
4871 * equal again at which point it updates the rptr.
4875 * cik_enable_interrupts - Enable the interrupt ring buffer
4877 * @rdev: radeon_device pointer
4879 * Enable the interrupt ring buffer (CIK).
4881 static void cik_enable_interrupts(struct radeon_device *rdev)
4883 u32 ih_cntl = RREG32(IH_CNTL);
4884 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4886 ih_cntl |= ENABLE_INTR;
4887 ih_rb_cntl |= IH_RB_ENABLE;
4888 WREG32(IH_CNTL, ih_cntl);
4889 WREG32(IH_RB_CNTL, ih_rb_cntl);
4890 rdev->ih.enabled = true;
4894 * cik_disable_interrupts - Disable the interrupt ring buffer
4896 * @rdev: radeon_device pointer
4898 * Disable the interrupt ring buffer (CIK).
4900 static void cik_disable_interrupts(struct radeon_device *rdev)
4902 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4903 u32 ih_cntl = RREG32(IH_CNTL);
4905 ih_rb_cntl &= ~IH_RB_ENABLE;
4906 ih_cntl &= ~ENABLE_INTR;
4907 WREG32(IH_RB_CNTL, ih_rb_cntl);
4908 WREG32(IH_CNTL, ih_cntl);
4909 /* set rptr, wptr to 0 */
4910 WREG32(IH_RB_RPTR, 0);
4911 WREG32(IH_RB_WPTR, 0);
4912 rdev->ih.enabled = false;
4917 * cik_disable_interrupt_state - Disable all interrupt sources
4919 * @rdev: radeon_device pointer
4921 * Clear all interrupt enable bits used by the driver (CIK).
4923 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4928 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4930 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4931 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4932 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4933 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4934 /* compute queues */
4935 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4936 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4937 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4938 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4939 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4940 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4941 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4942 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4944 WREG32(GRBM_INT_CNTL, 0);
4945 /* vline/vblank, etc. */
4946 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4948 if (rdev->num_crtc >= 4) {
4949 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4950 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4952 if (rdev->num_crtc >= 6) {
4953 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4954 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4958 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4960 /* digital hotplug */
4961 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4962 WREG32(DC_HPD1_INT_CONTROL, tmp);
4963 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4964 WREG32(DC_HPD2_INT_CONTROL, tmp);
4965 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4966 WREG32(DC_HPD3_INT_CONTROL, tmp);
4967 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4968 WREG32(DC_HPD4_INT_CONTROL, tmp);
4969 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4970 WREG32(DC_HPD5_INT_CONTROL, tmp);
4971 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4972 WREG32(DC_HPD6_INT_CONTROL, tmp);
4977 * cik_irq_init - init and enable the interrupt ring
4979 * @rdev: radeon_device pointer
4981 * Allocate a ring buffer for the interrupt controller,
4982 * enable the RLC, disable interrupts, enable the IH
4983 * ring buffer and enable it (CIK).
4984 * Called at device load and reume.
4985 * Returns 0 for success, errors for failure.
4987 static int cik_irq_init(struct radeon_device *rdev)
4991 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4994 ret = r600_ih_ring_alloc(rdev);
4999 cik_disable_interrupts(rdev);
5002 ret = cik_rlc_resume(rdev);
5004 r600_ih_ring_fini(rdev);
5008 /* setup interrupt control */
5009 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5010 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5011 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5012 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5013 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5015 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5016 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5017 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5018 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5020 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5021 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5023 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5024 IH_WPTR_OVERFLOW_CLEAR |
5027 if (rdev->wb.enabled)
5028 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5030 /* set the writeback address whether it's enabled or not */
5031 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5032 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5034 WREG32(IH_RB_CNTL, ih_rb_cntl);
5036 /* set rptr, wptr to 0 */
5037 WREG32(IH_RB_RPTR, 0);
5038 WREG32(IH_RB_WPTR, 0);
5040 /* Default settings for IH_CNTL (disabled at first) */
5041 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5042 /* RPTR_REARM only works if msi's are enabled */
5043 if (rdev->msi_enabled)
5044 ih_cntl |= RPTR_REARM;
5045 WREG32(IH_CNTL, ih_cntl);
5047 /* force the active interrupt state to all disabled */
5048 cik_disable_interrupt_state(rdev);
5050 pci_set_master(rdev->pdev);
5053 cik_enable_interrupts(rdev);
5059 * cik_irq_set - enable/disable interrupt sources
5061 * @rdev: radeon_device pointer
5063 * Enable interrupt sources on the GPU (vblanks, hpd,
5065 * Returns 0 for success, errors for failure.
5067 int cik_irq_set(struct radeon_device *rdev)
5069 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5070 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5071 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5072 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5073 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5074 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5075 u32 grbm_int_cntl = 0;
5076 u32 dma_cntl, dma_cntl1;
5078 if (!rdev->irq.installed) {
5079 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5082 /* don't enable anything if the ih is disabled */
5083 if (!rdev->ih.enabled) {
5084 cik_disable_interrupts(rdev);
5085 /* force the active interrupt state to all disabled */
5086 cik_disable_interrupt_state(rdev);
5090 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5091 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5092 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5093 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5094 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5095 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5097 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5098 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5100 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5101 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5102 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5103 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5104 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5105 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5106 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5107 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5109 /* enable CP interrupts on all rings */
5110 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5111 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5112 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5114 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5115 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5116 DRM_DEBUG("si_irq_set: sw int cp1\n");
5117 if (ring->me == 1) {
5118 switch (ring->pipe) {
5120 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5123 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5126 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5129 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5132 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5135 } else if (ring->me == 2) {
5136 switch (ring->pipe) {
5138 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5141 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5144 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5147 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5150 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5154 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5157 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5158 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5159 DRM_DEBUG("si_irq_set: sw int cp2\n");
5160 if (ring->me == 1) {
5161 switch (ring->pipe) {
5163 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5166 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5169 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5172 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5175 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5178 } else if (ring->me == 2) {
5179 switch (ring->pipe) {
5181 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5184 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5187 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5190 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5193 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5197 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5201 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5202 DRM_DEBUG("cik_irq_set: sw int dma\n");
5203 dma_cntl |= TRAP_ENABLE;
5206 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5207 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5208 dma_cntl1 |= TRAP_ENABLE;
5211 if (rdev->irq.crtc_vblank_int[0] ||
5212 atomic_read(&rdev->irq.pflip[0])) {
5213 DRM_DEBUG("cik_irq_set: vblank 0\n");
5214 crtc1 |= VBLANK_INTERRUPT_MASK;
5216 if (rdev->irq.crtc_vblank_int[1] ||
5217 atomic_read(&rdev->irq.pflip[1])) {
5218 DRM_DEBUG("cik_irq_set: vblank 1\n");
5219 crtc2 |= VBLANK_INTERRUPT_MASK;
5221 if (rdev->irq.crtc_vblank_int[2] ||
5222 atomic_read(&rdev->irq.pflip[2])) {
5223 DRM_DEBUG("cik_irq_set: vblank 2\n");
5224 crtc3 |= VBLANK_INTERRUPT_MASK;
5226 if (rdev->irq.crtc_vblank_int[3] ||
5227 atomic_read(&rdev->irq.pflip[3])) {
5228 DRM_DEBUG("cik_irq_set: vblank 3\n");
5229 crtc4 |= VBLANK_INTERRUPT_MASK;
5231 if (rdev->irq.crtc_vblank_int[4] ||
5232 atomic_read(&rdev->irq.pflip[4])) {
5233 DRM_DEBUG("cik_irq_set: vblank 4\n");
5234 crtc5 |= VBLANK_INTERRUPT_MASK;
5236 if (rdev->irq.crtc_vblank_int[5] ||
5237 atomic_read(&rdev->irq.pflip[5])) {
5238 DRM_DEBUG("cik_irq_set: vblank 5\n");
5239 crtc6 |= VBLANK_INTERRUPT_MASK;
5241 if (rdev->irq.hpd[0]) {
5242 DRM_DEBUG("cik_irq_set: hpd 1\n");
5243 hpd1 |= DC_HPDx_INT_EN;
5245 if (rdev->irq.hpd[1]) {
5246 DRM_DEBUG("cik_irq_set: hpd 2\n");
5247 hpd2 |= DC_HPDx_INT_EN;
5249 if (rdev->irq.hpd[2]) {
5250 DRM_DEBUG("cik_irq_set: hpd 3\n");
5251 hpd3 |= DC_HPDx_INT_EN;
5253 if (rdev->irq.hpd[3]) {
5254 DRM_DEBUG("cik_irq_set: hpd 4\n");
5255 hpd4 |= DC_HPDx_INT_EN;
5257 if (rdev->irq.hpd[4]) {
5258 DRM_DEBUG("cik_irq_set: hpd 5\n");
5259 hpd5 |= DC_HPDx_INT_EN;
5261 if (rdev->irq.hpd[5]) {
5262 DRM_DEBUG("cik_irq_set: hpd 6\n");
5263 hpd6 |= DC_HPDx_INT_EN;
5266 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5268 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5269 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5271 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5272 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5273 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5274 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5275 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5276 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5277 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5278 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5280 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5282 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5283 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5284 if (rdev->num_crtc >= 4) {
5285 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5286 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5288 if (rdev->num_crtc >= 6) {
5289 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5293 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5294 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5295 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5296 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5297 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5298 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5304 * cik_irq_ack - ack interrupt sources
5306 * @rdev: radeon_device pointer
5308 * Ack interrupt sources on the GPU (vblanks, hpd,
5309 * etc.) (CIK). Certain interrupts sources are sw
5310 * generated and do not require an explicit ack.
5312 static inline void cik_irq_ack(struct radeon_device *rdev)
5316 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5317 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5318 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5319 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5320 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5321 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5322 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5324 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5325 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5326 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5327 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5328 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5329 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5330 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5331 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5333 if (rdev->num_crtc >= 4) {
5334 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5335 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5336 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5337 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5338 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5339 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5340 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5341 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5344 if (rdev->num_crtc >= 6) {
5345 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5349 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5350 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5351 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5352 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5355 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5356 tmp = RREG32(DC_HPD1_INT_CONTROL);
5357 tmp |= DC_HPDx_INT_ACK;
5358 WREG32(DC_HPD1_INT_CONTROL, tmp);
5360 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5361 tmp = RREG32(DC_HPD2_INT_CONTROL);
5362 tmp |= DC_HPDx_INT_ACK;
5363 WREG32(DC_HPD2_INT_CONTROL, tmp);
5365 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5366 tmp = RREG32(DC_HPD3_INT_CONTROL);
5367 tmp |= DC_HPDx_INT_ACK;
5368 WREG32(DC_HPD3_INT_CONTROL, tmp);
5370 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5371 tmp = RREG32(DC_HPD4_INT_CONTROL);
5372 tmp |= DC_HPDx_INT_ACK;
5373 WREG32(DC_HPD4_INT_CONTROL, tmp);
5375 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5376 tmp = RREG32(DC_HPD5_INT_CONTROL);
5377 tmp |= DC_HPDx_INT_ACK;
5378 WREG32(DC_HPD5_INT_CONTROL, tmp);
5380 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5381 tmp = RREG32(DC_HPD5_INT_CONTROL);
5382 tmp |= DC_HPDx_INT_ACK;
5383 WREG32(DC_HPD6_INT_CONTROL, tmp);
5388 * cik_irq_disable - disable interrupts
5390 * @rdev: radeon_device pointer
5392 * Disable interrupts on the hw (CIK).
5394 static void cik_irq_disable(struct radeon_device *rdev)
5396 cik_disable_interrupts(rdev);
5397 /* Wait and acknowledge irq */
5400 cik_disable_interrupt_state(rdev);
5404 * cik_irq_disable - disable interrupts for suspend
5406 * @rdev: radeon_device pointer
5408 * Disable interrupts and stop the RLC (CIK).
5411 static void cik_irq_suspend(struct radeon_device *rdev)
5413 cik_irq_disable(rdev);
5418 * cik_irq_fini - tear down interrupt support
5420 * @rdev: radeon_device pointer
5422 * Disable interrupts on the hw and free the IH ring
5424 * Used for driver unload.
5426 static void cik_irq_fini(struct radeon_device *rdev)
5428 cik_irq_suspend(rdev);
5429 r600_ih_ring_fini(rdev);
5433 * cik_get_ih_wptr - get the IH ring buffer wptr
5435 * @rdev: radeon_device pointer
5437 * Get the IH ring buffer wptr from either the register
5438 * or the writeback memory buffer (CIK). Also check for
5439 * ring buffer overflow and deal with it.
5440 * Used by cik_irq_process().
5441 * Returns the value of the wptr.
5443 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5447 if (rdev->wb.enabled)
5448 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5450 wptr = RREG32(IH_RB_WPTR);
5452 if (wptr & RB_OVERFLOW) {
5453 /* When a ring buffer overflow happen start parsing interrupt
5454 * from the last not overwritten vector (wptr + 16). Hopefully
5455 * this should allow us to catchup.
5457 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5458 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5459 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5460 tmp = RREG32(IH_RB_CNTL);
5461 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5462 WREG32(IH_RB_CNTL, tmp);
5464 return (wptr & rdev->ih.ptr_mask);
5468 * Each IV ring entry is 128 bits:
5469 * [7:0] - interrupt source id
5471 * [59:32] - interrupt source data
5472 * [63:60] - reserved
5475 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5476 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5477 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5478 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5479 * PIPE_ID - ME0 0=3D
5480 * - ME1&2 compute dispatcher (4 pipes each)
5482 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5483 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5484 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5487 * [127:96] - reserved
5490 * cik_irq_process - interrupt handler
5492 * @rdev: radeon_device pointer
5494 * Interrupt hander (CIK). Walk the IH ring,
5495 * ack interrupts and schedule work to handle
5497 * Returns irq process return code.
5499 int cik_irq_process(struct radeon_device *rdev)
5501 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5502 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5505 u32 src_id, src_data, ring_id;
5506 u8 me_id, pipe_id, queue_id;
5508 bool queue_hotplug = false;
5509 bool queue_reset = false;
5511 if (!rdev->ih.enabled || rdev->shutdown)
5514 wptr = cik_get_ih_wptr(rdev);
5517 /* is somebody else already processing irqs? */
5518 if (atomic_xchg(&rdev->ih.lock, 1))
5521 rptr = rdev->ih.rptr;
5522 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5524 /* Order reading of wptr vs. reading of IH ring data */
5527 /* display interrupts */
5530 while (rptr != wptr) {
5531 /* wptr/rptr are in bytes! */
5532 ring_index = rptr / 4;
5533 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5534 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5535 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5538 case 1: /* D1 vblank/vline */
5540 case 0: /* D1 vblank */
5541 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5542 if (rdev->irq.crtc_vblank_int[0]) {
5543 drm_handle_vblank(rdev->ddev, 0);
5544 rdev->pm.vblank_sync = true;
5545 wake_up(&rdev->irq.vblank_queue);
5547 if (atomic_read(&rdev->irq.pflip[0]))
5548 radeon_crtc_handle_flip(rdev, 0);
5549 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5550 DRM_DEBUG("IH: D1 vblank\n");
5553 case 1: /* D1 vline */
5554 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5555 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5556 DRM_DEBUG("IH: D1 vline\n");
5560 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5564 case 2: /* D2 vblank/vline */
5566 case 0: /* D2 vblank */
5567 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5568 if (rdev->irq.crtc_vblank_int[1]) {
5569 drm_handle_vblank(rdev->ddev, 1);
5570 rdev->pm.vblank_sync = true;
5571 wake_up(&rdev->irq.vblank_queue);
5573 if (atomic_read(&rdev->irq.pflip[1]))
5574 radeon_crtc_handle_flip(rdev, 1);
5575 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5576 DRM_DEBUG("IH: D2 vblank\n");
5579 case 1: /* D2 vline */
5580 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5581 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5582 DRM_DEBUG("IH: D2 vline\n");
5586 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5590 case 3: /* D3 vblank/vline */
5592 case 0: /* D3 vblank */
5593 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5594 if (rdev->irq.crtc_vblank_int[2]) {
5595 drm_handle_vblank(rdev->ddev, 2);
5596 rdev->pm.vblank_sync = true;
5597 wake_up(&rdev->irq.vblank_queue);
5599 if (atomic_read(&rdev->irq.pflip[2]))
5600 radeon_crtc_handle_flip(rdev, 2);
5601 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5602 DRM_DEBUG("IH: D3 vblank\n");
5605 case 1: /* D3 vline */
5606 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5607 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5608 DRM_DEBUG("IH: D3 vline\n");
5612 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5616 case 4: /* D4 vblank/vline */
5618 case 0: /* D4 vblank */
5619 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5620 if (rdev->irq.crtc_vblank_int[3]) {
5621 drm_handle_vblank(rdev->ddev, 3);
5622 rdev->pm.vblank_sync = true;
5623 wake_up(&rdev->irq.vblank_queue);
5625 if (atomic_read(&rdev->irq.pflip[3]))
5626 radeon_crtc_handle_flip(rdev, 3);
5627 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5628 DRM_DEBUG("IH: D4 vblank\n");
5631 case 1: /* D4 vline */
5632 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5633 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5634 DRM_DEBUG("IH: D4 vline\n");
5638 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5642 case 5: /* D5 vblank/vline */
5644 case 0: /* D5 vblank */
5645 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5646 if (rdev->irq.crtc_vblank_int[4]) {
5647 drm_handle_vblank(rdev->ddev, 4);
5648 rdev->pm.vblank_sync = true;
5649 wake_up(&rdev->irq.vblank_queue);
5651 if (atomic_read(&rdev->irq.pflip[4]))
5652 radeon_crtc_handle_flip(rdev, 4);
5653 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5654 DRM_DEBUG("IH: D5 vblank\n");
5657 case 1: /* D5 vline */
5658 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5659 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5660 DRM_DEBUG("IH: D5 vline\n");
5664 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5668 case 6: /* D6 vblank/vline */
5670 case 0: /* D6 vblank */
5671 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5672 if (rdev->irq.crtc_vblank_int[5]) {
5673 drm_handle_vblank(rdev->ddev, 5);
5674 rdev->pm.vblank_sync = true;
5675 wake_up(&rdev->irq.vblank_queue);
5677 if (atomic_read(&rdev->irq.pflip[5]))
5678 radeon_crtc_handle_flip(rdev, 5);
5679 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5680 DRM_DEBUG("IH: D6 vblank\n");
5683 case 1: /* D6 vline */
5684 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5685 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5686 DRM_DEBUG("IH: D6 vline\n");
5690 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5694 case 42: /* HPD hotplug */
5697 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5698 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5699 queue_hotplug = true;
5700 DRM_DEBUG("IH: HPD1\n");
5704 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5705 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5706 queue_hotplug = true;
5707 DRM_DEBUG("IH: HPD2\n");
5711 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5712 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5713 queue_hotplug = true;
5714 DRM_DEBUG("IH: HPD3\n");
5718 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5719 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5720 queue_hotplug = true;
5721 DRM_DEBUG("IH: HPD4\n");
5725 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5726 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5727 queue_hotplug = true;
5728 DRM_DEBUG("IH: HPD5\n");
5732 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5733 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5734 queue_hotplug = true;
5735 DRM_DEBUG("IH: HPD6\n");
5739 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5745 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5746 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5747 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5748 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5749 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5750 /* reset addr and status */
5751 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5753 case 176: /* GFX RB CP_INT */
5754 case 177: /* GFX IB CP_INT */
5755 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5757 case 181: /* CP EOP event */
5758 DRM_DEBUG("IH: CP EOP\n");
5759 /* XXX check the bitfield order! */
5760 me_id = (ring_id & 0x60) >> 5;
5761 pipe_id = (ring_id & 0x18) >> 3;
5762 queue_id = (ring_id & 0x7) >> 0;
5765 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5769 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5770 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5771 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5772 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5776 case 184: /* CP Privileged reg access */
5777 DRM_ERROR("Illegal register access in command stream\n");
5778 /* XXX check the bitfield order! */
5779 me_id = (ring_id & 0x60) >> 5;
5780 pipe_id = (ring_id & 0x18) >> 3;
5781 queue_id = (ring_id & 0x7) >> 0;
5784 /* This results in a full GPU reset, but all we need to do is soft
5785 * reset the CP for gfx
5799 case 185: /* CP Privileged inst */
5800 DRM_ERROR("Illegal instruction in command stream\n");
5801 /* XXX check the bitfield order! */
5802 me_id = (ring_id & 0x60) >> 5;
5803 pipe_id = (ring_id & 0x18) >> 3;
5804 queue_id = (ring_id & 0x7) >> 0;
5807 /* This results in a full GPU reset, but all we need to do is soft
5808 * reset the CP for gfx
5822 case 224: /* SDMA trap event */
5823 /* XXX check the bitfield order! */
5824 me_id = (ring_id & 0x3) >> 0;
5825 queue_id = (ring_id & 0xc) >> 2;
5826 DRM_DEBUG("IH: SDMA trap\n");
5831 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5844 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5856 case 241: /* SDMA Privileged inst */
5857 case 247: /* SDMA Privileged inst */
5858 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5859 /* XXX check the bitfield order! */
5860 me_id = (ring_id & 0x3) >> 0;
5861 queue_id = (ring_id & 0xc) >> 2;
5895 case 233: /* GUI IDLE */
5896 DRM_DEBUG("IH: GUI idle\n");
5899 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5903 /* wptr/rptr are in bytes! */
5905 rptr &= rdev->ih.ptr_mask;
5908 schedule_work(&rdev->hotplug_work);
5910 schedule_work(&rdev->reset_work);
5911 rdev->ih.rptr = rptr;
5912 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5913 atomic_set(&rdev->ih.lock, 0);
5915 /* make sure wptr hasn't changed while processing */
5916 wptr = cik_get_ih_wptr(rdev);
5924 * startup/shutdown callbacks
5927 * cik_startup - program the asic to a functional state
5929 * @rdev: radeon_device pointer
5931 * Programs the asic to a functional state (CIK).
5932 * Called by cik_init() and cik_resume().
5933 * Returns 0 for success, error for failure.
5935 static int cik_startup(struct radeon_device *rdev)
5937 struct radeon_ring *ring;
5940 if (rdev->flags & RADEON_IS_IGP) {
5941 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5942 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5943 r = cik_init_microcode(rdev);
5945 DRM_ERROR("Failed to load firmware!\n");
5950 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5951 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5953 r = cik_init_microcode(rdev);
5955 DRM_ERROR("Failed to load firmware!\n");
5960 r = ci_mc_load_microcode(rdev);
5962 DRM_ERROR("Failed to load MC firmware!\n");
5967 r = r600_vram_scratch_init(rdev);
5971 cik_mc_program(rdev);
5972 r = cik_pcie_gart_enable(rdev);
5977 /* allocate rlc buffers */
5978 r = si_rlc_init(rdev);
5980 DRM_ERROR("Failed to init rlc BOs!\n");
5984 /* allocate wb buffer */
5985 r = radeon_wb_init(rdev);
5989 /* allocate mec buffers */
5990 r = cik_mec_init(rdev);
5992 DRM_ERROR("Failed to init MEC BOs!\n");
5996 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5998 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6002 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6004 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6008 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6010 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6014 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6016 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6020 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6022 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6026 r = cik_uvd_resume(rdev);
6028 r = radeon_fence_driver_start_ring(rdev,
6029 R600_RING_TYPE_UVD_INDEX);
6031 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6034 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6037 if (!rdev->irq.installed) {
6038 r = radeon_irq_kms_init(rdev);
6043 r = cik_irq_init(rdev);
6045 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6046 radeon_irq_kms_fini(rdev);
6051 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6052 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6053 CP_RB0_RPTR, CP_RB0_WPTR,
6054 0, 0xfffff, RADEON_CP_PACKET2);
6058 /* set up the compute queues */
6059 /* type-2 packets are deprecated on MEC, use type-3 instead */
6060 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6061 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6062 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6063 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6066 ring->me = 1; /* first MEC */
6067 ring->pipe = 0; /* first pipe */
6068 ring->queue = 0; /* first queue */
6069 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6071 /* type-2 packets are deprecated on MEC, use type-3 instead */
6072 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6073 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6074 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6075 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6078 /* dGPU only have 1 MEC */
6079 ring->me = 1; /* first MEC */
6080 ring->pipe = 0; /* first pipe */
6081 ring->queue = 1; /* second queue */
6082 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6084 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6085 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6086 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6087 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6088 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6092 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6093 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6094 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6095 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6096 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6100 r = cik_cp_resume(rdev);
6104 r = cik_sdma_resume(rdev);
6108 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6109 if (ring->ring_size) {
6110 r = radeon_ring_init(rdev, ring, ring->ring_size,
6111 R600_WB_UVD_RPTR_OFFSET,
6112 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6113 0, 0xfffff, RADEON_CP_PACKET2);
6115 r = r600_uvd_init(rdev);
6117 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6120 r = radeon_ib_pool_init(rdev);
6122 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6126 r = radeon_vm_manager_init(rdev);
6128 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6136 * cik_resume - resume the asic to a functional state
6138 * @rdev: radeon_device pointer
6140 * Programs the asic to a functional state (CIK).
6142 * Returns 0 for success, error for failure.
6144 int cik_resume(struct radeon_device *rdev)
6149 atom_asic_init(rdev->mode_info.atom_context);
6151 /* init golden registers */
6152 cik_init_golden_registers(rdev);
6154 rdev->accel_working = true;
6155 r = cik_startup(rdev);
6157 DRM_ERROR("cik startup failed on resume\n");
6158 rdev->accel_working = false;
6167 * cik_suspend - suspend the asic
6169 * @rdev: radeon_device pointer
6171 * Bring the chip into a state suitable for suspend (CIK).
6172 * Called at suspend.
6173 * Returns 0 for success.
6175 int cik_suspend(struct radeon_device *rdev)
6177 radeon_vm_manager_fini(rdev);
6178 cik_cp_enable(rdev, false);
6179 cik_sdma_enable(rdev, false);
6180 r600_uvd_rbc_stop(rdev);
6181 radeon_uvd_suspend(rdev);
6182 cik_irq_suspend(rdev);
6183 radeon_wb_disable(rdev);
6184 cik_pcie_gart_disable(rdev);
6188 /* Plan is to move initialization in that function and use
6189 * helper function so that radeon_device_init pretty much
6190 * do nothing more than calling asic specific function. This
6191 * should also allow to remove a bunch of callback function
6195 * cik_init - asic specific driver and hw init
6197 * @rdev: radeon_device pointer
6199 * Setup asic specific driver variables and program the hw
6200 * to a functional state (CIK).
6201 * Called at driver startup.
6202 * Returns 0 for success, errors for failure.
6204 int cik_init(struct radeon_device *rdev)
6206 struct radeon_ring *ring;
6210 if (!radeon_get_bios(rdev)) {
6211 if (ASIC_IS_AVIVO(rdev))
6214 /* Must be an ATOMBIOS */
6215 if (!rdev->is_atom_bios) {
6216 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6219 r = radeon_atombios_init(rdev);
6223 /* Post card if necessary */
6224 if (!radeon_card_posted(rdev)) {
6226 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6229 DRM_INFO("GPU not posted. posting now...\n");
6230 atom_asic_init(rdev->mode_info.atom_context);
6232 /* init golden registers */
6233 cik_init_golden_registers(rdev);
6234 /* Initialize scratch registers */
6235 cik_scratch_init(rdev);
6236 /* Initialize surface registers */
6237 radeon_surface_init(rdev);
6238 /* Initialize clocks */
6239 radeon_get_clock_info(rdev->ddev);
6242 r = radeon_fence_driver_init(rdev);
6246 /* initialize memory controller */
6247 r = cik_mc_init(rdev);
6250 /* Memory manager */
6251 r = radeon_bo_init(rdev);
6255 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6256 ring->ring_obj = NULL;
6257 r600_ring_init(rdev, ring, 1024 * 1024);
6259 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6260 ring->ring_obj = NULL;
6261 r600_ring_init(rdev, ring, 1024 * 1024);
6262 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6266 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6267 ring->ring_obj = NULL;
6268 r600_ring_init(rdev, ring, 1024 * 1024);
6269 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6273 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6274 ring->ring_obj = NULL;
6275 r600_ring_init(rdev, ring, 256 * 1024);
6277 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6278 ring->ring_obj = NULL;
6279 r600_ring_init(rdev, ring, 256 * 1024);
6281 r = radeon_uvd_init(rdev);
6283 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6284 ring->ring_obj = NULL;
6285 r600_ring_init(rdev, ring, 4096);
6288 rdev->ih.ring_obj = NULL;
6289 r600_ih_ring_init(rdev, 64 * 1024);
6291 r = r600_pcie_gart_init(rdev);
6295 rdev->accel_working = true;
6296 r = cik_startup(rdev);
6298 dev_err(rdev->dev, "disabling GPU acceleration\n");
6300 cik_sdma_fini(rdev);
6304 radeon_wb_fini(rdev);
6305 radeon_ib_pool_fini(rdev);
6306 radeon_vm_manager_fini(rdev);
6307 radeon_irq_kms_fini(rdev);
6308 cik_pcie_gart_fini(rdev);
6309 rdev->accel_working = false;
6312 /* Don't start up if the MC ucode is missing.
6313 * The default clocks and voltages before the MC ucode
6314 * is loaded are not suffient for advanced operations.
6316 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6317 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6325 * cik_fini - asic specific driver and hw fini
6327 * @rdev: radeon_device pointer
6329 * Tear down the asic specific driver variables and program the hw
6330 * to an idle state (CIK).
6331 * Called at driver unload.
6333 void cik_fini(struct radeon_device *rdev)
6336 cik_sdma_fini(rdev);
6340 radeon_wb_fini(rdev);
6341 radeon_vm_manager_fini(rdev);
6342 radeon_ib_pool_fini(rdev);
6343 radeon_irq_kms_fini(rdev);
6344 radeon_uvd_fini(rdev);
6345 cik_pcie_gart_fini(rdev);
6346 r600_vram_scratch_fini(rdev);
6347 radeon_gem_fini(rdev);
6348 radeon_fence_driver_fini(rdev);
6349 radeon_bo_fini(rdev);
6350 radeon_atombios_fini(rdev);
6355 /* display watermark setup */
6357 * dce8_line_buffer_adjust - Set up the line buffer
6359 * @rdev: radeon_device pointer
6360 * @radeon_crtc: the selected display controller
6361 * @mode: the current display mode on the selected display
6364 * Setup up the line buffer allocation for
6365 * the selected display controller (CIK).
6366 * Returns the line buffer size in pixels.
6368 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6369 struct radeon_crtc *radeon_crtc,
6370 struct drm_display_mode *mode)
6376 * There are 6 line buffers, one for each display controllers.
6377 * There are 3 partitions per LB. Select the number of partitions
6378 * to enable based on the display width. For display widths larger
6379 * than 4096, you need use to use 2 display controllers and combine
6380 * them using the stereo blender.
6382 if (radeon_crtc->base.enabled && mode) {
6383 if (mode->crtc_hdisplay < 1920)
6385 else if (mode->crtc_hdisplay < 2560)
6387 else if (mode->crtc_hdisplay < 4096)
6390 DRM_DEBUG_KMS("Mode too big for LB!\n");
6396 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6397 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6399 if (radeon_crtc->base.enabled && mode) {
6411 /* controller not enabled, so no lb used */
6416 * cik_get_number_of_dram_channels - get the number of dram channels
6418 * @rdev: radeon_device pointer
6420 * Look up the number of video ram channels (CIK).
6421 * Used for display watermark bandwidth calculations
6422 * Returns the number of dram channels
6424 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6426 u32 tmp = RREG32(MC_SHARED_CHMAP);
6428 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6451 struct dce8_wm_params {
6452 u32 dram_channels; /* number of dram channels */
6453 u32 yclk; /* bandwidth per dram data pin in kHz */
6454 u32 sclk; /* engine clock in kHz */
6455 u32 disp_clk; /* display clock in kHz */
6456 u32 src_width; /* viewport width */
6457 u32 active_time; /* active display time in ns */
6458 u32 blank_time; /* blank time in ns */
6459 bool interlaced; /* mode is interlaced */
6460 fixed20_12 vsc; /* vertical scale ratio */
6461 u32 num_heads; /* number of active crtcs */
6462 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6463 u32 lb_size; /* line buffer allocated to pipe */
6464 u32 vtaps; /* vertical scaler taps */
6468 * dce8_dram_bandwidth - get the dram bandwidth
6470 * @wm: watermark calculation data
6472 * Calculate the raw dram bandwidth (CIK).
6473 * Used for display watermark bandwidth calculations
6474 * Returns the dram bandwidth in MBytes/s
6476 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6478 /* Calculate raw DRAM Bandwidth */
6479 fixed20_12 dram_efficiency; /* 0.7 */
6480 fixed20_12 yclk, dram_channels, bandwidth;
6483 a.full = dfixed_const(1000);
6484 yclk.full = dfixed_const(wm->yclk);
6485 yclk.full = dfixed_div(yclk, a);
6486 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6487 a.full = dfixed_const(10);
6488 dram_efficiency.full = dfixed_const(7);
6489 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6490 bandwidth.full = dfixed_mul(dram_channels, yclk);
6491 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6493 return dfixed_trunc(bandwidth);
6497 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6499 * @wm: watermark calculation data
6501 * Calculate the dram bandwidth used for display (CIK).
6502 * Used for display watermark bandwidth calculations
6503 * Returns the dram bandwidth for display in MBytes/s
6505 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6507 /* Calculate DRAM Bandwidth and the part allocated to display. */
6508 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6509 fixed20_12 yclk, dram_channels, bandwidth;
6512 a.full = dfixed_const(1000);
6513 yclk.full = dfixed_const(wm->yclk);
6514 yclk.full = dfixed_div(yclk, a);
6515 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516 a.full = dfixed_const(10);
6517 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6518 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6519 bandwidth.full = dfixed_mul(dram_channels, yclk);
6520 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6522 return dfixed_trunc(bandwidth);
6526 * dce8_data_return_bandwidth - get the data return bandwidth
6528 * @wm: watermark calculation data
6530 * Calculate the data return bandwidth used for display (CIK).
6531 * Used for display watermark bandwidth calculations
6532 * Returns the data return bandwidth in MBytes/s
6534 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6536 /* Calculate the display Data return Bandwidth */
6537 fixed20_12 return_efficiency; /* 0.8 */
6538 fixed20_12 sclk, bandwidth;
6541 a.full = dfixed_const(1000);
6542 sclk.full = dfixed_const(wm->sclk);
6543 sclk.full = dfixed_div(sclk, a);
6544 a.full = dfixed_const(10);
6545 return_efficiency.full = dfixed_const(8);
6546 return_efficiency.full = dfixed_div(return_efficiency, a);
6547 a.full = dfixed_const(32);
6548 bandwidth.full = dfixed_mul(a, sclk);
6549 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6551 return dfixed_trunc(bandwidth);
6555 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6557 * @wm: watermark calculation data
6559 * Calculate the dmif bandwidth used for display (CIK).
6560 * Used for display watermark bandwidth calculations
6561 * Returns the dmif bandwidth in MBytes/s
6563 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6565 /* Calculate the DMIF Request Bandwidth */
6566 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6567 fixed20_12 disp_clk, bandwidth;
6570 a.full = dfixed_const(1000);
6571 disp_clk.full = dfixed_const(wm->disp_clk);
6572 disp_clk.full = dfixed_div(disp_clk, a);
6573 a.full = dfixed_const(32);
6574 b.full = dfixed_mul(a, disp_clk);
6576 a.full = dfixed_const(10);
6577 disp_clk_request_efficiency.full = dfixed_const(8);
6578 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6580 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6582 return dfixed_trunc(bandwidth);
6586 * dce8_available_bandwidth - get the min available bandwidth
6588 * @wm: watermark calculation data
6590 * Calculate the min available bandwidth used for display (CIK).
6591 * Used for display watermark bandwidth calculations
6592 * Returns the min available bandwidth in MBytes/s
6594 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6596 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6597 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6598 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6599 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6601 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6605 * dce8_average_bandwidth - get the average available bandwidth
6607 * @wm: watermark calculation data
6609 * Calculate the average available bandwidth used for display (CIK).
6610 * Used for display watermark bandwidth calculations
6611 * Returns the average available bandwidth in MBytes/s
6613 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6615 /* Calculate the display mode Average Bandwidth
6616 * DisplayMode should contain the source and destination dimensions,
6620 fixed20_12 line_time;
6621 fixed20_12 src_width;
6622 fixed20_12 bandwidth;
6625 a.full = dfixed_const(1000);
6626 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6627 line_time.full = dfixed_div(line_time, a);
6628 bpp.full = dfixed_const(wm->bytes_per_pixel);
6629 src_width.full = dfixed_const(wm->src_width);
6630 bandwidth.full = dfixed_mul(src_width, bpp);
6631 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6632 bandwidth.full = dfixed_div(bandwidth, line_time);
6634 return dfixed_trunc(bandwidth);
6638 * dce8_latency_watermark - get the latency watermark
6640 * @wm: watermark calculation data
6642 * Calculate the latency watermark (CIK).
6643 * Used for display watermark bandwidth calculations
6644 * Returns the latency watermark in ns
6646 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6648 /* First calculate the latency in ns */
6649 u32 mc_latency = 2000; /* 2000 ns. */
6650 u32 available_bandwidth = dce8_available_bandwidth(wm);
6651 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6652 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6653 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6654 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6655 (wm->num_heads * cursor_line_pair_return_time);
6656 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6657 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6658 u32 tmp, dmif_size = 12288;
6661 if (wm->num_heads == 0)
6664 a.full = dfixed_const(2);
6665 b.full = dfixed_const(1);
6666 if ((wm->vsc.full > a.full) ||
6667 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6669 ((wm->vsc.full >= a.full) && wm->interlaced))
6670 max_src_lines_per_dst_line = 4;
6672 max_src_lines_per_dst_line = 2;
6674 a.full = dfixed_const(available_bandwidth);
6675 b.full = dfixed_const(wm->num_heads);
6676 a.full = dfixed_div(a, b);
6678 b.full = dfixed_const(mc_latency + 512);
6679 c.full = dfixed_const(wm->disp_clk);
6680 b.full = dfixed_div(b, c);
6682 c.full = dfixed_const(dmif_size);
6683 b.full = dfixed_div(c, b);
6685 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6687 b.full = dfixed_const(1000);
6688 c.full = dfixed_const(wm->disp_clk);
6689 b.full = dfixed_div(c, b);
6690 c.full = dfixed_const(wm->bytes_per_pixel);
6691 b.full = dfixed_mul(b, c);
6693 lb_fill_bw = min(tmp, dfixed_trunc(b));
6695 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6696 b.full = dfixed_const(1000);
6697 c.full = dfixed_const(lb_fill_bw);
6698 b.full = dfixed_div(c, b);
6699 a.full = dfixed_div(a, b);
6700 line_fill_time = dfixed_trunc(a);
6702 if (line_fill_time < wm->active_time)
6705 return latency + (line_fill_time - wm->active_time);
6710 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6711 * average and available dram bandwidth
6713 * @wm: watermark calculation data
6715 * Check if the display average bandwidth fits in the display
6716 * dram bandwidth (CIK).
6717 * Used for display watermark bandwidth calculations
6718 * Returns true if the display fits, false if not.
6720 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6722 if (dce8_average_bandwidth(wm) <=
6723 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6730 * dce8_average_bandwidth_vs_available_bandwidth - check
6731 * average and available bandwidth
6733 * @wm: watermark calculation data
6735 * Check if the display average bandwidth fits in the display
6736 * available bandwidth (CIK).
6737 * Used for display watermark bandwidth calculations
6738 * Returns true if the display fits, false if not.
6740 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6742 if (dce8_average_bandwidth(wm) <=
6743 (dce8_available_bandwidth(wm) / wm->num_heads))
6750 * dce8_check_latency_hiding - check latency hiding
6752 * @wm: watermark calculation data
6754 * Check latency hiding (CIK).
6755 * Used for display watermark bandwidth calculations
6756 * Returns true if the display fits, false if not.
6758 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6760 u32 lb_partitions = wm->lb_size / wm->src_width;
6761 u32 line_time = wm->active_time + wm->blank_time;
6762 u32 latency_tolerant_lines;
6766 a.full = dfixed_const(1);
6767 if (wm->vsc.full > a.full)
6768 latency_tolerant_lines = 1;
6770 if (lb_partitions <= (wm->vtaps + 1))
6771 latency_tolerant_lines = 1;
6773 latency_tolerant_lines = 2;
6776 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6778 if (dce8_latency_watermark(wm) <= latency_hiding)
6785 * dce8_program_watermarks - program display watermarks
6787 * @rdev: radeon_device pointer
6788 * @radeon_crtc: the selected display controller
6789 * @lb_size: line buffer size
6790 * @num_heads: number of display controllers in use
6792 * Calculate and program the display watermarks for the
6793 * selected display controller (CIK).
6795 static void dce8_program_watermarks(struct radeon_device *rdev,
6796 struct radeon_crtc *radeon_crtc,
6797 u32 lb_size, u32 num_heads)
6799 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6800 struct dce8_wm_params wm;
6803 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6806 if (radeon_crtc->base.enabled && num_heads && mode) {
6807 pixel_period = 1000000 / (u32)mode->clock;
6808 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6810 wm.yclk = rdev->pm.current_mclk * 10;
6811 wm.sclk = rdev->pm.current_sclk * 10;
6812 wm.disp_clk = mode->clock;
6813 wm.src_width = mode->crtc_hdisplay;
6814 wm.active_time = mode->crtc_hdisplay * pixel_period;
6815 wm.blank_time = line_time - wm.active_time;
6816 wm.interlaced = false;
6817 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6818 wm.interlaced = true;
6819 wm.vsc = radeon_crtc->vsc;
6821 if (radeon_crtc->rmx_type != RMX_OFF)
6823 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6824 wm.lb_size = lb_size;
6825 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6826 wm.num_heads = num_heads;
6828 /* set for high clocks */
6829 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6830 /* set for low clocks */
6831 /* wm.yclk = low clk; wm.sclk = low clk */
6832 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6834 /* possibly force display priority to high */
6835 /* should really do this at mode validation time... */
6836 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6837 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6838 !dce8_check_latency_hiding(&wm) ||
6839 (rdev->disp_priority == 2)) {
6840 DRM_DEBUG_KMS("force priority to high\n");
6845 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6847 tmp &= ~LATENCY_WATERMARK_MASK(3);
6848 tmp |= LATENCY_WATERMARK_MASK(1);
6849 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6850 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6851 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6852 LATENCY_HIGH_WATERMARK(line_time)));
6854 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6855 tmp &= ~LATENCY_WATERMARK_MASK(3);
6856 tmp |= LATENCY_WATERMARK_MASK(2);
6857 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6858 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6859 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6860 LATENCY_HIGH_WATERMARK(line_time)));
6861 /* restore original selection */
6862 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6866 * dce8_bandwidth_update - program display watermarks
6868 * @rdev: radeon_device pointer
6870 * Calculate and program the display watermarks and line
6871 * buffer allocation (CIK).
6873 void dce8_bandwidth_update(struct radeon_device *rdev)
6875 struct drm_display_mode *mode = NULL;
6876 u32 num_heads = 0, lb_size;
6879 radeon_update_display_priority(rdev);
6881 for (i = 0; i < rdev->num_crtc; i++) {
6882 if (rdev->mode_info.crtcs[i]->base.enabled)
6885 for (i = 0; i < rdev->num_crtc; i++) {
6886 mode = &rdev->mode_info.crtcs[i]->base.mode;
6887 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6888 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6893 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6895 * @rdev: radeon_device pointer
6897 * Fetches a GPU clock counter snapshot (SI).
6898 * Returns the 64 bit clock counter snapshot.
6900 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6904 mutex_lock(&rdev->gpu_clock_mutex);
6905 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6906 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6907 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6908 mutex_unlock(&rdev->gpu_clock_mutex);
6912 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6913 u32 cntl_reg, u32 status_reg)
6916 struct atom_clock_dividers dividers;
6919 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6920 clock, false, ÷rs);
6924 tmp = RREG32_SMC(cntl_reg);
6925 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6926 tmp |= dividers.post_divider;
6927 WREG32_SMC(cntl_reg, tmp);
6929 for (i = 0; i < 100; i++) {
6930 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6940 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6944 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6948 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6952 int cik_uvd_resume(struct radeon_device *rdev)
6958 r = radeon_uvd_resume(rdev);
6962 /* programm the VCPU memory controller bits 0-27 */
6963 addr = rdev->uvd.gpu_addr >> 3;
6964 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6965 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6966 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6969 size = RADEON_UVD_STACK_SIZE >> 3;
6970 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6971 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6974 size = RADEON_UVD_HEAP_SIZE >> 3;
6975 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6976 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6979 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6980 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6983 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6984 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));