]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
video: tegra: Wait PMU finishes booting
[sojka/nv-tegra/linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "gr_gk20a.h"
31 #include "hw_mc_gk20a.h"
32 #include "hw_pwr_gk20a.h"
33 #include "hw_top_gk20a.h"
34
35 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
36
37 #define gk20a_dbg_pmu(fmt, arg...) \
38         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
39
40 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
41 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
42                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
43 static void pmu_setup_hw(struct work_struct *work);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51 {
52         return sizeof(struct pmu_cmdline_args_v0);
53 }
54
55 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56 {
57         return sizeof(struct pmu_cmdline_args_v1);
58 }
59
60 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61 {
62         pmu->args_v1.cpu_freq_hz = freq;
63 }
64
65 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66 {
67         pmu->args_v0.cpu_freq_hz = freq;
68 }
69
70 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71 {
72         return (void *)(&pmu->args_v1);
73 }
74
75 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76 {
77         return (void *)(&pmu->args_v0);
78 }
79
80 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81 {
82         return sizeof(struct pmu_allocation_v1);
83 }
84
85 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86 {
87         return sizeof(struct pmu_allocation_v0);
88 }
89
90 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91         void **pmu_alloc_ptr, void *assign_ptr)
92 {
93         struct pmu_allocation_v1 **pmu_a_ptr =
94                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96 }
97
98 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99         void **pmu_alloc_ptr, void *assign_ptr)
100 {
101         struct pmu_allocation_v0 **pmu_a_ptr =
102                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104 }
105
106 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107         void *pmu_alloc_ptr, u16 size)
108 {
109         struct pmu_allocation_v1 *pmu_a_ptr =
110                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111         pmu_a_ptr->alloc.dmem.size = size;
112 }
113
114 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115         void *pmu_alloc_ptr, u16 size)
116 {
117         struct pmu_allocation_v0 *pmu_a_ptr =
118                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119         pmu_a_ptr->alloc.dmem.size = size;
120 }
121
122 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123         void *pmu_alloc_ptr)
124 {
125         struct pmu_allocation_v1 *pmu_a_ptr =
126                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127         return pmu_a_ptr->alloc.dmem.size;
128 }
129
130 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131         void *pmu_alloc_ptr)
132 {
133         struct pmu_allocation_v0 *pmu_a_ptr =
134                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135         return pmu_a_ptr->alloc.dmem.size;
136 }
137
138 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139         void *pmu_alloc_ptr)
140 {
141         struct pmu_allocation_v1 *pmu_a_ptr =
142                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143         return pmu_a_ptr->alloc.dmem.offset;
144 }
145
146 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147         void *pmu_alloc_ptr)
148 {
149         struct pmu_allocation_v0 *pmu_a_ptr =
150                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151         return pmu_a_ptr->alloc.dmem.offset;
152 }
153
154 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155         void *pmu_alloc_ptr)
156 {
157         struct pmu_allocation_v1 *pmu_a_ptr =
158                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159         return &pmu_a_ptr->alloc.dmem.offset;
160 }
161
162 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163         void *pmu_alloc_ptr)
164 {
165         struct pmu_allocation_v0 *pmu_a_ptr =
166                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167         return &pmu_a_ptr->alloc.dmem.offset;
168 }
169
170 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171         void *pmu_alloc_ptr, u32 offset)
172 {
173         struct pmu_allocation_v1 *pmu_a_ptr =
174                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175         pmu_a_ptr->alloc.dmem.offset = offset;
176 }
177
178 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179         void *pmu_alloc_ptr, u32 offset)
180 {
181         struct pmu_allocation_v0 *pmu_a_ptr =
182                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183         pmu_a_ptr->alloc.dmem.offset = offset;
184 }
185
186 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187 {
188         return (void *)(&(init->pmu_init_v1));
189 }
190
191 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192 {
193         struct pmu_init_msg_pmu_v1 *init =
194                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195         return init->sw_managed_area_offset;
196 }
197
198 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199 {
200         struct pmu_init_msg_pmu_v1 *init =
201                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202         return init->sw_managed_area_size;
203 }
204
205 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206 {
207         return (void *)(&(init->pmu_init_v0));
208 }
209
210 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211 {
212         struct pmu_init_msg_pmu_v0 *init =
213                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214         return init->sw_managed_area_offset;
215 }
216
217 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218 {
219         struct pmu_init_msg_pmu_v0 *init =
220                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221         return init->sw_managed_area_size;
222 }
223
224 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225 {
226         return sizeof(struct pmu_perfmon_cmd_start_v1);
227 }
228
229 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230 {
231         return sizeof(struct pmu_perfmon_cmd_start_v0);
232 }
233
234 static int get_perfmon_cmd_start_offsetofvar_v1(
235         enum pmu_perfmon_cmd_start_fields field)
236 {
237         switch (field) {
238         case COUNTER_ALLOC:
239                 return offsetof(struct pmu_perfmon_cmd_start_v1,
240                 counter_alloc);
241         default:
242                 return -EINVAL;
243                 break;
244         }
245         return 0;
246 }
247
248 static int get_perfmon_cmd_start_offsetofvar_v0(
249         enum pmu_perfmon_cmd_start_fields field)
250 {
251         switch (field) {
252         case COUNTER_ALLOC:
253                 return offsetof(struct pmu_perfmon_cmd_start_v0,
254                 counter_alloc);
255         default:
256                 return -EINVAL;
257                 break;
258         }
259         return 0;
260 }
261
262 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263 {
264         return sizeof(struct pmu_perfmon_cmd_init_v1);
265 }
266
267 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268 {
269         return sizeof(struct pmu_perfmon_cmd_init_v0);
270 }
271
272 static int get_perfmon_cmd_init_offsetofvar_v1(
273         enum pmu_perfmon_cmd_start_fields field)
274 {
275         switch (field) {
276         case COUNTER_ALLOC:
277                 return offsetof(struct pmu_perfmon_cmd_init_v1,
278                 counter_alloc);
279         default:
280                 return -EINVAL;
281                 break;
282         }
283         return 0;
284 }
285
286 static int get_perfmon_cmd_init_offsetofvar_v0(
287         enum pmu_perfmon_cmd_start_fields field)
288 {
289         switch (field) {
290         case COUNTER_ALLOC:
291                 return offsetof(struct pmu_perfmon_cmd_init_v0,
292                 counter_alloc);
293         default:
294                 return -EINVAL;
295                 break;
296         }
297         return 0;
298 }
299
300 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301 {
302         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303         start->cmd_type = value;
304 }
305
306 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307 {
308         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309         start->cmd_type = value;
310 }
311
312 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313 {
314         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315         start->group_id = value;
316 }
317
318 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319 {
320         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321         start->group_id = value;
322 }
323
324 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325 {
326         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327         start->state_id = value;
328 }
329
330 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331 {
332         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333         start->state_id = value;
334 }
335
336 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337 {
338         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339         start->flags = value;
340 }
341
342 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343 {
344         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345         start->flags = value;
346 }
347
348 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349 {
350         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351         return start->flags;
352 }
353
354 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355 {
356         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357         return start->flags;
358 }
359
360 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361         u16 value)
362 {
363         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364         init->sample_buffer = value;
365 }
366
367 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368         u16 value)
369 {
370         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371         init->sample_buffer = value;
372 }
373
374 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375         u8 value)
376 {
377         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378         init->to_decrease_count = value;
379 }
380
381 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382         u8 value)
383 {
384         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385         init->to_decrease_count = value;
386 }
387
388 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389         u8 value)
390 {
391         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392         init->base_counter_id = value;
393 }
394
395 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396         u8 value)
397 {
398         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399         init->base_counter_id = value;
400 }
401
402 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403         u32 value)
404 {
405         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406         init->sample_period_us = value;
407 }
408
409 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410         u32 value)
411 {
412         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413         init->sample_period_us = value;
414 }
415
416 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417         u8 value)
418 {
419         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420         init->num_counters = value;
421 }
422
423 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424         u8 value)
425 {
426         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427         init->num_counters = value;
428 }
429
430 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431         u8 value)
432 {
433         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434         init->samples_in_moving_avg = value;
435 }
436
437 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438         u8 value)
439 {
440         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441         init->samples_in_moving_avg = value;
442 }
443
444 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445         u32 id, void *pmu_init_msg)
446 {
447         struct pmu_init_msg_pmu_v0 *init =
448                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449         queue->index    = init->queue_info[id].index;
450         queue->offset   = init->queue_info[id].offset;
451         queue->size = init->queue_info[id].size;
452 }
453
454 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455         u32 id, void *pmu_init_msg)
456 {
457         struct pmu_init_msg_pmu_v1 *init =
458                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459         queue->index    = init->queue_info[id].index;
460         queue->offset   = init->queue_info[id].offset;
461         queue->size = init->queue_info[id].size;
462 }
463
464 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465 {
466         return (void *)(&seq->in_v1);
467 }
468
469 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470 {
471         return (void *)(&seq->in_v0);
472 }
473
474 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475 {
476         return (void *)(&seq->out_v1);
477 }
478
479 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480 {
481         return (void *)(&seq->out_v0);
482 }
483
484 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485 {
486         struct gk20a *g = pmu->g;
487         switch (pmu->desc->app_version) {
488         case APP_VERSION_1:
489                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491                         pmu_cmdline_size_v1;
492                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493                         set_pmu_cmdline_args_cpufreq_v1;
494                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495                         get_pmu_cmdline_args_ptr_v1;
496                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497                         get_pmu_allocation_size_v1;
498                 g->ops.pmu_ver.set_pmu_allocation_ptr =
499                         set_pmu_allocation_ptr_v1;
500                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501                         pmu_allocation_set_dmem_size_v1;
502                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503                         pmu_allocation_get_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505                         pmu_allocation_get_dmem_offset_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507                         pmu_allocation_get_dmem_offset_addr_v1;
508                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509                         pmu_allocation_set_dmem_offset_v1;
510                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511                         get_pmu_init_msg_pmu_queue_params_v1;
512                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513                         get_pmu_msg_pmu_init_msg_ptr_v1;
514                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515                         get_pmu_init_msg_pmu_sw_mg_off_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517                         get_pmu_init_msg_pmu_sw_mg_size_v1;
518                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519                         get_pmu_perfmon_cmd_start_size_v1;
520                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521                         get_perfmon_cmd_start_offsetofvar_v1;
522                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523                         perfmon_start_set_cmd_type_v1;
524                 g->ops.pmu_ver.perfmon_start_set_group_id =
525                         perfmon_start_set_group_id_v1;
526                 g->ops.pmu_ver.perfmon_start_set_state_id =
527                         perfmon_start_set_state_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_flags =
529                         perfmon_start_set_flags_v1;
530                 g->ops.pmu_ver.perfmon_start_get_flags =
531                         perfmon_start_get_flags_v1;
532                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533                         get_pmu_perfmon_cmd_init_size_v1;
534                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535                         get_perfmon_cmd_init_offsetofvar_v1;
536                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537                         perfmon_cmd_init_set_sample_buffer_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539                         perfmon_cmd_init_set_dec_cnt_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541                         perfmon_cmd_init_set_base_cnt_id_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543                         perfmon_cmd_init_set_samp_period_us_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545                         perfmon_cmd_init_set_num_cnt_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547                         perfmon_cmd_init_set_mov_avg_v1;
548                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549                         get_pmu_sequence_in_alloc_ptr_v1;
550                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551                         get_pmu_sequence_out_alloc_ptr_v1;
552                 break;
553         case APP_VERSION_0:
554                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556                         pmu_cmdline_size_v0;
557                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558                         set_pmu_cmdline_args_cpufreq_v0;
559                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560                         get_pmu_cmdline_args_ptr_v0;
561                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562                         get_pmu_allocation_size_v0;
563                 g->ops.pmu_ver.set_pmu_allocation_ptr =
564                         set_pmu_allocation_ptr_v0;
565                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566                         pmu_allocation_set_dmem_size_v0;
567                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568                         pmu_allocation_get_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570                         pmu_allocation_get_dmem_offset_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572                         pmu_allocation_get_dmem_offset_addr_v0;
573                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574                         pmu_allocation_set_dmem_offset_v0;
575                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576                         get_pmu_init_msg_pmu_queue_params_v0;
577                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578                         get_pmu_msg_pmu_init_msg_ptr_v0;
579                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580                         get_pmu_init_msg_pmu_sw_mg_off_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582                         get_pmu_init_msg_pmu_sw_mg_size_v0;
583                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584                         get_pmu_perfmon_cmd_start_size_v0;
585                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586                         get_perfmon_cmd_start_offsetofvar_v0;
587                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588                         perfmon_start_set_cmd_type_v0;
589                 g->ops.pmu_ver.perfmon_start_set_group_id =
590                         perfmon_start_set_group_id_v0;
591                 g->ops.pmu_ver.perfmon_start_set_state_id =
592                         perfmon_start_set_state_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_flags =
594                         perfmon_start_set_flags_v0;
595                 g->ops.pmu_ver.perfmon_start_get_flags =
596                         perfmon_start_get_flags_v0;
597                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598                         get_pmu_perfmon_cmd_init_size_v0;
599                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600                         get_perfmon_cmd_init_offsetofvar_v0;
601                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602                         perfmon_cmd_init_set_sample_buffer_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604                         perfmon_cmd_init_set_dec_cnt_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606                         perfmon_cmd_init_set_base_cnt_id_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608                         perfmon_cmd_init_set_samp_period_us_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610                         perfmon_cmd_init_set_num_cnt_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612                         perfmon_cmd_init_set_mov_avg_v0;
613                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614                         get_pmu_sequence_in_alloc_ptr_v0;
615                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616                         get_pmu_sequence_out_alloc_ptr_v0;
617                 break;
618         default:
619                 gk20a_err(dev_from_gk20a(pmu->g),
620                 "PMU code version not supported\n");
621                 return -EINVAL;
622                 break;
623         }
624         return 0;
625 }
626
627 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628                 u32 src, u8 *dst, u32 size, u8 port)
629 {
630         struct gk20a *g = pmu->g;
631         u32 i, words, bytes;
632         u32 data, addr_mask;
633         u32 *dst_u32 = (u32*)dst;
634
635         if (size == 0) {
636                 gk20a_err(dev_from_gk20a(g),
637                         "size is zero");
638                 return;
639         }
640
641         if (src & 0x3) {
642                 gk20a_err(dev_from_gk20a(g),
643                         "src (0x%08x) not 4-byte aligned", src);
644                 return;
645         }
646
647         mutex_lock(&pmu->pmu_copy_lock);
648
649         words = size >> 2;
650         bytes = size & 0x3;
651
652         addr_mask = pwr_falcon_dmemc_offs_m() |
653                     pwr_falcon_dmemc_blk_m();
654
655         src &= addr_mask;
656
657         gk20a_writel(g, pwr_falcon_dmemc_r(port),
658                 src | pwr_falcon_dmemc_aincr_f(1));
659
660         for (i = 0; i < words; i++)
661                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663         if (bytes > 0) {
664                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665                 for (i = 0; i < bytes; i++) {
666                         dst[(words << 2) + i] = ((u8 *)&data)[i];
667                 }
668         }
669         mutex_unlock(&pmu->pmu_copy_lock);
670         return;
671 }
672
673 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
674                 u32 dst, u8 *src, u32 size, u8 port)
675 {
676         struct gk20a *g = pmu->g;
677         u32 i, words, bytes;
678         u32 data, addr_mask;
679         u32 *src_u32 = (u32*)src;
680
681         if (size == 0) {
682                 gk20a_err(dev_from_gk20a(g),
683                         "size is zero");
684                 return;
685         }
686
687         if (dst & 0x3) {
688                 gk20a_err(dev_from_gk20a(g),
689                         "dst (0x%08x) not 4-byte aligned", dst);
690                 return;
691         }
692
693         mutex_lock(&pmu->pmu_copy_lock);
694
695         words = size >> 2;
696         bytes = size & 0x3;
697
698         addr_mask = pwr_falcon_dmemc_offs_m() |
699                     pwr_falcon_dmemc_blk_m();
700
701         dst &= addr_mask;
702
703         gk20a_writel(g, pwr_falcon_dmemc_r(port),
704                 dst | pwr_falcon_dmemc_aincw_f(1));
705
706         for (i = 0; i < words; i++)
707                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
708
709         if (bytes > 0) {
710                 data = 0;
711                 for (i = 0; i < bytes; i++)
712                         ((u8 *)&data)[i] = src[(words << 2) + i];
713                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
714         }
715
716         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
717         size = ALIGN(size, 4);
718         if (data != dst + size) {
719                 gk20a_err(dev_from_gk20a(g),
720                         "copy failed. bytes written %d, expected %d",
721                         data - dst, size);
722         }
723         mutex_unlock(&pmu->pmu_copy_lock);
724         return;
725 }
726
727 static int pmu_idle(struct pmu_gk20a *pmu)
728 {
729         struct gk20a *g = pmu->g;
730         unsigned long end_jiffies = jiffies +
731                 msecs_to_jiffies(2000);
732         u32 idle_stat;
733
734         /* wait for pmu idle */
735         do {
736                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
737
738                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
739                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
740                         break;
741                 }
742
743                 if (time_after_eq(jiffies, end_jiffies)) {
744                         gk20a_err(dev_from_gk20a(g),
745                                 "timeout waiting pmu idle : 0x%08x",
746                                 idle_stat);
747                         return -EBUSY;
748                 }
749                 usleep_range(100, 200);
750         } while (1);
751
752         gk20a_dbg_fn("done");
753         return 0;
754 }
755
756 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
757 {
758         struct gk20a *g = pmu->g;
759
760         gk20a_dbg_fn("");
761
762         gk20a_writel(g, mc_intr_mask_0_r(),
763                 gk20a_readl(g, mc_intr_mask_0_r()) &
764                 ~mc_intr_mask_0_pmu_enabled_f());
765         gk20a_writel(g, mc_intr_mask_1_r(),
766                 gk20a_readl(g, mc_intr_mask_1_r()) &
767                 ~mc_intr_mask_1_pmu_enabled_f());
768
769         gk20a_writel(g, pwr_falcon_irqmclr_r(),
770                 pwr_falcon_irqmclr_gptmr_f(1)  |
771                 pwr_falcon_irqmclr_wdtmr_f(1)  |
772                 pwr_falcon_irqmclr_mthd_f(1)   |
773                 pwr_falcon_irqmclr_ctxsw_f(1)  |
774                 pwr_falcon_irqmclr_halt_f(1)   |
775                 pwr_falcon_irqmclr_exterr_f(1) |
776                 pwr_falcon_irqmclr_swgen0_f(1) |
777                 pwr_falcon_irqmclr_swgen1_f(1) |
778                 pwr_falcon_irqmclr_ext_f(0xff));
779
780         if (enable) {
781                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
782                 gk20a_writel(g, pwr_falcon_irqdest_r(),
783                         pwr_falcon_irqdest_host_gptmr_f(0)    |
784                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
785                         pwr_falcon_irqdest_host_mthd_f(0)     |
786                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
787                         pwr_falcon_irqdest_host_halt_f(1)     |
788                         pwr_falcon_irqdest_host_exterr_f(0)   |
789                         pwr_falcon_irqdest_host_swgen0_f(1)   |
790                         pwr_falcon_irqdest_host_swgen1_f(0)   |
791                         pwr_falcon_irqdest_host_ext_f(0xff)   |
792                         pwr_falcon_irqdest_target_gptmr_f(1)  |
793                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
794                         pwr_falcon_irqdest_target_mthd_f(0)   |
795                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
796                         pwr_falcon_irqdest_target_halt_f(0)   |
797                         pwr_falcon_irqdest_target_exterr_f(0) |
798                         pwr_falcon_irqdest_target_swgen0_f(0) |
799                         pwr_falcon_irqdest_target_swgen1_f(0) |
800                         pwr_falcon_irqdest_target_ext_f(0xff));
801
802                 /* 0=disable, 1=enable */
803                 gk20a_writel(g, pwr_falcon_irqmset_r(),
804                         pwr_falcon_irqmset_gptmr_f(1)  |
805                         pwr_falcon_irqmset_wdtmr_f(1)  |
806                         pwr_falcon_irqmset_mthd_f(0)   |
807                         pwr_falcon_irqmset_ctxsw_f(0)  |
808                         pwr_falcon_irqmset_halt_f(1)   |
809                         pwr_falcon_irqmset_exterr_f(1) |
810                         pwr_falcon_irqmset_swgen0_f(1) |
811                         pwr_falcon_irqmset_swgen1_f(1));
812
813                 gk20a_writel(g, mc_intr_mask_0_r(),
814                         gk20a_readl(g, mc_intr_mask_0_r()) |
815                         mc_intr_mask_0_pmu_enabled_f());
816         }
817
818         gk20a_dbg_fn("done");
819 }
820
821 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
822 {
823         struct gk20a *g = pmu->g;
824
825         gk20a_dbg_fn("");
826
827         if (enable) {
828                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
829                 gk20a_enable(g, mc_enable_pwr_enabled_f());
830
831                 do {
832                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
833                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
834                                  pwr_falcon_dmactl_imem_scrubbing_m());
835
836                         if (!w) {
837                                 gk20a_dbg_fn("done");
838                                 return 0;
839                         }
840                         udelay(GR_IDLE_CHECK_DEFAULT);
841                 } while (--retries || !tegra_platform_is_silicon());
842
843                 gk20a_disable(g, mc_enable_pwr_enabled_f());
844                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
845
846                 return -ETIMEDOUT;
847         } else {
848                 gk20a_disable(g, mc_enable_pwr_enabled_f());
849                 return 0;
850         }
851 }
852
853 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
854 {
855         struct gk20a *g = pmu->g;
856         u32 pmc_enable;
857         int err;
858
859         gk20a_dbg_fn("");
860
861         if (!enable) {
862                 pmc_enable = gk20a_readl(g, mc_enable_r());
863                 if (mc_enable_pwr_v(pmc_enable) !=
864                     mc_enable_pwr_disabled_v()) {
865
866                         pmu_enable_irq(pmu, false);
867                         pmu_enable_hw(pmu, false);
868                 }
869         } else {
870                 err = pmu_enable_hw(pmu, true);
871                 if (err)
872                         return err;
873
874                 /* TBD: post reset */
875
876                 err = pmu_idle(pmu);
877                 if (err)
878                         return err;
879
880                 pmu_enable_irq(pmu, true);
881         }
882
883         gk20a_dbg_fn("done");
884         return 0;
885 }
886
887 static int pmu_reset(struct pmu_gk20a *pmu)
888 {
889         int err;
890
891         err = pmu_idle(pmu);
892         if (err)
893                 return err;
894
895         /* TBD: release pmu hw mutex */
896
897         err = pmu_enable(pmu, false);
898         if (err)
899                 return err;
900
901         /* TBD: cancel all sequences */
902         /* TBD: init all sequences and state tables */
903         /* TBD: restore pre-init message handler */
904
905         err = pmu_enable(pmu, true);
906         if (err)
907                 return err;
908
909         return 0;
910 }
911
912 static int pmu_bootstrap(struct pmu_gk20a *pmu)
913 {
914         struct gk20a *g = pmu->g;
915         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
916         struct mm_gk20a *mm = &g->mm;
917         struct pmu_ucode_desc *desc = pmu->desc;
918         u64 addr_code, addr_data, addr_load;
919         u32 i, blocks, addr_args;
920
921         gk20a_dbg_fn("");
922
923         gk20a_writel(g, pwr_falcon_itfen_r(),
924                 gk20a_readl(g, pwr_falcon_itfen_r()) |
925                 pwr_falcon_itfen_ctxen_enable_f());
926         gk20a_writel(g, pwr_pmu_new_instblk_r(),
927                 pwr_pmu_new_instblk_ptr_f(
928                         mm->pmu.inst_block.cpu_pa >> 12) |
929                 pwr_pmu_new_instblk_valid_f(1) |
930                 pwr_pmu_new_instblk_target_sys_coh_f());
931
932         /* TBD: load all other surfaces */
933
934         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
935                 clk_get_rate(platform->clk[1]));
936
937         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
938                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
939                         << GK20A_PMU_DMEM_BLKSIZE2) -
940                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
941
942         pmu_copy_to_dmem(pmu, addr_args,
943                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
944                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
945
946         gk20a_writel(g, pwr_falcon_dmemc_r(0),
947                 pwr_falcon_dmemc_offs_f(0) |
948                 pwr_falcon_dmemc_blk_f(0)  |
949                 pwr_falcon_dmemc_aincw_f(1));
950
951         addr_code = u64_lo32((pmu->ucode.pmu_va +
952                         desc->app_start_offset +
953                         desc->app_resident_code_offset) >> 8) ;
954         addr_data = u64_lo32((pmu->ucode.pmu_va +
955                         desc->app_start_offset +
956                         desc->app_resident_data_offset) >> 8);
957         addr_load = u64_lo32((pmu->ucode.pmu_va +
958                         desc->bootloader_start_offset) >> 8);
959
960         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
961         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
970
971         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
972                 addr_load - (desc->bootloader_imem_offset >> 8));
973
974         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
975
976         for (i = 0; i < blocks; i++) {
977                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
978                         desc->bootloader_imem_offset + (i << 8));
979                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
982                         pwr_falcon_dmatrfcmd_imem_f(1)  |
983                         pwr_falcon_dmatrfcmd_write_f(0) |
984                         pwr_falcon_dmatrfcmd_size_f(6)  |
985                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
986         }
987
988         gk20a_writel(g, pwr_falcon_bootvec_r(),
989                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
990
991         gk20a_writel(g, pwr_falcon_cpuctl_r(),
992                 pwr_falcon_cpuctl_startcpu_f(1));
993
994         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
995
996         return 0;
997 }
998
999 static void pmu_seq_init(struct pmu_gk20a *pmu)
1000 {
1001         u32 i;
1002
1003         memset(pmu->seq, 0,
1004                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1005         memset(pmu->pmu_seq_tbl, 0,
1006                 sizeof(pmu->pmu_seq_tbl));
1007
1008         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1009                 pmu->seq[i].id = i;
1010 }
1011
1012 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1013                         struct pmu_sequence **pseq)
1014 {
1015         struct gk20a *g = pmu->g;
1016         struct pmu_sequence *seq;
1017         u32 index;
1018
1019         mutex_lock(&pmu->pmu_seq_lock);
1020         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1021                                 sizeof(pmu->pmu_seq_tbl));
1022         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1023                 gk20a_err(dev_from_gk20a(g),
1024                         "no free sequence available");
1025                 mutex_unlock(&pmu->pmu_seq_lock);
1026                 return -EAGAIN;
1027         }
1028         set_bit(index, pmu->pmu_seq_tbl);
1029         mutex_unlock(&pmu->pmu_seq_lock);
1030
1031         seq = &pmu->seq[index];
1032         seq->state = PMU_SEQ_STATE_PENDING;
1033
1034         *pseq = seq;
1035         return 0;
1036 }
1037
1038 static void pmu_seq_release(struct pmu_gk20a *pmu,
1039                         struct pmu_sequence *seq)
1040 {
1041         struct gk20a *g = pmu->g;
1042         seq->state      = PMU_SEQ_STATE_FREE;
1043         seq->desc       = PMU_INVALID_SEQ_DESC;
1044         seq->callback   = NULL;
1045         seq->cb_params  = NULL;
1046         seq->msg        = NULL;
1047         seq->out_payload = NULL;
1048         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1049                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1052
1053         clear_bit(seq->id, pmu->pmu_seq_tbl);
1054 }
1055
1056 static int pmu_queue_init(struct pmu_gk20a *pmu,
1057                 u32 id, union pmu_init_msg_pmu *init)
1058 {
1059         struct gk20a *g = pmu->g;
1060         struct pmu_queue *queue = &pmu->queue[id];
1061         queue->id       = id;
1062         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1063
1064         queue->mutex_id = id;
1065         mutex_init(&queue->mutex);
1066
1067         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1068                 id, queue->index, queue->offset, queue->size);
1069
1070         return 0;
1071 }
1072
1073 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1074                         u32 *head, bool set)
1075 {
1076         struct gk20a *g = pmu->g;
1077
1078         BUG_ON(!head);
1079
1080         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1081
1082                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1083                         return -EINVAL;
1084
1085                 if (!set)
1086                         *head = pwr_pmu_queue_head_address_v(
1087                                 gk20a_readl(g,
1088                                         pwr_pmu_queue_head_r(queue->index)));
1089                 else
1090                         gk20a_writel(g,
1091                                 pwr_pmu_queue_head_r(queue->index),
1092                                 pwr_pmu_queue_head_address_f(*head));
1093         } else {
1094                 if (!set)
1095                         *head = pwr_pmu_msgq_head_val_v(
1096                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1097                 else
1098                         gk20a_writel(g,
1099                                 pwr_pmu_msgq_head_r(),
1100                                 pwr_pmu_msgq_head_val_f(*head));
1101         }
1102
1103         return 0;
1104 }
1105
1106 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1107                         u32 *tail, bool set)
1108 {
1109         struct gk20a *g = pmu->g;
1110
1111         BUG_ON(!tail);
1112
1113         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1114
1115                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1116                         return -EINVAL;
1117
1118                 if (!set)
1119                         *tail = pwr_pmu_queue_tail_address_v(
1120                                 gk20a_readl(g,
1121                                         pwr_pmu_queue_tail_r(queue->index)));
1122                 else
1123                         gk20a_writel(g,
1124                                 pwr_pmu_queue_tail_r(queue->index),
1125                                 pwr_pmu_queue_tail_address_f(*tail));
1126         } else {
1127                 if (!set)
1128                         *tail = pwr_pmu_msgq_tail_val_v(
1129                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1130                 else
1131                         gk20a_writel(g,
1132                                 pwr_pmu_msgq_tail_r(),
1133                                 pwr_pmu_msgq_tail_val_f(*tail));
1134         }
1135
1136         return 0;
1137 }
1138
1139 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1140                         u32 offset, u8 *dst, u32 size)
1141 {
1142         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1143 }
1144
1145 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1146                         u32 offset, u8 *src, u32 size)
1147 {
1148         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1149 }
1150
1151 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1152 {
1153         struct gk20a *g = pmu->g;
1154         struct pmu_mutex *mutex;
1155         u32 data, owner, max_retry;
1156
1157         if (!pmu->initialized)
1158                 return -EINVAL;
1159
1160         BUG_ON(!token);
1161         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1162         BUG_ON(id > pmu->mutex_cnt);
1163
1164         mutex = &pmu->mutex[id];
1165
1166         owner = pwr_pmu_mutex_value_v(
1167                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1168
1169         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1170                 BUG_ON(mutex->ref_cnt == 0);
1171                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1172                 mutex->ref_cnt++;
1173                 return 0;
1174         }
1175
1176         max_retry = 40;
1177         do {
1178                 data = pwr_pmu_mutex_id_value_v(
1179                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1180                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1181                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1182                         gk20a_warn(dev_from_gk20a(g),
1183                                 "fail to generate mutex token: val 0x%08x",
1184                                 owner);
1185                         usleep_range(20, 40);
1186                         continue;
1187                 }
1188
1189                 owner = data;
1190                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1191                         pwr_pmu_mutex_value_f(owner));
1192
1193                 data = pwr_pmu_mutex_value_v(
1194                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1195
1196                 if (owner == data) {
1197                         mutex->ref_cnt = 1;
1198                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1199                                 mutex->index, *token);
1200                         *token = owner;
1201                         return 0;
1202                 } else {
1203                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1204                                 mutex->index);
1205
1206                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1207                         data = set_field(data,
1208                                 pwr_pmu_mutex_id_release_value_m(),
1209                                 pwr_pmu_mutex_id_release_value_f(owner));
1210                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1211
1212                         usleep_range(20, 40);
1213                         continue;
1214                 }
1215         } while (max_retry-- > 0);
1216
1217         return -EBUSY;
1218 }
1219
1220 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1221 {
1222         struct gk20a *g = pmu->g;
1223         struct pmu_mutex *mutex;
1224         u32 owner, data;
1225
1226         if (!pmu->initialized)
1227                 return -EINVAL;
1228
1229         BUG_ON(!token);
1230         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1231         BUG_ON(id > pmu->mutex_cnt);
1232
1233         mutex = &pmu->mutex[id];
1234
1235         owner = pwr_pmu_mutex_value_v(
1236                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1237
1238         if (*token != owner) {
1239                 gk20a_err(dev_from_gk20a(g),
1240                         "requester 0x%08x NOT match owner 0x%08x",
1241                         *token, owner);
1242                 return -EINVAL;
1243         }
1244
1245         if (--mutex->ref_cnt == 0) {
1246                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1247                         pwr_pmu_mutex_value_initial_lock_f());
1248
1249                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1250                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1251                         pwr_pmu_mutex_id_release_value_f(owner));
1252                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1253
1254                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1255                         mutex->index, *token);
1256         }
1257
1258         return 0;
1259 }
1260
1261 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1262                         struct pmu_queue *queue)
1263 {
1264         int err;
1265
1266         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1267                 return 0;
1268
1269         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1270                 mutex_lock(&queue->mutex);
1271                 return 0;
1272         }
1273
1274         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1275         return err;
1276 }
1277
1278 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1279                         struct pmu_queue *queue)
1280 {
1281         int err;
1282
1283         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1284                 return 0;
1285
1286         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1287                 mutex_unlock(&queue->mutex);
1288                 return 0;
1289         }
1290
1291         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1292         return err;
1293 }
1294
1295 /* called by pmu_read_message, no lock */
1296 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1297                         struct pmu_queue *queue)
1298 {
1299         u32 head, tail;
1300
1301         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1302         if (queue->opened && queue->oflag == OFLAG_READ)
1303                 tail = queue->position;
1304         else
1305                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1306
1307         return head == tail;
1308 }
1309
1310 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1311                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1312 {
1313         u32 head, tail, free;
1314         bool rewind = false;
1315
1316         size = ALIGN(size, QUEUE_ALIGNMENT);
1317
1318         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1319         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1320
1321         if (head >= tail) {
1322                 free = queue->offset + queue->size - head;
1323                 free -= PMU_CMD_HDR_SIZE;
1324
1325                 if (size > free) {
1326                         rewind = true;
1327                         head = queue->offset;
1328                 }
1329         }
1330
1331         if (head < tail)
1332                 free = tail - head - 1;
1333
1334         if (need_rewind)
1335                 *need_rewind = rewind;
1336
1337         return size <= free;
1338 }
1339
1340 static int pmu_queue_push(struct pmu_gk20a *pmu,
1341                         struct pmu_queue *queue, void *data, u32 size)
1342 {
1343         gk20a_dbg_fn("");
1344
1345         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1346                 gk20a_err(dev_from_gk20a(pmu->g),
1347                         "queue not opened for write");
1348                 return -EINVAL;
1349         }
1350
1351         pmu_queue_write(pmu, queue->position, data, size);
1352         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1353         return 0;
1354 }
1355
1356 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1357                         struct pmu_queue *queue, void *data, u32 size,
1358                         u32 *bytes_read)
1359 {
1360         u32 head, tail, used;
1361
1362         *bytes_read = 0;
1363
1364         if (!queue->opened && queue->oflag == OFLAG_READ){
1365                 gk20a_err(dev_from_gk20a(pmu->g),
1366                         "queue not opened for read");
1367                 return -EINVAL;
1368         }
1369
1370         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1371         tail = queue->position;
1372
1373         if (head == tail)
1374                 return 0;
1375
1376         if (head > tail)
1377                 used = head - tail;
1378         else
1379                 used = queue->offset + queue->size - tail;
1380
1381         if (size > used) {
1382                 gk20a_warn(dev_from_gk20a(pmu->g),
1383                         "queue size smaller than request read");
1384                 size = used;
1385         }
1386
1387         pmu_queue_read(pmu, tail, data, size);
1388         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1389         *bytes_read = size;
1390         return 0;
1391 }
1392
1393 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1394                         struct pmu_queue *queue)
1395 {
1396         struct pmu_cmd cmd;
1397
1398         gk20a_dbg_fn("");
1399
1400         if (!queue->opened) {
1401                 gk20a_err(dev_from_gk20a(pmu->g),
1402                         "queue not opened");
1403                 return;
1404         }
1405
1406         if (queue->oflag == OFLAG_WRITE) {
1407                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1408                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1409                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1410                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1411         }
1412
1413         queue->position = queue->offset;
1414         return;
1415 }
1416
1417 /* open for read and lock the queue */
1418 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1419                         struct pmu_queue *queue)
1420 {
1421         int err;
1422
1423         err = pmu_queue_lock(pmu, queue);
1424         if (err)
1425                 return err;
1426
1427         if (queue->opened)
1428                 BUG();
1429
1430         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1431         queue->oflag = OFLAG_READ;
1432         queue->opened = true;
1433
1434         return 0;
1435 }
1436
1437 /* open for write and lock the queue
1438    make sure there's enough free space for the write */
1439 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1440                         struct pmu_queue *queue, u32 size)
1441 {
1442         bool rewind = false;
1443         int err;
1444
1445         err = pmu_queue_lock(pmu, queue);
1446         if (err)
1447                 return err;
1448
1449         if (queue->opened)
1450                 BUG();
1451
1452         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1453                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1454                 return -EAGAIN;
1455         }
1456
1457         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1458         queue->oflag = OFLAG_WRITE;
1459         queue->opened = true;
1460
1461         if (rewind)
1462                 pmu_queue_rewind(pmu, queue);
1463
1464         return 0;
1465 }
1466
1467 /* close and unlock the queue */
1468 static int pmu_queue_close(struct pmu_gk20a *pmu,
1469                         struct pmu_queue *queue, bool commit)
1470 {
1471         if (!queue->opened)
1472                 return 0;
1473
1474         if (commit) {
1475                 if (queue->oflag == OFLAG_READ) {
1476                         pmu_queue_tail(pmu, queue,
1477                                 &queue->position, QUEUE_SET);
1478                 }
1479                 else {
1480                         pmu_queue_head(pmu, queue,
1481                                 &queue->position, QUEUE_SET);
1482                 }
1483         }
1484
1485         queue->opened = false;
1486
1487         pmu_queue_unlock(pmu, queue);
1488
1489         return 0;
1490 }
1491
1492 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1493 {
1494         gk20a_dbg_fn("");
1495
1496         gk20a_allocator_destroy(&pmu->dmem);
1497 }
1498
1499 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1500 {
1501         struct pmu_gk20a *pmu = &g->pmu;
1502
1503         gk20a_dbg_fn("");
1504
1505         pmu_enable_hw(pmu, true);
1506
1507         return 0;
1508 }
1509
1510 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1511 {
1512         struct pmu_gk20a *pmu = &g->pmu;
1513         struct mm_gk20a *mm = &g->mm;
1514         struct vm_gk20a *vm = &mm->pmu.vm;
1515         struct device *d = dev_from_gk20a(g);
1516         int i, err = 0;
1517         u8 *ptr;
1518         void *ucode_ptr;
1519         struct sg_table *sgt_pmu_ucode;
1520         struct sg_table *sgt_seq_buf;
1521         DEFINE_DMA_ATTRS(attrs);
1522         dma_addr_t iova;
1523
1524         gk20a_dbg_fn("");
1525
1526         /* start with elpg disabled until first enable call */
1527         mutex_init(&pmu->elpg_mutex);
1528         pmu->elpg_refcnt = 0;
1529
1530         if (pmu->sw_ready) {
1531                 for (i = 0; i < pmu->mutex_cnt; i++) {
1532                         pmu->mutex[i].id    = i;
1533                         pmu->mutex[i].index = i;
1534                 }
1535                 pmu_seq_init(pmu);
1536
1537                 gk20a_dbg_fn("skip init");
1538                 goto skip_init;
1539         }
1540
1541         /* no infoRom script from vbios? */
1542
1543         /* TBD: sysmon subtask */
1544
1545         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1546         pmu->mutex = kzalloc(pmu->mutex_cnt *
1547                 sizeof(struct pmu_mutex), GFP_KERNEL);
1548         if (!pmu->mutex) {
1549                 err = -ENOMEM;
1550                 goto err;
1551         }
1552
1553         for (i = 0; i < pmu->mutex_cnt; i++) {
1554                 pmu->mutex[i].id    = i;
1555                 pmu->mutex[i].index = i;
1556         }
1557
1558         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1559                 sizeof(struct pmu_sequence), GFP_KERNEL);
1560         if (!pmu->seq) {
1561                 err = -ENOMEM;
1562                 goto err_free_mutex;
1563         }
1564
1565         pmu_seq_init(pmu);
1566
1567         if (!g->pmu_fw) {
1568                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1569                 if (!g->pmu_fw) {
1570                         gk20a_err(d, "failed to load pmu ucode!!");
1571                         err = -ENOENT;
1572                         goto err_free_seq;
1573                 }
1574         }
1575
1576         gk20a_dbg_fn("firmware loaded");
1577
1578         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1579         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1580                         pmu->desc->descriptor_size);
1581
1582         INIT_WORK(&pmu->pg_init, pmu_setup_hw);
1583
1584         gk20a_init_pmu_vm(mm);
1585
1586         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1587         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1588                                         &iova,
1589                                         GFP_KERNEL,
1590                                         &attrs);
1591         if (!pmu->ucode.cpuva) {
1592                 gk20a_err(d, "failed to allocate memory\n");
1593                 err = -ENOMEM;
1594                 goto err_release_fw;
1595         }
1596
1597         pmu->ucode.iova = iova;
1598         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1599                                         &iova,
1600                                         GFP_KERNEL);
1601         if (!pmu->seq_buf.cpuva) {
1602                 gk20a_err(d, "failed to allocate memory\n");
1603                 err = -ENOMEM;
1604                 goto err_free_pmu_ucode;
1605         }
1606
1607         pmu->seq_buf.iova = iova;
1608
1609         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1610                                 pmu->ucode.cpuva,
1611                                 pmu->ucode.iova,
1612                                 GK20A_PMU_UCODE_SIZE_MAX);
1613         if (err) {
1614                 gk20a_err(d, "failed to allocate sg table\n");
1615                 goto err_free_seq_buf;
1616         }
1617
1618         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1619                                         GK20A_PMU_UCODE_SIZE_MAX,
1620                                         0, /* flags */
1621                                         gk20a_mem_flag_read_only);
1622         if (!pmu->ucode.pmu_va) {
1623                 gk20a_err(d, "failed to map pmu ucode memory!!");
1624                 goto err_free_ucode_sgt;
1625         }
1626
1627         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1628                                 pmu->seq_buf.cpuva,
1629                                 pmu->seq_buf.iova,
1630                                 GK20A_PMU_SEQ_BUF_SIZE);
1631         if (err) {
1632                 gk20a_err(d, "failed to allocate sg table\n");
1633                 goto err_unmap_ucode;
1634         }
1635
1636         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1637                                         GK20A_PMU_SEQ_BUF_SIZE,
1638                                         0, /* flags */
1639                                         gk20a_mem_flag_none);
1640         if (!pmu->seq_buf.pmu_va) {
1641                 gk20a_err(d, "failed to map pmu ucode memory!!");
1642                 goto err_free_seq_buf_sgt;
1643         }
1644
1645         ptr = (u8 *)pmu->seq_buf.cpuva;
1646         if (!ptr) {
1647                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1648                 goto err_unmap_seq_buf;
1649         }
1650
1651         /* TBD: remove this if ZBC save/restore is handled by PMU
1652          * end an empty ZBC sequence for now */
1653         ptr[0] = 0x16; /* opcode EXIT */
1654         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1655         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1656
1657         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1658
1659         ucode_ptr = pmu->ucode.cpuva;
1660
1661         for (i = 0; i < (pmu->desc->app_start_offset +
1662                         pmu->desc->app_size) >> 2; i++)
1663                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1664
1665         gk20a_free_sgtable(&sgt_pmu_ucode);
1666         gk20a_free_sgtable(&sgt_seq_buf);
1667
1668         pmu->sw_ready = true;
1669
1670 skip_init:
1671         mutex_init(&pmu->isr_mutex);
1672         mutex_init(&pmu->isr_enable_lock);
1673         mutex_init(&pmu->pmu_copy_lock);
1674         mutex_init(&pmu->pmu_seq_lock);
1675
1676         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1677         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1678
1679         pmu->remove_support = gk20a_remove_pmu_support;
1680         err = gk20a_init_pmu(pmu);
1681         if (err) {
1682                 gk20a_err(d, "failed to set function pointers\n");
1683                 return err;
1684         }
1685
1686         gk20a_dbg_fn("done");
1687         return 0;
1688
1689  err_unmap_seq_buf:
1690         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1691                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1692  err_free_seq_buf_sgt:
1693         gk20a_free_sgtable(&sgt_seq_buf);
1694  err_unmap_ucode:
1695         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1696                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1697  err_free_ucode_sgt:
1698         gk20a_free_sgtable(&sgt_pmu_ucode);
1699  err_free_seq_buf:
1700         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1701                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1702         pmu->seq_buf.cpuva = NULL;
1703         pmu->seq_buf.iova = 0;
1704  err_free_pmu_ucode:
1705         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1706                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1707         pmu->ucode.cpuva = NULL;
1708         pmu->ucode.iova = 0;
1709  err_release_fw:
1710         release_firmware(g->pmu_fw);
1711  err_free_seq:
1712         kfree(pmu->seq);
1713  err_free_mutex:
1714         kfree(pmu->mutex);
1715  err:
1716         gk20a_dbg_fn("fail");
1717         return err;
1718 }
1719
1720 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1721                         void *param, u32 handle, u32 status);
1722
1723 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1724                         void *param, u32 handle, u32 status)
1725 {
1726         struct pmu_gk20a *pmu = param;
1727         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1728
1729         gk20a_dbg_fn("");
1730
1731         gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1732         if (status != 0) {
1733                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1734                 /* TBD: disable ELPG */
1735                 return;
1736         }
1737
1738         if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1739                 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1740         }
1741
1742         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1743         schedule_work(&pmu->pg_init);
1744 }
1745
1746 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1747 {
1748         struct pmu_gk20a *pmu = &g->pmu;
1749         int err;
1750
1751         gk20a_dbg_fn("");
1752
1753         mutex_lock(&pmu->isr_enable_lock);
1754         pmu_reset(pmu);
1755         pmu->isr_enabled = true;
1756         mutex_unlock(&pmu->isr_enable_lock);
1757
1758         /* setup apertures - virtual */
1759         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1760                 pwr_fbif_transcfg_mem_type_virtual_f());
1761         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1762                 pwr_fbif_transcfg_mem_type_virtual_f());
1763         /* setup apertures - physical */
1764         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1765                 pwr_fbif_transcfg_mem_type_physical_f() |
1766                 pwr_fbif_transcfg_target_local_fb_f());
1767         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1768                 pwr_fbif_transcfg_mem_type_physical_f() |
1769                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1770         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1771                 pwr_fbif_transcfg_mem_type_physical_f() |
1772                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1773
1774         /* TBD: load pmu ucode */
1775         err = pmu_bootstrap(pmu);
1776         if (err)
1777                 return err;
1778
1779         return 0;
1780
1781 }
1782
1783 static int gk20a_aelpg_init(struct gk20a *g);
1784 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1785
1786 static void pmu_setup_hw_load_zbc(struct gk20a *g);
1787 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1788
1789 static void pmu_setup_hw(struct work_struct *work)
1790 {
1791         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1792         struct gk20a *g = pmu->g;
1793
1794         switch (pmu->pmu_state) {
1795         case PMU_STATE_ELPG_BOOTED:
1796                 gk20a_dbg_pmu("elpg booted");
1797                 gk20a_init_pmu_bind_fecs(g);
1798                 break;
1799         case PMU_STATE_LOADING_PG_BUF:
1800                 gk20a_dbg_pmu("loaded pg buf");
1801                 pmu_setup_hw_load_zbc(g);
1802                 break;
1803         case PMU_STATE_LOADING_ZBC:
1804                 gk20a_dbg_pmu("loaded zbc");
1805                 pmu_setup_hw_enable_elpg(g);
1806                 break;
1807         case PMU_STATE_STARTED:
1808                 gk20a_dbg_pmu("PMU booted");
1809                 break;
1810         default:
1811                 gk20a_dbg_pmu("invalid state");
1812                 break;
1813         }
1814 }
1815
1816 int gk20a_init_pmu_bind_fecs(struct gk20a *g)
1817 {
1818         struct pmu_gk20a *pmu = &g->pmu;
1819         struct mm_gk20a *mm = &g->mm;
1820         struct vm_gk20a *vm = &mm->pmu.vm;
1821         struct device *d = dev_from_gk20a(g);
1822         struct pmu_cmd cmd;
1823         u32 desc;
1824         int err;
1825         u32 size;
1826         struct sg_table *sgt_pg_buf;
1827         dma_addr_t iova;
1828
1829         gk20a_dbg_fn("");
1830
1831         size = 0;
1832         gk20a_gr_wait_initialized(g);
1833         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1834         if (err) {
1835                 gk20a_err(dev_from_gk20a(g),
1836                         "fail to query fecs pg buffer size");
1837                 return err;
1838         }
1839
1840         if (!pmu->pg_buf.cpuva) {
1841                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1842                                                 &iova,
1843                                                 GFP_KERNEL);
1844                 if (!pmu->pg_buf.cpuva) {
1845                         gk20a_err(d, "failed to allocate memory\n");
1846                         return -ENOMEM;
1847                 }
1848
1849                 pmu->pg_buf.iova = iova;
1850                 pmu->pg_buf.size = size;
1851
1852                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1853                                         pmu->pg_buf.cpuva,
1854                                         pmu->pg_buf.iova,
1855                                         size);
1856                 if (err) {
1857                         gk20a_err(d, "failed to create sg table\n");
1858                         goto err_free_pg_buf;
1859                 }
1860
1861                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1862                                         &sgt_pg_buf,
1863                                         size,
1864                                         0, /* flags */
1865                                         gk20a_mem_flag_none);
1866                 if (!pmu->pg_buf.pmu_va) {
1867                         gk20a_err(d, "failed to map fecs pg buffer");
1868                         err = -ENOMEM;
1869                         goto err_free_sgtable;
1870                 }
1871
1872                 gk20a_free_sgtable(&sgt_pg_buf);
1873         }
1874
1875         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1876         if (err) {
1877                 gk20a_err(dev_from_gk20a(g),
1878                         "fail to bind pmu inst to gr");
1879                 return err;
1880         }
1881
1882         err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va);
1883         if (err) {
1884                 gk20a_err(dev_from_gk20a(g),
1885                         "fail to set pg buffer pmu va");
1886                 return err;
1887         }
1888
1889         memset(&cmd, 0, sizeof(struct pmu_cmd));
1890         cmd.hdr.unit_id = PMU_UNIT_PG;
1891         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1892         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1893         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1894         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1895         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1896         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1897         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1898         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1899
1900         pmu->buf_loaded = false;
1901         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1902         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1903                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1904         pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
1905         return err;
1906
1907 err_free_sgtable:
1908         gk20a_free_sgtable(&sgt_pg_buf);
1909 err_free_pg_buf:
1910         dma_free_coherent(d, size,
1911                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
1912         pmu->pg_buf.cpuva = NULL;
1913         pmu->pg_buf.iova = 0;
1914         return err;
1915 }
1916
1917 static void pmu_setup_hw_load_zbc(struct gk20a *g)
1918 {
1919         struct pmu_gk20a *pmu = &g->pmu;
1920         struct pmu_cmd cmd;
1921         u32 desc;
1922
1923         memset(&cmd, 0, sizeof(struct pmu_cmd));
1924         cmd.hdr.unit_id = PMU_UNIT_PG;
1925         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1926         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1927         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1928         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1929         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1930         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1931         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1932         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1933
1934         pmu->buf_loaded = false;
1935         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
1936         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1937                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1938         pmu->pmu_state = PMU_STATE_LOADING_ZBC;
1939 }
1940
1941 static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1942 {
1943         struct pmu_gk20a *pmu = &g->pmu;
1944
1945         /*
1946          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1947          * 7. This prevents PMU stalling on Host register accesses. Once the
1948          * cause for this hang is discovered and fixed, this WAR should be
1949          * removed.
1950          */
1951         gk20a_writel(g, 0x10a164, 0x109ff);
1952
1953         pmu->initialized = true;
1954         pmu->pmu_state = PMU_STATE_STARTED;
1955
1956         pmu->zbc_ready = true;
1957         /* Save zbc table after PMU is initialized. */
1958         gr_gk20a_pmu_save_zbc(g, 0xf);
1959
1960         if (g->elpg_enabled)
1961                 gk20a_pmu_enable_elpg(g);
1962
1963         udelay(50);
1964
1965         /* Enable AELPG */
1966         if (g->aelpg_enabled) {
1967                 gk20a_aelpg_init(g);
1968                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
1969         }
1970
1971         wake_up(&g->pmu.boot_wq);
1972 }
1973
1974 int gk20a_init_pmu_support(struct gk20a *g)
1975 {
1976         struct pmu_gk20a *pmu = &g->pmu;
1977         u32 err;
1978
1979         gk20a_dbg_fn("");
1980
1981         if (pmu->initialized)
1982                 return 0;
1983
1984         pmu->g = g;
1985
1986         err = gk20a_init_pmu_reset_enable_hw(g);
1987         if (err)
1988                 return err;
1989
1990         if (support_gk20a_pmu()) {
1991                 err = gk20a_init_pmu_setup_sw(g);
1992                 if (err)
1993                         return err;
1994
1995                 err = gk20a_init_pmu_setup_hw1(g);
1996                 if (err)
1997                         return err;
1998         }
1999
2000         return err;
2001 }
2002
2003 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2004                         void *param, u32 handle, u32 status)
2005 {
2006         struct pmu_gk20a *pmu = param;
2007         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2008
2009         gk20a_dbg_fn("");
2010
2011         if (status != 0) {
2012                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2013                 /* TBD: disable ELPG */
2014                 return;
2015         }
2016
2017         switch (elpg_msg->msg) {
2018         case PMU_PG_ELPG_MSG_INIT_ACK:
2019                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2020                 break;
2021         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2022                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2023                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2024                 break;
2025         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2026                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2027                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2028                 if (pmu->pmu_state == PMU_STATE_STARTING)
2029                         pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
2030                 schedule_work(&pmu->pg_init);
2031                 break;
2032         default:
2033                 gk20a_err(dev_from_gk20a(g),
2034                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2035         }
2036
2037         return;
2038 }
2039
2040 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2041                         void *param, u32 handle, u32 status)
2042 {
2043         struct pmu_gk20a *pmu = param;
2044
2045         gk20a_dbg_fn("");
2046
2047         if (status != 0) {
2048                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2049                 /* TBD: disable ELPG */
2050                 return;
2051         }
2052
2053         switch (msg->msg.pg.stat.sub_msg_id) {
2054         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2055                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2056                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2057                 break;
2058         default:
2059                 break;
2060         }
2061 }
2062
2063 static int pmu_init_powergating(struct pmu_gk20a *pmu)
2064 {
2065         struct gk20a *g = pmu->g;
2066         struct pmu_cmd cmd;
2067         u32 seq;
2068
2069         gk20a_dbg_fn("");
2070
2071         if (tegra_cpu_is_asim()) {
2072                 /* TBD: calculate threshold for silicon */
2073                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2074                                 PMU_PG_IDLE_THRESHOLD_SIM);
2075                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2076                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2077         } else {
2078                 /* TBD: calculate threshold for silicon */
2079                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2080                                 PMU_PG_IDLE_THRESHOLD);
2081                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2082                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2083         }
2084
2085         /* init ELPG */
2086         memset(&cmd, 0, sizeof(struct pmu_cmd));
2087         cmd.hdr.unit_id = PMU_UNIT_PG;
2088         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2089         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2090         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2091         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2092
2093         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
2094         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2095                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2096
2097         /* alloc dmem for powergating state log */
2098         pmu->stat_dmem_offset = 0;
2099         memset(&cmd, 0, sizeof(struct pmu_cmd));
2100         cmd.hdr.unit_id = PMU_UNIT_PG;
2101         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2102         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2103         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2104         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2105         cmd.cmd.pg.stat.data = 0;
2106
2107         gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
2108         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2109                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2110
2111         /* disallow ELPG initially
2112            PMU ucode requires a disallow cmd before allow cmd */
2113         pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
2114         memset(&cmd, 0, sizeof(struct pmu_cmd));
2115         cmd.hdr.unit_id = PMU_UNIT_PG;
2116         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2117         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2118         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2119         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2120
2121         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
2122         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2123                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2124
2125         pmu->pmu_state = PMU_STATE_STARTING;
2126
2127         return 0;
2128 }
2129
2130 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2131 {
2132         struct gk20a *g = pmu->g;
2133         struct pmu_v *pv = &g->ops.pmu_ver;
2134         struct pmu_cmd cmd;
2135         struct pmu_payload payload;
2136         u32 seq;
2137         u32 data;
2138         int err = 0;
2139
2140         gk20a_dbg_fn("");
2141
2142         pmu->perfmon_ready = 0;
2143
2144         /* use counter #3 for GR && CE2 busy cycles */
2145         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2146                 pwr_pmu_idle_mask_gr_enabled_f() |
2147                 pwr_pmu_idle_mask_ce_2_enabled_f());
2148
2149         /* disable idle filtering for counters 3 and 6 */
2150         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2151         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2152                         pwr_pmu_idle_ctrl_filter_m(),
2153                         pwr_pmu_idle_ctrl_value_busy_f() |
2154                         pwr_pmu_idle_ctrl_filter_disabled_f());
2155         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2156
2157         /* use counter #6 for total cycles */
2158         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2159         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2160                         pwr_pmu_idle_ctrl_filter_m(),
2161                         pwr_pmu_idle_ctrl_value_always_f() |
2162                         pwr_pmu_idle_ctrl_filter_disabled_f());
2163         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2164
2165         /*
2166          * We don't want to disturb counters #3 and #6, which are used by
2167          * perfmon, so we add wiring also to counters #1 and #2 for
2168          * exposing raw counter readings.
2169          */
2170         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2171                 pwr_pmu_idle_mask_gr_enabled_f() |
2172                 pwr_pmu_idle_mask_ce_2_enabled_f());
2173
2174         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2175         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2176                         pwr_pmu_idle_ctrl_filter_m(),
2177                         pwr_pmu_idle_ctrl_value_busy_f() |
2178                         pwr_pmu_idle_ctrl_filter_disabled_f());
2179         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2180
2181         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2182         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2183                         pwr_pmu_idle_ctrl_filter_m(),
2184                         pwr_pmu_idle_ctrl_value_always_f() |
2185                         pwr_pmu_idle_ctrl_filter_disabled_f());
2186         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2187
2188         if (!pmu->sample_buffer)
2189                 err = pmu->dmem.alloc(&pmu->dmem,
2190                                       &pmu->sample_buffer, 2 * sizeof(u16));
2191         if (err) {
2192                 gk20a_err(dev_from_gk20a(g),
2193                         "failed to allocate perfmon sample buffer");
2194                 return -ENOMEM;
2195         }
2196
2197         /* init PERFMON */
2198         memset(&cmd, 0, sizeof(struct pmu_cmd));
2199         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2200         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2201         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2202         /* buffer to save counter values for pmu perfmon */
2203         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2204         (u16)pmu->sample_buffer);
2205         /* number of sample periods below lower threshold
2206            before pmu triggers perfmon decrease event
2207            TBD: = 15 */
2208         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2209         /* index of base counter, aka. always ticking counter */
2210         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2211         /* microseconds interval between pmu polls perf counters */
2212         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2213         /* number of perfmon counters
2214            counter #3 (GR and CE2) for gk20a */
2215         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2216         /* moving average window for sample periods
2217            TBD: = 3000000 / sample_period_us = 17 */
2218         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2219
2220         memset(&payload, 0, sizeof(struct pmu_payload));
2221         payload.in.buf = &pmu->perfmon_counter;
2222         payload.in.size = sizeof(struct pmu_perfmon_counter);
2223         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2224
2225         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
2226         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2227                         NULL, NULL, &seq, ~0);
2228
2229         return 0;
2230 }
2231
2232 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2233                         struct pmu_msg *msg)
2234 {
2235         struct gk20a *g = pmu->g;
2236         struct pmu_v *pv = &g->ops.pmu_ver;
2237         union pmu_init_msg_pmu *init;
2238         struct pmu_sha1_gid_data gid_data;
2239         u32 i, tail = 0;
2240
2241         tail = pwr_pmu_msgq_tail_val_v(
2242                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2243
2244         pmu_copy_from_dmem(pmu, tail,
2245                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2246
2247         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2248                 gk20a_err(dev_from_gk20a(g),
2249                         "expecting init msg");
2250                 return -EINVAL;
2251         }
2252
2253         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2254                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2255
2256         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2257                 gk20a_err(dev_from_gk20a(g),
2258                         "expecting init msg");
2259                 return -EINVAL;
2260         }
2261
2262         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2263         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2264                 pwr_pmu_msgq_tail_val_f(tail));
2265
2266         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2267         if (!pmu->gid_info.valid) {
2268
2269                 pmu_copy_from_dmem(pmu,
2270                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2271                         (u8 *)&gid_data,
2272                         sizeof(struct pmu_sha1_gid_data), 0);
2273
2274                 pmu->gid_info.valid =
2275                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2276
2277                 if (pmu->gid_info.valid) {
2278
2279                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2280                                 sizeof(gid_data.gid));
2281
2282                         memcpy(pmu->gid_info.gid, gid_data.gid,
2283                                 sizeof(pmu->gid_info.gid));
2284                 }
2285         }
2286
2287         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2288                 pmu_queue_init(pmu, i, init);
2289
2290         if (!pmu->dmem.alloc)
2291                 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2292                                 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2293                                 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2294                                 PMU_DMEM_ALLOC_ALIGNMENT);
2295
2296         pmu->pmu_ready = true;
2297
2298         return 0;
2299 }
2300
2301 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2302                         struct pmu_msg *msg, int *status)
2303 {
2304         struct gk20a *g = pmu->g;
2305         u32 read_size, bytes_read;
2306         int err;
2307
2308         *status = 0;
2309
2310         if (pmu_queue_is_empty(pmu, queue))
2311                 return false;
2312
2313         err = pmu_queue_open_read(pmu, queue);
2314         if (err) {
2315                 gk20a_err(dev_from_gk20a(g),
2316                         "fail to open queue %d for read", queue->id);
2317                 *status = err;
2318                 return false;
2319         }
2320
2321         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2322                         PMU_MSG_HDR_SIZE, &bytes_read);
2323         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2324                 gk20a_err(dev_from_gk20a(g),
2325                         "fail to read msg from queue %d", queue->id);
2326                 *status = err | -EINVAL;
2327                 goto clean_up;
2328         }
2329
2330         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2331                 pmu_queue_rewind(pmu, queue);
2332                 /* read again after rewind */
2333                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2334                                 PMU_MSG_HDR_SIZE, &bytes_read);
2335                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2336                         gk20a_err(dev_from_gk20a(g),
2337                                 "fail to read msg from queue %d", queue->id);
2338                         *status = err | -EINVAL;
2339                         goto clean_up;
2340                 }
2341         }
2342
2343         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2344                 gk20a_err(dev_from_gk20a(g),
2345                         "read invalid unit_id %d from queue %d",
2346                         msg->hdr.unit_id, queue->id);
2347                         *status = -EINVAL;
2348                         goto clean_up;
2349         }
2350
2351         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2352                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2353                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2354                         read_size, &bytes_read);
2355                 if (err || bytes_read != read_size) {
2356                         gk20a_err(dev_from_gk20a(g),
2357                                 "fail to read msg from queue %d", queue->id);
2358                         *status = err;
2359                         goto clean_up;
2360                 }
2361         }
2362
2363         err = pmu_queue_close(pmu, queue, true);
2364         if (err) {
2365                 gk20a_err(dev_from_gk20a(g),
2366                         "fail to close queue %d", queue->id);
2367                 *status = err;
2368                 return false;
2369         }
2370
2371         return true;
2372
2373 clean_up:
2374         err = pmu_queue_close(pmu, queue, false);
2375         if (err)
2376                 gk20a_err(dev_from_gk20a(g),
2377                         "fail to close queue %d", queue->id);
2378         return false;
2379 }
2380
2381 static int pmu_response_handle(struct pmu_gk20a *pmu,
2382                         struct pmu_msg *msg)
2383 {
2384         struct gk20a *g = pmu->g;
2385         struct pmu_sequence *seq;
2386         struct pmu_v *pv = &g->ops.pmu_ver;
2387         int ret = 0;
2388
2389         gk20a_dbg_fn("");
2390
2391         seq = &pmu->seq[msg->hdr.seq_id];
2392         if (seq->state != PMU_SEQ_STATE_USED &&
2393             seq->state != PMU_SEQ_STATE_CANCELLED) {
2394                 gk20a_err(dev_from_gk20a(g),
2395                         "msg for an unknown sequence %d", seq->id);
2396                 return -EINVAL;
2397         }
2398
2399         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2400             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2401                 gk20a_err(dev_from_gk20a(g),
2402                         "unhandled cmd: seq %d", seq->id);
2403         }
2404         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2405                 if (seq->msg) {
2406                         if (seq->msg->hdr.size >= msg->hdr.size) {
2407                                 memcpy(seq->msg, msg, msg->hdr.size);
2408                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2409                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2410                                         pmu_copy_from_dmem(pmu,
2411                                         pv->pmu_allocation_get_dmem_offset(pmu,
2412                                         pv->get_pmu_seq_out_a_ptr(seq)),
2413                                         seq->out_payload,
2414                                         pv->pmu_allocation_get_dmem_size(pmu,
2415                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2416                                 }
2417                         } else {
2418                                 gk20a_err(dev_from_gk20a(g),
2419                                         "sequence %d msg buffer too small",
2420                                         seq->id);
2421                         }
2422                 }
2423         } else
2424                 seq->callback = NULL;
2425         if (pv->pmu_allocation_get_dmem_size(pmu,
2426                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2427                 pmu->dmem.free(&pmu->dmem,
2428                 pv->pmu_allocation_get_dmem_offset(pmu,
2429                 pv->get_pmu_seq_in_a_ptr(seq)),
2430                 pv->pmu_allocation_get_dmem_size(pmu,
2431                 pv->get_pmu_seq_in_a_ptr(seq)));
2432         if (pv->pmu_allocation_get_dmem_size(pmu,
2433                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2434                 pmu->dmem.free(&pmu->dmem,
2435                 pv->pmu_allocation_get_dmem_offset(pmu,
2436                 pv->get_pmu_seq_out_a_ptr(seq)),
2437                 pv->pmu_allocation_get_dmem_size(pmu,
2438                 pv->get_pmu_seq_out_a_ptr(seq)));
2439
2440         if (seq->callback)
2441                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2442
2443         pmu_seq_release(pmu, seq);
2444
2445         /* TBD: notify client waiting for available dmem */
2446
2447         gk20a_dbg_fn("done");
2448
2449         return 0;
2450 }
2451
2452 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2453                                  u32 *var, u32 val);
2454
2455 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2456                         void *param, u32 handle, u32 status)
2457 {
2458         struct pmu_gk20a *pmu = param;
2459         gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
2460         pmu->zbc_save_done = 1;
2461 }
2462
2463 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2464 {
2465         struct pmu_gk20a *pmu = &g->pmu;
2466         struct pmu_cmd cmd;
2467         u32 seq;
2468
2469         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2470                 return;
2471
2472         memset(&cmd, 0, sizeof(struct pmu_cmd));
2473         cmd.hdr.unit_id = PMU_UNIT_PG;
2474         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2475         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2476         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2477
2478         pmu->zbc_save_done = 0;
2479
2480         gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
2481         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2482                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2483         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2484                               &pmu->zbc_save_done, 1);
2485         if (!pmu->zbc_save_done)
2486                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2487 }
2488
2489 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2490 {
2491         struct gk20a *g = pmu->g;
2492         struct pmu_v *pv = &g->ops.pmu_ver;
2493         struct pmu_cmd cmd;
2494         struct pmu_payload payload;
2495         u32 current_rate = 0;
2496         u32 seq;
2497
2498         /* PERFMON Start */
2499         memset(&cmd, 0, sizeof(struct pmu_cmd));
2500         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2501         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2502         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2503                 PMU_PERFMON_CMD_ID_START);
2504         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2505                 PMU_DOMAIN_GROUP_PSTATE);
2506         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2507                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2508
2509         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2510         if (current_rate >= gpc_pll_params.max_freq)
2511                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2512                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2513         else if (current_rate <= gpc_pll_params.min_freq)
2514                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2515                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2516         else
2517                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2518                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2519                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2520
2521         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2522                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2523                 PMU_PERFMON_FLAG_CLEAR_PREV);
2524
2525         memset(&payload, 0, sizeof(struct pmu_payload));
2526
2527         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2528         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2529         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2530         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2531         pmu->perfmon_counter.valid = true;
2532
2533         payload.in.buf = &pmu->perfmon_counter;
2534         payload.in.size = sizeof(pmu->perfmon_counter);
2535         payload.in.offset =
2536                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2537
2538         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
2539         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2540                         NULL, NULL, &seq, ~0);
2541
2542         return 0;
2543 }
2544
2545 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2546 {
2547         struct gk20a *g = pmu->g;
2548         struct pmu_cmd cmd;
2549         u32 seq;
2550
2551         /* PERFMON Stop */
2552         memset(&cmd, 0, sizeof(struct pmu_cmd));
2553         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2554         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2555         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2556
2557         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
2558         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2559                         NULL, NULL, &seq, ~0);
2560         return 0;
2561 }
2562
2563 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2564                         struct pmu_perfmon_msg *msg)
2565 {
2566         struct gk20a *g = pmu->g;
2567         u32 rate;
2568
2569         gk20a_dbg_fn("");
2570
2571         switch (msg->msg_type) {
2572         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2573                 gk20a_dbg_pmu("perfmon increase event: "
2574                         "state_id %d, ground_id %d, pct %d",
2575                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2576                 /* increase gk20a clock freq by 20% */
2577                 rate = gk20a_clk_get_rate(g);
2578                 gk20a_clk_set_rate(g, rate * 6 / 5);
2579                 break;
2580         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2581                 gk20a_dbg_pmu("perfmon decrease event: "
2582                         "state_id %d, ground_id %d, pct %d",
2583                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2584                 /* decrease gk20a clock freq by 10% */
2585                 rate = gk20a_clk_get_rate(g);
2586                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2587                 break;
2588         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2589                 pmu->perfmon_ready = 1;
2590                 gk20a_dbg_pmu("perfmon init event");
2591                 break;
2592         default:
2593                 break;
2594         }
2595
2596         /* restart sampling */
2597         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2598                 return pmu_perfmon_start_sampling(pmu);
2599         return 0;
2600 }
2601
2602
2603 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2604 {
2605         int err;
2606
2607         gk20a_dbg_fn("");
2608
2609         switch (msg->hdr.unit_id) {
2610         case PMU_UNIT_PERFMON:
2611                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2612                 break;
2613         default:
2614                 break;
2615         }
2616
2617         return err;
2618 }
2619
2620 static int pmu_process_message(struct pmu_gk20a *pmu)
2621 {
2622         struct pmu_msg msg;
2623         int status;
2624
2625         if (unlikely(!pmu->pmu_ready)) {
2626                 pmu_process_init_msg(pmu, &msg);
2627                 pmu_init_powergating(pmu);
2628                 pmu_init_perfmon(pmu);
2629                 return 0;
2630         }
2631
2632         while (pmu_read_message(pmu,
2633                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2634
2635                 gk20a_dbg_pmu("read msg hdr: "
2636                                 "unit_id = 0x%08x, size = 0x%08x, "
2637                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2638                                 msg.hdr.unit_id, msg.hdr.size,
2639                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2640
2641                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2642
2643                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2644                         pmu_handle_event(pmu, &msg);
2645                 } else {
2646                         pmu_response_handle(pmu, &msg);
2647                 }
2648         }
2649
2650         return 0;
2651 }
2652
2653 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2654                                  u32 *var, u32 val)
2655 {
2656         struct gk20a *g = pmu->g;
2657         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2658         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2659
2660         do {
2661                 if (*var == val)
2662                         return 0;
2663
2664                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2665                         gk20a_pmu_isr(g);
2666
2667                 usleep_range(delay, delay * 2);
2668                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2669         } while (time_before(jiffies, end_jiffies) ||
2670                         !tegra_platform_is_silicon());
2671
2672         return -ETIMEDOUT;
2673 }
2674
2675 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2676 {
2677         struct gk20a *g = pmu->g;
2678         struct pmu_pg_stats stats;
2679
2680         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2681                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2682
2683         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2684                 stats.pg_entry_start_timestamp);
2685         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2686                 stats.pg_exit_start_timestamp);
2687         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2688                 stats.pg_ingating_start_timestamp);
2689         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2690                 stats.pg_ungating_start_timestamp);
2691         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2692                 stats.pg_avg_entry_time_us);
2693         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2694                 stats.pg_avg_exit_time_us);
2695         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2696                 stats.pg_ingating_cnt);
2697         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2698                 stats.pg_ingating_time_us);
2699         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2700                 stats.pg_ungating_count);
2701         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2702                 stats.pg_ungating_time_us);
2703         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2704                 stats.pg_gating_cnt);
2705         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2706                 stats.pg_gating_deny_cnt);
2707
2708         /*
2709            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2710            in .nm file, e.g. 0x1000066c. use 0x66c.
2711         u32 i, val[20];
2712         pmu_copy_from_dmem(pmu, 0x66c,
2713                 (u8 *)val, sizeof(val), 0);
2714         gk20a_dbg_pmu("elpg log begin");
2715         for (i = 0; i < 20; i++)
2716                 gk20a_dbg_pmu("0x%08x", val[i]);
2717         gk20a_dbg_pmu("elpg log end");
2718         */
2719
2720         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2721                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2722         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2723                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2724         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2725                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2726         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2727                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2728         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2729                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2730
2731         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2732                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2733         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2734                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2735         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2736                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2737
2738         /*
2739          TBD: script can't generate those registers correctly
2740         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2741                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2742         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2743                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2744         */
2745 }
2746
2747 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2748 {
2749         struct gk20a *g = pmu->g;
2750         int i;
2751
2752         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2753                 gk20a_readl(g, pwr_falcon_os_r()));
2754         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2755                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2756         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2757                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2758         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2759                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2760         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2761                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2762         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2763                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2764         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2765                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2766         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2767                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2768         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2769                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2770
2771         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2772                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2773                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2774
2775         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2776                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2777                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2778
2779         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2780                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2781                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2782                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2783                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2784                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2785         }
2786
2787         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2788         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2789         if (i != 0) {
2790                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2791                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2792                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2793                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2794                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2795                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2796                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2797                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2798         }
2799
2800         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2801         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2802
2803         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2804         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2805         if (pwr_falcon_exterrstat_valid_v(i) ==
2806                         pwr_falcon_exterrstat_valid_true_v()) {
2807                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2808                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2809                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2810                         gk20a_readl(g, mc_enable_r()));
2811         }
2812
2813         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2814                 gk20a_readl(g, pwr_falcon_engctl_r()));
2815         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2816                 gk20a_readl(g, pwr_falcon_curctx_r()));
2817         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2818                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2819
2820         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2821                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2822                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2823         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2824                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2825
2826         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2827                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2828                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2829         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2830                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2831
2832         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2833                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2834                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2835         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2836                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2837
2838         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2839                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2840                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2841         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2842                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2843
2844         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2845                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2846                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2847         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2848                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2849
2850         for (i = 0; i < 4; i++) {
2851                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2852                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2853                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2854                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2855                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2856
2857                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2858                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2859                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2860                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2861                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2862         }
2863         gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
2864                         pmu->elpg_stat);
2865
2866         /* PMU may crash due to FECS crash. Dump FECS status */
2867         gk20a_fecs_dump_falcon_stats(g);
2868 }
2869
2870 void gk20a_pmu_isr(struct gk20a *g)
2871 {
2872         struct pmu_gk20a *pmu = &g->pmu;
2873         struct pmu_queue *queue;
2874         u32 intr, mask;
2875         bool recheck = false;
2876
2877         gk20a_dbg_fn("");
2878
2879         mutex_lock(&pmu->isr_enable_lock);
2880         if (!pmu->isr_enabled) {
2881                 mutex_unlock(&pmu->isr_enable_lock);
2882                 return;
2883         }
2884
2885         mutex_lock(&pmu->isr_mutex);
2886
2887         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2888                 gk20a_readl(g, pwr_falcon_irqdest_r());
2889
2890         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2891
2892         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2893
2894         if (!intr) {
2895                 mutex_unlock(&pmu->isr_mutex);
2896                 mutex_unlock(&pmu->isr_enable_lock);
2897                 return;
2898         }
2899
2900         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2901                 gk20a_err(dev_from_gk20a(g),
2902                         "pmu halt intr not implemented");
2903                 pmu_dump_falcon_stats(pmu);
2904         }
2905         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2906                 gk20a_err(dev_from_gk20a(g),
2907                         "pmu exterr intr not implemented. Clearing interrupt.");
2908                 pmu_dump_falcon_stats(pmu);
2909
2910                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2911                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2912                                 ~pwr_falcon_exterrstat_valid_m());
2913         }
2914         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2915                 pmu_process_message(pmu);
2916                 recheck = true;
2917         }
2918
2919         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2920
2921         if (recheck) {
2922                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2923                 if (!pmu_queue_is_empty(pmu, queue))
2924                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2925                                 pwr_falcon_irqsset_swgen0_set_f());
2926         }
2927
2928         mutex_unlock(&pmu->isr_mutex);
2929         mutex_unlock(&pmu->isr_enable_lock);
2930 }
2931
2932 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2933                         struct pmu_msg *msg, struct pmu_payload *payload,
2934                         u32 queue_id)
2935 {
2936         struct gk20a *g = pmu->g;
2937         struct pmu_queue *queue;
2938         u32 in_size, out_size;
2939
2940         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2941                 goto invalid_cmd;
2942
2943         queue = &pmu->queue[queue_id];
2944         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2945                 goto invalid_cmd;
2946
2947         if (cmd->hdr.size > (queue->size >> 1))
2948                 goto invalid_cmd;
2949
2950         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
2951                 goto invalid_cmd;
2952
2953         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
2954                 goto invalid_cmd;
2955
2956         if (payload == NULL)
2957                 return true;
2958
2959         if (payload->in.buf == NULL && payload->out.buf == NULL)
2960                 goto invalid_cmd;
2961
2962         if ((payload->in.buf != NULL && payload->in.size == 0) ||
2963             (payload->out.buf != NULL && payload->out.size == 0))
2964                 goto invalid_cmd;
2965
2966         in_size = PMU_CMD_HDR_SIZE;
2967         if (payload->in.buf) {
2968                 in_size += payload->in.offset;
2969                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2970         }
2971
2972         out_size = PMU_CMD_HDR_SIZE;
2973         if (payload->out.buf) {
2974                 out_size += payload->out.offset;
2975                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2976         }
2977
2978         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
2979                 goto invalid_cmd;
2980
2981
2982         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
2983             (payload->out.offset != 0 && payload->out.buf == NULL))
2984                 goto invalid_cmd;
2985
2986         return true;
2987
2988 invalid_cmd:
2989         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
2990                 "queue_id=%d,\n"
2991                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
2992                 "payload in=%p, in_size=%d, in_offset=%d,\n"
2993                 "payload out=%p, out_size=%d, out_offset=%d",
2994                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
2995                 msg, msg?msg->hdr.unit_id:~0,
2996                 &payload->in, payload->in.size, payload->in.offset,
2997                 &payload->out, payload->out.size, payload->out.offset);
2998
2999         return false;
3000 }
3001
3002 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3003                         u32 queue_id, unsigned long timeout)
3004 {
3005         struct gk20a *g = pmu->g;
3006         struct pmu_queue *queue;
3007         unsigned long end_jiffies = jiffies +
3008                 msecs_to_jiffies(timeout);
3009         int err;
3010
3011         gk20a_dbg_fn("");
3012
3013         queue = &pmu->queue[queue_id];
3014
3015         do {
3016                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3017                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3018                         usleep_range(1000, 2000);
3019                 else
3020                         break;
3021         } while (1);
3022
3023         if (err)
3024                 goto clean_up;
3025
3026         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3027
3028         err = pmu_queue_close(pmu, queue, true);
3029
3030 clean_up:
3031         if (err)
3032                 gk20a_err(dev_from_gk20a(g),
3033                         "fail to write cmd to queue %d", queue_id);
3034         else
3035                 gk20a_dbg_fn("done");
3036
3037         return err;
3038 }
3039
3040 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3041                 struct pmu_msg *msg, struct pmu_payload *payload,
3042                 u32 queue_id, pmu_callback callback, void* cb_param,
3043                 u32 *seq_desc, unsigned long timeout)
3044 {
3045         struct pmu_gk20a *pmu = &g->pmu;
3046         struct pmu_v *pv = &g->ops.pmu_ver;
3047         struct pmu_sequence *seq;
3048         void *in = NULL, *out = NULL;
3049         int err;
3050
3051         gk20a_dbg_fn("");
3052
3053         BUG_ON(!cmd);
3054         BUG_ON(!seq_desc);
3055         BUG_ON(!pmu->pmu_ready);
3056
3057         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3058                 return -EINVAL;
3059
3060         err = pmu_seq_acquire(pmu, &seq);
3061         if (err)
3062                 return err;
3063
3064         cmd->hdr.seq_id = seq->id;
3065
3066         cmd->hdr.ctrl_flags = 0;
3067         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3068         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3069
3070         seq->callback = callback;
3071         seq->cb_params = cb_param;
3072         seq->msg = msg;
3073         seq->out_payload = NULL;
3074         seq->desc = pmu->next_seq_desc++;
3075
3076         if (payload)
3077                 seq->out_payload = payload->out.buf;
3078
3079         *seq_desc = seq->desc;
3080
3081         if (payload && payload->in.offset != 0) {
3082                 pv->set_pmu_allocation_ptr(pmu, &in,
3083                 ((u8 *)&cmd->cmd + payload->in.offset));
3084
3085                 if (payload->in.buf != payload->out.buf)
3086                         pv->pmu_allocation_set_dmem_size(pmu, in,
3087                         (u16)payload->in.size);
3088                 else
3089                         pv->pmu_allocation_set_dmem_size(pmu, in,
3090                         (u16)max(payload->in.size, payload->out.size));
3091
3092                 err = pmu->dmem.alloc(&pmu->dmem,
3093                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3094                 pv->pmu_allocation_get_dmem_size(pmu, in));
3095                 if (err)
3096                         goto clean_up;
3097
3098                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3099                 in)),
3100                         payload->in.buf, payload->in.size, 0);
3101                 pv->pmu_allocation_set_dmem_size(pmu,
3102                 pv->get_pmu_seq_in_a_ptr(seq),
3103                 pv->pmu_allocation_get_dmem_size(pmu, in));
3104                 pv->pmu_allocation_set_dmem_offset(pmu,
3105                 pv->get_pmu_seq_in_a_ptr(seq),
3106                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3107         }
3108
3109         if (payload && payload->out.offset != 0) {
3110                 pv->set_pmu_allocation_ptr(pmu, &out,
3111                 ((u8 *)&cmd->cmd + payload->out.offset));
3112                 pv->pmu_allocation_set_dmem_size(pmu, out,
3113                 (u16)payload->out.size);
3114
3115                 if (payload->out.buf != payload->in.buf) {
3116                         err = pmu->dmem.alloc(&pmu->dmem,
3117                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3118                         pv->pmu_allocation_get_dmem_size(pmu, out));
3119                         if (err)
3120                                 goto clean_up;
3121                 } else {
3122                         BUG_ON(in == NULL);
3123                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3124                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3125                 }
3126
3127                 pv->pmu_allocation_set_dmem_size(pmu,
3128                 pv->get_pmu_seq_out_a_ptr(seq),
3129                 pv->pmu_allocation_get_dmem_size(pmu, out));
3130                 pv->pmu_allocation_set_dmem_offset(pmu,
3131                 pv->get_pmu_seq_out_a_ptr(seq),
3132                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3133         }
3134
3135         seq->state = PMU_SEQ_STATE_USED;
3136         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3137         if (err)
3138                 seq->state = PMU_SEQ_STATE_PENDING;
3139
3140         gk20a_dbg_fn("done");
3141
3142         return 0;
3143
3144 clean_up:
3145         gk20a_dbg_fn("fail");
3146         if (in)
3147                 pmu->dmem.free(&pmu->dmem,
3148                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3149                 pv->pmu_allocation_get_dmem_size(pmu, in));
3150         if (out)
3151                 pmu->dmem.free(&pmu->dmem,
3152                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3153                 pv->pmu_allocation_get_dmem_size(pmu, out));
3154
3155         pmu_seq_release(pmu, seq);
3156         return err;
3157 }
3158
3159 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3160 {
3161         struct pmu_gk20a *pmu = &g->pmu;
3162         struct pmu_cmd cmd;
3163         u32 seq, status;
3164
3165         gk20a_dbg_fn("");
3166
3167         memset(&cmd, 0, sizeof(struct pmu_cmd));
3168         cmd.hdr.unit_id = PMU_UNIT_PG;
3169         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3170         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3171         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3172         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3173
3174         /* no need to wait ack for ELPG enable but set pending to sync
3175            with follow up ELPG disable */
3176         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3177
3178         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
3179         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3180                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3181
3182         BUG_ON(status != 0);
3183
3184         gk20a_dbg_fn("done");
3185         return 0;
3186 }
3187
3188 int gk20a_pmu_enable_elpg(struct gk20a *g)
3189 {
3190         struct pmu_gk20a *pmu = &g->pmu;
3191         struct gr_gk20a *gr = &g->gr;
3192
3193         int ret = 0;
3194
3195         gk20a_dbg_fn("");
3196
3197         mutex_lock(&pmu->elpg_mutex);
3198
3199         pmu->elpg_refcnt++;
3200         if (pmu->elpg_refcnt <= 0)
3201                 goto exit_unlock;
3202
3203         /* something is not right if we end up in following code path */
3204         if (unlikely(pmu->elpg_refcnt > 1)) {
3205                 gk20a_warn(dev_from_gk20a(g),
3206                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3207                 __func__, pmu->elpg_refcnt);
3208                 WARN_ON(1);
3209         }
3210
3211         /* do NOT enable elpg until golden ctx is created,
3212            which is related with the ctx that ELPG save and restore. */
3213         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3214                 goto exit_unlock;
3215
3216         /* return if ELPG is already on or on_pending or off_on_pending */
3217         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3218                 goto exit_unlock;
3219
3220         ret = gk20a_pmu_enable_elpg_locked(g);
3221
3222 exit_unlock:
3223         mutex_unlock(&pmu->elpg_mutex);
3224         gk20a_dbg_fn("done");
3225         return ret;
3226 }
3227
3228 int gk20a_pmu_disable_elpg(struct gk20a *g)
3229 {
3230         struct pmu_gk20a *pmu = &g->pmu;
3231         struct pmu_cmd cmd;
3232         u32 seq;
3233         int ret = 0;
3234
3235         gk20a_dbg_fn("");
3236
3237         mutex_lock(&pmu->elpg_mutex);
3238
3239         pmu->elpg_refcnt--;
3240         if (pmu->elpg_refcnt > 0) {
3241                 gk20a_warn(dev_from_gk20a(g),
3242                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3243                 __func__, pmu->elpg_refcnt);
3244                 WARN_ON(1);
3245                 ret = 0;
3246                 goto exit_unlock;
3247         }
3248
3249         /* cancel off_on_pending and return */
3250         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3251                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3252                 ret = 0;
3253                 goto exit_reschedule;
3254         }
3255         /* wait if on_pending */
3256         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3257
3258                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3259                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3260
3261                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3262                         gk20a_err(dev_from_gk20a(g),
3263                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3264                                 pmu->elpg_stat);
3265                         pmu_dump_elpg_stats(pmu);
3266                         pmu_dump_falcon_stats(pmu);
3267                         ret = -EBUSY;
3268                         goto exit_unlock;
3269                 }
3270         }
3271         /* return if ELPG is already off */
3272         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3273                 ret = 0;
3274                 goto exit_reschedule;
3275         }
3276
3277         memset(&cmd, 0, sizeof(struct pmu_cmd));
3278         cmd.hdr.unit_id = PMU_UNIT_PG;
3279         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3280         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3281         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3282         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3283
3284         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3285
3286         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
3287         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3288                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3289
3290         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3291                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3292         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3293                 gk20a_err(dev_from_gk20a(g),
3294                         "ELPG_DISALLOW_ACK failed");
3295                 pmu_dump_elpg_stats(pmu);
3296                 pmu_dump_falcon_stats(pmu);
3297                 ret = -EBUSY;
3298                 goto exit_unlock;
3299         }
3300
3301 exit_reschedule:
3302 exit_unlock:
3303         mutex_unlock(&pmu->elpg_mutex);
3304         gk20a_dbg_fn("done");
3305         return ret;
3306 }
3307
3308 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3309 {
3310         struct pmu_gk20a *pmu = &g->pmu;
3311         int err;
3312
3313         gk20a_dbg_fn("");
3314
3315         if (enable)
3316                 err = pmu_perfmon_start_sampling(pmu);
3317         else
3318                 err = pmu_perfmon_stop_sampling(pmu);
3319
3320         return err;
3321 }
3322
3323 int gk20a_pmu_destroy(struct gk20a *g)
3324 {
3325         struct pmu_gk20a *pmu = &g->pmu;
3326         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3327
3328         gk20a_dbg_fn("");
3329
3330         if (!support_gk20a_pmu())
3331                 return 0;
3332
3333         /* make sure the pending operations are finished before we continue */
3334         cancel_work_sync(&pmu->pg_init);
3335
3336         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3337                 &elpg_ungating_time, &gating_cnt);
3338
3339         gk20a_pmu_disable_elpg(g);
3340         pmu->initialized = false;
3341
3342         /* update the s/w ELPG residency counters */
3343         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3344         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3345         g->pg_gating_cnt += gating_cnt;
3346
3347         mutex_lock(&pmu->isr_enable_lock);
3348         pmu_enable(pmu, false);
3349         pmu->isr_enabled = false;
3350         mutex_unlock(&pmu->isr_enable_lock);
3351
3352         pmu->pmu_state = PMU_STATE_OFF;
3353         pmu->pmu_ready = false;
3354         pmu->perfmon_ready = false;
3355         pmu->zbc_ready = false;
3356
3357         gk20a_dbg_fn("done");
3358         return 0;
3359 }
3360
3361 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3362 {
3363         struct pmu_gk20a *pmu = &g->pmu;
3364         u16 _load = 0;
3365
3366         if (!pmu->perfmon_ready) {
3367                 *load = 0;
3368                 return 0;
3369         }
3370
3371         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3372         *load = _load / 10;
3373
3374         return 0;
3375 }
3376
3377 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3378                                  u32 *total_cycles)
3379 {
3380         if (!g->power_on) {
3381                 *busy_cycles = 0;
3382                 *total_cycles = 0;
3383                 return;
3384         }
3385
3386         gk20a_busy(g->dev);
3387         *busy_cycles = pwr_pmu_idle_count_value_v(
3388                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3389         rmb();
3390         *total_cycles = pwr_pmu_idle_count_value_v(
3391                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3392         gk20a_idle(g->dev);
3393 }
3394
3395 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3396 {
3397         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3398
3399         if (!g->power_on)
3400                 return;
3401
3402         gk20a_busy(g->dev);
3403         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3404         wmb();
3405         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3406         gk20a_idle(g->dev);
3407 }
3408
3409 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3410                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3411 {
3412         struct pmu_gk20a *pmu = &g->pmu;
3413         struct pmu_pg_stats stats;
3414
3415         if (!pmu->initialized) {
3416                 *ingating_time = 0;
3417                 *ungating_time = 0;
3418                 *gating_cnt = 0;
3419                 return 0;
3420         }
3421
3422         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3423                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3424
3425         *ingating_time = stats.pg_ingating_time_us;
3426         *ungating_time = stats.pg_ungating_time_us;
3427         *gating_cnt = stats.pg_gating_cnt;
3428
3429         return 0;
3430 }
3431
3432 /* Send an Adaptive Power (AP) related command to PMU */
3433 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3434                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3435 {
3436         struct pmu_gk20a *pmu = &g->pmu;
3437         /* FIXME: where is the PG structure defined?? */
3438         u32 status = 0;
3439         struct pmu_cmd cmd;
3440         u32 seq;
3441         pmu_callback p_callback = NULL;
3442
3443         memset(&cmd, 0, sizeof(struct pmu_cmd));
3444
3445         /* Copy common members */
3446         cmd.hdr.unit_id = PMU_UNIT_PG;
3447         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3448
3449         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3450         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3451
3452         /* Copy other members of command */
3453         switch (p_ap_cmd->cmn.cmd_id) {
3454         case PMU_AP_CMD_ID_INIT:
3455                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
3456                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3457                         p_ap_cmd->init.pg_sampling_period_us;
3458                 p_callback = ap_callback_init_and_enable_ctrl;
3459                 break;
3460
3461         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3462                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
3463                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3464                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3465                 memcpy(
3466                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3467                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3468                         sizeof(struct pmu_ap_ctrl_init_params));
3469
3470                 p_callback = ap_callback_init_and_enable_ctrl;
3471                 break;
3472
3473         case PMU_AP_CMD_ID_ENABLE_CTRL:
3474                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
3475                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3476                         p_ap_cmd->enable_ctrl.ctrl_id;
3477                 break;
3478
3479         case PMU_AP_CMD_ID_DISABLE_CTRL:
3480                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
3481                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3482                         p_ap_cmd->disable_ctrl.ctrl_id;
3483                 break;
3484
3485         case PMU_AP_CMD_ID_KICK_CTRL:
3486                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
3487                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3488                         p_ap_cmd->kick_ctrl.ctrl_id;
3489                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3490                         p_ap_cmd->kick_ctrl.skip_count;
3491                 break;
3492
3493         default:
3494                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3495                         __func__, p_ap_cmd->cmn.cmd_id);
3496                 return 0x2f;
3497         }
3498
3499         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3500                         p_callback, pmu, &seq, ~0);
3501
3502         if (!status) {
3503                 gk20a_dbg_pmu(
3504                         "%s: Unable to submit Adaptive Power Command %d\n",
3505                         __func__, p_ap_cmd->cmn.cmd_id);
3506                 goto err_return;
3507         }
3508
3509         /* TODO: Implement blocking calls (b_block) */
3510
3511 err_return:
3512         return status;
3513 }
3514
3515 static void ap_callback_init_and_enable_ctrl(
3516                 struct gk20a *g, struct pmu_msg *msg,
3517                 void *param, u32 seq_desc, u32 status)
3518 {
3519         /* Define p_ap (i.e pointer to pmu_ap structure) */
3520         WARN_ON(!msg);
3521
3522         if (!status) {
3523                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3524                 case PMU_AP_MSG_ID_INIT_ACK:
3525                         gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
3526                         break;
3527
3528                 default:
3529                         gk20a_dbg_pmu(
3530                         "%s: Invalid Adaptive Power Message: %x\n",
3531                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3532                         break;
3533                 }
3534         }
3535 }
3536
3537 static int gk20a_aelpg_init(struct gk20a *g)
3538 {
3539         int status = 0;
3540
3541         /* Remove reliance on app_ctrl field. */
3542         union pmu_ap_cmd ap_cmd;
3543
3544         /* TODO: Check for elpg being ready? */
3545         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3546         ap_cmd.init.pg_sampling_period_us =
3547                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3548
3549         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3550         return status;
3551 }
3552
3553 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3554 {
3555         int status = 0;
3556         union pmu_ap_cmd ap_cmd;
3557
3558         /* TODO: Probably check if ELPG is ready? */
3559
3560         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3561         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3562         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3563                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3564         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3565                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3566         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3567                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3568         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3569                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3570
3571         switch (ctrl_id) {
3572         case PMU_AP_CTRL_ID_GRAPHICS:
3573                 break;
3574         default:
3575                 break;
3576         }
3577
3578         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3579         return status;
3580 }
3581
3582 #if CONFIG_DEBUG_FS
3583 static int elpg_residency_show(struct seq_file *s, void *data)
3584 {
3585         struct gk20a *g = s->private;
3586         u32 ingating_time = 0;
3587         u32 ungating_time = 0;
3588         u32 gating_cnt;
3589         u64 total_ingating, total_ungating, residency, divisor, dividend;
3590
3591         /* Don't unnecessarily power on the device */
3592         if (g->power_on) {
3593                 gk20a_busy(g->dev);
3594                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3595                         &ungating_time, &gating_cnt);
3596                 gk20a_idle(g->dev);
3597         }
3598         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3599         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3600         divisor = total_ingating + total_ungating;
3601
3602         /* We compute the residency on a scale of 1000 */
3603         dividend = total_ingating * 1000;
3604
3605         if (divisor)
3606                 residency = div64_u64(dividend, divisor);
3607         else
3608                 residency = 0;
3609
3610         seq_printf(s, "Time in ELPG: %llu us\n"
3611                         "Time out of ELPG: %llu us\n"
3612                         "ELPG residency ratio: %llu\n",
3613                         total_ingating, total_ungating, residency);
3614         return 0;
3615
3616 }
3617
3618 static int elpg_residency_open(struct inode *inode, struct file *file)
3619 {
3620         return single_open(file, elpg_residency_show, inode->i_private);
3621 }
3622
3623 static const struct file_operations elpg_residency_fops = {
3624         .open           = elpg_residency_open,
3625         .read           = seq_read,
3626         .llseek         = seq_lseek,
3627         .release        = single_release,
3628 };
3629
3630 static int elpg_transitions_show(struct seq_file *s, void *data)
3631 {
3632         struct gk20a *g = s->private;
3633         u32 ingating_time, ungating_time, total_gating_cnt;
3634         u32 gating_cnt = 0;
3635
3636         if (g->power_on) {
3637                 gk20a_busy(g->dev);
3638                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3639                         &ungating_time, &gating_cnt);
3640                 gk20a_idle(g->dev);
3641         }
3642         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3643
3644         seq_printf(s, "%u\n", total_gating_cnt);
3645         return 0;
3646
3647 }
3648
3649 static int elpg_transitions_open(struct inode *inode, struct file *file)
3650 {
3651         return single_open(file, elpg_transitions_show, inode->i_private);
3652 }
3653
3654 static const struct file_operations elpg_transitions_fops = {
3655         .open           = elpg_transitions_open,
3656         .read           = seq_read,
3657         .llseek         = seq_lseek,
3658         .release        = single_release,
3659 };
3660
3661 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3662 {
3663         struct dentry *d;
3664         struct gk20a_platform *platform = platform_get_drvdata(dev);
3665         struct gk20a *g = get_gk20a(dev);
3666
3667         d = debugfs_create_file(
3668                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3669                                                 &elpg_residency_fops);
3670         if (!d)
3671                 goto err_out;
3672
3673         d = debugfs_create_file(
3674                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3675                                                 &elpg_transitions_fops);
3676         if (!d)
3677                 goto err_out;
3678
3679         return 0;
3680
3681 err_out:
3682         pr_err("%s: Failed to make debugfs node\n", __func__);
3683         debugfs_remove_recursive(platform->debugfs);
3684         return -ENOMEM;
3685 }
3686 #endif