]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
gu: nvgpu: Add PMU state ELPG booting
[sojka/nv-tegra/linux-3.10.git] / drivers / gpu / nvgpu / gk20a / pmu_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/pmu_gk20a.c
3  *
4  * GK20A PMU (aka. gPMU outside gk20a context)
5  *
6  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for mdelay */
23 #include <linux/firmware.h>
24 #include <linux/clk.h>
25 #include <linux/module.h>
26 #include <linux/debugfs.h>
27 #include <linux/dma-mapping.h>
28
29 #include "gk20a.h"
30 #include "gr_gk20a.h"
31 #include "hw_mc_gk20a.h"
32 #include "hw_pwr_gk20a.h"
33 #include "hw_top_gk20a.h"
34
35 #define GK20A_PMU_UCODE_IMAGE   "gpmu_ucode.bin"
36
37 #define gk20a_dbg_pmu(fmt, arg...) \
38         gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
39
40 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
41 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
42                 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
43 static void pmu_setup_hw(struct work_struct *work);
44 static void ap_callback_init_and_enable_ctrl(
45                 struct gk20a *g, struct pmu_msg *msg,
46                 void *param, u32 seq_desc, u32 status);
47 static int gk20a_pmu_ap_send_command(struct gk20a *g,
48                         union pmu_ap_cmd *p_ap_cmd, bool b_block);
49
50 static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
51 {
52         return sizeof(struct pmu_cmdline_args_v0);
53 }
54
55 static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
56 {
57         return sizeof(struct pmu_cmdline_args_v1);
58 }
59
60 static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
61 {
62         pmu->args_v1.cpu_freq_hz = freq;
63 }
64
65 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
66 {
67         pmu->args_v0.cpu_freq_hz = freq;
68 }
69
70 static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
71 {
72         return (void *)(&pmu->args_v1);
73 }
74
75 static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
76 {
77         return (void *)(&pmu->args_v0);
78 }
79
80 static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
81 {
82         return sizeof(struct pmu_allocation_v1);
83 }
84
85 static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
86 {
87         return sizeof(struct pmu_allocation_v0);
88 }
89
90 static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
91         void **pmu_alloc_ptr, void *assign_ptr)
92 {
93         struct pmu_allocation_v1 **pmu_a_ptr =
94                 (struct pmu_allocation_v1 **)pmu_alloc_ptr;
95         *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
96 }
97
98 static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
99         void **pmu_alloc_ptr, void *assign_ptr)
100 {
101         struct pmu_allocation_v0 **pmu_a_ptr =
102                 (struct pmu_allocation_v0 **)pmu_alloc_ptr;
103         *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
104 }
105
106 static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
107         void *pmu_alloc_ptr, u16 size)
108 {
109         struct pmu_allocation_v1 *pmu_a_ptr =
110                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
111         pmu_a_ptr->alloc.dmem.size = size;
112 }
113
114 static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
115         void *pmu_alloc_ptr, u16 size)
116 {
117         struct pmu_allocation_v0 *pmu_a_ptr =
118                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
119         pmu_a_ptr->alloc.dmem.size = size;
120 }
121
122 static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
123         void *pmu_alloc_ptr)
124 {
125         struct pmu_allocation_v1 *pmu_a_ptr =
126                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
127         return pmu_a_ptr->alloc.dmem.size;
128 }
129
130 static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
131         void *pmu_alloc_ptr)
132 {
133         struct pmu_allocation_v0 *pmu_a_ptr =
134                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
135         return pmu_a_ptr->alloc.dmem.size;
136 }
137
138 static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
139         void *pmu_alloc_ptr)
140 {
141         struct pmu_allocation_v1 *pmu_a_ptr =
142                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
143         return pmu_a_ptr->alloc.dmem.offset;
144 }
145
146 static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
147         void *pmu_alloc_ptr)
148 {
149         struct pmu_allocation_v0 *pmu_a_ptr =
150                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
151         return pmu_a_ptr->alloc.dmem.offset;
152 }
153
154 static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
155         void *pmu_alloc_ptr)
156 {
157         struct pmu_allocation_v1 *pmu_a_ptr =
158                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
159         return &pmu_a_ptr->alloc.dmem.offset;
160 }
161
162 static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
163         void *pmu_alloc_ptr)
164 {
165         struct pmu_allocation_v0 *pmu_a_ptr =
166                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
167         return &pmu_a_ptr->alloc.dmem.offset;
168 }
169
170 static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
171         void *pmu_alloc_ptr, u32 offset)
172 {
173         struct pmu_allocation_v1 *pmu_a_ptr =
174                 (struct pmu_allocation_v1 *)pmu_alloc_ptr;
175         pmu_a_ptr->alloc.dmem.offset = offset;
176 }
177
178 static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
179         void *pmu_alloc_ptr, u32 offset)
180 {
181         struct pmu_allocation_v0 *pmu_a_ptr =
182                 (struct pmu_allocation_v0 *)pmu_alloc_ptr;
183         pmu_a_ptr->alloc.dmem.offset = offset;
184 }
185
186 static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
187 {
188         return (void *)(&(init->pmu_init_v1));
189 }
190
191 static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
192 {
193         struct pmu_init_msg_pmu_v1 *init =
194                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
195         return init->sw_managed_area_offset;
196 }
197
198 static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
199 {
200         struct pmu_init_msg_pmu_v1 *init =
201                 (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
202         return init->sw_managed_area_size;
203 }
204
205 static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
206 {
207         return (void *)(&(init->pmu_init_v0));
208 }
209
210 static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
211 {
212         struct pmu_init_msg_pmu_v0 *init =
213                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
214         return init->sw_managed_area_offset;
215 }
216
217 static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
218 {
219         struct pmu_init_msg_pmu_v0 *init =
220                 (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
221         return init->sw_managed_area_size;
222 }
223
224 static u32 get_pmu_perfmon_cmd_start_size_v1(void)
225 {
226         return sizeof(struct pmu_perfmon_cmd_start_v1);
227 }
228
229 static u32 get_pmu_perfmon_cmd_start_size_v0(void)
230 {
231         return sizeof(struct pmu_perfmon_cmd_start_v0);
232 }
233
234 static int get_perfmon_cmd_start_offsetofvar_v1(
235         enum pmu_perfmon_cmd_start_fields field)
236 {
237         switch (field) {
238         case COUNTER_ALLOC:
239                 return offsetof(struct pmu_perfmon_cmd_start_v1,
240                 counter_alloc);
241         default:
242                 return -EINVAL;
243                 break;
244         }
245         return 0;
246 }
247
248 static int get_perfmon_cmd_start_offsetofvar_v0(
249         enum pmu_perfmon_cmd_start_fields field)
250 {
251         switch (field) {
252         case COUNTER_ALLOC:
253                 return offsetof(struct pmu_perfmon_cmd_start_v0,
254                 counter_alloc);
255         default:
256                 return -EINVAL;
257                 break;
258         }
259         return 0;
260 }
261
262 static u32 get_pmu_perfmon_cmd_init_size_v1(void)
263 {
264         return sizeof(struct pmu_perfmon_cmd_init_v1);
265 }
266
267 static u32 get_pmu_perfmon_cmd_init_size_v0(void)
268 {
269         return sizeof(struct pmu_perfmon_cmd_init_v0);
270 }
271
272 static int get_perfmon_cmd_init_offsetofvar_v1(
273         enum pmu_perfmon_cmd_start_fields field)
274 {
275         switch (field) {
276         case COUNTER_ALLOC:
277                 return offsetof(struct pmu_perfmon_cmd_init_v1,
278                 counter_alloc);
279         default:
280                 return -EINVAL;
281                 break;
282         }
283         return 0;
284 }
285
286 static int get_perfmon_cmd_init_offsetofvar_v0(
287         enum pmu_perfmon_cmd_start_fields field)
288 {
289         switch (field) {
290         case COUNTER_ALLOC:
291                 return offsetof(struct pmu_perfmon_cmd_init_v0,
292                 counter_alloc);
293         default:
294                 return -EINVAL;
295                 break;
296         }
297         return 0;
298 }
299
300 static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
301 {
302         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
303         start->cmd_type = value;
304 }
305
306 static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
307 {
308         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
309         start->cmd_type = value;
310 }
311
312 static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
313 {
314         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
315         start->group_id = value;
316 }
317
318 static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
319 {
320         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
321         start->group_id = value;
322 }
323
324 static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
325 {
326         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
327         start->state_id = value;
328 }
329
330 static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
331 {
332         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
333         start->state_id = value;
334 }
335
336 static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
337 {
338         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
339         start->flags = value;
340 }
341
342 static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
343 {
344         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
345         start->flags = value;
346 }
347
348 static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
349 {
350         struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
351         return start->flags;
352 }
353
354 static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
355 {
356         struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
357         return start->flags;
358 }
359
360 static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
361         u16 value)
362 {
363         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
364         init->sample_buffer = value;
365 }
366
367 static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
368         u16 value)
369 {
370         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
371         init->sample_buffer = value;
372 }
373
374 static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
375         u8 value)
376 {
377         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
378         init->to_decrease_count = value;
379 }
380
381 static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
382         u8 value)
383 {
384         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
385         init->to_decrease_count = value;
386 }
387
388 static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
389         u8 value)
390 {
391         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
392         init->base_counter_id = value;
393 }
394
395 static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
396         u8 value)
397 {
398         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
399         init->base_counter_id = value;
400 }
401
402 static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
403         u32 value)
404 {
405         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
406         init->sample_period_us = value;
407 }
408
409 static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
410         u32 value)
411 {
412         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
413         init->sample_period_us = value;
414 }
415
416 static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
417         u8 value)
418 {
419         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
420         init->num_counters = value;
421 }
422
423 static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
424         u8 value)
425 {
426         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
427         init->num_counters = value;
428 }
429
430 static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
431         u8 value)
432 {
433         struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
434         init->samples_in_moving_avg = value;
435 }
436
437 static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
438         u8 value)
439 {
440         struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
441         init->samples_in_moving_avg = value;
442 }
443
444 static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
445         u32 id, void *pmu_init_msg)
446 {
447         struct pmu_init_msg_pmu_v0 *init =
448                 (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
449         queue->index    = init->queue_info[id].index;
450         queue->offset   = init->queue_info[id].offset;
451         queue->size = init->queue_info[id].size;
452 }
453
454 static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
455         u32 id, void *pmu_init_msg)
456 {
457         struct pmu_init_msg_pmu_v1 *init =
458                 (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
459         queue->index    = init->queue_info[id].index;
460         queue->offset   = init->queue_info[id].offset;
461         queue->size = init->queue_info[id].size;
462 }
463
464 static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
465 {
466         return (void *)(&seq->in_v1);
467 }
468
469 static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
470 {
471         return (void *)(&seq->in_v0);
472 }
473
474 static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
475 {
476         return (void *)(&seq->out_v1);
477 }
478
479 static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
480 {
481         return (void *)(&seq->out_v0);
482 }
483
484 static int gk20a_init_pmu(struct pmu_gk20a *pmu)
485 {
486         struct gk20a *g = pmu->g;
487         switch (pmu->desc->app_version) {
488         case APP_VERSION_1:
489                 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
490                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
491                         pmu_cmdline_size_v1;
492                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
493                         set_pmu_cmdline_args_cpufreq_v1;
494                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
495                         get_pmu_cmdline_args_ptr_v1;
496                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
497                         get_pmu_allocation_size_v1;
498                 g->ops.pmu_ver.set_pmu_allocation_ptr =
499                         set_pmu_allocation_ptr_v1;
500                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
501                         pmu_allocation_set_dmem_size_v1;
502                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
503                         pmu_allocation_get_dmem_size_v1;
504                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
505                         pmu_allocation_get_dmem_offset_v1;
506                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
507                         pmu_allocation_get_dmem_offset_addr_v1;
508                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
509                         pmu_allocation_set_dmem_offset_v1;
510                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
511                         get_pmu_init_msg_pmu_queue_params_v1;
512                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
513                         get_pmu_msg_pmu_init_msg_ptr_v1;
514                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
515                         get_pmu_init_msg_pmu_sw_mg_off_v1;
516                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
517                         get_pmu_init_msg_pmu_sw_mg_size_v1;
518                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
519                         get_pmu_perfmon_cmd_start_size_v1;
520                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
521                         get_perfmon_cmd_start_offsetofvar_v1;
522                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
523                         perfmon_start_set_cmd_type_v1;
524                 g->ops.pmu_ver.perfmon_start_set_group_id =
525                         perfmon_start_set_group_id_v1;
526                 g->ops.pmu_ver.perfmon_start_set_state_id =
527                         perfmon_start_set_state_id_v1;
528                 g->ops.pmu_ver.perfmon_start_set_flags =
529                         perfmon_start_set_flags_v1;
530                 g->ops.pmu_ver.perfmon_start_get_flags =
531                         perfmon_start_get_flags_v1;
532                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
533                         get_pmu_perfmon_cmd_init_size_v1;
534                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
535                         get_perfmon_cmd_init_offsetofvar_v1;
536                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
537                         perfmon_cmd_init_set_sample_buffer_v1;
538                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
539                         perfmon_cmd_init_set_dec_cnt_v1;
540                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
541                         perfmon_cmd_init_set_base_cnt_id_v1;
542                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
543                         perfmon_cmd_init_set_samp_period_us_v1;
544                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
545                         perfmon_cmd_init_set_num_cnt_v1;
546                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
547                         perfmon_cmd_init_set_mov_avg_v1;
548                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
549                         get_pmu_sequence_in_alloc_ptr_v1;
550                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
551                         get_pmu_sequence_out_alloc_ptr_v1;
552                 break;
553         case APP_VERSION_0:
554                 g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
555                 g->ops.pmu_ver.get_pmu_cmdline_args_size =
556                         pmu_cmdline_size_v0;
557                 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
558                         set_pmu_cmdline_args_cpufreq_v0;
559                 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
560                         get_pmu_cmdline_args_ptr_v0;
561                 g->ops.pmu_ver.get_pmu_allocation_struct_size =
562                         get_pmu_allocation_size_v0;
563                 g->ops.pmu_ver.set_pmu_allocation_ptr =
564                         set_pmu_allocation_ptr_v0;
565                 g->ops.pmu_ver.pmu_allocation_set_dmem_size =
566                         pmu_allocation_set_dmem_size_v0;
567                 g->ops.pmu_ver.pmu_allocation_get_dmem_size =
568                         pmu_allocation_get_dmem_size_v0;
569                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
570                         pmu_allocation_get_dmem_offset_v0;
571                 g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
572                         pmu_allocation_get_dmem_offset_addr_v0;
573                 g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
574                         pmu_allocation_set_dmem_offset_v0;
575                 g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
576                         get_pmu_init_msg_pmu_queue_params_v0;
577                 g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
578                         get_pmu_msg_pmu_init_msg_ptr_v0;
579                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
580                         get_pmu_init_msg_pmu_sw_mg_off_v0;
581                 g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
582                         get_pmu_init_msg_pmu_sw_mg_size_v0;
583                 g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
584                         get_pmu_perfmon_cmd_start_size_v0;
585                 g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
586                         get_perfmon_cmd_start_offsetofvar_v0;
587                 g->ops.pmu_ver.perfmon_start_set_cmd_type =
588                         perfmon_start_set_cmd_type_v0;
589                 g->ops.pmu_ver.perfmon_start_set_group_id =
590                         perfmon_start_set_group_id_v0;
591                 g->ops.pmu_ver.perfmon_start_set_state_id =
592                         perfmon_start_set_state_id_v0;
593                 g->ops.pmu_ver.perfmon_start_set_flags =
594                         perfmon_start_set_flags_v0;
595                 g->ops.pmu_ver.perfmon_start_get_flags =
596                         perfmon_start_get_flags_v0;
597                 g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
598                         get_pmu_perfmon_cmd_init_size_v0;
599                 g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
600                         get_perfmon_cmd_init_offsetofvar_v0;
601                 g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
602                         perfmon_cmd_init_set_sample_buffer_v0;
603                 g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
604                         perfmon_cmd_init_set_dec_cnt_v0;
605                 g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
606                         perfmon_cmd_init_set_base_cnt_id_v0;
607                 g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
608                         perfmon_cmd_init_set_samp_period_us_v0;
609                 g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
610                         perfmon_cmd_init_set_num_cnt_v0;
611                 g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
612                         perfmon_cmd_init_set_mov_avg_v0;
613                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
614                         get_pmu_sequence_in_alloc_ptr_v0;
615                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
616                         get_pmu_sequence_out_alloc_ptr_v0;
617                 break;
618         default:
619                 gk20a_err(dev_from_gk20a(pmu->g),
620                 "PMU code version not supported\n");
621                 return -EINVAL;
622                 break;
623         }
624         return 0;
625 }
626
627 static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
628                 u32 src, u8 *dst, u32 size, u8 port)
629 {
630         struct gk20a *g = pmu->g;
631         u32 i, words, bytes;
632         u32 data, addr_mask;
633         u32 *dst_u32 = (u32*)dst;
634
635         if (size == 0) {
636                 gk20a_err(dev_from_gk20a(g),
637                         "size is zero");
638                 return;
639         }
640
641         if (src & 0x3) {
642                 gk20a_err(dev_from_gk20a(g),
643                         "src (0x%08x) not 4-byte aligned", src);
644                 return;
645         }
646
647         mutex_lock(&pmu->pmu_copy_lock);
648
649         words = size >> 2;
650         bytes = size & 0x3;
651
652         addr_mask = pwr_falcon_dmemc_offs_m() |
653                     pwr_falcon_dmemc_blk_m();
654
655         src &= addr_mask;
656
657         gk20a_writel(g, pwr_falcon_dmemc_r(port),
658                 src | pwr_falcon_dmemc_aincr_f(1));
659
660         for (i = 0; i < words; i++)
661                 dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
662
663         if (bytes > 0) {
664                 data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
665                 for (i = 0; i < bytes; i++) {
666                         dst[(words << 2) + i] = ((u8 *)&data)[i];
667                 }
668         }
669         mutex_unlock(&pmu->pmu_copy_lock);
670         return;
671 }
672
673 static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
674                 u32 dst, u8 *src, u32 size, u8 port)
675 {
676         struct gk20a *g = pmu->g;
677         u32 i, words, bytes;
678         u32 data, addr_mask;
679         u32 *src_u32 = (u32*)src;
680
681         if (size == 0) {
682                 gk20a_err(dev_from_gk20a(g),
683                         "size is zero");
684                 return;
685         }
686
687         if (dst & 0x3) {
688                 gk20a_err(dev_from_gk20a(g),
689                         "dst (0x%08x) not 4-byte aligned", dst);
690                 return;
691         }
692
693         mutex_lock(&pmu->pmu_copy_lock);
694
695         words = size >> 2;
696         bytes = size & 0x3;
697
698         addr_mask = pwr_falcon_dmemc_offs_m() |
699                     pwr_falcon_dmemc_blk_m();
700
701         dst &= addr_mask;
702
703         gk20a_writel(g, pwr_falcon_dmemc_r(port),
704                 dst | pwr_falcon_dmemc_aincw_f(1));
705
706         for (i = 0; i < words; i++)
707                 gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
708
709         if (bytes > 0) {
710                 data = 0;
711                 for (i = 0; i < bytes; i++)
712                         ((u8 *)&data)[i] = src[(words << 2) + i];
713                 gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
714         }
715
716         data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
717         size = ALIGN(size, 4);
718         if (data != dst + size) {
719                 gk20a_err(dev_from_gk20a(g),
720                         "copy failed. bytes written %d, expected %d",
721                         data - dst, size);
722         }
723         mutex_unlock(&pmu->pmu_copy_lock);
724         return;
725 }
726
727 static int pmu_idle(struct pmu_gk20a *pmu)
728 {
729         struct gk20a *g = pmu->g;
730         unsigned long end_jiffies = jiffies +
731                 msecs_to_jiffies(2000);
732         u32 idle_stat;
733
734         /* wait for pmu idle */
735         do {
736                 idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
737
738                 if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
739                     pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
740                         break;
741                 }
742
743                 if (time_after_eq(jiffies, end_jiffies)) {
744                         gk20a_err(dev_from_gk20a(g),
745                                 "timeout waiting pmu idle : 0x%08x",
746                                 idle_stat);
747                         return -EBUSY;
748                 }
749                 usleep_range(100, 200);
750         } while (1);
751
752         gk20a_dbg_fn("done");
753         return 0;
754 }
755
756 static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
757 {
758         struct gk20a *g = pmu->g;
759
760         gk20a_dbg_fn("");
761
762         gk20a_writel(g, mc_intr_mask_0_r(),
763                 gk20a_readl(g, mc_intr_mask_0_r()) &
764                 ~mc_intr_mask_0_pmu_enabled_f());
765         gk20a_writel(g, mc_intr_mask_1_r(),
766                 gk20a_readl(g, mc_intr_mask_1_r()) &
767                 ~mc_intr_mask_1_pmu_enabled_f());
768
769         gk20a_writel(g, pwr_falcon_irqmclr_r(),
770                 pwr_falcon_irqmclr_gptmr_f(1)  |
771                 pwr_falcon_irqmclr_wdtmr_f(1)  |
772                 pwr_falcon_irqmclr_mthd_f(1)   |
773                 pwr_falcon_irqmclr_ctxsw_f(1)  |
774                 pwr_falcon_irqmclr_halt_f(1)   |
775                 pwr_falcon_irqmclr_exterr_f(1) |
776                 pwr_falcon_irqmclr_swgen0_f(1) |
777                 pwr_falcon_irqmclr_swgen1_f(1) |
778                 pwr_falcon_irqmclr_ext_f(0xff));
779
780         if (enable) {
781                 /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
782                 gk20a_writel(g, pwr_falcon_irqdest_r(),
783                         pwr_falcon_irqdest_host_gptmr_f(0)    |
784                         pwr_falcon_irqdest_host_wdtmr_f(1)    |
785                         pwr_falcon_irqdest_host_mthd_f(0)     |
786                         pwr_falcon_irqdest_host_ctxsw_f(0)    |
787                         pwr_falcon_irqdest_host_halt_f(1)     |
788                         pwr_falcon_irqdest_host_exterr_f(0)   |
789                         pwr_falcon_irqdest_host_swgen0_f(1)   |
790                         pwr_falcon_irqdest_host_swgen1_f(0)   |
791                         pwr_falcon_irqdest_host_ext_f(0xff)   |
792                         pwr_falcon_irqdest_target_gptmr_f(1)  |
793                         pwr_falcon_irqdest_target_wdtmr_f(0)  |
794                         pwr_falcon_irqdest_target_mthd_f(0)   |
795                         pwr_falcon_irqdest_target_ctxsw_f(0)  |
796                         pwr_falcon_irqdest_target_halt_f(0)   |
797                         pwr_falcon_irqdest_target_exterr_f(0) |
798                         pwr_falcon_irqdest_target_swgen0_f(0) |
799                         pwr_falcon_irqdest_target_swgen1_f(0) |
800                         pwr_falcon_irqdest_target_ext_f(0xff));
801
802                 /* 0=disable, 1=enable */
803                 gk20a_writel(g, pwr_falcon_irqmset_r(),
804                         pwr_falcon_irqmset_gptmr_f(1)  |
805                         pwr_falcon_irqmset_wdtmr_f(1)  |
806                         pwr_falcon_irqmset_mthd_f(0)   |
807                         pwr_falcon_irqmset_ctxsw_f(0)  |
808                         pwr_falcon_irqmset_halt_f(1)   |
809                         pwr_falcon_irqmset_exterr_f(1) |
810                         pwr_falcon_irqmset_swgen0_f(1) |
811                         pwr_falcon_irqmset_swgen1_f(1));
812
813                 gk20a_writel(g, mc_intr_mask_0_r(),
814                         gk20a_readl(g, mc_intr_mask_0_r()) |
815                         mc_intr_mask_0_pmu_enabled_f());
816         }
817
818         gk20a_dbg_fn("done");
819 }
820
821 static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
822 {
823         struct gk20a *g = pmu->g;
824
825         gk20a_dbg_fn("");
826
827         if (enable) {
828                 int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
829                 gk20a_enable(g, mc_enable_pwr_enabled_f());
830
831                 do {
832                         u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
833                                 (pwr_falcon_dmactl_dmem_scrubbing_m() |
834                                  pwr_falcon_dmactl_imem_scrubbing_m());
835
836                         if (!w) {
837                                 gk20a_dbg_fn("done");
838                                 return 0;
839                         }
840                         udelay(GR_IDLE_CHECK_DEFAULT);
841                 } while (--retries || !tegra_platform_is_silicon());
842
843                 gk20a_disable(g, mc_enable_pwr_enabled_f());
844                 gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
845
846                 return -ETIMEDOUT;
847         } else {
848                 gk20a_disable(g, mc_enable_pwr_enabled_f());
849                 return 0;
850         }
851 }
852
853 static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
854 {
855         struct gk20a *g = pmu->g;
856         u32 pmc_enable;
857         int err;
858
859         gk20a_dbg_fn("");
860
861         if (!enable) {
862                 pmc_enable = gk20a_readl(g, mc_enable_r());
863                 if (mc_enable_pwr_v(pmc_enable) !=
864                     mc_enable_pwr_disabled_v()) {
865
866                         pmu_enable_irq(pmu, false);
867                         pmu_enable_hw(pmu, false);
868                 }
869         } else {
870                 err = pmu_enable_hw(pmu, true);
871                 if (err)
872                         return err;
873
874                 /* TBD: post reset */
875
876                 err = pmu_idle(pmu);
877                 if (err)
878                         return err;
879
880                 pmu_enable_irq(pmu, true);
881         }
882
883         gk20a_dbg_fn("done");
884         return 0;
885 }
886
887 static int pmu_reset(struct pmu_gk20a *pmu)
888 {
889         int err;
890
891         err = pmu_idle(pmu);
892         if (err)
893                 return err;
894
895         /* TBD: release pmu hw mutex */
896
897         err = pmu_enable(pmu, false);
898         if (err)
899                 return err;
900
901         /* TBD: cancel all sequences */
902         /* TBD: init all sequences and state tables */
903         /* TBD: restore pre-init message handler */
904
905         err = pmu_enable(pmu, true);
906         if (err)
907                 return err;
908
909         return 0;
910 }
911
912 static int pmu_bootstrap(struct pmu_gk20a *pmu)
913 {
914         struct gk20a *g = pmu->g;
915         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
916         struct mm_gk20a *mm = &g->mm;
917         struct pmu_ucode_desc *desc = pmu->desc;
918         u64 addr_code, addr_data, addr_load;
919         u32 i, blocks, addr_args;
920
921         gk20a_dbg_fn("");
922
923         gk20a_writel(g, pwr_falcon_itfen_r(),
924                 gk20a_readl(g, pwr_falcon_itfen_r()) |
925                 pwr_falcon_itfen_ctxen_enable_f());
926         gk20a_writel(g, pwr_pmu_new_instblk_r(),
927                 pwr_pmu_new_instblk_ptr_f(
928                         mm->pmu.inst_block.cpu_pa >> 12) |
929                 pwr_pmu_new_instblk_valid_f(1) |
930                 pwr_pmu_new_instblk_target_sys_coh_f());
931
932         /* TBD: load all other surfaces */
933
934         g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
935                 clk_get_rate(platform->clk[1]));
936
937         addr_args = (pwr_falcon_hwcfg_dmem_size_v(
938                 gk20a_readl(g, pwr_falcon_hwcfg_r()))
939                         << GK20A_PMU_DMEM_BLKSIZE2) -
940                 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
941
942         pmu_copy_to_dmem(pmu, addr_args,
943                         (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
944                         g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
945
946         gk20a_writel(g, pwr_falcon_dmemc_r(0),
947                 pwr_falcon_dmemc_offs_f(0) |
948                 pwr_falcon_dmemc_blk_f(0)  |
949                 pwr_falcon_dmemc_aincw_f(1));
950
951         addr_code = u64_lo32((pmu->ucode.pmu_va +
952                         desc->app_start_offset +
953                         desc->app_resident_code_offset) >> 8) ;
954         addr_data = u64_lo32((pmu->ucode.pmu_va +
955                         desc->app_start_offset +
956                         desc->app_resident_data_offset) >> 8);
957         addr_load = u64_lo32((pmu->ucode.pmu_va +
958                         desc->bootloader_start_offset) >> 8);
959
960         gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
961         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
962         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
963         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
964         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
965         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
966         gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
967         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
968         gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
969         gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
970
971         gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
972                 addr_load - (desc->bootloader_imem_offset >> 8));
973
974         blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
975
976         for (i = 0; i < blocks; i++) {
977                 gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
978                         desc->bootloader_imem_offset + (i << 8));
979                 gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
980                         desc->bootloader_imem_offset + (i << 8));
981                 gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
982                         pwr_falcon_dmatrfcmd_imem_f(1)  |
983                         pwr_falcon_dmatrfcmd_write_f(0) |
984                         pwr_falcon_dmatrfcmd_size_f(6)  |
985                         pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
986         }
987
988         gk20a_writel(g, pwr_falcon_bootvec_r(),
989                 pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
990
991         gk20a_writel(g, pwr_falcon_cpuctl_r(),
992                 pwr_falcon_cpuctl_startcpu_f(1));
993
994         gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
995
996         return 0;
997 }
998
999 static void pmu_seq_init(struct pmu_gk20a *pmu)
1000 {
1001         u32 i;
1002
1003         memset(pmu->seq, 0,
1004                 sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
1005         memset(pmu->pmu_seq_tbl, 0,
1006                 sizeof(pmu->pmu_seq_tbl));
1007
1008         for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
1009                 pmu->seq[i].id = i;
1010 }
1011
1012 static int pmu_seq_acquire(struct pmu_gk20a *pmu,
1013                         struct pmu_sequence **pseq)
1014 {
1015         struct gk20a *g = pmu->g;
1016         struct pmu_sequence *seq;
1017         u32 index;
1018
1019         mutex_lock(&pmu->pmu_seq_lock);
1020         index = find_first_zero_bit(pmu->pmu_seq_tbl,
1021                                 sizeof(pmu->pmu_seq_tbl));
1022         if (index >= sizeof(pmu->pmu_seq_tbl)) {
1023                 gk20a_err(dev_from_gk20a(g),
1024                         "no free sequence available");
1025                 mutex_unlock(&pmu->pmu_seq_lock);
1026                 return -EAGAIN;
1027         }
1028         set_bit(index, pmu->pmu_seq_tbl);
1029         mutex_unlock(&pmu->pmu_seq_lock);
1030
1031         seq = &pmu->seq[index];
1032         seq->state = PMU_SEQ_STATE_PENDING;
1033
1034         *pseq = seq;
1035         return 0;
1036 }
1037
1038 static void pmu_seq_release(struct pmu_gk20a *pmu,
1039                         struct pmu_sequence *seq)
1040 {
1041         struct gk20a *g = pmu->g;
1042         seq->state      = PMU_SEQ_STATE_FREE;
1043         seq->desc       = PMU_INVALID_SEQ_DESC;
1044         seq->callback   = NULL;
1045         seq->cb_params  = NULL;
1046         seq->msg        = NULL;
1047         seq->out_payload = NULL;
1048         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1049                 g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
1050         g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
1051                 g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
1052
1053         clear_bit(seq->id, pmu->pmu_seq_tbl);
1054 }
1055
1056 static int pmu_queue_init(struct pmu_gk20a *pmu,
1057                 u32 id, union pmu_init_msg_pmu *init)
1058 {
1059         struct gk20a *g = pmu->g;
1060         struct pmu_queue *queue = &pmu->queue[id];
1061         queue->id       = id;
1062         g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
1063
1064         queue->mutex_id = id;
1065         mutex_init(&queue->mutex);
1066
1067         gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
1068                 id, queue->index, queue->offset, queue->size);
1069
1070         return 0;
1071 }
1072
1073 static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1074                         u32 *head, bool set)
1075 {
1076         struct gk20a *g = pmu->g;
1077
1078         BUG_ON(!head);
1079
1080         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1081
1082                 if (queue->index >= pwr_pmu_queue_head__size_1_v())
1083                         return -EINVAL;
1084
1085                 if (!set)
1086                         *head = pwr_pmu_queue_head_address_v(
1087                                 gk20a_readl(g,
1088                                         pwr_pmu_queue_head_r(queue->index)));
1089                 else
1090                         gk20a_writel(g,
1091                                 pwr_pmu_queue_head_r(queue->index),
1092                                 pwr_pmu_queue_head_address_f(*head));
1093         } else {
1094                 if (!set)
1095                         *head = pwr_pmu_msgq_head_val_v(
1096                                 gk20a_readl(g, pwr_pmu_msgq_head_r()));
1097                 else
1098                         gk20a_writel(g,
1099                                 pwr_pmu_msgq_head_r(),
1100                                 pwr_pmu_msgq_head_val_f(*head));
1101         }
1102
1103         return 0;
1104 }
1105
1106 static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
1107                         u32 *tail, bool set)
1108 {
1109         struct gk20a *g = pmu->g;
1110
1111         BUG_ON(!tail);
1112
1113         if (PMU_IS_COMMAND_QUEUE(queue->id)) {
1114
1115                 if (queue->index >= pwr_pmu_queue_tail__size_1_v())
1116                         return -EINVAL;
1117
1118                 if (!set)
1119                         *tail = pwr_pmu_queue_tail_address_v(
1120                                 gk20a_readl(g,
1121                                         pwr_pmu_queue_tail_r(queue->index)));
1122                 else
1123                         gk20a_writel(g,
1124                                 pwr_pmu_queue_tail_r(queue->index),
1125                                 pwr_pmu_queue_tail_address_f(*tail));
1126         } else {
1127                 if (!set)
1128                         *tail = pwr_pmu_msgq_tail_val_v(
1129                                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
1130                 else
1131                         gk20a_writel(g,
1132                                 pwr_pmu_msgq_tail_r(),
1133                                 pwr_pmu_msgq_tail_val_f(*tail));
1134         }
1135
1136         return 0;
1137 }
1138
1139 static inline void pmu_queue_read(struct pmu_gk20a *pmu,
1140                         u32 offset, u8 *dst, u32 size)
1141 {
1142         pmu_copy_from_dmem(pmu, offset, dst, size, 0);
1143 }
1144
1145 static inline void pmu_queue_write(struct pmu_gk20a *pmu,
1146                         u32 offset, u8 *src, u32 size)
1147 {
1148         pmu_copy_to_dmem(pmu, offset, src, size, 0);
1149 }
1150
1151 int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
1152 {
1153         struct gk20a *g = pmu->g;
1154         struct pmu_mutex *mutex;
1155         u32 data, owner, max_retry;
1156
1157         if (!pmu->initialized)
1158                 return -EINVAL;
1159
1160         BUG_ON(!token);
1161         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1162         BUG_ON(id > pmu->mutex_cnt);
1163
1164         mutex = &pmu->mutex[id];
1165
1166         owner = pwr_pmu_mutex_value_v(
1167                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1168
1169         if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
1170                 BUG_ON(mutex->ref_cnt == 0);
1171                 gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
1172                 mutex->ref_cnt++;
1173                 return 0;
1174         }
1175
1176         max_retry = 40;
1177         do {
1178                 data = pwr_pmu_mutex_id_value_v(
1179                         gk20a_readl(g, pwr_pmu_mutex_id_r()));
1180                 if (data == pwr_pmu_mutex_id_value_init_v() ||
1181                     data == pwr_pmu_mutex_id_value_not_avail_v()) {
1182                         gk20a_warn(dev_from_gk20a(g),
1183                                 "fail to generate mutex token: val 0x%08x",
1184                                 owner);
1185                         usleep_range(20, 40);
1186                         continue;
1187                 }
1188
1189                 owner = data;
1190                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1191                         pwr_pmu_mutex_value_f(owner));
1192
1193                 data = pwr_pmu_mutex_value_v(
1194                         gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1195
1196                 if (owner == data) {
1197                         mutex->ref_cnt = 1;
1198                         gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
1199                                 mutex->index, *token);
1200                         *token = owner;
1201                         return 0;
1202                 } else {
1203                         gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
1204                                 mutex->index);
1205
1206                         data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1207                         data = set_field(data,
1208                                 pwr_pmu_mutex_id_release_value_m(),
1209                                 pwr_pmu_mutex_id_release_value_f(owner));
1210                         gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1211
1212                         usleep_range(20, 40);
1213                         continue;
1214                 }
1215         } while (max_retry-- > 0);
1216
1217         return -EBUSY;
1218 }
1219
1220 int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
1221 {
1222         struct gk20a *g = pmu->g;
1223         struct pmu_mutex *mutex;
1224         u32 owner, data;
1225
1226         if (!pmu->initialized)
1227                 return -EINVAL;
1228
1229         BUG_ON(!token);
1230         BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
1231         BUG_ON(id > pmu->mutex_cnt);
1232
1233         mutex = &pmu->mutex[id];
1234
1235         owner = pwr_pmu_mutex_value_v(
1236                 gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
1237
1238         if (*token != owner) {
1239                 gk20a_err(dev_from_gk20a(g),
1240                         "requester 0x%08x NOT match owner 0x%08x",
1241                         *token, owner);
1242                 return -EINVAL;
1243         }
1244
1245         if (--mutex->ref_cnt == 0) {
1246                 gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
1247                         pwr_pmu_mutex_value_initial_lock_f());
1248
1249                 data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
1250                 data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
1251                         pwr_pmu_mutex_id_release_value_f(owner));
1252                 gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
1253
1254                 gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
1255                         mutex->index, *token);
1256         }
1257
1258         return 0;
1259 }
1260
1261 static int pmu_queue_lock(struct pmu_gk20a *pmu,
1262                         struct pmu_queue *queue)
1263 {
1264         int err;
1265
1266         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1267                 return 0;
1268
1269         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1270                 mutex_lock(&queue->mutex);
1271                 return 0;
1272         }
1273
1274         err = pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
1275         return err;
1276 }
1277
1278 static int pmu_queue_unlock(struct pmu_gk20a *pmu,
1279                         struct pmu_queue *queue)
1280 {
1281         int err;
1282
1283         if (PMU_IS_MESSAGE_QUEUE(queue->id))
1284                 return 0;
1285
1286         if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
1287                 mutex_unlock(&queue->mutex);
1288                 return 0;
1289         }
1290
1291         err = pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
1292         return err;
1293 }
1294
1295 /* called by pmu_read_message, no lock */
1296 static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
1297                         struct pmu_queue *queue)
1298 {
1299         u32 head, tail;
1300
1301         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1302         if (queue->opened && queue->oflag == OFLAG_READ)
1303                 tail = queue->position;
1304         else
1305                 pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1306
1307         return head == tail;
1308 }
1309
1310 static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
1311                         struct pmu_queue *queue, u32 size, bool *need_rewind)
1312 {
1313         u32 head, tail, free;
1314         bool rewind = false;
1315
1316         size = ALIGN(size, QUEUE_ALIGNMENT);
1317
1318         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1319         pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
1320
1321         if (head >= tail) {
1322                 free = queue->offset + queue->size - head;
1323                 free -= PMU_CMD_HDR_SIZE;
1324
1325                 if (size > free) {
1326                         rewind = true;
1327                         head = queue->offset;
1328                 }
1329         }
1330
1331         if (head < tail)
1332                 free = tail - head - 1;
1333
1334         if (need_rewind)
1335                 *need_rewind = rewind;
1336
1337         return size <= free;
1338 }
1339
1340 static int pmu_queue_push(struct pmu_gk20a *pmu,
1341                         struct pmu_queue *queue, void *data, u32 size)
1342 {
1343         gk20a_dbg_fn("");
1344
1345         if (!queue->opened && queue->oflag == OFLAG_WRITE){
1346                 gk20a_err(dev_from_gk20a(pmu->g),
1347                         "queue not opened for write");
1348                 return -EINVAL;
1349         }
1350
1351         pmu_queue_write(pmu, queue->position, data, size);
1352         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1353         return 0;
1354 }
1355
1356 static int pmu_queue_pop(struct pmu_gk20a *pmu,
1357                         struct pmu_queue *queue, void *data, u32 size,
1358                         u32 *bytes_read)
1359 {
1360         u32 head, tail, used;
1361
1362         *bytes_read = 0;
1363
1364         if (!queue->opened && queue->oflag == OFLAG_READ){
1365                 gk20a_err(dev_from_gk20a(pmu->g),
1366                         "queue not opened for read");
1367                 return -EINVAL;
1368         }
1369
1370         pmu_queue_head(pmu, queue, &head, QUEUE_GET);
1371         tail = queue->position;
1372
1373         if (head == tail)
1374                 return 0;
1375
1376         if (head > tail)
1377                 used = head - tail;
1378         else
1379                 used = queue->offset + queue->size - tail;
1380
1381         if (size > used) {
1382                 gk20a_warn(dev_from_gk20a(pmu->g),
1383                         "queue size smaller than request read");
1384                 size = used;
1385         }
1386
1387         pmu_queue_read(pmu, tail, data, size);
1388         queue->position += ALIGN(size, QUEUE_ALIGNMENT);
1389         *bytes_read = size;
1390         return 0;
1391 }
1392
1393 static void pmu_queue_rewind(struct pmu_gk20a *pmu,
1394                         struct pmu_queue *queue)
1395 {
1396         struct pmu_cmd cmd;
1397
1398         gk20a_dbg_fn("");
1399
1400         if (!queue->opened) {
1401                 gk20a_err(dev_from_gk20a(pmu->g),
1402                         "queue not opened");
1403                 return;
1404         }
1405
1406         if (queue->oflag == OFLAG_WRITE) {
1407                 cmd.hdr.unit_id = PMU_UNIT_REWIND;
1408                 cmd.hdr.size = PMU_CMD_HDR_SIZE;
1409                 pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
1410                 gk20a_dbg_pmu("queue %d rewinded", queue->id);
1411         }
1412
1413         queue->position = queue->offset;
1414         return;
1415 }
1416
1417 /* open for read and lock the queue */
1418 static int pmu_queue_open_read(struct pmu_gk20a *pmu,
1419                         struct pmu_queue *queue)
1420 {
1421         int err;
1422
1423         err = pmu_queue_lock(pmu, queue);
1424         if (err)
1425                 return err;
1426
1427         if (queue->opened)
1428                 BUG();
1429
1430         pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
1431         queue->oflag = OFLAG_READ;
1432         queue->opened = true;
1433
1434         return 0;
1435 }
1436
1437 /* open for write and lock the queue
1438    make sure there's enough free space for the write */
1439 static int pmu_queue_open_write(struct pmu_gk20a *pmu,
1440                         struct pmu_queue *queue, u32 size)
1441 {
1442         bool rewind = false;
1443         int err;
1444
1445         err = pmu_queue_lock(pmu, queue);
1446         if (err)
1447                 return err;
1448
1449         if (queue->opened)
1450                 BUG();
1451
1452         if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
1453                 gk20a_err(dev_from_gk20a(pmu->g), "queue full");
1454                 return -EAGAIN;
1455         }
1456
1457         pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
1458         queue->oflag = OFLAG_WRITE;
1459         queue->opened = true;
1460
1461         if (rewind)
1462                 pmu_queue_rewind(pmu, queue);
1463
1464         return 0;
1465 }
1466
1467 /* close and unlock the queue */
1468 static int pmu_queue_close(struct pmu_gk20a *pmu,
1469                         struct pmu_queue *queue, bool commit)
1470 {
1471         if (!queue->opened)
1472                 return 0;
1473
1474         if (commit) {
1475                 if (queue->oflag == OFLAG_READ) {
1476                         pmu_queue_tail(pmu, queue,
1477                                 &queue->position, QUEUE_SET);
1478                 }
1479                 else {
1480                         pmu_queue_head(pmu, queue,
1481                                 &queue->position, QUEUE_SET);
1482                 }
1483         }
1484
1485         queue->opened = false;
1486
1487         pmu_queue_unlock(pmu, queue);
1488
1489         return 0;
1490 }
1491
1492 void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
1493 {
1494         gk20a_dbg_fn("");
1495
1496         gk20a_allocator_destroy(&pmu->dmem);
1497 }
1498
1499 int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
1500 {
1501         struct pmu_gk20a *pmu = &g->pmu;
1502
1503         gk20a_dbg_fn("");
1504
1505         pmu_enable_hw(pmu, true);
1506
1507         return 0;
1508 }
1509
1510 int gk20a_init_pmu_setup_sw(struct gk20a *g)
1511 {
1512         struct pmu_gk20a *pmu = &g->pmu;
1513         struct mm_gk20a *mm = &g->mm;
1514         struct vm_gk20a *vm = &mm->pmu.vm;
1515         struct device *d = dev_from_gk20a(g);
1516         int i, err = 0;
1517         u8 *ptr;
1518         void *ucode_ptr;
1519         struct sg_table *sgt_pmu_ucode;
1520         struct sg_table *sgt_seq_buf;
1521         DEFINE_DMA_ATTRS(attrs);
1522         dma_addr_t iova;
1523
1524         gk20a_dbg_fn("");
1525
1526         /* start with elpg disabled until first enable call */
1527         mutex_init(&pmu->elpg_mutex);
1528         pmu->elpg_refcnt = 0;
1529
1530         if (pmu->sw_ready) {
1531                 for (i = 0; i < pmu->mutex_cnt; i++) {
1532                         pmu->mutex[i].id    = i;
1533                         pmu->mutex[i].index = i;
1534                 }
1535                 pmu_seq_init(pmu);
1536
1537                 gk20a_dbg_fn("skip init");
1538                 goto skip_init;
1539         }
1540
1541         /* no infoRom script from vbios? */
1542
1543         /* TBD: sysmon subtask */
1544
1545         pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1546         pmu->mutex = kzalloc(pmu->mutex_cnt *
1547                 sizeof(struct pmu_mutex), GFP_KERNEL);
1548         if (!pmu->mutex) {
1549                 err = -ENOMEM;
1550                 goto err;
1551         }
1552
1553         for (i = 0; i < pmu->mutex_cnt; i++) {
1554                 pmu->mutex[i].id    = i;
1555                 pmu->mutex[i].index = i;
1556         }
1557
1558         pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
1559                 sizeof(struct pmu_sequence), GFP_KERNEL);
1560         if (!pmu->seq) {
1561                 err = -ENOMEM;
1562                 goto err_free_mutex;
1563         }
1564
1565         pmu_seq_init(pmu);
1566
1567         if (!g->pmu_fw) {
1568                 g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
1569                 if (!g->pmu_fw) {
1570                         gk20a_err(d, "failed to load pmu ucode!!");
1571                         err = -ENOENT;
1572                         goto err_free_seq;
1573                 }
1574         }
1575
1576         gk20a_dbg_fn("firmware loaded");
1577
1578         pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
1579         pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1580                         pmu->desc->descriptor_size);
1581
1582         INIT_WORK(&pmu->pg_init, pmu_setup_hw);
1583
1584         gk20a_init_pmu_vm(mm);
1585
1586         dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1587         pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1588                                         &iova,
1589                                         GFP_KERNEL,
1590                                         &attrs);
1591         if (!pmu->ucode.cpuva) {
1592                 gk20a_err(d, "failed to allocate memory\n");
1593                 err = -ENOMEM;
1594                 goto err_release_fw;
1595         }
1596
1597         pmu->ucode.iova = iova;
1598         pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1599                                         &iova,
1600                                         GFP_KERNEL);
1601         if (!pmu->seq_buf.cpuva) {
1602                 gk20a_err(d, "failed to allocate memory\n");
1603                 err = -ENOMEM;
1604                 goto err_free_pmu_ucode;
1605         }
1606
1607         pmu->seq_buf.iova = iova;
1608
1609         err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1610                                 pmu->ucode.cpuva,
1611                                 pmu->ucode.iova,
1612                                 GK20A_PMU_UCODE_SIZE_MAX);
1613         if (err) {
1614                 gk20a_err(d, "failed to allocate sg table\n");
1615                 goto err_free_seq_buf;
1616         }
1617
1618         pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1619                                         GK20A_PMU_UCODE_SIZE_MAX,
1620                                         0, /* flags */
1621                                         gk20a_mem_flag_read_only);
1622         if (!pmu->ucode.pmu_va) {
1623                 gk20a_err(d, "failed to map pmu ucode memory!!");
1624                 goto err_free_ucode_sgt;
1625         }
1626
1627         err = gk20a_get_sgtable(d, &sgt_seq_buf,
1628                                 pmu->seq_buf.cpuva,
1629                                 pmu->seq_buf.iova,
1630                                 GK20A_PMU_SEQ_BUF_SIZE);
1631         if (err) {
1632                 gk20a_err(d, "failed to allocate sg table\n");
1633                 goto err_unmap_ucode;
1634         }
1635
1636         pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
1637                                         GK20A_PMU_SEQ_BUF_SIZE,
1638                                         0, /* flags */
1639                                         gk20a_mem_flag_none);
1640         if (!pmu->seq_buf.pmu_va) {
1641                 gk20a_err(d, "failed to map pmu ucode memory!!");
1642                 goto err_free_seq_buf_sgt;
1643         }
1644
1645         ptr = (u8 *)pmu->seq_buf.cpuva;
1646         if (!ptr) {
1647                 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
1648                 goto err_unmap_seq_buf;
1649         }
1650
1651         /* TBD: remove this if ZBC save/restore is handled by PMU
1652          * end an empty ZBC sequence for now */
1653         ptr[0] = 0x16; /* opcode EXIT */
1654         ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
1655         ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
1656
1657         pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
1658
1659         ucode_ptr = pmu->ucode.cpuva;
1660
1661         for (i = 0; i < (pmu->desc->app_start_offset +
1662                         pmu->desc->app_size) >> 2; i++)
1663                 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
1664
1665         gk20a_free_sgtable(&sgt_pmu_ucode);
1666         gk20a_free_sgtable(&sgt_seq_buf);
1667
1668         pmu->sw_ready = true;
1669
1670 skip_init:
1671         mutex_init(&pmu->isr_mutex);
1672         mutex_init(&pmu->isr_enable_lock);
1673         mutex_init(&pmu->pmu_copy_lock);
1674         mutex_init(&pmu->pmu_seq_lock);
1675
1676         pmu->perfmon_counter.index = 3; /* GR & CE2 */
1677         pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
1678
1679         pmu->remove_support = gk20a_remove_pmu_support;
1680         err = gk20a_init_pmu(pmu);
1681         if (err) {
1682                 gk20a_err(d, "failed to set function pointers\n");
1683                 return err;
1684         }
1685
1686         gk20a_dbg_fn("done");
1687         return 0;
1688
1689  err_unmap_seq_buf:
1690         gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
1691                 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
1692  err_free_seq_buf_sgt:
1693         gk20a_free_sgtable(&sgt_seq_buf);
1694  err_unmap_ucode:
1695         gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1696                 GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
1697  err_free_ucode_sgt:
1698         gk20a_free_sgtable(&sgt_pmu_ucode);
1699  err_free_seq_buf:
1700         dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
1701                 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
1702         pmu->seq_buf.cpuva = NULL;
1703         pmu->seq_buf.iova = 0;
1704  err_free_pmu_ucode:
1705         dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1706                 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
1707         pmu->ucode.cpuva = NULL;
1708         pmu->ucode.iova = 0;
1709  err_release_fw:
1710         release_firmware(g->pmu_fw);
1711  err_free_seq:
1712         kfree(pmu->seq);
1713  err_free_mutex:
1714         kfree(pmu->mutex);
1715  err:
1716         gk20a_dbg_fn("fail");
1717         return err;
1718 }
1719
1720 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
1721                         void *param, u32 handle, u32 status);
1722
1723 static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
1724                         void *param, u32 handle, u32 status)
1725 {
1726         struct pmu_gk20a *pmu = param;
1727         struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
1728
1729         gk20a_dbg_fn("");
1730
1731         gk20a_dbg_pmu("reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1732         if (status != 0) {
1733                 gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
1734                 /* TBD: disable ELPG */
1735                 return;
1736         }
1737
1738         if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
1739                 gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
1740         }
1741
1742         pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
1743         schedule_work(&pmu->pg_init);
1744 }
1745
1746 int gk20a_init_pmu_setup_hw1(struct gk20a *g)
1747 {
1748         struct pmu_gk20a *pmu = &g->pmu;
1749         int err;
1750
1751         gk20a_dbg_fn("");
1752
1753         mutex_lock(&pmu->isr_enable_lock);
1754         pmu_reset(pmu);
1755         pmu->isr_enabled = true;
1756         mutex_unlock(&pmu->isr_enable_lock);
1757
1758         /* setup apertures - virtual */
1759         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1760                 pwr_fbif_transcfg_mem_type_virtual_f());
1761         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1762                 pwr_fbif_transcfg_mem_type_virtual_f());
1763         /* setup apertures - physical */
1764         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1765                 pwr_fbif_transcfg_mem_type_physical_f() |
1766                 pwr_fbif_transcfg_target_local_fb_f());
1767         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1768                 pwr_fbif_transcfg_mem_type_physical_f() |
1769                 pwr_fbif_transcfg_target_coherent_sysmem_f());
1770         gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1771                 pwr_fbif_transcfg_mem_type_physical_f() |
1772                 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1773
1774         /* TBD: load pmu ucode */
1775         err = pmu_bootstrap(pmu);
1776         if (err)
1777                 return err;
1778
1779         return 0;
1780
1781 }
1782
1783 static int gk20a_aelpg_init(struct gk20a *g);
1784 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1785
1786 static void pmu_setup_hw_load_zbc(struct gk20a *g);
1787 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1788
1789 static void pmu_setup_hw(struct work_struct *work)
1790 {
1791         struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1792         struct gk20a *g = pmu->g;
1793
1794         switch (pmu->pmu_state) {
1795         case PMU_STATE_ELPG_BOOTED:
1796                 gk20a_dbg_pmu("elpg booted");
1797                 gk20a_init_pmu_bind_fecs(g);
1798                 break;
1799         case PMU_STATE_LOADING_PG_BUF:
1800                 gk20a_dbg_pmu("loaded pg buf");
1801                 pmu_setup_hw_load_zbc(g);
1802                 break;
1803         case PMU_STATE_LOADING_ZBC:
1804                 gk20a_dbg_pmu("loaded zbc");
1805                 pmu_setup_hw_enable_elpg(g);
1806                 break;
1807         case PMU_STATE_STARTED:
1808                 gk20a_dbg_pmu("PMU booted");
1809                 break;
1810         default:
1811                 gk20a_dbg_pmu("invalid state");
1812                 break;
1813         }
1814 }
1815
1816 int gk20a_init_pmu_bind_fecs(struct gk20a *g)
1817 {
1818         struct pmu_gk20a *pmu = &g->pmu;
1819         struct mm_gk20a *mm = &g->mm;
1820         struct vm_gk20a *vm = &mm->pmu.vm;
1821         struct device *d = dev_from_gk20a(g);
1822         struct pmu_cmd cmd;
1823         u32 desc;
1824         int err;
1825         u32 size;
1826         struct sg_table *sgt_pg_buf;
1827         dma_addr_t iova;
1828
1829         gk20a_dbg_fn("");
1830
1831         size = 0;
1832         gk20a_gr_wait_initialized(g);
1833         err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
1834         if (err) {
1835                 gk20a_err(dev_from_gk20a(g),
1836                         "fail to query fecs pg buffer size");
1837                 return err;
1838         }
1839
1840         if (!pmu->pg_buf.cpuva) {
1841                 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
1842                                                 &iova,
1843                                                 GFP_KERNEL);
1844                 if (!pmu->pg_buf.cpuva) {
1845                         gk20a_err(d, "failed to allocate memory\n");
1846                         return -ENOMEM;
1847                 }
1848
1849                 pmu->pg_buf.iova = iova;
1850                 pmu->pg_buf.size = size;
1851
1852                 err = gk20a_get_sgtable(d, &sgt_pg_buf,
1853                                         pmu->pg_buf.cpuva,
1854                                         pmu->pg_buf.iova,
1855                                         size);
1856                 if (err) {
1857                         gk20a_err(d, "failed to create sg table\n");
1858                         goto err_free_pg_buf;
1859                 }
1860
1861                 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
1862                                         &sgt_pg_buf,
1863                                         size,
1864                                         0, /* flags */
1865                                         gk20a_mem_flag_none);
1866                 if (!pmu->pg_buf.pmu_va) {
1867                         gk20a_err(d, "failed to map fecs pg buffer");
1868                         err = -ENOMEM;
1869                         goto err_free_sgtable;
1870                 }
1871
1872                 gk20a_free_sgtable(&sgt_pg_buf);
1873         }
1874
1875         err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
1876         if (err) {
1877                 gk20a_err(dev_from_gk20a(g),
1878                         "fail to bind pmu inst to gr");
1879                 return err;
1880         }
1881
1882         err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va);
1883         if (err) {
1884                 gk20a_err(dev_from_gk20a(g),
1885                         "fail to set pg buffer pmu va");
1886                 return err;
1887         }
1888
1889         memset(&cmd, 0, sizeof(struct pmu_cmd));
1890         cmd.hdr.unit_id = PMU_UNIT_PG;
1891         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1892         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1893         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1894         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
1895         cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
1896         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
1897         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
1898         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1899
1900         pmu->buf_loaded = false;
1901         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
1902         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1903                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1904         pmu->pmu_state = PMU_STATE_LOADING_PG_BUF;
1905         return err;
1906
1907 err_free_sgtable:
1908         gk20a_free_sgtable(&sgt_pg_buf);
1909 err_free_pg_buf:
1910         dma_free_coherent(d, size,
1911                 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
1912         pmu->pg_buf.cpuva = NULL;
1913         pmu->pg_buf.iova = 0;
1914         return err;
1915 }
1916
1917 static void pmu_setup_hw_load_zbc(struct gk20a *g)
1918 {
1919         struct pmu_gk20a *pmu = &g->pmu;
1920         struct pmu_cmd cmd;
1921         u32 desc;
1922
1923         memset(&cmd, 0, sizeof(struct pmu_cmd));
1924         cmd.hdr.unit_id = PMU_UNIT_PG;
1925         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
1926         cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
1927         cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
1928         cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
1929         cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
1930         cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
1931         cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
1932         cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
1933
1934         pmu->buf_loaded = false;
1935         gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
1936         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
1937                         pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
1938         pmu->pmu_state = PMU_STATE_LOADING_ZBC;
1939 }
1940
1941 static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1942 {
1943         struct pmu_gk20a *pmu = &g->pmu;
1944
1945         /*
1946          * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
1947          * 7. This prevents PMU stalling on Host register accesses. Once the
1948          * cause for this hang is discovered and fixed, this WAR should be
1949          * removed.
1950          */
1951         gk20a_writel(g, 0x10a164, 0x109ff);
1952
1953         pmu->initialized = true;
1954         pmu->pmu_state = PMU_STATE_STARTED;
1955
1956         pmu->zbc_ready = true;
1957         /* Save zbc table after PMU is initialized. */
1958         gr_gk20a_pmu_save_zbc(g, 0xf);
1959
1960         if (g->elpg_enabled)
1961                 gk20a_pmu_enable_elpg(g);
1962
1963         udelay(50);
1964
1965         /* Enable AELPG */
1966         if (g->aelpg_enabled) {
1967                 gk20a_aelpg_init(g);
1968                 gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
1969         }
1970
1971         wake_up(&g->pmu.boot_wq);
1972 }
1973
1974 int gk20a_init_pmu_support(struct gk20a *g)
1975 {
1976         struct pmu_gk20a *pmu = &g->pmu;
1977         u32 err;
1978
1979         gk20a_dbg_fn("");
1980
1981         if (pmu->initialized)
1982                 return 0;
1983
1984         pmu->g = g;
1985
1986         err = gk20a_init_pmu_reset_enable_hw(g);
1987         if (err)
1988                 return err;
1989
1990         if (support_gk20a_pmu()) {
1991                 err = gk20a_init_pmu_setup_sw(g);
1992                 if (err)
1993                         return err;
1994
1995                 err = gk20a_init_pmu_setup_hw1(g);
1996                 if (err)
1997                         return err;
1998
1999                 pmu->pmu_state = PMU_STATE_STARTING;
2000         }
2001
2002         return err;
2003 }
2004
2005 static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
2006                         void *param, u32 handle, u32 status)
2007 {
2008         struct pmu_gk20a *pmu = param;
2009         struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
2010
2011         gk20a_dbg_fn("");
2012
2013         if (status != 0) {
2014                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2015                 /* TBD: disable ELPG */
2016                 return;
2017         }
2018
2019         switch (elpg_msg->msg) {
2020         case PMU_PG_ELPG_MSG_INIT_ACK:
2021                 gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
2022                 break;
2023         case PMU_PG_ELPG_MSG_ALLOW_ACK:
2024                 gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
2025                 pmu->elpg_stat = PMU_ELPG_STAT_ON;
2026                 break;
2027         case PMU_PG_ELPG_MSG_DISALLOW_ACK:
2028                 gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
2029                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
2030                 if (pmu->pmu_state == PMU_STATE_ELPG_BOOTING)
2031                         pmu->pmu_state = PMU_STATE_ELPG_BOOTED;
2032                 schedule_work(&pmu->pg_init);
2033                 break;
2034         default:
2035                 gk20a_err(dev_from_gk20a(g),
2036                         "unsupported ELPG message : 0x%04x", elpg_msg->msg);
2037         }
2038
2039         return;
2040 }
2041
2042 static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
2043                         void *param, u32 handle, u32 status)
2044 {
2045         struct pmu_gk20a *pmu = param;
2046
2047         gk20a_dbg_fn("");
2048
2049         if (status != 0) {
2050                 gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
2051                 /* TBD: disable ELPG */
2052                 return;
2053         }
2054
2055         switch (msg->msg.pg.stat.sub_msg_id) {
2056         case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
2057                 gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
2058                 pmu->stat_dmem_offset = msg->msg.pg.stat.data;
2059                 break;
2060         default:
2061                 break;
2062         }
2063 }
2064
2065 static int pmu_init_powergating(struct pmu_gk20a *pmu)
2066 {
2067         struct gk20a *g = pmu->g;
2068         struct pmu_cmd cmd;
2069         u32 seq;
2070
2071         gk20a_dbg_fn("");
2072
2073         if (tegra_cpu_is_asim()) {
2074                 /* TBD: calculate threshold for silicon */
2075                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2076                                 PMU_PG_IDLE_THRESHOLD_SIM);
2077                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2078                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
2079         } else {
2080                 /* TBD: calculate threshold for silicon */
2081                 gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
2082                                 PMU_PG_IDLE_THRESHOLD);
2083                 gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
2084                                 PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
2085         }
2086
2087         /* init ELPG */
2088         memset(&cmd, 0, sizeof(struct pmu_cmd));
2089         cmd.hdr.unit_id = PMU_UNIT_PG;
2090         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2091         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2092         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2093         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
2094
2095         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
2096         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2097                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2098
2099         /* alloc dmem for powergating state log */
2100         pmu->stat_dmem_offset = 0;
2101         memset(&cmd, 0, sizeof(struct pmu_cmd));
2102         cmd.hdr.unit_id = PMU_UNIT_PG;
2103         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
2104         cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
2105         cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
2106         cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
2107         cmd.cmd.pg.stat.data = 0;
2108
2109         gk20a_dbg_pmu("cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
2110         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2111                         pmu_handle_pg_stat_msg, pmu, &seq, ~0);
2112
2113         /* disallow ELPG initially
2114            PMU ucode requires a disallow cmd before allow cmd */
2115         pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */
2116         memset(&cmd, 0, sizeof(struct pmu_cmd));
2117         cmd.hdr.unit_id = PMU_UNIT_PG;
2118         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
2119         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
2120         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
2121         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
2122
2123         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
2124         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2125                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
2126
2127         /* start with elpg disabled until first enable call */
2128         pmu->elpg_refcnt = 0;
2129
2130         pmu->pmu_state = PMU_STATE_ELPG_BOOTING;
2131
2132         return 0;
2133 }
2134
2135 static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2136 {
2137         struct gk20a *g = pmu->g;
2138         struct pmu_v *pv = &g->ops.pmu_ver;
2139         struct pmu_cmd cmd;
2140         struct pmu_payload payload;
2141         u32 seq;
2142         u32 data;
2143         int err = 0;
2144
2145         gk20a_dbg_fn("");
2146
2147         pmu->perfmon_ready = 0;
2148
2149         /* use counter #3 for GR && CE2 busy cycles */
2150         gk20a_writel(g, pwr_pmu_idle_mask_r(3),
2151                 pwr_pmu_idle_mask_gr_enabled_f() |
2152                 pwr_pmu_idle_mask_ce_2_enabled_f());
2153
2154         /* disable idle filtering for counters 3 and 6 */
2155         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
2156         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2157                         pwr_pmu_idle_ctrl_filter_m(),
2158                         pwr_pmu_idle_ctrl_value_busy_f() |
2159                         pwr_pmu_idle_ctrl_filter_disabled_f());
2160         gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
2161
2162         /* use counter #6 for total cycles */
2163         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
2164         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2165                         pwr_pmu_idle_ctrl_filter_m(),
2166                         pwr_pmu_idle_ctrl_value_always_f() |
2167                         pwr_pmu_idle_ctrl_filter_disabled_f());
2168         gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
2169
2170         /*
2171          * We don't want to disturb counters #3 and #6, which are used by
2172          * perfmon, so we add wiring also to counters #1 and #2 for
2173          * exposing raw counter readings.
2174          */
2175         gk20a_writel(g, pwr_pmu_idle_mask_r(1),
2176                 pwr_pmu_idle_mask_gr_enabled_f() |
2177                 pwr_pmu_idle_mask_ce_2_enabled_f());
2178
2179         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
2180         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2181                         pwr_pmu_idle_ctrl_filter_m(),
2182                         pwr_pmu_idle_ctrl_value_busy_f() |
2183                         pwr_pmu_idle_ctrl_filter_disabled_f());
2184         gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
2185
2186         data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
2187         data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
2188                         pwr_pmu_idle_ctrl_filter_m(),
2189                         pwr_pmu_idle_ctrl_value_always_f() |
2190                         pwr_pmu_idle_ctrl_filter_disabled_f());
2191         gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2192
2193         if (!pmu->sample_buffer)
2194                 err = pmu->dmem.alloc(&pmu->dmem,
2195                                       &pmu->sample_buffer, 2 * sizeof(u16));
2196         if (err) {
2197                 gk20a_err(dev_from_gk20a(g),
2198                         "failed to allocate perfmon sample buffer");
2199                 return -ENOMEM;
2200         }
2201
2202         /* init PERFMON */
2203         memset(&cmd, 0, sizeof(struct pmu_cmd));
2204         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2205         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
2206         cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
2207         /* buffer to save counter values for pmu perfmon */
2208         pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
2209         (u16)pmu->sample_buffer);
2210         /* number of sample periods below lower threshold
2211            before pmu triggers perfmon decrease event
2212            TBD: = 15 */
2213         pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
2214         /* index of base counter, aka. always ticking counter */
2215         pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
2216         /* microseconds interval between pmu polls perf counters */
2217         pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
2218         /* number of perfmon counters
2219            counter #3 (GR and CE2) for gk20a */
2220         pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
2221         /* moving average window for sample periods
2222            TBD: = 3000000 / sample_period_us = 17 */
2223         pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
2224
2225         memset(&payload, 0, sizeof(struct pmu_payload));
2226         payload.in.buf = &pmu->perfmon_counter;
2227         payload.in.size = sizeof(struct pmu_perfmon_counter);
2228         payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
2229
2230         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_INIT");
2231         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2232                         NULL, NULL, &seq, ~0);
2233
2234         return 0;
2235 }
2236
2237 static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2238                         struct pmu_msg *msg)
2239 {
2240         struct gk20a *g = pmu->g;
2241         struct pmu_v *pv = &g->ops.pmu_ver;
2242         union pmu_init_msg_pmu *init;
2243         struct pmu_sha1_gid_data gid_data;
2244         u32 i, tail = 0;
2245
2246         tail = pwr_pmu_msgq_tail_val_v(
2247                 gk20a_readl(g, pwr_pmu_msgq_tail_r()));
2248
2249         pmu_copy_from_dmem(pmu, tail,
2250                 (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
2251
2252         if (msg->hdr.unit_id != PMU_UNIT_INIT) {
2253                 gk20a_err(dev_from_gk20a(g),
2254                         "expecting init msg");
2255                 return -EINVAL;
2256         }
2257
2258         pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
2259                 (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
2260
2261         if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
2262                 gk20a_err(dev_from_gk20a(g),
2263                         "expecting init msg");
2264                 return -EINVAL;
2265         }
2266
2267         tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
2268         gk20a_writel(g, pwr_pmu_msgq_tail_r(),
2269                 pwr_pmu_msgq_tail_val_f(tail));
2270
2271         init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
2272         if (!pmu->gid_info.valid) {
2273
2274                 pmu_copy_from_dmem(pmu,
2275                         pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2276                         (u8 *)&gid_data,
2277                         sizeof(struct pmu_sha1_gid_data), 0);
2278
2279                 pmu->gid_info.valid =
2280                         (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
2281
2282                 if (pmu->gid_info.valid) {
2283
2284                         BUG_ON(sizeof(pmu->gid_info.gid) !=
2285                                 sizeof(gid_data.gid));
2286
2287                         memcpy(pmu->gid_info.gid, gid_data.gid,
2288                                 sizeof(pmu->gid_info.gid));
2289                 }
2290         }
2291
2292         for (i = 0; i < PMU_QUEUE_COUNT; i++)
2293                 pmu_queue_init(pmu, i, init);
2294
2295         if (!pmu->dmem.alloc)
2296                 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
2297                                 pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2298                                 pv->get_pmu_init_msg_pmu_sw_mg_size(init),
2299                                 PMU_DMEM_ALLOC_ALIGNMENT);
2300
2301         pmu->pmu_ready = true;
2302
2303         return 0;
2304 }
2305
2306 static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
2307                         struct pmu_msg *msg, int *status)
2308 {
2309         struct gk20a *g = pmu->g;
2310         u32 read_size, bytes_read;
2311         int err;
2312
2313         *status = 0;
2314
2315         if (pmu_queue_is_empty(pmu, queue))
2316                 return false;
2317
2318         err = pmu_queue_open_read(pmu, queue);
2319         if (err) {
2320                 gk20a_err(dev_from_gk20a(g),
2321                         "fail to open queue %d for read", queue->id);
2322                 *status = err;
2323                 return false;
2324         }
2325
2326         err = pmu_queue_pop(pmu, queue, &msg->hdr,
2327                         PMU_MSG_HDR_SIZE, &bytes_read);
2328         if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2329                 gk20a_err(dev_from_gk20a(g),
2330                         "fail to read msg from queue %d", queue->id);
2331                 *status = err | -EINVAL;
2332                 goto clean_up;
2333         }
2334
2335         if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
2336                 pmu_queue_rewind(pmu, queue);
2337                 /* read again after rewind */
2338                 err = pmu_queue_pop(pmu, queue, &msg->hdr,
2339                                 PMU_MSG_HDR_SIZE, &bytes_read);
2340                 if (err || bytes_read != PMU_MSG_HDR_SIZE) {
2341                         gk20a_err(dev_from_gk20a(g),
2342                                 "fail to read msg from queue %d", queue->id);
2343                         *status = err | -EINVAL;
2344                         goto clean_up;
2345                 }
2346         }
2347
2348         if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
2349                 gk20a_err(dev_from_gk20a(g),
2350                         "read invalid unit_id %d from queue %d",
2351                         msg->hdr.unit_id, queue->id);
2352                         *status = -EINVAL;
2353                         goto clean_up;
2354         }
2355
2356         if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
2357                 read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
2358                 err = pmu_queue_pop(pmu, queue, &msg->msg,
2359                         read_size, &bytes_read);
2360                 if (err || bytes_read != read_size) {
2361                         gk20a_err(dev_from_gk20a(g),
2362                                 "fail to read msg from queue %d", queue->id);
2363                         *status = err;
2364                         goto clean_up;
2365                 }
2366         }
2367
2368         err = pmu_queue_close(pmu, queue, true);
2369         if (err) {
2370                 gk20a_err(dev_from_gk20a(g),
2371                         "fail to close queue %d", queue->id);
2372                 *status = err;
2373                 return false;
2374         }
2375
2376         return true;
2377
2378 clean_up:
2379         err = pmu_queue_close(pmu, queue, false);
2380         if (err)
2381                 gk20a_err(dev_from_gk20a(g),
2382                         "fail to close queue %d", queue->id);
2383         return false;
2384 }
2385
2386 static int pmu_response_handle(struct pmu_gk20a *pmu,
2387                         struct pmu_msg *msg)
2388 {
2389         struct gk20a *g = pmu->g;
2390         struct pmu_sequence *seq;
2391         struct pmu_v *pv = &g->ops.pmu_ver;
2392         int ret = 0;
2393
2394         gk20a_dbg_fn("");
2395
2396         seq = &pmu->seq[msg->hdr.seq_id];
2397         if (seq->state != PMU_SEQ_STATE_USED &&
2398             seq->state != PMU_SEQ_STATE_CANCELLED) {
2399                 gk20a_err(dev_from_gk20a(g),
2400                         "msg for an unknown sequence %d", seq->id);
2401                 return -EINVAL;
2402         }
2403
2404         if (msg->hdr.unit_id == PMU_UNIT_RC &&
2405             msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
2406                 gk20a_err(dev_from_gk20a(g),
2407                         "unhandled cmd: seq %d", seq->id);
2408         }
2409         else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
2410                 if (seq->msg) {
2411                         if (seq->msg->hdr.size >= msg->hdr.size) {
2412                                 memcpy(seq->msg, msg, msg->hdr.size);
2413                                 if (pv->pmu_allocation_get_dmem_size(pmu,
2414                                 pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
2415                                         pmu_copy_from_dmem(pmu,
2416                                         pv->pmu_allocation_get_dmem_offset(pmu,
2417                                         pv->get_pmu_seq_out_a_ptr(seq)),
2418                                         seq->out_payload,
2419                                         pv->pmu_allocation_get_dmem_size(pmu,
2420                                         pv->get_pmu_seq_out_a_ptr(seq)), 0);
2421                                 }
2422                         } else {
2423                                 gk20a_err(dev_from_gk20a(g),
2424                                         "sequence %d msg buffer too small",
2425                                         seq->id);
2426                         }
2427                 }
2428         } else
2429                 seq->callback = NULL;
2430         if (pv->pmu_allocation_get_dmem_size(pmu,
2431                         pv->get_pmu_seq_in_a_ptr(seq)) != 0)
2432                 pmu->dmem.free(&pmu->dmem,
2433                 pv->pmu_allocation_get_dmem_offset(pmu,
2434                 pv->get_pmu_seq_in_a_ptr(seq)),
2435                 pv->pmu_allocation_get_dmem_size(pmu,
2436                 pv->get_pmu_seq_in_a_ptr(seq)));
2437         if (pv->pmu_allocation_get_dmem_size(pmu,
2438                         pv->get_pmu_seq_out_a_ptr(seq)) != 0)
2439                 pmu->dmem.free(&pmu->dmem,
2440                 pv->pmu_allocation_get_dmem_offset(pmu,
2441                 pv->get_pmu_seq_out_a_ptr(seq)),
2442                 pv->pmu_allocation_get_dmem_size(pmu,
2443                 pv->get_pmu_seq_out_a_ptr(seq)));
2444
2445         if (seq->callback)
2446                 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
2447
2448         pmu_seq_release(pmu, seq);
2449
2450         /* TBD: notify client waiting for available dmem */
2451
2452         gk20a_dbg_fn("done");
2453
2454         return 0;
2455 }
2456
2457 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2458                                  u32 *var, u32 val);
2459
2460 static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
2461                         void *param, u32 handle, u32 status)
2462 {
2463         struct pmu_gk20a *pmu = param;
2464         gk20a_dbg_pmu("reply ZBC_TABLE_UPDATE");
2465         pmu->zbc_save_done = 1;
2466 }
2467
2468 void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
2469 {
2470         struct pmu_gk20a *pmu = &g->pmu;
2471         struct pmu_cmd cmd;
2472         u32 seq;
2473
2474         if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
2475                 return;
2476
2477         memset(&cmd, 0, sizeof(struct pmu_cmd));
2478         cmd.hdr.unit_id = PMU_UNIT_PG;
2479         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
2480         cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
2481         cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
2482
2483         pmu->zbc_save_done = 0;
2484
2485         gk20a_dbg_pmu("cmd post ZBC_TABLE_UPDATE");
2486         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
2487                            pmu_handle_zbc_msg, pmu, &seq, ~0);
2488         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
2489                               &pmu->zbc_save_done, 1);
2490         if (!pmu->zbc_save_done)
2491                 gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
2492 }
2493
2494 static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2495 {
2496         struct gk20a *g = pmu->g;
2497         struct pmu_v *pv = &g->ops.pmu_ver;
2498         struct pmu_cmd cmd;
2499         struct pmu_payload payload;
2500         u32 current_rate = 0;
2501         u32 seq;
2502
2503         /* PERFMON Start */
2504         memset(&cmd, 0, sizeof(struct pmu_cmd));
2505         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2506         cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
2507         pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
2508                 PMU_PERFMON_CMD_ID_START);
2509         pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
2510                 PMU_DOMAIN_GROUP_PSTATE);
2511         pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2512                 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2513
2514         current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2515         if (current_rate >= gpc_pll_params.max_freq)
2516                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2517                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2518         else if (current_rate <= gpc_pll_params.min_freq)
2519                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2520                 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2521         else
2522                 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2523                 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2524                 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2525
2526         pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2527                 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
2528                 PMU_PERFMON_FLAG_CLEAR_PREV);
2529
2530         memset(&payload, 0, sizeof(struct pmu_payload));
2531
2532         /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
2533         pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
2534         /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
2535         pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
2536         pmu->perfmon_counter.valid = true;
2537
2538         payload.in.buf = &pmu->perfmon_counter;
2539         payload.in.size = sizeof(pmu->perfmon_counter);
2540         payload.in.offset =
2541                 pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
2542
2543         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_START");
2544         gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
2545                         NULL, NULL, &seq, ~0);
2546
2547         return 0;
2548 }
2549
2550 static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2551 {
2552         struct gk20a *g = pmu->g;
2553         struct pmu_cmd cmd;
2554         u32 seq;
2555
2556         /* PERFMON Stop */
2557         memset(&cmd, 0, sizeof(struct pmu_cmd));
2558         cmd.hdr.unit_id = PMU_UNIT_PERFMON;
2559         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
2560         cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
2561
2562         gk20a_dbg_pmu("cmd post PMU_PERFMON_CMD_ID_STOP");
2563         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
2564                         NULL, NULL, &seq, ~0);
2565         return 0;
2566 }
2567
2568 static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2569                         struct pmu_perfmon_msg *msg)
2570 {
2571         struct gk20a *g = pmu->g;
2572         u32 rate;
2573
2574         gk20a_dbg_fn("");
2575
2576         switch (msg->msg_type) {
2577         case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
2578                 gk20a_dbg_pmu("perfmon increase event: "
2579                         "state_id %d, ground_id %d, pct %d",
2580                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2581                 /* increase gk20a clock freq by 20% */
2582                 rate = gk20a_clk_get_rate(g);
2583                 gk20a_clk_set_rate(g, rate * 6 / 5);
2584                 break;
2585         case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2586                 gk20a_dbg_pmu("perfmon decrease event: "
2587                         "state_id %d, ground_id %d, pct %d",
2588                         msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2589                 /* decrease gk20a clock freq by 10% */
2590                 rate = gk20a_clk_get_rate(g);
2591                 gk20a_clk_set_rate(g, (rate / 10) * 7);
2592                 break;
2593         case PMU_PERFMON_MSG_ID_INIT_EVENT:
2594                 pmu->perfmon_ready = 1;
2595                 gk20a_dbg_pmu("perfmon init event");
2596                 break;
2597         default:
2598                 break;
2599         }
2600
2601         /* restart sampling */
2602         if (IS_ENABLED(CONFIG_GK20A_PERFMON))
2603                 return pmu_perfmon_start_sampling(pmu);
2604         return 0;
2605 }
2606
2607
2608 static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
2609 {
2610         int err;
2611
2612         gk20a_dbg_fn("");
2613
2614         switch (msg->hdr.unit_id) {
2615         case PMU_UNIT_PERFMON:
2616                 err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
2617                 break;
2618         default:
2619                 break;
2620         }
2621
2622         return err;
2623 }
2624
2625 static int pmu_process_message(struct pmu_gk20a *pmu)
2626 {
2627         struct pmu_msg msg;
2628         int status;
2629
2630         if (unlikely(!pmu->pmu_ready)) {
2631                 pmu_process_init_msg(pmu, &msg);
2632                 pmu_init_powergating(pmu);
2633                 pmu_init_perfmon(pmu);
2634                 return 0;
2635         }
2636
2637         while (pmu_read_message(pmu,
2638                 &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
2639
2640                 gk20a_dbg_pmu("read msg hdr: "
2641                                 "unit_id = 0x%08x, size = 0x%08x, "
2642                                 "ctrl_flags = 0x%08x, seq_id = 0x%08x",
2643                                 msg.hdr.unit_id, msg.hdr.size,
2644                                 msg.hdr.ctrl_flags, msg.hdr.seq_id);
2645
2646                 msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
2647
2648                 if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
2649                         pmu_handle_event(pmu, &msg);
2650                 } else {
2651                         pmu_response_handle(pmu, &msg);
2652                 }
2653         }
2654
2655         return 0;
2656 }
2657
2658 static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
2659                                  u32 *var, u32 val)
2660 {
2661         struct gk20a *g = pmu->g;
2662         unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
2663         unsigned long delay = GR_IDLE_CHECK_DEFAULT;
2664
2665         do {
2666                 if (*var == val)
2667                         return 0;
2668
2669                 if (gk20a_readl(g, pwr_falcon_irqstat_r()))
2670                         gk20a_pmu_isr(g);
2671
2672                 usleep_range(delay, delay * 2);
2673                 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
2674         } while (time_before(jiffies, end_jiffies) ||
2675                         !tegra_platform_is_silicon());
2676
2677         return -ETIMEDOUT;
2678 }
2679
2680 static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2681 {
2682         struct gk20a *g = pmu->g;
2683         struct pmu_pg_stats stats;
2684
2685         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
2686                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
2687
2688         gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
2689                 stats.pg_entry_start_timestamp);
2690         gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
2691                 stats.pg_exit_start_timestamp);
2692         gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
2693                 stats.pg_ingating_start_timestamp);
2694         gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
2695                 stats.pg_ungating_start_timestamp);
2696         gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
2697                 stats.pg_avg_entry_time_us);
2698         gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
2699                 stats.pg_avg_exit_time_us);
2700         gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
2701                 stats.pg_ingating_cnt);
2702         gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
2703                 stats.pg_ingating_time_us);
2704         gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
2705                 stats.pg_ungating_count);
2706         gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
2707                 stats.pg_ungating_time_us);
2708         gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
2709                 stats.pg_gating_cnt);
2710         gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
2711                 stats.pg_gating_deny_cnt);
2712
2713         /*
2714            Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
2715            in .nm file, e.g. 0x1000066c. use 0x66c.
2716         u32 i, val[20];
2717         pmu_copy_from_dmem(pmu, 0x66c,
2718                 (u8 *)val, sizeof(val), 0);
2719         gk20a_dbg_pmu("elpg log begin");
2720         for (i = 0; i < 20; i++)
2721                 gk20a_dbg_pmu("0x%08x", val[i]);
2722         gk20a_dbg_pmu("elpg log end");
2723         */
2724
2725         gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
2726                 gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
2727         gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
2728                 gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
2729         gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
2730                 gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
2731         gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
2732                 gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
2733         gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
2734                 gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
2735
2736         gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
2737                 gk20a_readl(g, pwr_pmu_idle_count_r(3)));
2738         gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
2739                 gk20a_readl(g, pwr_pmu_idle_count_r(4)));
2740         gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
2741                 gk20a_readl(g, pwr_pmu_idle_count_r(7)));
2742
2743         /*
2744          TBD: script can't generate those registers correctly
2745         gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
2746                 gk20a_readl(g, pwr_pmu_idle_status_r()));
2747         gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
2748                 gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
2749         */
2750 }
2751
2752 static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2753 {
2754         struct gk20a *g = pmu->g;
2755         int i;
2756
2757         gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
2758                 gk20a_readl(g, pwr_falcon_os_r()));
2759         gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
2760                 gk20a_readl(g, pwr_falcon_cpuctl_r()));
2761         gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
2762                 gk20a_readl(g, pwr_falcon_idlestate_r()));
2763         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
2764                 gk20a_readl(g, pwr_falcon_mailbox0_r()));
2765         gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
2766                 gk20a_readl(g, pwr_falcon_mailbox1_r()));
2767         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
2768                 gk20a_readl(g, pwr_falcon_irqstat_r()));
2769         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
2770                 gk20a_readl(g, pwr_falcon_irqmode_r()));
2771         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
2772                 gk20a_readl(g, pwr_falcon_irqmask_r()));
2773         gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
2774                 gk20a_readl(g, pwr_falcon_irqdest_r()));
2775
2776         for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
2777                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
2778                         i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
2779
2780         for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
2781                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
2782                         i, gk20a_readl(g, pwr_pmu_debug_r(i)));
2783
2784         for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
2785                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2786                         pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
2787                         pwr_pmu_falcon_icd_cmd_idx_f(i));
2788                 gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
2789                         i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2790         }
2791
2792         i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
2793         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
2794         if (i != 0) {
2795                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
2796                         gk20a_readl(g, pwr_pmu_bar0_addr_r()));
2797                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
2798                         gk20a_readl(g, pwr_pmu_bar0_data_r()));
2799                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
2800                         gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
2801                 gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
2802                         gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
2803         }
2804
2805         i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
2806         gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
2807
2808         i = gk20a_readl(g, pwr_falcon_exterrstat_r());
2809         gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
2810         if (pwr_falcon_exterrstat_valid_v(i) ==
2811                         pwr_falcon_exterrstat_valid_true_v()) {
2812                 gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
2813                         gk20a_readl(g, pwr_falcon_exterraddr_r()));
2814                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
2815                         gk20a_readl(g, mc_enable_r()));
2816         }
2817
2818         gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
2819                 gk20a_readl(g, pwr_falcon_engctl_r()));
2820         gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
2821                 gk20a_readl(g, pwr_falcon_curctx_r()));
2822         gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
2823                 gk20a_readl(g, pwr_falcon_nxtctx_r()));
2824
2825         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2826                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2827                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
2828         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
2829                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2830
2831         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2832                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2833                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
2834         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
2835                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2836
2837         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2838                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2839                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
2840         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
2841                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2842
2843         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2844                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2845                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
2846         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
2847                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2848
2849         gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2850                 pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2851                 pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
2852         gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
2853                 gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2854
2855         for (i = 0; i < 4; i++) {
2856                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2857                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2858                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
2859                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
2860                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2861
2862                 gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
2863                         pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
2864                         pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
2865                 gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
2866                         gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
2867         }
2868         gk20a_err(dev_from_gk20a(g), "elpg stat: %d\n",
2869                         pmu->elpg_stat);
2870
2871         /* PMU may crash due to FECS crash. Dump FECS status */
2872         gk20a_fecs_dump_falcon_stats(g);
2873 }
2874
2875 void gk20a_pmu_isr(struct gk20a *g)
2876 {
2877         struct pmu_gk20a *pmu = &g->pmu;
2878         struct pmu_queue *queue;
2879         u32 intr, mask;
2880         bool recheck = false;
2881
2882         gk20a_dbg_fn("");
2883
2884         mutex_lock(&pmu->isr_enable_lock);
2885         if (!pmu->isr_enabled) {
2886                 mutex_unlock(&pmu->isr_enable_lock);
2887                 return;
2888         }
2889
2890         mutex_lock(&pmu->isr_mutex);
2891
2892         mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
2893                 gk20a_readl(g, pwr_falcon_irqdest_r());
2894
2895         intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
2896
2897         gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
2898
2899         if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
2900                 gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2901                 mutex_unlock(&pmu->isr_mutex);
2902                 mutex_unlock(&pmu->isr_enable_lock);
2903                 return;
2904         }
2905
2906         if (intr & pwr_falcon_irqstat_halt_true_f()) {
2907                 gk20a_err(dev_from_gk20a(g),
2908                         "pmu halt intr not implemented");
2909                 pmu_dump_falcon_stats(pmu);
2910         }
2911         if (intr & pwr_falcon_irqstat_exterr_true_f()) {
2912                 gk20a_err(dev_from_gk20a(g),
2913                         "pmu exterr intr not implemented. Clearing interrupt.");
2914                 pmu_dump_falcon_stats(pmu);
2915
2916                 gk20a_writel(g, pwr_falcon_exterrstat_r(),
2917                         gk20a_readl(g, pwr_falcon_exterrstat_r()) &
2918                                 ~pwr_falcon_exterrstat_valid_m());
2919         }
2920         if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
2921                 pmu_process_message(pmu);
2922                 recheck = true;
2923         }
2924
2925         gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
2926
2927         if (recheck) {
2928                 queue = &pmu->queue[PMU_MESSAGE_QUEUE];
2929                 if (!pmu_queue_is_empty(pmu, queue))
2930                         gk20a_writel(g, pwr_falcon_irqsset_r(),
2931                                 pwr_falcon_irqsset_swgen0_set_f());
2932         }
2933
2934         mutex_unlock(&pmu->isr_mutex);
2935         mutex_unlock(&pmu->isr_enable_lock);
2936 }
2937
2938 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
2939                         struct pmu_msg *msg, struct pmu_payload *payload,
2940                         u32 queue_id)
2941 {
2942         struct gk20a *g = pmu->g;
2943         struct pmu_queue *queue;
2944         u32 in_size, out_size;
2945
2946         if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
2947                 goto invalid_cmd;
2948
2949         queue = &pmu->queue[queue_id];
2950         if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
2951                 goto invalid_cmd;
2952
2953         if (cmd->hdr.size > (queue->size >> 1))
2954                 goto invalid_cmd;
2955
2956         if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
2957                 goto invalid_cmd;
2958
2959         if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
2960                 goto invalid_cmd;
2961
2962         if (payload == NULL)
2963                 return true;
2964
2965         if (payload->in.buf == NULL && payload->out.buf == NULL)
2966                 goto invalid_cmd;
2967
2968         if ((payload->in.buf != NULL && payload->in.size == 0) ||
2969             (payload->out.buf != NULL && payload->out.size == 0))
2970                 goto invalid_cmd;
2971
2972         in_size = PMU_CMD_HDR_SIZE;
2973         if (payload->in.buf) {
2974                 in_size += payload->in.offset;
2975                 in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2976         }
2977
2978         out_size = PMU_CMD_HDR_SIZE;
2979         if (payload->out.buf) {
2980                 out_size += payload->out.offset;
2981                 out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
2982         }
2983
2984         if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
2985                 goto invalid_cmd;
2986
2987
2988         if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
2989             (payload->out.offset != 0 && payload->out.buf == NULL))
2990                 goto invalid_cmd;
2991
2992         return true;
2993
2994 invalid_cmd:
2995         gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
2996                 "queue_id=%d,\n"
2997                 "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
2998                 "payload in=%p, in_size=%d, in_offset=%d,\n"
2999                 "payload out=%p, out_size=%d, out_offset=%d",
3000                 queue_id, cmd->hdr.size, cmd->hdr.unit_id,
3001                 msg, msg?msg->hdr.unit_id:~0,
3002                 &payload->in, payload->in.size, payload->in.offset,
3003                 &payload->out, payload->out.size, payload->out.offset);
3004
3005         return false;
3006 }
3007
3008 static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
3009                         u32 queue_id, unsigned long timeout)
3010 {
3011         struct gk20a *g = pmu->g;
3012         struct pmu_queue *queue;
3013         unsigned long end_jiffies = jiffies +
3014                 msecs_to_jiffies(timeout);
3015         int err;
3016
3017         gk20a_dbg_fn("");
3018
3019         queue = &pmu->queue[queue_id];
3020
3021         do {
3022                 err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
3023                 if (err == -EAGAIN && time_before(jiffies, end_jiffies))
3024                         usleep_range(1000, 2000);
3025                 else
3026                         break;
3027         } while (1);
3028
3029         if (err)
3030                 goto clean_up;
3031
3032         pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
3033
3034         err = pmu_queue_close(pmu, queue, true);
3035
3036 clean_up:
3037         if (err)
3038                 gk20a_err(dev_from_gk20a(g),
3039                         "fail to write cmd to queue %d", queue_id);
3040         else
3041                 gk20a_dbg_fn("done");
3042
3043         return err;
3044 }
3045
3046 int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3047                 struct pmu_msg *msg, struct pmu_payload *payload,
3048                 u32 queue_id, pmu_callback callback, void* cb_param,
3049                 u32 *seq_desc, unsigned long timeout)
3050 {
3051         struct pmu_gk20a *pmu = &g->pmu;
3052         struct pmu_v *pv = &g->ops.pmu_ver;
3053         struct pmu_sequence *seq;
3054         void *in = NULL, *out = NULL;
3055         int err;
3056
3057         gk20a_dbg_fn("");
3058
3059         BUG_ON(!cmd);
3060         BUG_ON(!seq_desc);
3061         BUG_ON(!pmu->pmu_ready);
3062
3063         if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
3064                 return -EINVAL;
3065
3066         err = pmu_seq_acquire(pmu, &seq);
3067         if (err)
3068                 return err;
3069
3070         cmd->hdr.seq_id = seq->id;
3071
3072         cmd->hdr.ctrl_flags = 0;
3073         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
3074         cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
3075
3076         seq->callback = callback;
3077         seq->cb_params = cb_param;
3078         seq->msg = msg;
3079         seq->out_payload = NULL;
3080         seq->desc = pmu->next_seq_desc++;
3081
3082         if (payload)
3083                 seq->out_payload = payload->out.buf;
3084
3085         *seq_desc = seq->desc;
3086
3087         if (payload && payload->in.offset != 0) {
3088                 pv->set_pmu_allocation_ptr(pmu, &in,
3089                 ((u8 *)&cmd->cmd + payload->in.offset));
3090
3091                 if (payload->in.buf != payload->out.buf)
3092                         pv->pmu_allocation_set_dmem_size(pmu, in,
3093                         (u16)payload->in.size);
3094                 else
3095                         pv->pmu_allocation_set_dmem_size(pmu, in,
3096                         (u16)max(payload->in.size, payload->out.size));
3097
3098                 err = pmu->dmem.alloc(&pmu->dmem,
3099                 pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
3100                 pv->pmu_allocation_get_dmem_size(pmu, in));
3101                 if (err)
3102                         goto clean_up;
3103
3104                 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
3105                 in)),
3106                         payload->in.buf, payload->in.size, 0);
3107                 pv->pmu_allocation_set_dmem_size(pmu,
3108                 pv->get_pmu_seq_in_a_ptr(seq),
3109                 pv->pmu_allocation_get_dmem_size(pmu, in));
3110                 pv->pmu_allocation_set_dmem_offset(pmu,
3111                 pv->get_pmu_seq_in_a_ptr(seq),
3112                 pv->pmu_allocation_get_dmem_offset(pmu, in));
3113         }
3114
3115         if (payload && payload->out.offset != 0) {
3116                 pv->set_pmu_allocation_ptr(pmu, &out,
3117                 ((u8 *)&cmd->cmd + payload->out.offset));
3118                 pv->pmu_allocation_set_dmem_size(pmu, out,
3119                 (u16)payload->out.size);
3120
3121                 if (payload->out.buf != payload->in.buf) {
3122                         err = pmu->dmem.alloc(&pmu->dmem,
3123                         pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
3124                         pv->pmu_allocation_get_dmem_size(pmu, out));
3125                         if (err)
3126                                 goto clean_up;
3127                 } else {
3128                         BUG_ON(in == NULL);
3129                         pv->pmu_allocation_set_dmem_offset(pmu, out,
3130                         pv->pmu_allocation_get_dmem_offset(pmu, in));
3131                 }
3132
3133                 pv->pmu_allocation_set_dmem_size(pmu,
3134                 pv->get_pmu_seq_out_a_ptr(seq),
3135                 pv->pmu_allocation_get_dmem_size(pmu, out));
3136                 pv->pmu_allocation_set_dmem_offset(pmu,
3137                 pv->get_pmu_seq_out_a_ptr(seq),
3138                 pv->pmu_allocation_get_dmem_offset(pmu, out));
3139         }
3140
3141         seq->state = PMU_SEQ_STATE_USED;
3142         err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
3143         if (err)
3144                 seq->state = PMU_SEQ_STATE_PENDING;
3145
3146         gk20a_dbg_fn("done");
3147
3148         return 0;
3149
3150 clean_up:
3151         gk20a_dbg_fn("fail");
3152         if (in)
3153                 pmu->dmem.free(&pmu->dmem,
3154                 pv->pmu_allocation_get_dmem_offset(pmu, in),
3155                 pv->pmu_allocation_get_dmem_size(pmu, in));
3156         if (out)
3157                 pmu->dmem.free(&pmu->dmem,
3158                 pv->pmu_allocation_get_dmem_offset(pmu, out),
3159                 pv->pmu_allocation_get_dmem_size(pmu, out));
3160
3161         pmu_seq_release(pmu, seq);
3162         return err;
3163 }
3164
3165 static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
3166 {
3167         struct pmu_gk20a *pmu = &g->pmu;
3168         struct pmu_cmd cmd;
3169         u32 seq, status;
3170
3171         gk20a_dbg_fn("");
3172
3173         memset(&cmd, 0, sizeof(struct pmu_cmd));
3174         cmd.hdr.unit_id = PMU_UNIT_PG;
3175         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3176         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3177         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3178         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
3179
3180         /* no need to wait ack for ELPG enable but set pending to sync
3181            with follow up ELPG disable */
3182         pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
3183
3184         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_ALLOW");
3185         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3186                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3187
3188         BUG_ON(status != 0);
3189
3190         gk20a_dbg_fn("done");
3191         return 0;
3192 }
3193
3194 int gk20a_pmu_enable_elpg(struct gk20a *g)
3195 {
3196         struct pmu_gk20a *pmu = &g->pmu;
3197         struct gr_gk20a *gr = &g->gr;
3198
3199         int ret = 0;
3200
3201         gk20a_dbg_fn("");
3202
3203         mutex_lock(&pmu->elpg_mutex);
3204
3205         pmu->elpg_refcnt++;
3206         if (pmu->elpg_refcnt <= 0)
3207                 goto exit_unlock;
3208
3209         /* something is not right if we end up in following code path */
3210         if (unlikely(pmu->elpg_refcnt > 1)) {
3211                 gk20a_warn(dev_from_gk20a(g),
3212                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3213                 __func__, pmu->elpg_refcnt);
3214                 WARN_ON(1);
3215         }
3216
3217         /* do NOT enable elpg until golden ctx is created,
3218            which is related with the ctx that ELPG save and restore. */
3219         if (unlikely(!gr->ctx_vars.golden_image_initialized))
3220                 goto exit_unlock;
3221
3222         /* return if ELPG is already on or on_pending or off_on_pending */
3223         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
3224                 goto exit_unlock;
3225
3226         ret = gk20a_pmu_enable_elpg_locked(g);
3227
3228 exit_unlock:
3229         mutex_unlock(&pmu->elpg_mutex);
3230         gk20a_dbg_fn("done");
3231         return ret;
3232 }
3233
3234 int gk20a_pmu_disable_elpg(struct gk20a *g)
3235 {
3236         struct pmu_gk20a *pmu = &g->pmu;
3237         struct pmu_cmd cmd;
3238         u32 seq;
3239         int ret = 0;
3240
3241         gk20a_dbg_fn("");
3242
3243         mutex_lock(&pmu->elpg_mutex);
3244
3245         pmu->elpg_refcnt--;
3246         if (pmu->elpg_refcnt > 0) {
3247                 gk20a_warn(dev_from_gk20a(g),
3248                 "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
3249                 __func__, pmu->elpg_refcnt);
3250                 WARN_ON(1);
3251                 ret = 0;
3252                 goto exit_unlock;
3253         }
3254
3255         /* cancel off_on_pending and return */
3256         if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
3257                 pmu->elpg_stat = PMU_ELPG_STAT_OFF;
3258                 ret = 0;
3259                 goto exit_reschedule;
3260         }
3261         /* wait if on_pending */
3262         else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
3263
3264                 pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3265                                       &pmu->elpg_stat, PMU_ELPG_STAT_ON);
3266
3267                 if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3268                         gk20a_err(dev_from_gk20a(g),
3269                                 "ELPG_ALLOW_ACK failed, elpg_stat=%d",
3270                                 pmu->elpg_stat);
3271                         pmu_dump_elpg_stats(pmu);
3272                         pmu_dump_falcon_stats(pmu);
3273                         ret = -EBUSY;
3274                         goto exit_unlock;
3275                 }
3276         }
3277         /* return if ELPG is already off */
3278         else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
3279                 ret = 0;
3280                 goto exit_reschedule;
3281         }
3282
3283         memset(&cmd, 0, sizeof(struct pmu_cmd));
3284         cmd.hdr.unit_id = PMU_UNIT_PG;
3285         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
3286         cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
3287         cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
3288         cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
3289
3290         pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
3291
3292         gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
3293         gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3294                         pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
3295
3296         pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
3297                               &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
3298         if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
3299                 gk20a_err(dev_from_gk20a(g),
3300                         "ELPG_DISALLOW_ACK failed");
3301                 pmu_dump_elpg_stats(pmu);
3302                 pmu_dump_falcon_stats(pmu);
3303                 ret = -EBUSY;
3304                 goto exit_unlock;
3305         }
3306
3307 exit_reschedule:
3308 exit_unlock:
3309         mutex_unlock(&pmu->elpg_mutex);
3310         gk20a_dbg_fn("done");
3311         return ret;
3312 }
3313
3314 int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
3315 {
3316         struct pmu_gk20a *pmu = &g->pmu;
3317         int err;
3318
3319         gk20a_dbg_fn("");
3320
3321         if (enable)
3322                 err = pmu_perfmon_start_sampling(pmu);
3323         else
3324                 err = pmu_perfmon_stop_sampling(pmu);
3325
3326         return err;
3327 }
3328
3329 int gk20a_pmu_destroy(struct gk20a *g)
3330 {
3331         struct pmu_gk20a *pmu = &g->pmu;
3332         u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
3333
3334         gk20a_dbg_fn("");
3335
3336         if (!support_gk20a_pmu())
3337                 return 0;
3338
3339         /* make sure the pending operations are finished before we continue */
3340         cancel_work_sync(&pmu->pg_init);
3341
3342         gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
3343                 &elpg_ungating_time, &gating_cnt);
3344
3345         gk20a_pmu_disable_elpg(g);
3346         pmu->initialized = false;
3347
3348         /* update the s/w ELPG residency counters */
3349         g->pg_ingating_time_us += (u64)elpg_ingating_time;
3350         g->pg_ungating_time_us += (u64)elpg_ungating_time;
3351         g->pg_gating_cnt += gating_cnt;
3352
3353         mutex_lock(&pmu->isr_enable_lock);
3354         pmu_enable(pmu, false);
3355         pmu->isr_enabled = false;
3356         mutex_unlock(&pmu->isr_enable_lock);
3357
3358         pmu->pmu_state = PMU_STATE_OFF;
3359         pmu->pmu_ready = false;
3360         pmu->perfmon_ready = false;
3361         pmu->zbc_ready = false;
3362
3363         gk20a_dbg_fn("done");
3364         return 0;
3365 }
3366
3367 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3368 {
3369         struct pmu_gk20a *pmu = &g->pmu;
3370         u16 _load = 0;
3371
3372         if (!pmu->perfmon_ready) {
3373                 *load = 0;
3374                 return 0;
3375         }
3376
3377         pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3378         *load = _load / 10;
3379
3380         return 0;
3381 }
3382
3383 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
3384                                  u32 *total_cycles)
3385 {
3386         if (!g->power_on) {
3387                 *busy_cycles = 0;
3388                 *total_cycles = 0;
3389                 return;
3390         }
3391
3392         gk20a_busy(g->dev);
3393         *busy_cycles = pwr_pmu_idle_count_value_v(
3394                 gk20a_readl(g, pwr_pmu_idle_count_r(1)));
3395         rmb();
3396         *total_cycles = pwr_pmu_idle_count_value_v(
3397                 gk20a_readl(g, pwr_pmu_idle_count_r(2)));
3398         gk20a_idle(g->dev);
3399 }
3400
3401 void gk20a_pmu_reset_load_counters(struct gk20a *g)
3402 {
3403         u32 reg_val = pwr_pmu_idle_count_reset_f(1);
3404
3405         if (!g->power_on)
3406                 return;
3407
3408         gk20a_busy(g->dev);
3409         gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
3410         wmb();
3411         gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
3412         gk20a_idle(g->dev);
3413 }
3414
3415 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
3416                         u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
3417 {
3418         struct pmu_gk20a *pmu = &g->pmu;
3419         struct pmu_pg_stats stats;
3420
3421         if (!pmu->initialized) {
3422                 *ingating_time = 0;
3423                 *ungating_time = 0;
3424                 *gating_cnt = 0;
3425                 return 0;
3426         }
3427
3428         pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
3429                 (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
3430
3431         *ingating_time = stats.pg_ingating_time_us;
3432         *ungating_time = stats.pg_ungating_time_us;
3433         *gating_cnt = stats.pg_gating_cnt;
3434
3435         return 0;
3436 }
3437
3438 /* Send an Adaptive Power (AP) related command to PMU */
3439 static int gk20a_pmu_ap_send_command(struct gk20a *g,
3440                         union pmu_ap_cmd *p_ap_cmd, bool b_block)
3441 {
3442         struct pmu_gk20a *pmu = &g->pmu;
3443         /* FIXME: where is the PG structure defined?? */
3444         u32 status = 0;
3445         struct pmu_cmd cmd;
3446         u32 seq;
3447         pmu_callback p_callback = NULL;
3448
3449         memset(&cmd, 0, sizeof(struct pmu_cmd));
3450
3451         /* Copy common members */
3452         cmd.hdr.unit_id = PMU_UNIT_PG;
3453         cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
3454
3455         cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
3456         cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
3457
3458         /* Copy other members of command */
3459         switch (p_ap_cmd->cmn.cmd_id) {
3460         case PMU_AP_CMD_ID_INIT:
3461                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT");
3462                 cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
3463                         p_ap_cmd->init.pg_sampling_period_us;
3464                 p_callback = ap_callback_init_and_enable_ctrl;
3465                 break;
3466
3467         case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
3468                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
3469                 cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
3470                 p_ap_cmd->init_and_enable_ctrl.ctrl_id;
3471                 memcpy(
3472                 (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
3473                         (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
3474                         sizeof(struct pmu_ap_ctrl_init_params));
3475
3476                 p_callback = ap_callback_init_and_enable_ctrl;
3477                 break;
3478
3479         case PMU_AP_CMD_ID_ENABLE_CTRL:
3480                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
3481                 cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
3482                         p_ap_cmd->enable_ctrl.ctrl_id;
3483                 break;
3484
3485         case PMU_AP_CMD_ID_DISABLE_CTRL:
3486                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
3487                 cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
3488                         p_ap_cmd->disable_ctrl.ctrl_id;
3489                 break;
3490
3491         case PMU_AP_CMD_ID_KICK_CTRL:
3492                 gk20a_dbg_pmu("cmd post PMU_AP_CMD_ID_KICK_CTRL");
3493                 cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
3494                         p_ap_cmd->kick_ctrl.ctrl_id;
3495                 cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
3496                         p_ap_cmd->kick_ctrl.skip_count;
3497                 break;
3498
3499         default:
3500                 gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
3501                         __func__, p_ap_cmd->cmn.cmd_id);
3502                 return 0x2f;
3503         }
3504
3505         status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
3506                         p_callback, pmu, &seq, ~0);
3507
3508         if (!status) {
3509                 gk20a_dbg_pmu(
3510                         "%s: Unable to submit Adaptive Power Command %d\n",
3511                         __func__, p_ap_cmd->cmn.cmd_id);
3512                 goto err_return;
3513         }
3514
3515         /* TODO: Implement blocking calls (b_block) */
3516
3517 err_return:
3518         return status;
3519 }
3520
3521 static void ap_callback_init_and_enable_ctrl(
3522                 struct gk20a *g, struct pmu_msg *msg,
3523                 void *param, u32 seq_desc, u32 status)
3524 {
3525         /* Define p_ap (i.e pointer to pmu_ap structure) */
3526         WARN_ON(!msg);
3527
3528         if (!status) {
3529                 switch (msg->msg.pg.ap_msg.cmn.msg_id) {
3530                 case PMU_AP_MSG_ID_INIT_ACK:
3531                         gk20a_dbg_pmu("reply PMU_AP_CMD_ID_INIT");
3532                         break;
3533
3534                 default:
3535                         gk20a_dbg_pmu(
3536                         "%s: Invalid Adaptive Power Message: %x\n",
3537                         __func__, msg->msg.pg.ap_msg.cmn.msg_id);
3538                         break;
3539                 }
3540         }
3541 }
3542
3543 static int gk20a_aelpg_init(struct gk20a *g)
3544 {
3545         int status = 0;
3546
3547         /* Remove reliance on app_ctrl field. */
3548         union pmu_ap_cmd ap_cmd;
3549
3550         /* TODO: Check for elpg being ready? */
3551         ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
3552         ap_cmd.init.pg_sampling_period_us =
3553                 APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
3554
3555         status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
3556         return status;
3557 }
3558
3559 static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
3560 {
3561         int status = 0;
3562         union pmu_ap_cmd ap_cmd;
3563
3564         /* TODO: Probably check if ELPG is ready? */
3565
3566         ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
3567         ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
3568         ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
3569                 APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
3570         ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
3571                 APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
3572         ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
3573                 APCTRL_POWER_BREAKEVEN_DEFAULT_US;
3574         ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
3575                 APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
3576
3577         switch (ctrl_id) {
3578         case PMU_AP_CTRL_ID_GRAPHICS:
3579                 break;
3580         default:
3581                 break;
3582         }
3583
3584         status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
3585         return status;
3586 }
3587
3588 #if CONFIG_DEBUG_FS
3589 static int elpg_residency_show(struct seq_file *s, void *data)
3590 {
3591         struct gk20a *g = s->private;
3592         u32 ingating_time = 0;
3593         u32 ungating_time = 0;
3594         u32 gating_cnt;
3595         u64 total_ingating, total_ungating, residency, divisor, dividend;
3596
3597         /* Don't unnecessarily power on the device */
3598         if (g->power_on) {
3599                 gk20a_busy(g->dev);
3600                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3601                         &ungating_time, &gating_cnt);
3602                 gk20a_idle(g->dev);
3603         }
3604         total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
3605         total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
3606         divisor = total_ingating + total_ungating;
3607
3608         /* We compute the residency on a scale of 1000 */
3609         dividend = total_ingating * 1000;
3610
3611         if (divisor)
3612                 residency = div64_u64(dividend, divisor);
3613         else
3614                 residency = 0;
3615
3616         seq_printf(s, "Time in ELPG: %llu us\n"
3617                         "Time out of ELPG: %llu us\n"
3618                         "ELPG residency ratio: %llu\n",
3619                         total_ingating, total_ungating, residency);
3620         return 0;
3621
3622 }
3623
3624 static int elpg_residency_open(struct inode *inode, struct file *file)
3625 {
3626         return single_open(file, elpg_residency_show, inode->i_private);
3627 }
3628
3629 static const struct file_operations elpg_residency_fops = {
3630         .open           = elpg_residency_open,
3631         .read           = seq_read,
3632         .llseek         = seq_lseek,
3633         .release        = single_release,
3634 };
3635
3636 static int elpg_transitions_show(struct seq_file *s, void *data)
3637 {
3638         struct gk20a *g = s->private;
3639         u32 ingating_time, ungating_time, total_gating_cnt;
3640         u32 gating_cnt = 0;
3641
3642         if (g->power_on) {
3643                 gk20a_busy(g->dev);
3644                 gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
3645                         &ungating_time, &gating_cnt);
3646                 gk20a_idle(g->dev);
3647         }
3648         total_gating_cnt = g->pg_gating_cnt + gating_cnt;
3649
3650         seq_printf(s, "%u\n", total_gating_cnt);
3651         return 0;
3652
3653 }
3654
3655 static int elpg_transitions_open(struct inode *inode, struct file *file)
3656 {
3657         return single_open(file, elpg_transitions_show, inode->i_private);
3658 }
3659
3660 static const struct file_operations elpg_transitions_fops = {
3661         .open           = elpg_transitions_open,
3662         .read           = seq_read,
3663         .llseek         = seq_lseek,
3664         .release        = single_release,
3665 };
3666
3667 int gk20a_pmu_debugfs_init(struct platform_device *dev)
3668 {
3669         struct dentry *d;
3670         struct gk20a_platform *platform = platform_get_drvdata(dev);
3671         struct gk20a *g = get_gk20a(dev);
3672
3673         d = debugfs_create_file(
3674                 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
3675                                                 &elpg_residency_fops);
3676         if (!d)
3677                 goto err_out;
3678
3679         d = debugfs_create_file(
3680                 "elpg_transitions", S_IRUGO, platform->debugfs, g,
3681                                                 &elpg_transitions_fops);
3682         if (!d)
3683                 goto err_out;
3684
3685         return 0;
3686
3687 err_out:
3688         pr_err("%s: Failed to make debugfs node\n", __func__);
3689         debugfs_remove_recursive(platform->debugfs);
3690         return -ENOMEM;
3691 }
3692 #endif