]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
ffd7ec083d951568bd74fac217662bb22ba37531
[sojka/nv-tegra/linux-3.10.git] / drivers / gpu / nvgpu / gk20a / dbg_gpu_gk20a.c
1 /*
2  * Tegra GK20A GPU Debugger/Profiler Driver
3  *
4  * Copyright (c) 2013-2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/fs.h>
20 #include <linux/file.h>
21 #include <linux/cdev.h>
22 #include <linux/uaccess.h>
23 #include <linux/nvhost.h>
24 #include <linux/nvhost_dbg_gpu_ioctl.h>
25
26 #include "dev.h"
27 #include "nvhost_acm.h"
28 #include "gk20a.h"
29 #include "gr_gk20a.h"
30 #include "dbg_gpu_gk20a.h"
31 #include "regops_gk20a.h"
32 #include "hw_therm_gk20a.h"
33
34 struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
35         .exec_reg_ops = exec_regops_gk20a,
36 };
37
38 /* silly allocator - just increment session id */
39 static atomic_t session_id = ATOMIC_INIT(0);
40 static int generate_session_id(void)
41 {
42         return atomic_add_return(1, &session_id);
43 }
44
45 static int alloc_session(struct dbg_session_gk20a **_dbg_s)
46 {
47         struct dbg_session_gk20a *dbg_s;
48         *_dbg_s = NULL;
49
50         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
51
52         dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
53         if (!dbg_s)
54                 return -ENOMEM;
55
56         dbg_s->id = generate_session_id();
57         dbg_s->ops = &dbg_gpu_session_ops_gk20a;
58         *_dbg_s = dbg_s;
59         return 0;
60 }
61
62 int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler)
63 {
64         struct dbg_session_gk20a *dbg_session;
65         struct gk20a *g;
66
67         struct platform_device *pdev;
68         struct device *dev;
69
70         int err;
71
72         if (!is_profiler)
73                 g = container_of(inode->i_cdev,
74                                  struct gk20a, dbg.cdev);
75         else
76                 g = container_of(inode->i_cdev,
77                                  struct gk20a, prof.cdev);
78         pdev = g->dev;
79         dev  = &pdev->dev;
80
81         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
82
83         err  = alloc_session(&dbg_session);
84         if (err)
85                 return err;
86
87         filp->private_data = dbg_session;
88         dbg_session->pdev  = pdev;
89         dbg_session->dev   = dev;
90         dbg_session->g     = g;
91         dbg_session->is_profiler = is_profiler;
92         dbg_session->is_pg_disabled = false;
93
94         INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
95         init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
96         dbg_session->dbg_events.events_enabled = false;
97         dbg_session->dbg_events.num_pending_events = 0;
98
99         return 0;
100 }
101
102 /* used in scenarios where the debugger session can take just the inter-session
103  * lock for performance, but the profiler session must take the per-gpu lock
104  * since it might not have an associated channel. */
105 static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
106 {
107         if (dbg_s->is_profiler)
108                 mutex_lock(&dbg_s->g->dbg_sessions_lock);
109         else
110                 mutex_lock(&dbg_s->ch->dbg_s_lock);
111 }
112
113 static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
114 {
115         if (dbg_s->is_profiler)
116                 mutex_unlock(&dbg_s->g->dbg_sessions_lock);
117         else
118                 mutex_unlock(&dbg_s->ch->dbg_s_lock);
119 }
120
121 static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
122 {
123         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
124
125         gk20a_dbg_session_mutex_lock(dbg_s);
126
127         dbg_s->dbg_events.events_enabled = true;
128         dbg_s->dbg_events.num_pending_events = 0;
129
130         gk20a_dbg_session_mutex_unlock(dbg_s);
131 }
132
133 static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
134 {
135         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
136
137         gk20a_dbg_session_mutex_lock(dbg_s);
138
139         dbg_s->dbg_events.events_enabled = false;
140         dbg_s->dbg_events.num_pending_events = 0;
141
142         gk20a_dbg_session_mutex_unlock(dbg_s);
143 }
144
145 static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
146 {
147         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
148
149         gk20a_dbg_session_mutex_lock(dbg_s);
150
151         if (dbg_s->dbg_events.events_enabled &&
152                         dbg_s->dbg_events.num_pending_events > 0)
153                 dbg_s->dbg_events.num_pending_events--;
154
155         gk20a_dbg_session_mutex_unlock(dbg_s);
156 }
157
158 static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
159                           struct nvhost_dbg_gpu_events_ctrl_args *args)
160 {
161         int ret = 0;
162
163         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
164
165         if (!dbg_s->ch) {
166                 gk20a_err(dev_from_gk20a(dbg_s->g),
167                            "no channel bound to dbg session\n");
168                 return -EINVAL;
169         }
170
171         switch (args->cmd) {
172         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
173                 gk20a_dbg_gpu_events_enable(dbg_s);
174                 break;
175
176         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
177                 gk20a_dbg_gpu_events_disable(dbg_s);
178                 break;
179
180         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
181                 gk20a_dbg_gpu_events_clear(dbg_s);
182                 break;
183
184         default:
185                 gk20a_err(dev_from_gk20a(dbg_s->g),
186                            "unrecognized dbg gpu events ctrl cmd: 0x%x",
187                            args->cmd);
188                 ret = -EINVAL;
189                 break;
190         }
191
192         return ret;
193 }
194
195 unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
196 {
197         unsigned int mask = 0;
198         struct dbg_session_gk20a *dbg_s = filep->private_data;
199
200         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
201
202         poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
203
204         gk20a_dbg_session_mutex_lock(dbg_s);
205
206         if (dbg_s->dbg_events.events_enabled &&
207                         dbg_s->dbg_events.num_pending_events > 0) {
208                 gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
209                                 dbg_s->id);
210                 gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
211                                 dbg_s->dbg_events.num_pending_events);
212                 mask = (POLLPRI | POLLIN);
213         }
214
215         gk20a_dbg_session_mutex_unlock(dbg_s);
216
217         return mask;
218 }
219
220 int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
221 {
222         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
223         return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
224 }
225
226 int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
227 {
228         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
229         return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
230 }
231
232 void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
233 {
234         struct dbg_session_gk20a *dbg_s;
235
236         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
237
238         /* guard against the session list being modified */
239         mutex_lock(&ch->dbg_s_lock);
240
241         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
242                 if (dbg_s->dbg_events.events_enabled) {
243                         gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
244                                         dbg_s->id);
245                         gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
246                                         dbg_s->dbg_events.num_pending_events);
247
248                         dbg_s->dbg_events.num_pending_events++;
249
250                         wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
251                 }
252         }
253
254         mutex_unlock(&ch->dbg_s_lock);
255 }
256
257
258 static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
259                                 __u32  powermode);
260
261 static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
262 {
263         struct channel_gk20a *ch_gk20a = dbg_s->ch;
264         struct gk20a *g = dbg_s->g;
265
266         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
267
268         /* wasn't bound to start with ? */
269         if (!ch_gk20a) {
270                 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?");
271                 return -ENODEV;
272         }
273
274         mutex_lock(&g->dbg_sessions_lock);
275         mutex_lock(&ch_gk20a->dbg_s_lock);
276
277         --g->dbg_sessions;
278
279         /* Powergate enable is called here as possibility of dbg_session
280          * which called powergate disable ioctl, to be killed without calling
281          * powergate enable ioctl
282          */
283         dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
284
285         dbg_s->ch = NULL;
286         fput(dbg_s->ch_f);
287         dbg_s->ch_f = NULL;
288
289         list_del_init(&dbg_s->dbg_s_list_node);
290
291         mutex_unlock(&ch_gk20a->dbg_s_lock);
292         mutex_unlock(&g->dbg_sessions_lock);
293
294         return 0;
295 }
296
297 int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
298 {
299         struct dbg_session_gk20a *dbg_s = filp->private_data;
300
301         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
302
303         /* unbind if it was bound */
304         if (dbg_s->ch)
305                 dbg_unbind_channel_gk20a(dbg_s);
306
307         kfree(dbg_s);
308         return 0;
309 }
310
311 static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
312                           struct nvhost_dbg_gpu_bind_channel_args *args)
313 {
314         struct file *f;
315         struct gk20a *g;
316         struct channel_gk20a *ch;
317
318         gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
319                    dev_name(dbg_s->dev), args->channel_fd);
320
321         if (args->channel_fd == ~0)
322                 return dbg_unbind_channel_gk20a(dbg_s);
323
324         /* even though get_file_channel is doing this it releases it as well */
325         /* by holding it here we'll keep it from disappearing while the
326          * debugger is in session */
327         f = fget(args->channel_fd);
328         if (!f)
329                 return -ENODEV;
330
331         ch = gk20a_get_channel_from_file(args->channel_fd);
332         if (!ch) {
333                 gk20a_dbg_fn("no channel found for fd");
334                 fput(f);
335                 return -EINVAL;
336         }
337
338         g = dbg_s->g;
339         gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
340
341         mutex_lock(&g->dbg_sessions_lock);
342         mutex_lock(&ch->dbg_s_lock);
343
344         dbg_s->ch_f = f;
345         dbg_s->ch = ch;
346         list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
347
348         g->dbg_sessions++;
349
350         mutex_unlock(&ch->dbg_s_lock);
351         mutex_unlock(&g->dbg_sessions_lock);
352         return 0;
353 }
354
355 static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
356                                 struct nvhost_dbg_gpu_exec_reg_ops_args *args);
357
358 static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
359                                 struct nvhost_dbg_gpu_powergate_args *args);
360
361 static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
362                               struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
363
364 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
365                              unsigned long arg)
366 {
367         struct dbg_session_gk20a *dbg_s = filp->private_data;
368         struct gk20a *g = get_gk20a(dbg_s->pdev);
369         u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
370         int err = 0;
371
372         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
373
374         if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) ||
375             (_IOC_NR(cmd) == 0) ||
376             (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
377                 return -EFAULT;
378
379         BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
380
381         if (_IOC_DIR(cmd) & _IOC_WRITE) {
382                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
383                         return -EFAULT;
384         }
385
386         if (!g->gr.sw_ready) {
387                 err = gk20a_busy(g->dev);
388                 if (err)
389                         return err;
390
391                 gk20a_idle(g->dev);
392         }
393
394         switch (cmd) {
395         case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
396                 err = dbg_bind_channel_gk20a(dbg_s,
397                              (struct nvhost_dbg_gpu_bind_channel_args *)buf);
398                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
399                 break;
400
401         case NVHOST_DBG_GPU_IOCTL_REG_OPS:
402                 err = nvhost_ioctl_channel_reg_ops(dbg_s,
403                            (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
404                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
405                 break;
406
407         case NVHOST_DBG_GPU_IOCTL_POWERGATE:
408                 err = nvhost_ioctl_powergate_gk20a(dbg_s,
409                            (struct nvhost_dbg_gpu_powergate_args *)buf);
410                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
411                 break;
412
413         case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
414                 err = gk20a_dbg_gpu_events_ctrl(dbg_s,
415                            (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
416                 break;
417
418         case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
419                 err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
420                            (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
421                 break;
422
423         default:
424                 gk20a_err(dev_from_gk20a(g),
425                            "unrecognized dbg gpu ioctl cmd: 0x%x",
426                            cmd);
427                 err = -ENOTTY;
428                 break;
429         }
430
431         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
432                 err = copy_to_user((void __user *)arg,
433                                    buf, _IOC_SIZE(cmd));
434
435         return err;
436 }
437
438 /* In order to perform a context relative op the context has
439  * to be created already... which would imply that the
440  * context switch mechanism has already been put in place.
441  * So by the time we perform such an opertation it should always
442  * be possible to query for the appropriate context offsets, etc.
443  *
444  * But note: while the dbg_gpu bind requires the a channel fd,
445  * it doesn't require an allocated gr/compute obj at that point...
446  */
447 static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
448                                       struct gr_gk20a *gr)
449 {
450         int err;
451
452         mutex_lock(&gr->ctx_mutex);
453         err = !gr->ctx_vars.golden_image_initialized;
454         mutex_unlock(&gr->ctx_mutex);
455         if (err)
456                 return false;
457         return true;
458
459 }
460
461 static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
462                                 struct nvhost_dbg_gpu_exec_reg_ops_args *args)
463 {
464         int err;
465         struct device *dev = dbg_s->dev;
466         struct gk20a *g = get_gk20a(dbg_s->pdev);
467         struct nvhost_dbg_gpu_reg_op *ops;
468         u64 ops_size = sizeof(ops[0]) * args->num_ops;
469
470         gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
471
472         if (!dbg_s->ops) {
473                 gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
474                 return -EINVAL;
475         }
476
477         if (!dbg_s->is_profiler && !dbg_s->ch) {
478                 gk20a_err(dev, "bind a channel before regops for a debugging session");
479                 return -EINVAL;
480         }
481
482         /* be sure that ctx info is in place */
483         if (!gr_context_info_available(dbg_s, &g->gr)) {
484                 gk20a_err(dev, "gr context data not available\n");
485                 return -ENODEV;
486         }
487
488         ops = kzalloc(ops_size, GFP_KERNEL);
489         if (!ops) {
490                 gk20a_err(dev, "Allocating memory failed!");
491                 return -ENOMEM;
492         }
493
494         gk20a_dbg_fn("Copying regops from userspace");
495
496         if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
497                 dev_err(dev, "copy_from_user failed!");
498                 err = -EFAULT;
499                 goto clean_up;
500         }
501
502         /* since exec_reg_ops sends methods to the ucode, it must take the
503          * global gpu lock to protect against mixing methods from debug sessions
504          * on other channels */
505         mutex_lock(&g->dbg_sessions_lock);
506
507         err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
508
509         mutex_unlock(&g->dbg_sessions_lock);
510
511         if (err) {
512                 gk20a_err(dev, "dbg regops failed");
513                 goto clean_up;
514         }
515
516         gk20a_dbg_fn("Copying result to userspace");
517
518         if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
519                 dev_err(dev, "copy_to_user failed!");
520                 err = -EFAULT;
521                 goto clean_up;
522         }
523
524  clean_up:
525         kfree(ops);
526         return err;
527 }
528
529 static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
530                                 __u32  powermode)
531 {
532         int err = 0;
533         struct gk20a *g = get_gk20a(dbg_s->pdev);
534
535          /* This function must be called with g->dbg_sessions_lock held */
536
537         gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
538                    dev_name(dbg_s->dev), powermode);
539
540         switch (powermode) {
541         case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
542                 /* save off current powergate, clk state.
543                  * set gpu module's can_powergate = 0.
544                  * set gpu module's clk to max.
545                  * while *a* debug session is active there will be no power or
546                  * clocking state changes allowed from mainline code (but they
547                  * should be saved).
548                  */
549                 /* Allow powergate disable if the current dbg_session doesn't
550                  * call a powergate disable ioctl and the global
551                  * powergating_disabled_refcount is zero
552                  */
553
554                 if ((dbg_s->is_pg_disabled == false) &&
555                     (g->dbg_powergating_disabled_refcount++ == 0)) {
556
557                         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy");
558                         gk20a_busy(g->dev);
559                         err = gk20a_busy(dbg_s->pdev);
560                         if (err)
561                                 return -EPERM;
562
563                         g->ops.clock_gating.slcg_gr_load_gating_prod(g,
564                                         false);
565                         g->ops.clock_gating.slcg_perf_load_gating_prod(g,
566                                         false);
567                         gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
568
569                         g->elcg_enabled = false;
570                         gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
571                         gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
572
573                         gk20a_pmu_disable_elpg(g);
574                 }
575
576                 dbg_s->is_pg_disabled = true;
577                 break;
578
579         case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
580                 /* restore (can) powergate, clk state */
581                 /* release pending exceptions to fault/be handled as usual */
582                 /*TBD: ordering of these? */
583
584                 /* Re-enabling powergate as no other sessions want
585                  * powergate disabled and the current dbg-sessions had
586                  * requested the powergate disable through ioctl
587                 */
588                 if (dbg_s->is_pg_disabled &&
589                     --g->dbg_powergating_disabled_refcount == 0) {
590
591                         g->elcg_enabled = true;
592                         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
593                         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
594                         gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
595
596                         g->ops.clock_gating.slcg_gr_load_gating_prod(g,
597                                         g->slcg_enabled);
598                         g->ops.clock_gating.slcg_perf_load_gating_prod(g,
599                                         g->slcg_enabled);
600
601                         gk20a_pmu_enable_elpg(g);
602
603                         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
604                         gk20a_idle(dbg_s->pdev);
605                         gk20a_idle(g->dev);
606                 }
607
608                 dbg_s->is_pg_disabled = false;
609                 break;
610
611         default:
612                 gk20a_err(dev_from_gk20a(g),
613                            "unrecognized dbg gpu powergate mode: 0x%x",
614                            powermode);
615                 err = -ENOTTY;
616                 break;
617         }
618
619         return err;
620 }
621
622 static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
623                                 struct nvhost_dbg_gpu_powergate_args *args)
624 {
625         int err;
626         struct gk20a *g = get_gk20a(dbg_s->pdev);
627         gk20a_dbg_fn("%s  powergate mode = %d",
628                       dev_name(dbg_s->dev), args->mode);
629
630         mutex_lock(&g->dbg_sessions_lock);
631         err = dbg_set_powergate(dbg_s, args->mode);
632         mutex_unlock(&g->dbg_sessions_lock);
633         return  err;
634 }
635
636 static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
637                                struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
638 {
639         int err;
640         struct gk20a *g = get_gk20a(dbg_s->pdev);
641         struct channel_gk20a *ch_gk20a;
642
643         gk20a_dbg_fn("%s smpc ctxsw mode = %d",
644                      dev_name(dbg_s->dev), args->mode);
645
646         /* Take the global lock, since we'll be doing global regops */
647         mutex_lock(&g->dbg_sessions_lock);
648
649         ch_gk20a = dbg_s->ch;
650
651         if (!ch_gk20a) {
652                 gk20a_err(dev_from_gk20a(dbg_s->g),
653                           "no bound channel for smpc ctxsw mode update\n");
654                 err = -EINVAL;
655                 goto clean_up;
656         }
657
658         err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
659                       args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
660         if (err) {
661                 gk20a_err(dev_from_gk20a(dbg_s->g),
662                           "error (%d) during smpc ctxsw mode update\n", err);
663                 goto clean_up;
664         }
665         /* The following regops are a hack/war to make up for the fact that we
666          * just scribbled into the ctxsw image w/o really knowing whether
667          * it was already swapped out in/out once or not, etc.
668          */
669         {
670                 struct nvhost_dbg_gpu_reg_op ops[4];
671                 int i;
672                 for (i = 0; i < ARRAY_SIZE(ops); i++) {
673                         ops[i].op     = NVHOST_DBG_GPU_REG_OP_WRITE_32;
674                         ops[i].type   = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
675                         ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
676                         ops[i].value_hi      = 0;
677                         ops[i].and_n_mask_lo = 0;
678                         ops[i].and_n_mask_hi = 0;
679                 }
680                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
681                 ops[0].offset   = 0x00419e08;
682                 ops[0].value_lo = 0x1d;
683
684                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
685                 ops[1].offset   = 0x00419e58;
686                 ops[1].value_lo = 0x1;
687
688                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
689                 ops[2].offset   = 0x00419e68;
690                 ops[2].value_lo = 0xaaaa;
691
692                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
693                 ops[3].offset   = 0x00419f40;
694                 ops[3].value_lo = 0x18;
695
696                 err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
697         }
698
699  clean_up:
700         mutex_unlock(&g->dbg_sessions_lock);
701         return  err;
702 }