drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c

   1 /*
   2  * Tegra GK20A GPU Debugger/Profiler Driver
   3  *
   4  * Copyright (c) 2013-2014, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18
  19 #include <linux/fs.h>
  20 #include <linux/file.h>
  21 #include <linux/cdev.h>
  22 #include <linux/uaccess.h>
  23 #include <linux/nvhost.h>
  24 #include <linux/nvhost_dbg_gpu_ioctl.h>
  25
  26 #include "gk20a.h"
  27 #include "gr_gk20a.h"
  28 #include "dbg_gpu_gk20a.h"
  29 #include "regops_gk20a.h"
  30 #include "hw_therm_gk20a.h"
  31
  32 struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
  33         .exec_reg_ops = exec_regops_gk20a,
  34 };
  35
  36 /* silly allocator - just increment session id */
  37 static atomic_t session_id = ATOMIC_INIT(0);
  38 static int generate_session_id(void)
  39 {
  40         return atomic_add_return(1, &session_id);
  41 }
  42
  43 static int alloc_session(struct dbg_session_gk20a **_dbg_s)
  44 {
  45         struct dbg_session_gk20a *dbg_s;
  46         *_dbg_s = NULL;
  47
  48         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
  49
  50         dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
  51         if (!dbg_s)
  52                 return -ENOMEM;
  53
  54         dbg_s->id = generate_session_id();
  55         dbg_s->ops = &dbg_gpu_session_ops_gk20a;
  56         *_dbg_s = dbg_s;
  57         return 0;
  58 }
  59
  60 int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler)
  61 {
  62         struct dbg_session_gk20a *dbg_session;
  63         struct gk20a *g;
  64
  65         struct platform_device *pdev;
  66         struct device *dev;
  67
  68         int err;
  69
  70         if (!is_profiler)
  71                 g = container_of(inode->i_cdev,
  72                                  struct gk20a, dbg.cdev);
  73         else
  74                 g = container_of(inode->i_cdev,
  75                                  struct gk20a, prof.cdev);
  76         pdev = g->dev;
  77         dev  = &pdev->dev;
  78
  79         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
  80
  81         err  = alloc_session(&dbg_session);
  82         if (err)
  83                 return err;
  84
  85         filp->private_data = dbg_session;
  86         dbg_session->pdev  = pdev;
  87         dbg_session->dev   = dev;
  88         dbg_session->g     = g;
  89         dbg_session->is_profiler = is_profiler;
  90         dbg_session->is_pg_disabled = false;
  91
  92         INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
  93         init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
  94         dbg_session->dbg_events.events_enabled = false;
  95         dbg_session->dbg_events.num_pending_events = 0;
  96
  97         return 0;
  98 }
  99
 100 /* used in scenarios where the debugger session can take just the inter-session
 101  * lock for performance, but the profiler session must take the per-gpu lock
 102  * since it might not have an associated channel. */
 103 static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
 104 {
 105         if (dbg_s->is_profiler)
 106                 mutex_lock(&dbg_s->g->dbg_sessions_lock);
 107         else
 108                 mutex_lock(&dbg_s->ch->dbg_s_lock);
 109 }
 110
 111 static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
 112 {
 113         if (dbg_s->is_profiler)
 114                 mutex_unlock(&dbg_s->g->dbg_sessions_lock);
 115         else
 116                 mutex_unlock(&dbg_s->ch->dbg_s_lock);
 117 }
 118
 119 static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
 120 {
 121         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 122
 123         gk20a_dbg_session_mutex_lock(dbg_s);
 124
 125         dbg_s->dbg_events.events_enabled = true;
 126         dbg_s->dbg_events.num_pending_events = 0;
 127
 128         gk20a_dbg_session_mutex_unlock(dbg_s);
 129 }
 130
 131 static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
 132 {
 133         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 134
 135         gk20a_dbg_session_mutex_lock(dbg_s);
 136
 137         dbg_s->dbg_events.events_enabled = false;
 138         dbg_s->dbg_events.num_pending_events = 0;
 139
 140         gk20a_dbg_session_mutex_unlock(dbg_s);
 141 }
 142
 143 static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
 144 {
 145         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 146
 147         gk20a_dbg_session_mutex_lock(dbg_s);
 148
 149         if (dbg_s->dbg_events.events_enabled &&
 150                         dbg_s->dbg_events.num_pending_events > 0)
 151                 dbg_s->dbg_events.num_pending_events--;
 152
 153         gk20a_dbg_session_mutex_unlock(dbg_s);
 154 }
 155
 156 static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
 157                           struct nvhost_dbg_gpu_events_ctrl_args *args)
 158 {
 159         int ret = 0;
 160
 161         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
 162
 163         if (!dbg_s->ch) {
 164                 gk20a_err(dev_from_gk20a(dbg_s->g),
 165                            "no channel bound to dbg session\n");
 166                 return -EINVAL;
 167         }
 168
 169         switch (args->cmd) {
 170         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
 171                 gk20a_dbg_gpu_events_enable(dbg_s);
 172                 break;
 173
 174         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
 175                 gk20a_dbg_gpu_events_disable(dbg_s);
 176                 break;
 177
 178         case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
 179                 gk20a_dbg_gpu_events_clear(dbg_s);
 180                 break;
 181
 182         default:
 183                 gk20a_err(dev_from_gk20a(dbg_s->g),
 184                            "unrecognized dbg gpu events ctrl cmd: 0x%x",
 185                            args->cmd);
 186                 ret = -EINVAL;
 187                 break;
 188         }
 189
 190         return ret;
 191 }
 192
 193 unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
 194 {
 195         unsigned int mask = 0;
 196         struct dbg_session_gk20a *dbg_s = filep->private_data;
 197
 198         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 199
 200         poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
 201
 202         gk20a_dbg_session_mutex_lock(dbg_s);
 203
 204         if (dbg_s->dbg_events.events_enabled &&
 205                         dbg_s->dbg_events.num_pending_events > 0) {
 206                 gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
 207                                 dbg_s->id);
 208                 gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
 209                                 dbg_s->dbg_events.num_pending_events);
 210                 mask = (POLLPRI | POLLIN);
 211         }
 212
 213         gk20a_dbg_session_mutex_unlock(dbg_s);
 214
 215         return mask;
 216 }
 217
 218 int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
 219 {
 220         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 221         return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
 222 }
 223
 224 int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
 225 {
 226         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 227         return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
 228 }
 229
 230 void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
 231 {
 232         struct dbg_session_gk20a *dbg_s;
 233
 234         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 235
 236         /* guard against the session list being modified */
 237         mutex_lock(&ch->dbg_s_lock);
 238
 239         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
 240                 if (dbg_s->dbg_events.events_enabled) {
 241                         gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
 242                                         dbg_s->id);
 243                         gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
 244                                         dbg_s->dbg_events.num_pending_events);
 245
 246                         dbg_s->dbg_events.num_pending_events++;
 247
 248                         wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
 249                 }
 250         }
 251
 252         mutex_unlock(&ch->dbg_s_lock);
 253 }
 254
 255
 256 static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
 257                                 __u32  powermode);
 258
 259 static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
 260 {
 261         struct channel_gk20a *ch_gk20a = dbg_s->ch;
 262         struct gk20a *g = dbg_s->g;
 263
 264         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 265
 266         /* wasn't bound to start with ? */
 267         if (!ch_gk20a) {
 268                 gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?");
 269                 return -ENODEV;
 270         }
 271
 272         mutex_lock(&g->dbg_sessions_lock);
 273         mutex_lock(&ch_gk20a->dbg_s_lock);
 274
 275         --g->dbg_sessions;
 276
 277         dbg_s->ch = NULL;
 278         fput(dbg_s->ch_f);
 279         dbg_s->ch_f = NULL;
 280
 281         list_del_init(&dbg_s->dbg_s_list_node);
 282
 283         mutex_unlock(&ch_gk20a->dbg_s_lock);
 284         mutex_unlock(&g->dbg_sessions_lock);
 285
 286         return 0;
 287 }
 288
 289 int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 290 {
 291         struct dbg_session_gk20a *dbg_s = filp->private_data;
 292         struct gk20a *g = dbg_s->g;
 293
 294         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
 295
 296         /* unbind if it was bound */
 297         if (dbg_s->ch)
 298                 dbg_unbind_channel_gk20a(dbg_s);
 299
 300         /* Powergate enable is called here as possibility of dbg_session
 301          * which called powergate disable ioctl, to be killed without calling
 302          * powergate enable ioctl
 303          */
 304         mutex_lock(&g->dbg_sessions_lock);
 305         dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
 306         mutex_unlock(&g->dbg_sessions_lock);
 307
 308         kfree(dbg_s);
 309         return 0;
 310 }
 311
 312 static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 313                           struct nvhost_dbg_gpu_bind_channel_args *args)
 314 {
 315         struct file *f;
 316         struct gk20a *g;
 317         struct channel_gk20a *ch;
 318
 319         gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
 320                    dev_name(dbg_s->dev), args->channel_fd);
 321
 322         if (args->channel_fd == ~0)
 323                 return dbg_unbind_channel_gk20a(dbg_s);
 324
 325         /* even though get_file_channel is doing this it releases it as well */
 326         /* by holding it here we'll keep it from disappearing while the
 327          * debugger is in session */
 328         f = fget(args->channel_fd);
 329         if (!f)
 330                 return -ENODEV;
 331
 332         ch = gk20a_get_channel_from_file(args->channel_fd);
 333         if (!ch) {
 334                 gk20a_dbg_fn("no channel found for fd");
 335                 fput(f);
 336                 return -EINVAL;
 337         }
 338
 339         g = dbg_s->g;
 340         gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
 341
 342         mutex_lock(&g->dbg_sessions_lock);
 343         mutex_lock(&ch->dbg_s_lock);
 344
 345         dbg_s->ch_f = f;
 346         dbg_s->ch = ch;
 347         list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
 348
 349         g->dbg_sessions++;
 350
 351         mutex_unlock(&ch->dbg_s_lock);
 352         mutex_unlock(&g->dbg_sessions_lock);
 353         return 0;
 354 }
 355
 356 static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 357                                 struct nvhost_dbg_gpu_exec_reg_ops_args *args);
 358
 359 static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
 360                                 struct nvhost_dbg_gpu_powergate_args *args);
 361
 362 static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 363                               struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
 364
 365 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 366                              unsigned long arg)
 367 {
 368         struct dbg_session_gk20a *dbg_s = filp->private_data;
 369         struct gk20a *g = get_gk20a(dbg_s->pdev);
 370         u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
 371         int err = 0;
 372
 373         gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 374
 375         if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) ||
 376             (_IOC_NR(cmd) == 0) ||
 377             (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
 378                 return -EFAULT;
 379
 380         BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
 381
 382         if (_IOC_DIR(cmd) & _IOC_WRITE) {
 383                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
 384                         return -EFAULT;
 385         }
 386
 387         if (!g->gr.sw_ready) {
 388                 err = gk20a_busy(g->dev);
 389                 if (err)
 390                         return err;
 391
 392                 gk20a_idle(g->dev);
 393         }
 394
 395         switch (cmd) {
 396         case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
 397                 err = dbg_bind_channel_gk20a(dbg_s,
 398                              (struct nvhost_dbg_gpu_bind_channel_args *)buf);
 399                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 400                 break;
 401
 402         case NVHOST_DBG_GPU_IOCTL_REG_OPS:
 403                 err = nvhost_ioctl_channel_reg_ops(dbg_s,
 404                            (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
 405                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 406                 break;
 407
 408         case NVHOST_DBG_GPU_IOCTL_POWERGATE:
 409                 err = nvhost_ioctl_powergate_gk20a(dbg_s,
 410                            (struct nvhost_dbg_gpu_powergate_args *)buf);
 411                 gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 412                 break;
 413
 414         case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
 415                 err = gk20a_dbg_gpu_events_ctrl(dbg_s,
 416                            (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
 417                 break;
 418
 419         case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
 420                 err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
 421                            (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
 422                 break;
 423
 424         default:
 425                 gk20a_err(dev_from_gk20a(g),
 426                            "unrecognized dbg gpu ioctl cmd: 0x%x",
 427                            cmd);
 428                 err = -ENOTTY;
 429                 break;
 430         }
 431
 432         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
 433                 err = copy_to_user((void __user *)arg,
 434                                    buf, _IOC_SIZE(cmd));
 435
 436         return err;
 437 }
 438
 439 /* In order to perform a context relative op the context has
 440  * to be created already... which would imply that the
 441  * context switch mechanism has already been put in place.
 442  * So by the time we perform such an opertation it should always
 443  * be possible to query for the appropriate context offsets, etc.
 444  *
 445  * But note: while the dbg_gpu bind requires the a channel fd,
 446  * it doesn't require an allocated gr/compute obj at that point...
 447  */
 448 static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
 449                                       struct gr_gk20a *gr)
 450 {
 451         int err;
 452
 453         mutex_lock(&gr->ctx_mutex);
 454         err = !gr->ctx_vars.golden_image_initialized;
 455         mutex_unlock(&gr->ctx_mutex);
 456         if (err)
 457                 return false;
 458         return true;
 459
 460 }
 461
 462 static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 463                                 struct nvhost_dbg_gpu_exec_reg_ops_args *args)
 464 {
 465         int err;
 466         struct device *dev = dbg_s->dev;
 467         struct gk20a *g = get_gk20a(dbg_s->pdev);
 468         struct nvhost_dbg_gpu_reg_op *ops;
 469         u64 ops_size = sizeof(ops[0]) * args->num_ops;
 470
 471         gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
 472
 473         if (!dbg_s->ops) {
 474                 gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
 475                 return -EINVAL;
 476         }
 477
 478         if (!dbg_s->is_profiler && !dbg_s->ch) {
 479                 gk20a_err(dev, "bind a channel before regops for a debugging session");
 480                 return -EINVAL;
 481         }
 482
 483         /* be sure that ctx info is in place */
 484         if (!gr_context_info_available(dbg_s, &g->gr)) {
 485                 gk20a_err(dev, "gr context data not available\n");
 486                 return -ENODEV;
 487         }
 488
 489         ops = kzalloc(ops_size, GFP_KERNEL);
 490         if (!ops) {
 491                 gk20a_err(dev, "Allocating memory failed!");
 492                 return -ENOMEM;
 493         }
 494
 495         gk20a_dbg_fn("Copying regops from userspace");
 496
 497         if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
 498                 dev_err(dev, "copy_from_user failed!");
 499                 err = -EFAULT;
 500                 goto clean_up;
 501         }
 502
 503         /* since exec_reg_ops sends methods to the ucode, it must take the
 504          * global gpu lock to protect against mixing methods from debug sessions
 505          * on other channels */
 506         mutex_lock(&g->dbg_sessions_lock);
 507
 508         err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
 509
 510         mutex_unlock(&g->dbg_sessions_lock);
 511
 512         if (err) {
 513                 gk20a_err(dev, "dbg regops failed");
 514                 goto clean_up;
 515         }
 516
 517         gk20a_dbg_fn("Copying result to userspace");
 518
 519         if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
 520                 dev_err(dev, "copy_to_user failed!");
 521                 err = -EFAULT;
 522                 goto clean_up;
 523         }
 524
 525  clean_up:
 526         kfree(ops);
 527         return err;
 528 }
 529
 530 static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
 531                                 __u32  powermode)
 532 {
 533         int err = 0;
 534         struct gk20a *g = get_gk20a(dbg_s->pdev);
 535
 536          /* This function must be called with g->dbg_sessions_lock held */
 537
 538         gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
 539                    dev_name(dbg_s->dev), powermode);
 540
 541         switch (powermode) {
 542         case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
 543                 /* save off current powergate, clk state.
 544                  * set gpu module's can_powergate = 0.
 545                  * set gpu module's clk to max.
 546                  * while *a* debug session is active there will be no power or
 547                  * clocking state changes allowed from mainline code (but they
 548                  * should be saved).
 549                  */
 550                 /* Allow powergate disable if the current dbg_session doesn't
 551                  * call a powergate disable ioctl and the global
 552                  * powergating_disabled_refcount is zero
 553                  */
 554
 555                 if ((dbg_s->is_pg_disabled == false) &&
 556                     (g->dbg_powergating_disabled_refcount++ == 0)) {
 557
 558                         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy");
 559                         gk20a_busy(g->dev);
 560                         err = gk20a_busy(dbg_s->pdev);
 561                         if (err)
 562                                 return -EPERM;
 563
 564                         /*do elpg disable before clock gating disable*/
 565                         gk20a_pmu_disable_elpg(g);
 566                         g->ops.clock_gating.slcg_gr_load_gating_prod(g,
 567                                         false);
 568                         g->ops.clock_gating.slcg_perf_load_gating_prod(g,
 569                                         false);
 570                         gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
 571
 572                         g->elcg_enabled = false;
 573                         gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
 574                         gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
 575
 576                 }
 577
 578                 dbg_s->is_pg_disabled = true;
 579                 break;
 580
 581         case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
 582                 /* restore (can) powergate, clk state */
 583                 /* release pending exceptions to fault/be handled as usual */
 584                 /*TBD: ordering of these? */
 585
 586                 /* Re-enabling powergate as no other sessions want
 587                  * powergate disabled and the current dbg-sessions had
 588                  * requested the powergate disable through ioctl
 589                 */
 590                 if (dbg_s->is_pg_disabled &&
 591                     --g->dbg_powergating_disabled_refcount == 0) {
 592
 593                         g->elcg_enabled = true;
 594                         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
 595                         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
 596                         gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
 597
 598                         g->ops.clock_gating.slcg_gr_load_gating_prod(g,
 599                                         g->slcg_enabled);
 600                         g->ops.clock_gating.slcg_perf_load_gating_prod(g,
 601                                         g->slcg_enabled);
 602
 603                         gk20a_pmu_enable_elpg(g);
 604
 605                         gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
 606                         gk20a_idle(dbg_s->pdev);
 607                         gk20a_idle(g->dev);
 608                 }
 609
 610                 dbg_s->is_pg_disabled = false;
 611                 break;
 612
 613         default:
 614                 gk20a_err(dev_from_gk20a(g),
 615                            "unrecognized dbg gpu powergate mode: 0x%x",
 616                            powermode);
 617                 err = -ENOTTY;
 618                 break;
 619         }
 620
 621         gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d done",
 622                    dev_name(dbg_s->dev), powermode);
 623         return err;
 624 }
 625
 626 static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
 627                                 struct nvhost_dbg_gpu_powergate_args *args)
 628 {
 629         int err;
 630         struct gk20a *g = get_gk20a(dbg_s->pdev);
 631         gk20a_dbg_fn("%s  powergate mode = %d",
 632                       dev_name(dbg_s->dev), args->mode);
 633
 634         mutex_lock(&g->dbg_sessions_lock);
 635         err = dbg_set_powergate(dbg_s, args->mode);
 636         mutex_unlock(&g->dbg_sessions_lock);
 637         return  err;
 638 }
 639
 640 static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 641                                struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
 642 {
 643         int err;
 644         struct gk20a *g = get_gk20a(dbg_s->pdev);
 645         struct channel_gk20a *ch_gk20a;
 646
 647         gk20a_dbg_fn("%s smpc ctxsw mode = %d",
 648                      dev_name(dbg_s->dev), args->mode);
 649
 650         /* Take the global lock, since we'll be doing global regops */
 651         mutex_lock(&g->dbg_sessions_lock);
 652
 653         ch_gk20a = dbg_s->ch;
 654
 655         if (!ch_gk20a) {
 656                 gk20a_err(dev_from_gk20a(dbg_s->g),
 657                           "no bound channel for smpc ctxsw mode update\n");
 658                 err = -EINVAL;
 659                 goto clean_up;
 660         }
 661
 662         err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
 663                       args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
 664         if (err) {
 665                 gk20a_err(dev_from_gk20a(dbg_s->g),
 666                           "error (%d) during smpc ctxsw mode update\n", err);
 667                 goto clean_up;
 668         }
 669         /* The following regops are a hack/war to make up for the fact that we
 670          * just scribbled into the ctxsw image w/o really knowing whether
 671          * it was already swapped out in/out once or not, etc.
 672          */
 673         {
 674                 struct nvhost_dbg_gpu_reg_op ops[4];
 675                 int i;
 676                 for (i = 0; i < ARRAY_SIZE(ops); i++) {
 677                         ops[i].op     = NVHOST_DBG_GPU_REG_OP_WRITE_32;
 678                         ops[i].type   = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
 679                         ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
 680                         ops[i].value_hi      = 0;
 681                         ops[i].and_n_mask_lo = 0;
 682                         ops[i].and_n_mask_hi = 0;
 683                 }
 684                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
 685                 ops[0].offset   = 0x00419e08;
 686                 ops[0].value_lo = 0x1d;
 687
 688                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
 689                 ops[1].offset   = 0x00419e58;
 690                 ops[1].value_lo = 0x1;
 691
 692                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
 693                 ops[2].offset   = 0x00419e68;
 694                 ops[2].value_lo = 0xaaaa;
 695
 696                 /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
 697                 ops[3].offset   = 0x00419f40;
 698                 ops[3].value_lo = 0x18;
 699
 700                 err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
 701         }
 702
 703  clean_up:
 704         mutex_unlock(&g->dbg_sessions_lock);
 705         return  err;
 706 }