drivers/gpu/nvgpu/gk20a/channel_gk20a.c

   1 /*
   2  * GK20A Graphics channel
   3  *
   4  * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18
  19 #include <linux/nvhost.h>
  20 #include <linux/list.h>
  21 #include <linux/delay.h>
  22 #include <linux/highmem.h> /* need for nvmap.h*/
  23 #include <trace/events/gk20a.h>
  24 #include <linux/scatterlist.h>
  25 #include <linux/file.h>
  26 #include <linux/anon_inodes.h>
  27 #include <linux/dma-buf.h>
  28 #include <linux/vmalloc.h>
  29
  30 #include "debug_gk20a.h"
  31
  32 #include "gk20a.h"
  33 #include "dbg_gpu_gk20a.h"
  34 #include "fence_gk20a.h"
  35 #include "semaphore_gk20a.h"
  36
  37 #include "hw_ram_gk20a.h"
  38 #include "hw_fifo_gk20a.h"
  39 #include "hw_pbdma_gk20a.h"
  40 #include "hw_ccsr_gk20a.h"
  41 #include "hw_ltc_gk20a.h"
  42
  43 #define NVMAP_HANDLE_PARAM_SIZE 1
  44
  45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
  46
  47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
  48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
  49
  50 static void free_priv_cmdbuf(struct channel_gk20a *c,
  51                              struct priv_cmd_entry *e);
  52
  53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
  54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
  55
  56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
  57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
  58
  59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
  60
  61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
  62                                         bool add);
  63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
  64
  65 static void gk20a_channel_clean_up_jobs(struct work_struct *work);
  66
  67 /* allocate GPU channel */
  68 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
  69 {
  70         struct channel_gk20a *ch = NULL;
  71         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
  72
  73         mutex_lock(&f->free_chs_mutex);
  74         if (!list_empty(&f->free_chs)) {
  75                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
  76                                 free_chs);
  77                 list_del(&ch->free_chs);
  78                 WARN_ON(atomic_read(&ch->ref_count));
  79                 WARN_ON(ch->referenceable);
  80                 f->used_channels++;
  81         }
  82         mutex_unlock(&f->free_chs_mutex);
  83
  84         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
  85                 platform->aggressive_sync_destroy = true;
  86
  87         return ch;
  88 }
  89
  90 static void free_channel(struct fifo_gk20a *f,
  91                 struct channel_gk20a *ch)
  92 {
  93         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
  94
  95         trace_gk20a_release_used_channel(ch->hw_chid);
  96         /* refcount is zero here and channel is in a freed/dead state */
  97         mutex_lock(&f->free_chs_mutex);
  98         /* add to head to increase visibility of timing-related bugs */
  99         list_add(&ch->free_chs, &f->free_chs);
 100         f->used_channels--;
 101         mutex_unlock(&f->free_chs_mutex);
 102
 103         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
 104                 platform->aggressive_sync_destroy = false;
 105 }
 106
 107 int channel_gk20a_commit_va(struct channel_gk20a *c)
 108 {
 109         gk20a_dbg_fn("");
 110
 111         if (!c->inst_block.cpu_va)
 112                 return -ENOMEM;
 113
 114         gk20a_init_inst_block(&c->inst_block, c->vm,
 115                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
 116
 117         return 0;
 118 }
 119
 120 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
 121 {
 122         u32 addr_lo;
 123         u32 addr_hi;
 124         void *inst_ptr;
 125
 126         gk20a_dbg_fn("");
 127
 128         inst_ptr = c->inst_block.cpu_va;
 129         if (!inst_ptr)
 130                 return -ENOMEM;
 131
 132         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
 133         addr_hi = u64_hi32(c->userd_iova);
 134
 135         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
 136                 c->hw_chid, (u64)c->userd_iova);
 137
 138         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
 139                  pbdma_userd_target_vid_mem_f() |
 140                  pbdma_userd_addr_f(addr_lo));
 141
 142         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
 143                  pbdma_userd_target_vid_mem_f() |
 144                  pbdma_userd_hi_addr_f(addr_hi));
 145
 146         return 0;
 147 }
 148
 149 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 150                                 u32 timeslice_timeout)
 151 {
 152         void *inst_ptr;
 153         int shift = 3;
 154         int value = timeslice_timeout;
 155
 156         inst_ptr = c->inst_block.cpu_va;
 157         if (!inst_ptr)
 158                 return -ENOMEM;
 159
 160         /* disable channel */
 161         c->g->ops.fifo.disable_channel(c);
 162
 163         /* preempt the channel */
 164         WARN_ON(gk20a_fifo_preempt(c->g, c));
 165
 166         /* value field is 8 bits long */
 167         while (value >= 1 << 8) {
 168                 value >>= 1;
 169                 shift++;
 170         }
 171
 172         /* time slice register is only 18bits long */
 173         if ((value << shift) >= 1<<19) {
 174                 pr_err("Requested timeslice value is clamped to 18 bits\n");
 175                 value = 255;
 176                 shift = 10;
 177         }
 178
 179         /* set new timeslice */
 180         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
 181                 value | (shift << 12) |
 182                 fifo_runlist_timeslice_enable_true_f());
 183
 184         /* enable channel */
 185         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 186                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 187                 ccsr_channel_enable_set_true_f());
 188
 189         return 0;
 190 }
 191
 192 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 193                         u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
 194 {
 195         void *inst_ptr;
 196
 197         gk20a_dbg_fn("");
 198
 199         inst_ptr = c->inst_block.cpu_va;
 200         if (!inst_ptr)
 201                 return -ENOMEM;
 202
 203         memset(inst_ptr, 0, ram_fc_size_val_v());
 204
 205         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
 206                 pbdma_gp_base_offset_f(
 207                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
 208
 209         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
 210                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
 211                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 212
 213         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
 214                  c->g->ops.fifo.get_pbdma_signature(c->g));
 215
 216         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
 217                 pbdma_formats_gp_fermi0_f() |
 218                 pbdma_formats_pb_fermi1_f() |
 219                 pbdma_formats_mp_fermi0_f());
 220
 221         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
 222                 pbdma_pb_header_priv_user_f() |
 223                 pbdma_pb_header_method_zero_f() |
 224                 pbdma_pb_header_subchannel_zero_f() |
 225                 pbdma_pb_header_level_main_f() |
 226                 pbdma_pb_header_first_true_f() |
 227                 pbdma_pb_header_type_inc_f());
 228
 229         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
 230                 pbdma_subdevice_id_f(1) |
 231                 pbdma_subdevice_status_active_f() |
 232                 pbdma_subdevice_channel_dma_enable_f());
 233
 234         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
 235
 236         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
 237                 pbdma_acquire_retry_man_2_f() |
 238                 pbdma_acquire_retry_exp_2_f() |
 239                 pbdma_acquire_timeout_exp_max_f() |
 240                 pbdma_acquire_timeout_man_max_f() |
 241                 pbdma_acquire_timeout_en_disable_f());
 242
 243         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
 244                 fifo_runlist_timeslice_timeout_128_f() |
 245                 fifo_runlist_timeslice_timescale_3_f() |
 246                 fifo_runlist_timeslice_enable_true_f());
 247
 248         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
 249                 fifo_pb_timeslice_timeout_16_f() |
 250                 fifo_pb_timeslice_timescale_0_f() |
 251                 fifo_pb_timeslice_enable_true_f());
 252
 253         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 254
 255         return channel_gk20a_commit_userd(c);
 256 }
 257
 258 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
 259 {
 260         BUG_ON(!c->userd_cpu_va);
 261
 262         gk20a_dbg_fn("");
 263
 264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
 265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
 266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
 267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
 268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
 269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
 270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
 271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
 272         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
 273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
 274
 275         return 0;
 276 }
 277
 278 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
 279 {
 280         struct gk20a *g = ch_gk20a->g;
 281         struct fifo_gk20a *f = &g->fifo;
 282         struct fifo_engine_info_gk20a *engine_info =
 283                 f->engine_info + ENGINE_GR_GK20A;
 284
 285         u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
 286                 >> ram_in_base_shift_v();
 287
 288         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
 289                 ch_gk20a->hw_chid, inst_ptr);
 290
 291         ch_gk20a->bound = true;
 292
 293         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 294                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 295                  ~ccsr_channel_runlist_f(~0)) |
 296                  ccsr_channel_runlist_f(engine_info->runlist_id));
 297
 298         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 299                 ccsr_channel_inst_ptr_f(inst_ptr) |
 300                 ccsr_channel_inst_target_vid_mem_f() |
 301                 ccsr_channel_inst_bind_true_f());
 302
 303         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 304                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 305                  ~ccsr_channel_enable_set_f(~0)) |
 306                  ccsr_channel_enable_set_true_f());
 307 }
 308
 309 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 310 {
 311         struct gk20a *g = ch_gk20a->g;
 312
 313         gk20a_dbg_fn("");
 314
 315         if (ch_gk20a->bound)
 316                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 317                         ccsr_channel_inst_ptr_f(0) |
 318                         ccsr_channel_inst_bind_false_f());
 319
 320         ch_gk20a->bound = false;
 321 }
 322
 323 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 324 {
 325         int err;
 326
 327         gk20a_dbg_fn("");
 328
 329         err = gk20a_alloc_inst_block(g, &ch->inst_block);
 330         if (err)
 331                 return err;
 332
 333         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
 334                 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
 335
 336         gk20a_dbg_fn("done");
 337         return 0;
 338 }
 339
 340 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 341 {
 342         gk20a_free_inst_block(g, &ch->inst_block);
 343 }
 344
 345 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 346 {
 347         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
 348 }
 349
 350 void channel_gk20a_enable(struct channel_gk20a *ch)
 351 {
 352         /* enable channel */
 353         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 354                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
 355                 ccsr_channel_enable_set_true_f());
 356 }
 357
 358 void channel_gk20a_disable(struct channel_gk20a *ch)
 359 {
 360         /* disable channel */
 361         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 362                 gk20a_readl(ch->g,
 363                         ccsr_channel_r(ch->hw_chid)) |
 364                         ccsr_channel_enable_clr_true_f());
 365 }
 366
 367 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
 368 {
 369         struct channel_gk20a_job *job, *n;
 370         bool released_job_semaphore = false;
 371
 372         gk20a_dbg_fn("");
 373
 374         /* make sure new kickoffs are prevented */
 375         ch->has_timedout = true;
 376
 377         ch->g->ops.fifo.disable_channel(ch);
 378
 379         if (channel_preempt)
 380                 gk20a_fifo_preempt(ch->g, ch);
 381
 382         gk20a_channel_cancel_job_clean_up(ch, true);
 383
 384         /* ensure no fences are pending */
 385         mutex_lock(&ch->sync_lock);
 386         if (ch->sync)
 387                 ch->sync->set_min_eq_max(ch->sync);
 388         mutex_unlock(&ch->sync_lock);
 389
 390         /* release all job semaphores (applies only to jobs that use
 391            semaphore synchronization) */
 392         spin_lock(&ch->jobs_lock);
 393         list_for_each_entry_safe(job, n, &ch->jobs, list) {
 394                 if (job->post_fence->semaphore) {
 395                         gk20a_semaphore_release(job->post_fence->semaphore);
 396                         released_job_semaphore = true;
 397                 }
 398         }
 399         spin_unlock(&ch->jobs_lock);
 400
 401         if (released_job_semaphore)
 402                 wake_up_interruptible_all(&ch->semaphore_wq);
 403
 404         gk20a_channel_update(ch, 0);
 405 }
 406
 407 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
 408 {
 409         bool channel_idle = false;
 410         unsigned long end_jiffies = jiffies +
 411                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
 412
 413         do {
 414                 spin_lock(&ch->jobs_lock);
 415                 channel_idle = list_empty(&ch->jobs);
 416                 spin_unlock(&ch->jobs_lock);
 417                 if (channel_idle)
 418                         break;
 419
 420                 usleep_range(1000, 3000);
 421         } while (time_before(jiffies, end_jiffies)
 422                         || !tegra_platform_is_silicon());
 423
 424         if (!channel_idle) {
 425                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
 426                                 ch->hw_chid);
 427                 return -EBUSY;
 428         }
 429
 430         return 0;
 431 }
 432
 433 void gk20a_disable_channel(struct channel_gk20a *ch)
 434 {
 435         gk20a_channel_abort(ch, true);
 436         channel_gk20a_update_runlist(ch, false);
 437 }
 438
 439 #if defined(CONFIG_GK20A_CYCLE_STATS)
 440
 441 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 442 {
 443         /* disable existing cyclestats buffer */
 444         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
 445         if (ch->cyclestate.cyclestate_buffer_handler) {
 446                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
 447                                 ch->cyclestate.cyclestate_buffer);
 448                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
 449                 ch->cyclestate.cyclestate_buffer_handler = NULL;
 450                 ch->cyclestate.cyclestate_buffer = NULL;
 451                 ch->cyclestate.cyclestate_buffer_size = 0;
 452         }
 453         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
 454 }
 455
 456 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
 457                        struct nvgpu_cycle_stats_args *args)
 458 {
 459         struct dma_buf *dmabuf;
 460         void *virtual_address;
 461
 462         /* is it allowed to handle calls for current GPU? */
 463         if (0 == (ch->g->gpu_characteristics.flags &
 464                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
 465                 return -ENOSYS;
 466
 467         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
 468
 469                 /* set up new cyclestats buffer */
 470                 dmabuf = dma_buf_get(args->dmabuf_fd);
 471                 if (IS_ERR(dmabuf))
 472                         return PTR_ERR(dmabuf);
 473                 virtual_address = dma_buf_vmap(dmabuf);
 474                 if (!virtual_address)
 475                         return -ENOMEM;
 476
 477                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
 478                 ch->cyclestate.cyclestate_buffer = virtual_address;
 479                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
 480                 return 0;
 481
 482         } else if (!args->dmabuf_fd &&
 483                         ch->cyclestate.cyclestate_buffer_handler) {
 484                 gk20a_free_cycle_stats_buffer(ch);
 485                 return 0;
 486
 487         } else if (!args->dmabuf_fd &&
 488                         !ch->cyclestate.cyclestate_buffer_handler) {
 489                 /* no requst from GL */
 490                 return 0;
 491
 492         } else {
 493                 pr_err("channel already has cyclestats buffer\n");
 494                 return -EINVAL;
 495         }
 496 }
 497
 498
 499 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
 500 {
 501         int ret;
 502
 503         mutex_lock(&ch->cs_client_mutex);
 504         if (ch->cs_client)
 505                 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
 506         else
 507                 ret = -EBADF;
 508         mutex_unlock(&ch->cs_client_mutex);
 509
 510         return ret;
 511 }
 512
 513 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 514                                 u32 dmabuf_fd,
 515                                 u32 perfmon_id_count,
 516                                 u32 *perfmon_id_start)
 517 {
 518         int ret;
 519
 520         mutex_lock(&ch->cs_client_mutex);
 521         if (ch->cs_client) {
 522                 ret = -EEXIST;
 523         } else {
 524                 ret = gr_gk20a_css_attach(ch->g,
 525                                         dmabuf_fd,
 526                                         perfmon_id_count,
 527                                         perfmon_id_start,
 528                                         &ch->cs_client);
 529         }
 530         mutex_unlock(&ch->cs_client_mutex);
 531
 532         return ret;
 533 }
 534
 535 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
 536 {
 537         int ret;
 538
 539         mutex_lock(&ch->cs_client_mutex);
 540         if (ch->cs_client) {
 541                 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
 542                 ch->cs_client = NULL;
 543         } else {
 544                 ret = 0;
 545         }
 546         mutex_unlock(&ch->cs_client_mutex);
 547
 548         return ret;
 549 }
 550
 551 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
 552                         struct nvgpu_cycle_stats_snapshot_args *args)
 553 {
 554         int ret;
 555
 556         /* is it allowed to handle calls for current GPU? */
 557         if (0 == (ch->g->gpu_characteristics.flags &
 558                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
 559                 return -ENOSYS;
 560
 561         if (!args->dmabuf_fd)
 562                 return -EINVAL;
 563
 564         /* handle the command (most frequent cases first) */
 565         switch (args->cmd) {
 566         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
 567                 ret = gk20a_flush_cycle_stats_snapshot(ch);
 568                 args->extra = 0;
 569                 break;
 570
 571         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
 572                 ret = gk20a_attach_cycle_stats_snapshot(ch,
 573                                                 args->dmabuf_fd,
 574                                                 args->extra,
 575                                                 &args->extra);
 576                 break;
 577
 578         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
 579                 ret = gk20a_free_cycle_stats_snapshot(ch);
 580                 args->extra = 0;
 581                 break;
 582
 583         default:
 584                 pr_err("cyclestats: unknown command %u\n", args->cmd);
 585                 ret = -EINVAL;
 586                 break;
 587         }
 588
 589         return ret;
 590 }
 591 #endif
 592
 593 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 594                 struct nvgpu_set_error_notifier *args)
 595 {
 596         struct device *dev = dev_from_gk20a(ch->g);
 597         struct dma_buf *dmabuf;
 598         void *va;
 599         u64 end = args->offset + sizeof(struct nvgpu_notification);
 600
 601         if (!args->mem) {
 602                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
 603                 return -EINVAL;
 604         }
 605
 606         dmabuf = dma_buf_get(args->mem);
 607
 608         if (ch->error_notifier_ref)
 609                 gk20a_free_error_notifiers(ch);
 610
 611         if (IS_ERR(dmabuf)) {
 612                 pr_err("Invalid handle: %d\n", args->mem);
 613                 return -EINVAL;
 614         }
 615
 616         if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
 617                 dma_buf_put(dmabuf);
 618                 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
 619                 return -EINVAL;
 620         }
 621
 622         /* map handle */
 623         va = dma_buf_vmap(dmabuf);
 624         if (!va) {
 625                 dma_buf_put(dmabuf);
 626                 pr_err("Cannot map notifier handle\n");
 627                 return -ENOMEM;
 628         }
 629
 630         /* set channel notifiers pointer */
 631         ch->error_notifier_ref = dmabuf;
 632         ch->error_notifier = va + args->offset;
 633         ch->error_notifier_va = va;
 634         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
 635         return 0;
 636 }
 637
 638 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 639 {
 640         if (ch->error_notifier_ref) {
 641                 struct timespec time_data;
 642                 u64 nsec;
 643                 getnstimeofday(&time_data);
 644                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
 645                                 (u64)time_data.tv_nsec;
 646                 ch->error_notifier->time_stamp.nanoseconds[0] =
 647                                 (u32)nsec;
 648                 ch->error_notifier->time_stamp.nanoseconds[1] =
 649                                 (u32)(nsec >> 32);
 650                 ch->error_notifier->info32 = error;
 651                 ch->error_notifier->status = 0xffff;
 652
 653                 gk20a_err(dev_from_gk20a(ch->g),
 654                     "error notifier set to %d for ch %d", error, ch->hw_chid);
 655         }
 656 }
 657
 658 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 659 {
 660         if (ch->error_notifier_ref) {
 661                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
 662                 dma_buf_put(ch->error_notifier_ref);
 663                 ch->error_notifier_ref = NULL;
 664                 ch->error_notifier = NULL;
 665                 ch->error_notifier_va = NULL;
 666         }
 667 }
 668
 669 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
 670  * is positive */
 671 static int cyclic_delta(int a, int b)
 672 {
 673         return a - b;
 674 }
 675
 676 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
 677 {
 678         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
 679         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
 680
 681         /* wait until all stalling irqs are handled */
 682         wait_event(g->sw_irq_stall_last_handled_wq,
 683                    cyclic_delta(stall_irq_threshold,
 684                                 atomic_read(&g->sw_irq_stall_last_handled))
 685                    <= 0);
 686
 687         /* wait until all non-stalling irqs are handled */
 688         wait_event(g->sw_irq_nonstall_last_handled_wq,
 689                    cyclic_delta(nonstall_irq_threshold,
 690                                 atomic_read(&g->sw_irq_nonstall_last_handled))
 691                    <= 0);
 692 }
 693
 694 static void gk20a_wait_until_counter_is_N(
 695         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
 696         wait_queue_head_t *wq, const char *caller, const char *counter_name)
 697 {
 698         while (true) {
 699                 if (wait_event_timeout(
 700                             *wq,
 701                             atomic_read(counter) == wait_value,
 702                             msecs_to_jiffies(5000)) > 0)
 703                         break;
 704
 705                 gk20a_warn(dev_from_gk20a(ch->g),
 706                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
 707                            caller, ch->hw_chid, counter_name,
 708                            atomic_read(counter), wait_value);
 709         }
 710 }
 711
 712
 713
 714 /* call ONLY when no references to the channel exist: after the last put */
 715 static void gk20a_free_channel(struct channel_gk20a *ch)
 716 {
 717         struct gk20a *g = ch->g;
 718         struct fifo_gk20a *f = &g->fifo;
 719         struct gr_gk20a *gr = &g->gr;
 720         struct vm_gk20a *ch_vm = ch->vm;
 721         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
 722         struct dbg_session_gk20a *dbg_s;
 723         bool was_reset;
 724         gk20a_dbg_fn("");
 725
 726         WARN_ON(ch->g == NULL);
 727
 728         trace_gk20a_free_channel(ch->hw_chid);
 729
 730         /* abort channel and remove from runlist */
 731         gk20a_disable_channel(ch);
 732
 733         /* wait until there's only our ref to the channel */
 734         gk20a_wait_until_counter_is_N(
 735                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
 736                 __func__, "references");
 737
 738         /* wait until all pending interrupts for recently completed
 739          * jobs are handled */
 740         gk20a_wait_for_deferred_interrupts(g);
 741
 742         /* prevent new refs */
 743         spin_lock(&ch->ref_obtain_lock);
 744         if (!ch->referenceable) {
 745                 spin_unlock(&ch->ref_obtain_lock);
 746                 gk20a_err(dev_from_gk20a(ch->g),
 747                           "Extra %s() called to channel %u",
 748                           __func__, ch->hw_chid);
 749                 return;
 750         }
 751         ch->referenceable = false;
 752         spin_unlock(&ch->ref_obtain_lock);
 753
 754         /* matches with the initial reference in gk20a_open_new_channel() */
 755         atomic_dec(&ch->ref_count);
 756
 757         /* wait until no more refs to the channel */
 758         gk20a_wait_until_counter_is_N(
 759                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
 760                 __func__, "references");
 761
 762         /* if engine reset was deferred, perform it now */
 763         mutex_lock(&f->deferred_reset_mutex);
 764         if (g->fifo.deferred_reset_pending) {
 765                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
 766                            " deferred, running now");
 767                 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
 768                 mutex_lock(&g->fifo.gr_reset_mutex);
 769                 /* if lock is already taken, a reset is taking place
 770                 so no need to repeat */
 771                 if (!was_reset) {
 772                         gk20a_fifo_reset_engine(g,
 773                                 g->fifo.deferred_fault_engines);
 774                 }
 775                 mutex_unlock(&g->fifo.gr_reset_mutex);
 776                 g->fifo.deferred_fault_engines = 0;
 777                 g->fifo.deferred_reset_pending = false;
 778         }
 779         mutex_unlock(&f->deferred_reset_mutex);
 780
 781         if (!ch->bound)
 782                 goto release;
 783
 784         if (!gk20a_channel_as_bound(ch))
 785                 goto unbind;
 786
 787         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
 788                         timeout);
 789
 790         gk20a_free_error_notifiers(ch);
 791
 792         /* release channel ctx */
 793         g->ops.gr.free_channel_ctx(ch);
 794
 795         gk20a_gr_flush_channel_tlb(gr);
 796
 797         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
 798
 799         gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
 800
 801         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 802
 803 #if defined(CONFIG_GK20A_CYCLE_STATS)
 804         gk20a_free_cycle_stats_buffer(ch);
 805         gk20a_free_cycle_stats_snapshot(ch);
 806 #endif
 807
 808         channel_gk20a_free_priv_cmdbuf(ch);
 809
 810         /* sync must be destroyed before releasing channel vm */
 811         mutex_lock(&ch->sync_lock);
 812         if (ch->sync) {
 813                 gk20a_channel_sync_destroy(ch->sync);
 814                 ch->sync = NULL;
 815         }
 816         mutex_unlock(&ch->sync_lock);
 817
 818         /* release channel binding to the as_share */
 819         if (ch_vm->as_share)
 820                 gk20a_as_release_share(ch_vm->as_share);
 821         else
 822                 gk20a_vm_put(ch_vm);
 823
 824         spin_lock(&ch->update_fn_lock);
 825         ch->update_fn = NULL;
 826         ch->update_fn_data = NULL;
 827         spin_unlock(&ch->update_fn_lock);
 828         cancel_work_sync(&ch->update_fn_work);
 829
 830         /* make sure we don't have deferred interrupts pending that
 831          * could still touch the channel */
 832         gk20a_wait_for_deferred_interrupts(g);
 833
 834 unbind:
 835         if (gk20a_is_channel_marked_as_tsg(ch))
 836                 gk20a_tsg_unbind_channel(ch);
 837
 838         g->ops.fifo.unbind_channel(ch);
 839         g->ops.fifo.free_inst(g, ch);
 840
 841         ch->vpr = false;
 842         ch->vm = NULL;
 843
 844         mutex_lock(&ch->last_submit.fence_lock);
 845         gk20a_fence_put(ch->last_submit.pre_fence);
 846         gk20a_fence_put(ch->last_submit.post_fence);
 847         ch->last_submit.pre_fence = NULL;
 848         ch->last_submit.post_fence = NULL;
 849         mutex_unlock(&ch->last_submit.fence_lock);
 850         WARN_ON(ch->sync);
 851
 852         /* unlink all debug sessions */
 853         mutex_lock(&ch->dbg_s_lock);
 854
 855         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
 856                 dbg_s->ch = NULL;
 857                 list_del_init(&dbg_s->dbg_s_list_node);
 858         }
 859
 860         mutex_unlock(&ch->dbg_s_lock);
 861
 862 release:
 863         /* make sure we catch accesses of unopened channels in case
 864          * there's non-refcounted channel pointers hanging around */
 865         ch->g = NULL;
 866         wmb();
 867
 868         /* ALWAYS last */
 869         free_channel(f, ch);
 870 }
 871
 872 /* Try to get a reference to the channel. Return nonzero on success. If fails,
 873  * the channel is dead or being freed elsewhere and you must not touch it.
 874  *
 875  * Always when a channel_gk20a pointer is seen and about to be used, a
 876  * reference must be held to it - either by you or the caller, which should be
 877  * documented well or otherwise clearly seen. This usually boils down to the
 878  * file from ioctls directly, or an explicit get in exception handlers when the
 879  * channel is found by a hw_chid.
 880  *
 881  * Most global functions in this file require a reference to be held by the
 882  * caller.
 883  */
 884 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
 885                                          const char *caller) {
 886         struct channel_gk20a *ret;
 887
 888         spin_lock(&ch->ref_obtain_lock);
 889
 890         if (likely(ch->referenceable)) {
 891                 atomic_inc(&ch->ref_count);
 892                 ret = ch;
 893         } else
 894                 ret = NULL;
 895
 896         spin_unlock(&ch->ref_obtain_lock);
 897
 898         if (ret)
 899                 trace_gk20a_channel_get(ch->hw_chid, caller);
 900
 901         return ret;
 902 }
 903
 904 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
 905 {
 906         trace_gk20a_channel_put(ch->hw_chid, caller);
 907         atomic_dec(&ch->ref_count);
 908         wake_up_all(&ch->ref_count_dec_wq);
 909
 910         /* More puts than gets. Channel is probably going to get
 911          * stuck. */
 912         WARN_ON(atomic_read(&ch->ref_count) < 0);
 913
 914         /* Also, more puts than gets. ref_count can go to 0 only if
 915          * the channel is closing. Channel is probably going to get
 916          * stuck. */
 917         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
 918 }
 919
 920 void gk20a_channel_close(struct channel_gk20a *ch)
 921 {
 922         gk20a_free_channel(ch);
 923 }
 924
 925 int gk20a_channel_release(struct inode *inode, struct file *filp)
 926 {
 927         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
 928         struct gk20a *g = ch ? ch->g : NULL;
 929         int err;
 930
 931         if (!ch)
 932                 return 0;
 933
 934         trace_gk20a_channel_release(dev_name(&g->dev->dev));
 935
 936         err = gk20a_busy(g->dev);
 937         if (err) {
 938                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
 939                         ch->hw_chid);
 940                 return err;
 941         }
 942         gk20a_channel_close(ch);
 943         gk20a_idle(g->dev);
 944
 945         filp->private_data = NULL;
 946         return 0;
 947 }
 948
 949 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
 950 {
 951         struct channel_gk20a *ch =
 952                 container_of(work, struct channel_gk20a, update_fn_work);
 953         void (*update_fn)(struct channel_gk20a *, void *);
 954         void *update_fn_data;
 955
 956         spin_lock(&ch->update_fn_lock);
 957         update_fn = ch->update_fn;
 958         update_fn_data = ch->update_fn_data;
 959         spin_unlock(&ch->update_fn_lock);
 960
 961         if (update_fn)
 962                 update_fn(ch, update_fn_data);
 963 }
 964
 965 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 966                 void (*update_fn)(struct channel_gk20a *, void *),
 967                 void *update_fn_data)
 968 {
 969         struct channel_gk20a *ch = gk20a_open_new_channel(g);
 970
 971         if (ch) {
 972                 spin_lock(&ch->update_fn_lock);
 973                 ch->update_fn = update_fn;
 974                 ch->update_fn_data = update_fn_data;
 975                 spin_unlock(&ch->update_fn_lock);
 976         }
 977
 978         return ch;
 979 }
 980
 981 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 982 {
 983         struct fifo_gk20a *f = &g->fifo;
 984         struct channel_gk20a *ch;
 985
 986         gk20a_dbg_fn("");
 987
 988         ch = allocate_channel(f);
 989         if (ch == NULL) {
 990                 /* TBD: we want to make this virtualizable */
 991                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
 992                 return NULL;
 993         }
 994
 995         trace_gk20a_open_new_channel(ch->hw_chid);
 996
 997         BUG_ON(ch->g);
 998         ch->g = g;
 999
1000         if (g->ops.fifo.alloc_inst(g, ch)) {
1001                 ch->g = NULL;
1002                 free_channel(f, ch);
1003                 gk20a_err(dev_from_gk20a(g),
1004                            "failed to open gk20a channel, out of inst mem");
1005                 return NULL;
1006         }
1007
1008         /* now the channel is in a limbo out of the free list but not marked as
1009          * alive and used (i.e. get-able) yet */
1010
1011         ch->pid = current->pid;
1012
1013         /* By default, channel is regular (non-TSG) channel */
1014         ch->tsgid = NVGPU_INVALID_TSG_ID;
1015
1016         /* reset timeout counter and update timestamp */
1017         ch->timeout_accumulated_ms = 0;
1018         ch->timeout_gpfifo_get = 0;
1019         /* set gr host default timeout */
1020         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1021         ch->timeout_debug_dump = true;
1022         ch->has_timedout = false;
1023         ch->obj_class = 0;
1024         ch->clean_up.scheduled = false;
1025
1026         /* The channel is *not* runnable at this point. It still needs to have
1027          * an address space bound and allocate a gpfifo and grctx. */
1028
1029         init_waitqueue_head(&ch->notifier_wq);
1030         init_waitqueue_head(&ch->semaphore_wq);
1031         init_waitqueue_head(&ch->submit_wq);
1032
1033         mutex_init(&ch->poll_events.lock);
1034         ch->poll_events.events_enabled = false;
1035         ch->poll_events.num_pending_events = 0;
1036
1037         ch->update_fn = NULL;
1038         ch->update_fn_data = NULL;
1039         spin_lock_init(&ch->update_fn_lock);
1040         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1041
1042         /* Mark the channel alive, get-able, with 1 initial use
1043          * references. The initial reference will be decreased in
1044          * gk20a_free_channel() */
1045         ch->referenceable = true;
1046         atomic_set(&ch->ref_count, 1);
1047         wmb();
1048
1049         return ch;
1050 }
1051
1052 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1053 {
1054         int err;
1055         struct channel_gk20a *ch;
1056
1057         trace_gk20a_channel_open(dev_name(&g->dev->dev));
1058
1059         err = gk20a_busy(g->dev);
1060         if (err) {
1061                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1062                 return err;
1063         }
1064         ch = gk20a_open_new_channel(g);
1065         gk20a_idle(g->dev);
1066         if (!ch) {
1067                 gk20a_err(dev_from_gk20a(g),
1068                         "failed to get f");
1069                 return -ENOMEM;
1070         }
1071
1072         filp->private_data = ch;
1073         return 0;
1074 }
1075
1076 int gk20a_channel_open(struct inode *inode, struct file *filp)
1077 {
1078         struct gk20a *g = container_of(inode->i_cdev,
1079                         struct gk20a, channel.cdev);
1080         int ret;
1081
1082         gk20a_dbg_fn("start");
1083         ret = __gk20a_channel_open(g, filp);
1084
1085         gk20a_dbg_fn("end");
1086         return ret;
1087 }
1088
1089 int gk20a_channel_open_ioctl(struct gk20a *g,
1090                 struct nvgpu_channel_open_args *args)
1091 {
1092         int err;
1093         int fd;
1094         struct file *file;
1095         char *name;
1096
1097         err = get_unused_fd_flags(O_RDWR);
1098         if (err < 0)
1099                 return err;
1100         fd = err;
1101
1102         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1103                         dev_name(&g->dev->dev), fd);
1104         if (!name) {
1105                 err = -ENOMEM;
1106                 goto clean_up;
1107         }
1108
1109         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1110         kfree(name);
1111         if (IS_ERR(file)) {
1112                 err = PTR_ERR(file);
1113                 goto clean_up;
1114         }
1115
1116         err = __gk20a_channel_open(g, file);
1117         if (err)
1118                 goto clean_up_file;
1119
1120         fd_install(fd, file);
1121         args->channel_fd = fd;
1122         return 0;
1123
1124 clean_up_file:
1125         fput(file);
1126 clean_up:
1127         put_unused_fd(fd);
1128         return err;
1129 }
1130
1131 /* allocate private cmd buffer.
1132    used for inserting commands before/after user submitted buffers. */
1133 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1134 {
1135         struct device *d = dev_from_gk20a(c->g);
1136         struct vm_gk20a *ch_vm = c->vm;
1137         struct priv_cmd_queue *q = &c->priv_cmd_q;
1138         u32 size;
1139         int err = 0;
1140
1141         /* Kernel can insert gpfifos before and after user gpfifos.
1142            Before user gpfifos, kernel inserts fence_wait, which takes
1143            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1144            After user gpfifos, kernel inserts fence_get, which takes
1145            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1146            = 6 dwords.
1147            Worse case if kernel adds both of them for every user gpfifo,
1148            max size of priv_cmdbuf is :
1149            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1150         size = roundup_pow_of_two(
1151                 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1152
1153         err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1154         if (err) {
1155                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1156                 goto clean_up;
1157         }
1158
1159         q->size = q->mem.size / sizeof (u32);
1160
1161         return 0;
1162
1163 clean_up:
1164         channel_gk20a_free_priv_cmdbuf(c);
1165         return err;
1166 }
1167
1168 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1169 {
1170         struct vm_gk20a *ch_vm = c->vm;
1171         struct priv_cmd_queue *q = &c->priv_cmd_q;
1172
1173         if (q->size == 0)
1174                 return;
1175
1176         gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1177
1178         memset(q, 0, sizeof(struct priv_cmd_queue));
1179 }
1180
1181 /* allocate a cmd buffer with given size. size is number of u32 entries */
1182 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1183                              struct priv_cmd_entry **entry)
1184 {
1185         struct priv_cmd_queue *q = &c->priv_cmd_q;
1186         struct priv_cmd_entry *e;
1187         u32 free_count;
1188         u32 size = orig_size;
1189
1190         gk20a_dbg_fn("size %d", orig_size);
1191
1192         *entry = NULL;
1193
1194         /* if free space in the end is less than requested, increase the size
1195          * to make the real allocated space start from beginning. */
1196         if (q->put + size > q->size)
1197                 size = orig_size + (q->size - q->put);
1198
1199         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1200                         c->hw_chid, q->get, q->put);
1201
1202         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1203
1204         if (size > free_count)
1205                 return -ENOSPC;
1206
1207         e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1208         if (!e) {
1209                 gk20a_err(dev_from_gk20a(c->g),
1210                         "ch %d: fail to allocate priv cmd entry",
1211                         c->hw_chid);
1212                 return -ENOMEM;
1213         }
1214
1215         e->size = orig_size;
1216         e->gp_get = c->gpfifo.get;
1217         e->gp_put = c->gpfifo.put;
1218         e->gp_wrap = c->gpfifo.wrap;
1219
1220         /* if we have increased size to skip free space in the end, set put
1221            to beginning of cmd buffer (0) + size */
1222         if (size != orig_size) {
1223                 e->ptr = (u32 *)q->mem.cpu_va;
1224                 e->gva = q->mem.gpu_va;
1225                 q->put = orig_size;
1226         } else {
1227                 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1228                 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1229                 q->put = (q->put + orig_size) & (q->size - 1);
1230         }
1231
1232         /* we already handled q->put + size > q->size so BUG_ON this */
1233         BUG_ON(q->put > q->size);
1234
1235         *entry = e;
1236
1237         gk20a_dbg_fn("done");
1238
1239         return 0;
1240 }
1241
1242 /* Don't call this to free an explict cmd entry.
1243  * It doesn't update priv_cmd_queue get/put */
1244 static void free_priv_cmdbuf(struct channel_gk20a *c,
1245                              struct priv_cmd_entry *e)
1246 {
1247         kfree(e);
1248 }
1249
1250 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1251                 struct nvgpu_alloc_gpfifo_args *args)
1252 {
1253         struct gk20a *g = c->g;
1254         struct device *d = dev_from_gk20a(g);
1255         struct vm_gk20a *ch_vm;
1256         u32 gpfifo_size;
1257         int err = 0;
1258
1259         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1260            and another one after, for internal usage. Triple the requested size. */
1261         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1262
1263         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1264                 c->vpr = true;
1265
1266         /* an address space needs to have been bound at this point. */
1267         if (!gk20a_channel_as_bound(c)) {
1268                 gk20a_err(d,
1269                             "not bound to an address space at time of gpfifo"
1270                             " allocation.");
1271                 return -EINVAL;
1272         }
1273         ch_vm = c->vm;
1274
1275         c->cmds_pending = false;
1276         mutex_lock(&c->last_submit.fence_lock);
1277         gk20a_fence_put(c->last_submit.pre_fence);
1278         gk20a_fence_put(c->last_submit.post_fence);
1279         c->last_submit.pre_fence = NULL;
1280         c->last_submit.post_fence = NULL;
1281         mutex_unlock(&c->last_submit.fence_lock);
1282
1283         c->ramfc.offset = 0;
1284         c->ramfc.size = ram_in_ramfc_s() / 8;
1285
1286         if (c->gpfifo.mem.cpu_va) {
1287                 gk20a_err(d, "channel %d :"
1288                            "gpfifo already allocated", c->hw_chid);
1289                 return -EEXIST;
1290         }
1291
1292         err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1293                         &c->gpfifo.mem);
1294         if (err) {
1295                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1296                 goto clean_up;
1297         }
1298
1299         c->gpfifo.entry_num = gpfifo_size;
1300         c->gpfifo.get = c->gpfifo.put = 0;
1301
1302         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1303                 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1304
1305         channel_gk20a_setup_userd(c);
1306
1307         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1308                                         c->gpfifo.entry_num, args->flags);
1309         if (err)
1310                 goto clean_up_unmap;
1311
1312         /* TBD: setup engine contexts */
1313
1314         err = channel_gk20a_alloc_priv_cmdbuf(c);
1315         if (err)
1316                 goto clean_up_unmap;
1317
1318         err = channel_gk20a_update_runlist(c, true);
1319         if (err)
1320                 goto clean_up_unmap;
1321
1322         g->ops.fifo.bind_channel(c);
1323
1324         gk20a_dbg_fn("done");
1325         return 0;
1326
1327 clean_up_unmap:
1328         gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1329 clean_up:
1330         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1331         gk20a_err(d, "fail");
1332         return err;
1333 }
1334
1335 /* Update with this periodically to determine how the gpfifo is draining. */
1336 static inline u32 update_gp_get(struct gk20a *g,
1337                                 struct channel_gk20a *c)
1338 {
1339         u32 new_get = gk20a_bar1_readl(g,
1340                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1341         if (new_get < c->gpfifo.get)
1342                 c->gpfifo.wrap = !c->gpfifo.wrap;
1343         c->gpfifo.get = new_get;
1344         return new_get;
1345 }
1346
1347 static inline u32 gp_free_count(struct channel_gk20a *c)
1348 {
1349         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1350                 c->gpfifo.entry_num;
1351 }
1352
1353 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1354                 u32 timeout_delta_ms)
1355 {
1356         u32 gpfifo_get = update_gp_get(ch->g, ch);
1357         /* Count consequent timeout isr */
1358         if (gpfifo_get == ch->timeout_gpfifo_get) {
1359                 /* we didn't advance since previous channel timeout check */
1360                 ch->timeout_accumulated_ms += timeout_delta_ms;
1361         } else {
1362                 /* first timeout isr encountered */
1363                 ch->timeout_accumulated_ms = timeout_delta_ms;
1364         }
1365
1366         ch->timeout_gpfifo_get = gpfifo_get;
1367
1368         return ch->g->timeouts_enabled &&
1369                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1370 }
1371
1372 static u32 get_gp_free_count(struct channel_gk20a *c)
1373 {
1374         update_gp_get(c->g, c);
1375         return gp_free_count(c);
1376 }
1377
1378 static void trace_write_pushbuffer(struct channel_gk20a *c,
1379                                    struct nvgpu_gpfifo *g)
1380 {
1381         void *mem = NULL;
1382         unsigned int words;
1383         u64 offset;
1384         struct dma_buf *dmabuf = NULL;
1385
1386         if (gk20a_debug_trace_cmdbuf) {
1387                 u64 gpu_va = (u64)g->entry0 |
1388                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1389                 int err;
1390
1391                 words = pbdma_gp_entry1_length_v(g->entry1);
1392                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1393                 if (!err)
1394                         mem = dma_buf_vmap(dmabuf);
1395         }
1396
1397         if (mem) {
1398                 u32 i;
1399                 /*
1400                  * Write in batches of 128 as there seems to be a limit
1401                  * of how much you can output to ftrace at once.
1402                  */
1403                 for (i = 0; i < words; i += 128U) {
1404                         trace_gk20a_push_cmdbuf(
1405                                 c->g->dev->name,
1406                                 0,
1407                                 min(words - i, 128U),
1408                                 offset + i * sizeof(u32),
1409                                 mem);
1410                 }
1411                 dma_buf_vunmap(dmabuf, mem);
1412         }
1413 }
1414
1415 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1416                                          struct nvgpu_gpfifo *g,
1417                                          struct nvgpu_submit_gpfifo_args *args,
1418                                          int offset,
1419                                          int count)
1420 {
1421         u32 size;
1422         int i;
1423         struct nvgpu_gpfifo *gp;
1424         bool gpfifo_allocated = false;
1425
1426         if (!gk20a_debug_trace_cmdbuf)
1427                 return;
1428
1429         if (!g && !args)
1430                 return;
1431
1432         if (!g) {
1433                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1434                 if (size) {
1435                         g = nvgpu_alloc(size, false);
1436                         if (!g)
1437                                 return;
1438
1439                         if (copy_from_user(g,
1440                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1441                                 return;
1442                         }
1443                 }
1444                 gpfifo_allocated = true;
1445         }
1446
1447         gp = g + offset;
1448         for (i = 0; i < count; i++, gp++)
1449                 trace_write_pushbuffer(c, gp);
1450
1451         if (gpfifo_allocated)
1452                 nvgpu_free(g);
1453 }
1454
1455 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1456                                         struct priv_cmd_entry *e)
1457 {
1458         struct priv_cmd_queue *q = &c->priv_cmd_q;
1459         u32 cmd_entry_start;
1460         struct device *d = dev_from_gk20a(c->g);
1461
1462         if (!e)
1463                 return 0;
1464
1465         cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1466         if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1467                 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1468
1469         q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1470         free_priv_cmdbuf(c, e);
1471
1472         return 0;
1473 }
1474
1475 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
1476 {
1477         mutex_lock(&c->clean_up.lock);
1478
1479         if (c->clean_up.scheduled) {
1480                 mutex_unlock(&c->clean_up.lock);
1481                 return;
1482         }
1483
1484         c->clean_up.scheduled = true;
1485         schedule_delayed_work(&c->clean_up.wq, 1);
1486
1487         mutex_unlock(&c->clean_up.lock);
1488 }
1489
1490 void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1491                                 bool wait_for_completion)
1492 {
1493         if (wait_for_completion)
1494                 cancel_delayed_work_sync(&c->clean_up.wq);
1495
1496         mutex_lock(&c->clean_up.lock);
1497         c->clean_up.scheduled = false;
1498         mutex_unlock(&c->clean_up.lock);
1499 }
1500
1501 static int gk20a_channel_add_job(struct channel_gk20a *c,
1502                                  struct gk20a_fence *pre_fence,
1503                                  struct gk20a_fence *post_fence,
1504                                  struct priv_cmd_entry *wait_cmd,
1505                                  struct priv_cmd_entry *incr_cmd,
1506                                  bool skip_buffer_refcounting)
1507 {
1508         struct vm_gk20a *vm = c->vm;
1509         struct channel_gk20a_job *job = NULL;
1510         struct mapped_buffer_node **mapped_buffers = NULL;
1511         int err = 0, num_mapped_buffers = 0;
1512
1513         /* job needs reference to this vm (released in channel_update) */
1514         gk20a_vm_get(vm);
1515
1516         if (!skip_buffer_refcounting) {
1517                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1518                                         &num_mapped_buffers);
1519                 if (err) {
1520                         gk20a_vm_put(vm);
1521                         return err;
1522                 }
1523         }
1524
1525         job = kzalloc(sizeof(*job), GFP_KERNEL);
1526         if (!job) {
1527                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1528                 gk20a_vm_put(vm);
1529                 return -ENOMEM;
1530         }
1531
1532         /* put() is done in gk20a_channel_update() when the job is done */
1533         c = gk20a_channel_get(c);
1534
1535         if (c) {
1536                 job->num_mapped_buffers = num_mapped_buffers;
1537                 job->mapped_buffers = mapped_buffers;
1538                 job->pre_fence = gk20a_fence_get(pre_fence);
1539                 job->post_fence = gk20a_fence_get(post_fence);
1540                 job->wait_cmd = wait_cmd;
1541                 job->incr_cmd = incr_cmd;
1542
1543                 spin_lock(&c->jobs_lock);
1544                 list_add_tail(&job->list, &c->jobs);
1545                 spin_unlock(&c->jobs_lock);
1546         } else {
1547                 return -ETIMEDOUT;
1548         }
1549
1550         return 0;
1551 }
1552
1553 static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1554 {
1555         struct channel_gk20a *c = container_of(to_delayed_work(work),
1556                         struct channel_gk20a, clean_up.wq);
1557         struct vm_gk20a *vm;
1558         struct channel_gk20a_job *job;
1559         struct gk20a_platform *platform;
1560         struct gk20a *g;
1561
1562         c = gk20a_channel_get(c);
1563         if (!c)
1564                 return;
1565
1566         if (!c->g->power_on) { /* shutdown case */
1567                 gk20a_channel_put(c);
1568                 return;
1569         }
1570
1571         vm = c->vm;
1572         g = c->g;
1573         platform = gk20a_get_platform(g->dev);
1574
1575         gk20a_channel_cancel_job_clean_up(c, false);
1576
1577         while (1) {
1578                 bool completed;
1579
1580                 spin_lock(&c->jobs_lock);
1581                 if (list_empty(&c->jobs)) {
1582                         spin_unlock(&c->jobs_lock);
1583                         break;
1584                 }
1585                 job = list_first_entry(&c->jobs,
1586                                        struct channel_gk20a_job, list);
1587                 spin_unlock(&c->jobs_lock);
1588
1589                 completed = gk20a_fence_is_expired(job->post_fence);
1590                 if (!completed)
1591                         break;
1592
1593                 mutex_lock(&c->sync_lock);
1594                 if (c->sync) {
1595                         c->sync->signal_timeline(c->sync);
1596                         if (atomic_dec_and_test(&c->sync->refcount) &&
1597                                         platform->aggressive_sync_destroy) {
1598                                 gk20a_channel_sync_destroy(c->sync);
1599                                 c->sync = NULL;
1600                         }
1601                 } else {
1602                         WARN_ON(1);
1603                 }
1604                 mutex_unlock(&c->sync_lock);
1605
1606                 if (job->num_mapped_buffers)
1607                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1608                                 job->num_mapped_buffers);
1609
1610                 /* Close the fences (this will unref the semaphores and release
1611                  * them to the pool). */
1612                 gk20a_fence_put(job->pre_fence);
1613                 gk20a_fence_put(job->post_fence);
1614
1615                 /* Free the private command buffers (wait_cmd first and
1616                  * then incr_cmd i.e. order of allocation) */
1617                 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1618                 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1619
1620                 /* job is done. release its vm reference (taken in add_job) */
1621                 gk20a_vm_put(vm);
1622                 /* another bookkeeping taken in add_job. caller must hold a ref
1623                  * so this wouldn't get freed here. */
1624                 gk20a_channel_put(c);
1625
1626                 spin_lock(&c->jobs_lock);
1627                 list_del_init(&job->list);
1628                 spin_unlock(&c->jobs_lock);
1629
1630                 kfree(job);
1631
1632                 gk20a_idle(g->dev);
1633         }
1634
1635         if (c->update_fn)
1636                 schedule_work(&c->update_fn_work);
1637
1638         gk20a_channel_put(c);
1639 }
1640
1641 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1642 {
1643         c = gk20a_channel_get(c);
1644         if (!c)
1645                 return;
1646
1647         if (!c->g->power_on) { /* shutdown case */
1648                 gk20a_channel_put(c);
1649                 return;
1650         }
1651
1652         update_gp_get(c->g, c);
1653         wake_up(&c->submit_wq);
1654
1655         trace_gk20a_channel_update(c->hw_chid);
1656         gk20a_channel_schedule_job_clean_up(c);
1657
1658         gk20a_channel_put(c);
1659 }
1660
1661 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1662                                 struct nvgpu_gpfifo *gpfifo,
1663                                 struct nvgpu_submit_gpfifo_args *args,
1664                                 u32 num_entries,
1665                                 u32 flags,
1666                                 struct nvgpu_fence *fence,
1667                                 struct gk20a_fence **fence_out,
1668                                 bool force_need_sync_fence)
1669 {
1670         struct gk20a *g = c->g;
1671         struct device *d = dev_from_gk20a(g);
1672         int err = 0;
1673         int start, end;
1674         int wait_fence_fd = -1;
1675         struct priv_cmd_entry *wait_cmd = NULL;
1676         struct priv_cmd_entry *incr_cmd = NULL;
1677         struct gk20a_fence *pre_fence = NULL;
1678         struct gk20a_fence *post_fence = NULL;
1679         /* we might need two extra gpfifo entries - one for pre fence
1680          * and one for post fence. */
1681         const int extra_entries = 2;
1682         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1683         bool skip_buffer_refcounting = (flags &
1684                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1685         bool need_sync_fence = false;
1686
1687         /*
1688          * If user wants to allocate sync_fence_fd always, then respect that;
1689          * otherwise, allocate sync_fence_fd based on user flags only
1690          */
1691         if (force_need_sync_fence)
1692                 need_sync_fence = true;
1693
1694         if (c->has_timedout)
1695                 return -ETIMEDOUT;
1696
1697         /* fifo not large enough for request. Return error immediately.
1698          * Kernel can insert gpfifo entries before and after user gpfifos.
1699          * So, add extra_entries in user request. Also, HW with fifo size N
1700          * can accept only N-1 entreis and so the below condition */
1701         if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1702                 gk20a_err(d, "not enough gpfifo space allocated");
1703                 return -ENOMEM;
1704         }
1705
1706         if (!gpfifo && !args)
1707                 return -EINVAL;
1708
1709         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1710                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1711             !fence)
1712                 return -EINVAL;
1713
1714         /* an address space needs to have been bound at this point. */
1715         if (!gk20a_channel_as_bound(c)) {
1716                 gk20a_err(d,
1717                             "not bound to an address space at time of gpfifo"
1718                             " submission.");
1719                 return -EINVAL;
1720         }
1721
1722 #ifdef CONFIG_DEBUG_FS
1723         /* update debug settings */
1724         if (g->ops.ltc.sync_debugfs)
1725                 g->ops.ltc.sync_debugfs(g);
1726 #endif
1727
1728         gk20a_dbg_info("channel %d", c->hw_chid);
1729
1730         /* gk20a_channel_update releases this ref. */
1731         err = gk20a_busy(g->dev);
1732         if (err) {
1733                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1734                 return err;
1735         }
1736
1737         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1738                                           c->hw_chid,
1739                                           num_entries,
1740                                           flags,
1741                                           fence ? fence->id : 0,
1742                                           fence ? fence->value : 0);
1743
1744         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1745                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1746
1747         /* Make sure we have enough space for gpfifo entries. If not,
1748          * wait for signals from completed submits */
1749         if (gp_free_count(c) < num_entries + extra_entries) {
1750                 /* we can get here via locked ioctl and other paths too */
1751                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1752                 if (locked_path)
1753                         mutex_unlock(&c->ioctl_lock);
1754
1755                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1756                 err = wait_event_interruptible(c->submit_wq,
1757                         get_gp_free_count(c) >= num_entries + extra_entries ||
1758                         c->has_timedout);
1759                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1760
1761                 if (locked_path)
1762                         mutex_lock(&c->ioctl_lock);
1763         }
1764
1765         if (c->has_timedout) {
1766                 err = -ETIMEDOUT;
1767                 goto clean_up;
1768         }
1769
1770         if (err) {
1771                 err = -ENOSPC;
1772                 goto clean_up;
1773         }
1774
1775         mutex_lock(&c->sync_lock);
1776         if (!c->sync) {
1777                 c->sync = gk20a_channel_sync_create(c);
1778                 if (!c->sync) {
1779                         err = -ENOMEM;
1780                         mutex_unlock(&c->sync_lock);
1781                         goto clean_up;
1782                 }
1783                 if (g->ops.fifo.resetup_ramfc)
1784                         err = g->ops.fifo.resetup_ramfc(c);
1785                 if (err) {
1786                         mutex_unlock(&c->sync_lock);
1787                         return err;
1788                 }
1789         }
1790         atomic_inc(&c->sync->refcount);
1791         mutex_unlock(&c->sync_lock);
1792
1793         /*
1794          * optionally insert syncpt wait in the beginning of gpfifo submission
1795          * when user requested and the wait hasn't expired.
1796          * validate that the id makes sense, elide if not
1797          * the only reason this isn't being unceremoniously killed is to
1798          * keep running some tests which trigger this condition
1799          */
1800         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1801                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1802                         wait_fence_fd = fence->id;
1803                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1804                                         &wait_cmd, &pre_fence);
1805                 } else {
1806                         err = c->sync->wait_syncpt(c->sync, fence->id,
1807                                         fence->value, &wait_cmd, &pre_fence);
1808                 }
1809         }
1810         if (err) {
1811                 goto clean_up;
1812         }
1813
1814         if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1815                         (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1816                 need_sync_fence = true;
1817
1818         /* always insert syncpt increment at end of gpfifo submission
1819            to keep track of method completion for idle railgating */
1820         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1821                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1822                                  &post_fence, need_wfi, need_sync_fence);
1823         else
1824                 err = c->sync->incr(c->sync, &incr_cmd,
1825                                     &post_fence, need_sync_fence);
1826         if (err) {
1827                 goto clean_up;
1828         }
1829
1830         if (wait_cmd) {
1831                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1832                         u64_lo32(wait_cmd->gva);
1833                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1834                         u64_hi32(wait_cmd->gva) |
1835                         pbdma_gp_entry1_length_f(wait_cmd->size);
1836                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1837                         0, wait_cmd->size, 0, wait_cmd->ptr);
1838
1839                 c->gpfifo.put = (c->gpfifo.put + 1) &
1840                         (c->gpfifo.entry_num - 1);
1841
1842                 /* save gp_put */
1843                 wait_cmd->gp_put = c->gpfifo.put;
1844         }
1845
1846         /*
1847          * Copy source gpfifo entries into the gpfifo ring buffer,
1848          * potentially splitting into two memcpies to handle the
1849          * ring buffer wrap-around case.
1850          */
1851         start = c->gpfifo.put;
1852         end = start + num_entries;
1853
1854         if (gpfifo) {
1855                 if (end > c->gpfifo.entry_num) {
1856                         int length0 = c->gpfifo.entry_num - start;
1857                         int length1 = num_entries - length0;
1858
1859                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1860                                 gpfifo,
1861                                 length0 * sizeof(*gpfifo));
1862
1863                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1864                                 gpfifo + length0,
1865                                 length1 * sizeof(*gpfifo));
1866
1867                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1868                                         0, length0);
1869                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1870                                         length0, length1);
1871                 } else {
1872                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1873                                 gpfifo,
1874                                 num_entries * sizeof(*gpfifo));
1875
1876                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1877                                         0, num_entries);
1878                 }
1879         } else {
1880                 struct nvgpu_gpfifo __user *user_gpfifo =
1881                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1882                 if (end > c->gpfifo.entry_num) {
1883                         int length0 = c->gpfifo.entry_num - start;
1884                         int length1 = num_entries - length0;
1885
1886                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1887                                 user_gpfifo,
1888                                 length0 * sizeof(*user_gpfifo));
1889                         if (err) {
1890                                 goto clean_up;
1891                         }
1892
1893                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1894                                 user_gpfifo + length0,
1895                                 length1 * sizeof(*user_gpfifo));
1896                         if (err) {
1897                                 goto clean_up;
1898                         }
1899
1900                         trace_write_pushbuffer_range(c, NULL, args,
1901                                         0, length0);
1902                         trace_write_pushbuffer_range(c, NULL, args,
1903                                         length0, length1);
1904                 } else {
1905                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1906                                 user_gpfifo,
1907                                 num_entries * sizeof(*user_gpfifo));
1908                         if (err) {
1909                                 goto clean_up;
1910                         }
1911
1912                         trace_write_pushbuffer_range(c, NULL, args,
1913                                         0, num_entries);
1914                 }
1915         }
1916
1917         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1918                 (c->gpfifo.entry_num - 1);
1919
1920         if (incr_cmd) {
1921                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1922                         u64_lo32(incr_cmd->gva);
1923                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1924                         u64_hi32(incr_cmd->gva) |
1925                         pbdma_gp_entry1_length_f(incr_cmd->size);
1926                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1927                         0, incr_cmd->size, 0, incr_cmd->ptr);
1928
1929                 c->gpfifo.put = (c->gpfifo.put + 1) &
1930                         (c->gpfifo.entry_num - 1);
1931
1932                 /* save gp_put */
1933                 incr_cmd->gp_put = c->gpfifo.put;
1934         }
1935
1936         mutex_lock(&c->last_submit.fence_lock);
1937         gk20a_fence_put(c->last_submit.pre_fence);
1938         gk20a_fence_put(c->last_submit.post_fence);
1939         c->last_submit.pre_fence = pre_fence;
1940         c->last_submit.post_fence = post_fence;
1941         if (fence_out)
1942                 *fence_out = gk20a_fence_get(post_fence);
1943         mutex_unlock(&c->last_submit.fence_lock);
1944
1945         /* TODO! Check for errors... */
1946         gk20a_channel_add_job(c, pre_fence, post_fence,
1947                                 wait_cmd, incr_cmd,
1948                                 skip_buffer_refcounting);
1949
1950         c->cmds_pending = true;
1951         gk20a_bar1_writel(g,
1952                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1953                 c->gpfifo.put);
1954
1955         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1956                                              c->hw_chid,
1957                                              num_entries,
1958                                              flags,
1959                                              post_fence->syncpt_id,
1960                                              post_fence->syncpt_value);
1961
1962         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1963                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1964
1965         gk20a_dbg_fn("done");
1966         return err;
1967
1968 clean_up:
1969         gk20a_dbg_fn("fail");
1970         free_priv_cmdbuf(c, wait_cmd);
1971         free_priv_cmdbuf(c, incr_cmd);
1972         gk20a_fence_put(pre_fence);
1973         gk20a_fence_put(post_fence);
1974         gk20a_idle(g->dev);
1975         return err;
1976 }
1977
1978 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1979 {
1980         struct channel_gk20a *c = g->fifo.channel+chid;
1981         c->g = NULL;
1982         c->hw_chid = chid;
1983         c->bound = false;
1984         spin_lock_init(&c->ref_obtain_lock);
1985         atomic_set(&c->ref_count, 0);
1986         c->referenceable = false;
1987         init_waitqueue_head(&c->ref_count_dec_wq);
1988         mutex_init(&c->ioctl_lock);
1989         spin_lock_init(&c->jobs_lock);
1990         mutex_init(&c->last_submit.fence_lock);
1991         INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
1992         mutex_init(&c->clean_up.lock);
1993         mutex_init(&c->sync_lock);
1994         INIT_LIST_HEAD(&c->jobs);
1995 #if defined(CONFIG_GK20A_CYCLE_STATS)
1996         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1997         mutex_init(&c->cs_client_mutex);
1998 #endif
1999         INIT_LIST_HEAD(&c->dbg_s_list);
2000         mutex_init(&c->dbg_s_lock);
2001         list_add(&c->free_chs, &g->fifo.free_chs);
2002
2003         return 0;
2004 }
2005
2006 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2007 {
2008         int err = 0;
2009         struct gk20a_fence *fence;
2010
2011         if (!ch->cmds_pending)
2012                 return 0;
2013
2014         mutex_lock(&ch->last_submit.fence_lock);
2015         fence = ch->last_submit.post_fence;
2016         if (!fence) {
2017                 mutex_unlock(&ch->last_submit.fence_lock);
2018                 return -EINVAL;
2019         }
2020         mutex_unlock(&ch->last_submit.fence_lock);
2021
2022         /* Do not wait for a timedout channel */
2023         if (ch->has_timedout)
2024                 return -ETIMEDOUT;
2025
2026         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2027                      fence->syncpt_value, fence->semaphore);
2028
2029         err = gk20a_fence_wait(fence, timeout);
2030         if (WARN_ON(err))
2031                 dev_warn(dev_from_gk20a(ch->g),
2032                        "timed out waiting for gk20a channel to finish");
2033         else
2034                 ch->cmds_pending = false;
2035
2036         return err;
2037 }
2038
2039 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2040                                         ulong id, u32 offset,
2041                                         u32 payload, long timeout)
2042 {
2043         struct platform_device *pdev = ch->g->dev;
2044         struct dma_buf *dmabuf;
2045         void *data;
2046         u32 *semaphore;
2047         int ret = 0;
2048         long remain;
2049
2050         /* do not wait if channel has timed out */
2051         if (ch->has_timedout)
2052                 return -ETIMEDOUT;
2053
2054         dmabuf = dma_buf_get(id);
2055         if (IS_ERR(dmabuf)) {
2056                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2057                            id);
2058                 return -EINVAL;
2059         }
2060
2061         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2062         if (!data) {
2063                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2064                 ret = -EINVAL;
2065                 goto cleanup_put;
2066         }
2067
2068         semaphore = data + (offset & ~PAGE_MASK);
2069
2070         remain = wait_event_interruptible_timeout(
2071                         ch->semaphore_wq,
2072                         *semaphore == payload || ch->has_timedout,
2073                         timeout);
2074
2075         if (remain == 0 && *semaphore != payload)
2076                 ret = -ETIMEDOUT;
2077         else if (remain < 0)
2078                 ret = remain;
2079
2080         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2081 cleanup_put:
2082         dma_buf_put(dmabuf);
2083         return ret;
2084 }
2085
2086 static int gk20a_channel_wait(struct channel_gk20a *ch,
2087                               struct nvgpu_wait_args *args)
2088 {
2089         struct device *d = dev_from_gk20a(ch->g);
2090         struct dma_buf *dmabuf;
2091         struct notification *notif;
2092         struct timespec tv;
2093         u64 jiffies;
2094         ulong id;
2095         u32 offset;
2096         unsigned long timeout;
2097         int remain, ret = 0;
2098         u64 end;
2099
2100         gk20a_dbg_fn("");
2101
2102         if (ch->has_timedout)
2103                 return -ETIMEDOUT;
2104
2105         if (args->timeout == NVGPU_NO_TIMEOUT)
2106                 timeout = MAX_SCHEDULE_TIMEOUT;
2107         else
2108                 timeout = (u32)msecs_to_jiffies(args->timeout);
2109
2110         switch (args->type) {
2111         case NVGPU_WAIT_TYPE_NOTIFIER:
2112                 id = args->condition.notifier.dmabuf_fd;
2113                 offset = args->condition.notifier.offset;
2114                 end = offset + sizeof(struct notification);
2115
2116                 dmabuf = dma_buf_get(id);
2117                 if (IS_ERR(dmabuf)) {
2118                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2119                                    id);
2120                         return -EINVAL;
2121                 }
2122
2123                 if (end > dmabuf->size || end < sizeof(struct notification)) {
2124                         dma_buf_put(dmabuf);
2125                         gk20a_err(d, "invalid notifier offset\n");
2126                         return -EINVAL;
2127                 }
2128
2129                 notif = dma_buf_vmap(dmabuf);
2130                 if (!notif) {
2131                         gk20a_err(d, "failed to map notifier memory");
2132                         return -ENOMEM;
2133                 }
2134
2135                 notif = (struct notification *)((uintptr_t)notif + offset);
2136
2137                 /* user should set status pending before
2138                  * calling this ioctl */
2139                 remain = wait_event_interruptible_timeout(
2140                                 ch->notifier_wq,
2141                                 notif->status == 0 || ch->has_timedout,
2142                                 timeout);
2143
2144                 if (remain == 0 && notif->status != 0) {
2145                         ret = -ETIMEDOUT;
2146                         goto notif_clean_up;
2147                 } else if (remain < 0) {
2148                         ret = -EINTR;
2149                         goto notif_clean_up;
2150                 }
2151
2152                 /* TBD: fill in correct information */
2153                 jiffies = get_jiffies_64();
2154                 jiffies_to_timespec(jiffies, &tv);
2155                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2156                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2157                 notif->info32 = 0xDEADBEEF; /* should be object name */
2158                 notif->info16 = ch->hw_chid; /* should be method offset */
2159
2160 notif_clean_up:
2161                 dma_buf_vunmap(dmabuf, notif);
2162                 return ret;
2163
2164         case NVGPU_WAIT_TYPE_SEMAPHORE:
2165                 ret = gk20a_channel_wait_semaphore(ch,
2166                                 args->condition.semaphore.dmabuf_fd,
2167                                 args->condition.semaphore.offset,
2168                                 args->condition.semaphore.payload,
2169                                 timeout);
2170
2171                 break;
2172
2173         default:
2174                 ret = -EINVAL;
2175                 break;
2176         }
2177
2178         return ret;
2179 }
2180
2181 /* poll events for semaphores */
2182
2183 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2184 {
2185         gk20a_dbg_fn("");
2186
2187         mutex_lock(&ev->lock);
2188
2189         ev->events_enabled = true;
2190         ev->num_pending_events = 0;
2191
2192         mutex_unlock(&ev->lock);
2193 }
2194
2195 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2196 {
2197         gk20a_dbg_fn("");
2198
2199         mutex_lock(&ev->lock);
2200
2201         ev->events_enabled = false;
2202         ev->num_pending_events = 0;
2203
2204         mutex_unlock(&ev->lock);
2205 }
2206
2207 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2208 {
2209         gk20a_dbg_fn("");
2210
2211         mutex_lock(&ev->lock);
2212
2213         if (ev->events_enabled &&
2214                         ev->num_pending_events > 0)
2215                 ev->num_pending_events--;
2216
2217         mutex_unlock(&ev->lock);
2218 }
2219
2220 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2221                           struct nvgpu_channel_events_ctrl_args *args)
2222 {
2223         int ret = 0;
2224
2225         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2226                         "channel events ctrl cmd %d", args->cmd);
2227
2228         switch (args->cmd) {
2229         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2230                 gk20a_channel_events_enable(&ch->poll_events);
2231                 break;
2232
2233         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2234                 gk20a_channel_events_disable(&ch->poll_events);
2235                 break;
2236
2237         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2238                 gk20a_channel_events_clear(&ch->poll_events);
2239                 break;
2240
2241         default:
2242                 gk20a_err(dev_from_gk20a(ch->g),
2243                            "unrecognized channel events ctrl cmd: 0x%x",
2244                            args->cmd);
2245                 ret = -EINVAL;
2246                 break;
2247         }
2248
2249         return ret;
2250 }
2251
2252 void gk20a_channel_event(struct channel_gk20a *ch)
2253 {
2254         mutex_lock(&ch->poll_events.lock);
2255
2256         if (ch->poll_events.events_enabled) {
2257                 gk20a_dbg_info("posting event on channel id %d",
2258                                 ch->hw_chid);
2259                 gk20a_dbg_info("%d channel events pending",
2260                                 ch->poll_events.num_pending_events);
2261
2262                 ch->poll_events.num_pending_events++;
2263                 /* not waking up here, caller does that */
2264         }
2265
2266         mutex_unlock(&ch->poll_events.lock);
2267 }
2268
2269 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2270 {
2271         unsigned int mask = 0;
2272         struct channel_gk20a *ch = filep->private_data;
2273
2274         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2275
2276         poll_wait(filep, &ch->semaphore_wq, wait);
2277
2278         mutex_lock(&ch->poll_events.lock);
2279
2280         if (ch->poll_events.events_enabled &&
2281                         ch->poll_events.num_pending_events > 0) {
2282                 gk20a_dbg_info("found pending event on channel id %d",
2283                                 ch->hw_chid);
2284                 gk20a_dbg_info("%d channel events pending",
2285                                 ch->poll_events.num_pending_events);
2286                 mask = (POLLPRI | POLLIN);
2287         }
2288
2289         mutex_unlock(&ch->poll_events.lock);
2290
2291         return mask;
2292 }
2293
2294 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2295                 u32 priority)
2296 {
2297         u32 timeslice_timeout;
2298         /* set priority of graphics channel */
2299         switch (priority) {
2300         case NVGPU_PRIORITY_LOW:
2301                 /* 64 << 3 = 512us */
2302                 timeslice_timeout = 64;
2303                 break;
2304         case NVGPU_PRIORITY_MEDIUM:
2305                 /* 128 << 3 = 1024us */
2306                 timeslice_timeout = 128;
2307                 break;
2308         case NVGPU_PRIORITY_HIGH:
2309                 /* 255 << 3 = 2048us */
2310                 timeslice_timeout = 255;
2311                 break;
2312         default:
2313                 pr_err("Unsupported priority");
2314                 return -EINVAL;
2315         }
2316         channel_gk20a_set_schedule_params(ch,
2317                         timeslice_timeout);
2318         return 0;
2319 }
2320
2321 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2322                             struct nvgpu_zcull_bind_args *args)
2323 {
2324         struct gk20a *g = ch->g;
2325         struct gr_gk20a *gr = &g->gr;
2326
2327         gk20a_dbg_fn("");
2328
2329         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2330                                 args->gpu_va, args->mode);
2331 }
2332
2333 /* in this context the "channel" is the host1x channel which
2334  * maps to *all* gk20a channels */
2335 int gk20a_channel_suspend(struct gk20a *g)
2336 {
2337         struct fifo_gk20a *f = &g->fifo;
2338         u32 chid;
2339         bool channels_in_use = false;
2340         int err;
2341
2342         gk20a_dbg_fn("");
2343
2344         /* wait for engine idle */
2345         err = g->ops.fifo.wait_engine_idle(g);
2346         if (err)
2347                 return err;
2348
2349         for (chid = 0; chid < f->num_channels; chid++) {
2350                 struct channel_gk20a *ch = &f->channel[chid];
2351                 if (gk20a_channel_get(ch)) {
2352                         gk20a_dbg_info("suspend channel %d", chid);
2353                         /* disable channel */
2354                         g->ops.fifo.disable_channel(ch);
2355                         /* preempt the channel */
2356                         gk20a_fifo_preempt(ch->g, ch);
2357                         gk20a_channel_cancel_job_clean_up(ch, true);
2358                         /* wait for channel update notifiers */
2359                         if (ch->update_fn)
2360                                 cancel_work_sync(&ch->update_fn_work);
2361
2362                         channels_in_use = true;
2363
2364                         gk20a_channel_put(ch);
2365                 }
2366         }
2367
2368         if (channels_in_use) {
2369                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2370
2371                 for (chid = 0; chid < f->num_channels; chid++) {
2372                         if (gk20a_channel_get(&f->channel[chid])) {
2373                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2374                                 gk20a_channel_put(&f->channel[chid]);
2375                         }
2376                 }
2377         }
2378
2379         gk20a_dbg_fn("done");
2380         return 0;
2381 }
2382
2383 int gk20a_channel_resume(struct gk20a *g)
2384 {
2385         struct fifo_gk20a *f = &g->fifo;
2386         u32 chid;
2387         bool channels_in_use = false;
2388
2389         gk20a_dbg_fn("");
2390
2391         for (chid = 0; chid < f->num_channels; chid++) {
2392                 if (gk20a_channel_get(&f->channel[chid])) {
2393                         gk20a_dbg_info("resume channel %d", chid);
2394                         g->ops.fifo.bind_channel(&f->channel[chid]);
2395                         channels_in_use = true;
2396                         gk20a_channel_put(&f->channel[chid]);
2397                 }
2398         }
2399
2400         if (channels_in_use)
2401                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2402
2403         gk20a_dbg_fn("done");
2404         return 0;
2405 }
2406
2407 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2408 {
2409         struct fifo_gk20a *f = &g->fifo;
2410         u32 chid;
2411
2412         gk20a_dbg_fn("");
2413
2414         for (chid = 0; chid < f->num_channels; chid++) {
2415                 struct channel_gk20a *c = g->fifo.channel+chid;
2416                 if (gk20a_channel_get(c)) {
2417                         gk20a_channel_event(c);
2418                         wake_up_interruptible_all(&c->semaphore_wq);
2419                         gk20a_channel_update(c, 0);
2420                         gk20a_channel_put(c);
2421                 }
2422         }
2423 }
2424
2425 static int gk20a_ioctl_channel_submit_gpfifo(
2426         struct channel_gk20a *ch,
2427         struct nvgpu_submit_gpfifo_args *args)
2428 {
2429         struct gk20a_fence *fence_out;
2430         int ret = 0;
2431
2432         gk20a_dbg_fn("");
2433
2434         if (ch->has_timedout)
2435                 return -ETIMEDOUT;
2436
2437         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2438                                           args->flags, &args->fence,
2439                                           &fence_out, false);
2440
2441         if (ret)
2442                 goto clean_up;
2443
2444         /* Convert fence_out to something we can pass back to user space. */
2445         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2446                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2447                         int fd = gk20a_fence_install_fd(fence_out);
2448                         if (fd < 0)
2449                                 ret = fd;
2450                         else
2451                                 args->fence.id = fd;
2452                 } else {
2453                         args->fence.id = fence_out->syncpt_id;
2454                         args->fence.value = fence_out->syncpt_value;
2455                 }
2456         }
2457         gk20a_fence_put(fence_out);
2458
2459 clean_up:
2460         return ret;
2461 }
2462
2463 void gk20a_init_channel(struct gpu_ops *gops)
2464 {
2465         gops->fifo.bind_channel = channel_gk20a_bind;
2466         gops->fifo.unbind_channel = channel_gk20a_unbind;
2467         gops->fifo.disable_channel = channel_gk20a_disable;
2468         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2469         gops->fifo.free_inst = channel_gk20a_free_inst;
2470         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2471 }
2472
2473 long gk20a_channel_ioctl(struct file *filp,
2474         unsigned int cmd, unsigned long arg)
2475 {
2476         struct channel_gk20a *ch = filp->private_data;
2477         struct platform_device *dev = ch->g->dev;
2478         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
2479         int err = 0;
2480
2481         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2482
2483         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2484                 (_IOC_NR(cmd) == 0) ||
2485                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2486                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2487                 return -EINVAL;
2488
2489         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2490                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2491                         return -EFAULT;
2492         }
2493
2494         /* take a ref or return timeout if channel refs can't be taken */
2495         ch = gk20a_channel_get(ch);
2496         if (!ch)
2497                 return -ETIMEDOUT;
2498
2499         /* protect our sanity for threaded userspace - most of the channel is
2500          * not thread safe */
2501         mutex_lock(&ch->ioctl_lock);
2502
2503         /* this ioctl call keeps a ref to the file which keeps a ref to the
2504          * channel */
2505
2506         switch (cmd) {
2507         case NVGPU_IOCTL_CHANNEL_OPEN:
2508                 err = gk20a_channel_open_ioctl(ch->g,
2509                         (struct nvgpu_channel_open_args *)buf);
2510                 break;
2511         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2512                 break;
2513         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2514                 err = gk20a_busy(dev);
2515                 if (err) {
2516                         dev_err(&dev->dev,
2517                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2518                                 __func__, cmd);
2519                         break;
2520                 }
2521                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2522                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2523                 gk20a_idle(dev);
2524                 break;
2525         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2526                 err = gk20a_busy(dev);
2527                 if (err) {
2528                         dev_err(&dev->dev,
2529                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2530                                 __func__, cmd);
2531                         break;
2532                 }
2533                 err = ch->g->ops.gr.free_obj_ctx(ch,
2534                                 (struct nvgpu_free_obj_ctx_args *)buf);
2535                 gk20a_idle(dev);
2536                 break;
2537         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2538                 err = gk20a_busy(dev);
2539                 if (err) {
2540                         dev_err(&dev->dev,
2541                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2542                                 __func__, cmd);
2543                         break;
2544                 }
2545                 err = gk20a_alloc_channel_gpfifo(ch,
2546                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2547                 gk20a_idle(dev);
2548                 break;
2549         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2550                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2551                                 (struct nvgpu_submit_gpfifo_args *)buf);
2552                 break;
2553         case NVGPU_IOCTL_CHANNEL_WAIT:
2554                 err = gk20a_busy(dev);
2555                 if (err) {
2556                         dev_err(&dev->dev,
2557                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2558                                 __func__, cmd);
2559                         break;
2560                 }
2561
2562                 /* waiting is thread-safe, not dropping this mutex could
2563                  * deadlock in certain conditions */
2564                 mutex_unlock(&ch->ioctl_lock);
2565
2566                 err = gk20a_channel_wait(ch,
2567                                 (struct nvgpu_wait_args *)buf);
2568
2569                 mutex_lock(&ch->ioctl_lock);
2570
2571                 gk20a_idle(dev);
2572                 break;
2573         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2574                 err = gk20a_busy(dev);
2575                 if (err) {
2576                         dev_err(&dev->dev,
2577                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2578                                 __func__, cmd);
2579                         break;
2580                 }
2581                 err = gk20a_channel_zcull_bind(ch,
2582                                 (struct nvgpu_zcull_bind_args *)buf);
2583                 gk20a_idle(dev);
2584                 break;
2585         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2586                 err = gk20a_busy(dev);
2587                 if (err) {
2588                         dev_err(&dev->dev,
2589                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2590                                 __func__, cmd);
2591                         break;
2592                 }
2593                 err = gk20a_init_error_notifier(ch,
2594                                 (struct nvgpu_set_error_notifier *)buf);
2595                 gk20a_idle(dev);
2596                 break;
2597 #ifdef CONFIG_GK20A_CYCLE_STATS
2598         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2599                 err = gk20a_busy(dev);
2600                 if (err) {
2601                         dev_err(&dev->dev,
2602                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2603                                 __func__, cmd);
2604                         break;
2605                 }
2606                 err = gk20a_channel_cycle_stats(ch,
2607                                 (struct nvgpu_cycle_stats_args *)buf);
2608                 gk20a_idle(dev);
2609                 break;
2610 #endif
2611         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2612         {
2613                 u32 timeout =
2614                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2615                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2616                            timeout, ch->hw_chid);
2617                 ch->timeout_ms_max = timeout;
2618                 break;
2619         }
2620         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2621         {
2622                 u32 timeout =
2623                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2624                 bool timeout_debug_dump = !((u32)
2625                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2626                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2627                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2628                            timeout, ch->hw_chid);
2629                 ch->timeout_ms_max = timeout;
2630                 ch->timeout_debug_dump = timeout_debug_dump;
2631                 break;
2632         }
2633         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2634                 ((struct nvgpu_get_param_args *)buf)->value =
2635                         ch->has_timedout;
2636                 break;
2637         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2638                 err = gk20a_busy(dev);
2639                 if (err) {
2640                         dev_err(&dev->dev,
2641                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2642                                 __func__, cmd);
2643                         break;
2644                 }
2645                 gk20a_channel_set_priority(ch,
2646                         ((struct nvgpu_set_priority_args *)buf)->priority);
2647                 gk20a_idle(dev);
2648                 break;
2649         case NVGPU_IOCTL_CHANNEL_ENABLE:
2650                 err = gk20a_busy(dev);
2651                 if (err) {
2652                         dev_err(&dev->dev,
2653                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2654                                 __func__, cmd);
2655                         break;
2656                 }
2657                 /* enable channel */
2658                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2659                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2660                         ccsr_channel_enable_set_true_f());
2661                 gk20a_idle(dev);
2662                 break;
2663         case NVGPU_IOCTL_CHANNEL_DISABLE:
2664                 err = gk20a_busy(dev);
2665                 if (err) {
2666                         dev_err(&dev->dev,
2667                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2668                                 __func__, cmd);
2669                         break;
2670                 }
2671                 /* disable channel */
2672                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2673                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2674                         ccsr_channel_enable_clr_true_f());
2675                 gk20a_idle(dev);
2676                 break;
2677         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2678                 err = gk20a_busy(dev);
2679                 if (err) {
2680                         dev_err(&dev->dev,
2681                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2682                                 __func__, cmd);
2683                         break;
2684                 }
2685                 err = gk20a_fifo_preempt(ch->g, ch);
2686                 gk20a_idle(dev);
2687                 break;
2688         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2689                 err = gk20a_busy(dev);
2690                 if (err) {
2691                         dev_err(&dev->dev,
2692                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2693                                 __func__, cmd);
2694                         break;
2695                 }
2696                 err = gk20a_fifo_force_reset_ch(ch, true);
2697                 gk20a_idle(dev);
2698                 break;
2699         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2700                 err = gk20a_channel_events_ctrl(ch,
2701                            (struct nvgpu_channel_events_ctrl_args *)buf);
2702                 break;
2703 #ifdef CONFIG_GK20A_CYCLE_STATS
2704         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2705                 err = gk20a_busy(dev);
2706                 if (err) {
2707                         dev_err(&dev->dev,
2708                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2709                                 __func__, cmd);
2710                         break;
2711                 }
2712                 err = gk20a_channel_cycle_stats_snapshot(ch,
2713                                 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2714                 gk20a_idle(dev);
2715                 break;
2716 #endif
2717         default:
2718                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2719                 err = -ENOTTY;
2720                 break;
2721         }
2722
2723         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2724                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2725
2726         mutex_unlock(&ch->ioctl_lock);
2727
2728         gk20a_channel_put(ch);
2729
2730         gk20a_dbg_fn("end");
2731
2732         return err;
2733 }