drivers/gpu/nvgpu/gk20a/channel_gk20a.c

   1 /*
   2  * GK20A Graphics channel
   3  *
   4  * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18
  19 #include <linux/nvhost.h>
  20 #include <linux/list.h>
  21 #include <linux/delay.h>
  22 #include <linux/highmem.h> /* need for nvmap.h*/
  23 #include <trace/events/gk20a.h>
  24 #include <linux/scatterlist.h>
  25 #include <linux/file.h>
  26 #include <linux/anon_inodes.h>
  27 #include <linux/dma-buf.h>
  28 #include <linux/vmalloc.h>
  29
  30 #include "debug_gk20a.h"
  31
  32 #include "gk20a.h"
  33 #include "dbg_gpu_gk20a.h"
  34 #include "fence_gk20a.h"
  35 #include "semaphore_gk20a.h"
  36
  37 #include "hw_ram_gk20a.h"
  38 #include "hw_fifo_gk20a.h"
  39 #include "hw_pbdma_gk20a.h"
  40 #include "hw_ccsr_gk20a.h"
  41 #include "hw_ltc_gk20a.h"
  42
  43 #define NVMAP_HANDLE_PARAM_SIZE 1
  44
  45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
  46
  47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
  48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
  49
  50 static void free_priv_cmdbuf(struct channel_gk20a *c,
  51                              struct priv_cmd_entry *e);
  52
  53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
  54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
  55
  56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
  57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
  58
  59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
  60
  61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
  62                                         bool add);
  63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
  64
  65 static void gk20a_channel_clean_up_jobs(struct work_struct *work);
  66
  67 /* allocate GPU channel */
  68 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
  69 {
  70         struct channel_gk20a *ch = NULL;
  71         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
  72
  73         mutex_lock(&f->free_chs_mutex);
  74         if (!list_empty(&f->free_chs)) {
  75                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
  76                                 free_chs);
  77                 list_del(&ch->free_chs);
  78                 WARN_ON(atomic_read(&ch->ref_count));
  79                 WARN_ON(ch->referenceable);
  80                 f->used_channels++;
  81         }
  82         mutex_unlock(&f->free_chs_mutex);
  83
  84         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
  85                 platform->aggressive_sync_destroy = true;
  86
  87         return ch;
  88 }
  89
  90 static void free_channel(struct fifo_gk20a *f,
  91                 struct channel_gk20a *ch)
  92 {
  93         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
  94
  95         trace_gk20a_release_used_channel(ch->hw_chid);
  96         /* refcount is zero here and channel is in a freed/dead state */
  97         mutex_lock(&f->free_chs_mutex);
  98         /* add to head to increase visibility of timing-related bugs */
  99         list_add(&ch->free_chs, &f->free_chs);
 100         f->used_channels--;
 101         mutex_unlock(&f->free_chs_mutex);
 102
 103         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
 104                 platform->aggressive_sync_destroy = false;
 105 }
 106
 107 int channel_gk20a_commit_va(struct channel_gk20a *c)
 108 {
 109         gk20a_dbg_fn("");
 110
 111         if (!c->inst_block.cpu_va)
 112                 return -ENOMEM;
 113
 114         gk20a_init_inst_block(&c->inst_block, c->vm,
 115                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
 116
 117         return 0;
 118 }
 119
 120 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
 121 {
 122         u32 addr_lo;
 123         u32 addr_hi;
 124         void *inst_ptr;
 125
 126         gk20a_dbg_fn("");
 127
 128         inst_ptr = c->inst_block.cpu_va;
 129         if (!inst_ptr)
 130                 return -ENOMEM;
 131
 132         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
 133         addr_hi = u64_hi32(c->userd_iova);
 134
 135         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
 136                 c->hw_chid, (u64)c->userd_iova);
 137
 138         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
 139                  pbdma_userd_target_vid_mem_f() |
 140                  pbdma_userd_addr_f(addr_lo));
 141
 142         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
 143                  pbdma_userd_target_vid_mem_f() |
 144                  pbdma_userd_hi_addr_f(addr_hi));
 145
 146         return 0;
 147 }
 148
 149 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 150                                 u32 timeslice_timeout)
 151 {
 152         void *inst_ptr;
 153         int shift = 3;
 154         int value = timeslice_timeout;
 155
 156         inst_ptr = c->inst_block.cpu_va;
 157         if (!inst_ptr)
 158                 return -ENOMEM;
 159
 160         /* disable channel */
 161         c->g->ops.fifo.disable_channel(c);
 162
 163         /* preempt the channel */
 164         WARN_ON(gk20a_fifo_preempt(c->g, c));
 165
 166         /* value field is 8 bits long */
 167         while (value >= 1 << 8) {
 168                 value >>= 1;
 169                 shift++;
 170         }
 171
 172         /* time slice register is only 18bits long */
 173         if ((value << shift) >= 1<<19) {
 174                 pr_err("Requested timeslice value is clamped to 18 bits\n");
 175                 value = 255;
 176                 shift = 10;
 177         }
 178
 179         /* set new timeslice */
 180         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
 181                 value | (shift << 12) |
 182                 fifo_runlist_timeslice_enable_true_f());
 183
 184         /* enable channel */
 185         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 186                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 187                 ccsr_channel_enable_set_true_f());
 188
 189         return 0;
 190 }
 191
 192 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 193                         u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
 194 {
 195         void *inst_ptr;
 196
 197         gk20a_dbg_fn("");
 198
 199         inst_ptr = c->inst_block.cpu_va;
 200         if (!inst_ptr)
 201                 return -ENOMEM;
 202
 203         memset(inst_ptr, 0, ram_fc_size_val_v());
 204
 205         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
 206                 pbdma_gp_base_offset_f(
 207                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
 208
 209         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
 210                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
 211                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 212
 213         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
 214                  c->g->ops.fifo.get_pbdma_signature(c->g));
 215
 216         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
 217                 pbdma_formats_gp_fermi0_f() |
 218                 pbdma_formats_pb_fermi1_f() |
 219                 pbdma_formats_mp_fermi0_f());
 220
 221         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
 222                 pbdma_pb_header_priv_user_f() |
 223                 pbdma_pb_header_method_zero_f() |
 224                 pbdma_pb_header_subchannel_zero_f() |
 225                 pbdma_pb_header_level_main_f() |
 226                 pbdma_pb_header_first_true_f() |
 227                 pbdma_pb_header_type_inc_f());
 228
 229         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
 230                 pbdma_subdevice_id_f(1) |
 231                 pbdma_subdevice_status_active_f() |
 232                 pbdma_subdevice_channel_dma_enable_f());
 233
 234         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
 235
 236         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
 237                 pbdma_acquire_retry_man_2_f() |
 238                 pbdma_acquire_retry_exp_2_f() |
 239                 pbdma_acquire_timeout_exp_max_f() |
 240                 pbdma_acquire_timeout_man_max_f() |
 241                 pbdma_acquire_timeout_en_disable_f());
 242
 243         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
 244                 fifo_runlist_timeslice_timeout_128_f() |
 245                 fifo_runlist_timeslice_timescale_3_f() |
 246                 fifo_runlist_timeslice_enable_true_f());
 247
 248         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
 249                 fifo_pb_timeslice_timeout_16_f() |
 250                 fifo_pb_timeslice_timescale_0_f() |
 251                 fifo_pb_timeslice_enable_true_f());
 252
 253         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 254
 255         return channel_gk20a_commit_userd(c);
 256 }
 257
 258 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
 259 {
 260         BUG_ON(!c->userd_cpu_va);
 261
 262         gk20a_dbg_fn("");
 263
 264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
 265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
 266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
 267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
 268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
 269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
 270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
 271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
 272         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
 273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
 274
 275         return 0;
 276 }
 277
 278 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
 279 {
 280         struct gk20a *g = ch_gk20a->g;
 281         struct fifo_gk20a *f = &g->fifo;
 282         struct fifo_engine_info_gk20a *engine_info =
 283                 f->engine_info + ENGINE_GR_GK20A;
 284
 285         u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
 286                 >> ram_in_base_shift_v();
 287
 288         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
 289                 ch_gk20a->hw_chid, inst_ptr);
 290
 291         ch_gk20a->bound = true;
 292
 293         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 294                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 295                  ~ccsr_channel_runlist_f(~0)) |
 296                  ccsr_channel_runlist_f(engine_info->runlist_id));
 297
 298         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 299                 ccsr_channel_inst_ptr_f(inst_ptr) |
 300                 ccsr_channel_inst_target_vid_mem_f() |
 301                 ccsr_channel_inst_bind_true_f());
 302
 303         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 304                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 305                  ~ccsr_channel_enable_set_f(~0)) |
 306                  ccsr_channel_enable_set_true_f());
 307 }
 308
 309 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 310 {
 311         struct gk20a *g = ch_gk20a->g;
 312         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
 313
 314         gk20a_dbg_fn("");
 315
 316         if (ch_gk20a->bound)
 317                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 318                         ccsr_channel_inst_ptr_f(0) |
 319                         ccsr_channel_inst_bind_false_f());
 320
 321         ch_gk20a->bound = false;
 322
 323         /*
 324          * if we are agrressive then we can destroy the syncpt
 325          * resource at this point
 326          * if not, then it will be destroyed at channel_free()
 327          */
 328         mutex_lock(&ch_gk20a->sync_lock);
 329         if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
 330
 331                 ch_gk20a->sync->destroy(ch_gk20a->sync);
 332                 ch_gk20a->sync = NULL;
 333         }
 334         mutex_unlock(&ch_gk20a->sync_lock);
 335 }
 336
 337 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 338 {
 339         int err;
 340
 341         gk20a_dbg_fn("");
 342
 343         err = gk20a_alloc_inst_block(g, &ch->inst_block);
 344         if (err)
 345                 return err;
 346
 347         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
 348                 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
 349
 350         gk20a_dbg_fn("done");
 351         return 0;
 352 }
 353
 354 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 355 {
 356         gk20a_free_inst_block(g, &ch->inst_block);
 357 }
 358
 359 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 360 {
 361         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
 362 }
 363
 364 void channel_gk20a_enable(struct channel_gk20a *ch)
 365 {
 366         /* enable channel */
 367         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 368                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
 369                 ccsr_channel_enable_set_true_f());
 370 }
 371
 372 void channel_gk20a_disable(struct channel_gk20a *ch)
 373 {
 374         /* disable channel */
 375         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 376                 gk20a_readl(ch->g,
 377                         ccsr_channel_r(ch->hw_chid)) |
 378                         ccsr_channel_enable_clr_true_f());
 379 }
 380
 381 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
 382 {
 383         struct channel_gk20a_job *job, *n;
 384         bool released_job_semaphore = false;
 385
 386         gk20a_dbg_fn("");
 387
 388         /* make sure new kickoffs are prevented */
 389         ch->has_timedout = true;
 390
 391         ch->g->ops.fifo.disable_channel(ch);
 392
 393         if (channel_preempt)
 394                 gk20a_fifo_preempt(ch->g, ch);
 395
 396         /* ensure no fences are pending */
 397         mutex_lock(&ch->sync_lock);
 398         if (ch->sync)
 399                 ch->sync->set_min_eq_max(ch->sync);
 400         mutex_unlock(&ch->sync_lock);
 401
 402         /* release all job semaphores (applies only to jobs that use
 403            semaphore synchronization) */
 404         mutex_lock(&ch->jobs_lock);
 405         list_for_each_entry_safe(job, n, &ch->jobs, list) {
 406                 if (job->post_fence->semaphore) {
 407                         gk20a_semaphore_release(job->post_fence->semaphore);
 408                         released_job_semaphore = true;
 409                 }
 410         }
 411         mutex_unlock(&ch->jobs_lock);
 412
 413         if (released_job_semaphore)
 414                 wake_up_interruptible_all(&ch->semaphore_wq);
 415
 416         gk20a_channel_update(ch, 0);
 417 }
 418
 419 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
 420 {
 421         bool channel_idle = false;
 422         unsigned long end_jiffies = jiffies +
 423                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
 424
 425         do {
 426                 mutex_lock(&ch->jobs_lock);
 427                 channel_idle = list_empty(&ch->jobs);
 428                 mutex_unlock(&ch->jobs_lock);
 429                 if (channel_idle)
 430                         break;
 431
 432                 usleep_range(1000, 3000);
 433         } while (time_before(jiffies, end_jiffies)
 434                         || !tegra_platform_is_silicon());
 435
 436         if (!channel_idle) {
 437                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
 438                                 ch->hw_chid);
 439                 return -EBUSY;
 440         }
 441
 442         return 0;
 443 }
 444
 445 void gk20a_disable_channel(struct channel_gk20a *ch)
 446 {
 447         gk20a_channel_abort(ch, true);
 448         channel_gk20a_update_runlist(ch, false);
 449 }
 450
 451 #if defined(CONFIG_GK20A_CYCLE_STATS)
 452
 453 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 454 {
 455         /* disable existing cyclestats buffer */
 456         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
 457         if (ch->cyclestate.cyclestate_buffer_handler) {
 458                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
 459                                 ch->cyclestate.cyclestate_buffer);
 460                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
 461                 ch->cyclestate.cyclestate_buffer_handler = NULL;
 462                 ch->cyclestate.cyclestate_buffer = NULL;
 463                 ch->cyclestate.cyclestate_buffer_size = 0;
 464         }
 465         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
 466 }
 467
 468 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
 469                        struct nvgpu_cycle_stats_args *args)
 470 {
 471         struct dma_buf *dmabuf;
 472         void *virtual_address;
 473
 474         /* is it allowed to handle calls for current GPU? */
 475         if (0 == (ch->g->gpu_characteristics.flags &
 476                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
 477                 return -ENOSYS;
 478
 479         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
 480
 481                 /* set up new cyclestats buffer */
 482                 dmabuf = dma_buf_get(args->dmabuf_fd);
 483                 if (IS_ERR(dmabuf))
 484                         return PTR_ERR(dmabuf);
 485                 virtual_address = dma_buf_vmap(dmabuf);
 486                 if (!virtual_address)
 487                         return -ENOMEM;
 488
 489                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
 490                 ch->cyclestate.cyclestate_buffer = virtual_address;
 491                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
 492                 return 0;
 493
 494         } else if (!args->dmabuf_fd &&
 495                         ch->cyclestate.cyclestate_buffer_handler) {
 496                 gk20a_free_cycle_stats_buffer(ch);
 497                 return 0;
 498
 499         } else if (!args->dmabuf_fd &&
 500                         !ch->cyclestate.cyclestate_buffer_handler) {
 501                 /* no requst from GL */
 502                 return 0;
 503
 504         } else {
 505                 pr_err("channel already has cyclestats buffer\n");
 506                 return -EINVAL;
 507         }
 508 }
 509
 510
 511 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
 512 {
 513         int ret;
 514
 515         mutex_lock(&ch->cs_client_mutex);
 516         if (ch->cs_client)
 517                 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
 518         else
 519                 ret = -EBADF;
 520         mutex_unlock(&ch->cs_client_mutex);
 521
 522         return ret;
 523 }
 524
 525 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 526                                 u32 dmabuf_fd,
 527                                 u32 perfmon_id_count,
 528                                 u32 *perfmon_id_start)
 529 {
 530         int ret;
 531
 532         mutex_lock(&ch->cs_client_mutex);
 533         if (ch->cs_client) {
 534                 ret = -EEXIST;
 535         } else {
 536                 ret = gr_gk20a_css_attach(ch->g,
 537                                         dmabuf_fd,
 538                                         perfmon_id_count,
 539                                         perfmon_id_start,
 540                                         &ch->cs_client);
 541         }
 542         mutex_unlock(&ch->cs_client_mutex);
 543
 544         return ret;
 545 }
 546
 547 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
 548 {
 549         int ret;
 550
 551         mutex_lock(&ch->cs_client_mutex);
 552         if (ch->cs_client) {
 553                 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
 554                 ch->cs_client = NULL;
 555         } else {
 556                 ret = 0;
 557         }
 558         mutex_unlock(&ch->cs_client_mutex);
 559
 560         return ret;
 561 }
 562
 563 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
 564                         struct nvgpu_cycle_stats_snapshot_args *args)
 565 {
 566         int ret;
 567
 568         /* is it allowed to handle calls for current GPU? */
 569         if (0 == (ch->g->gpu_characteristics.flags &
 570                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
 571                 return -ENOSYS;
 572
 573         if (!args->dmabuf_fd)
 574                 return -EINVAL;
 575
 576         /* handle the command (most frequent cases first) */
 577         switch (args->cmd) {
 578         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
 579                 ret = gk20a_flush_cycle_stats_snapshot(ch);
 580                 args->extra = 0;
 581                 break;
 582
 583         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
 584                 ret = gk20a_attach_cycle_stats_snapshot(ch,
 585                                                 args->dmabuf_fd,
 586                                                 args->extra,
 587                                                 &args->extra);
 588                 break;
 589
 590         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
 591                 ret = gk20a_free_cycle_stats_snapshot(ch);
 592                 args->extra = 0;
 593                 break;
 594
 595         default:
 596                 pr_err("cyclestats: unknown command %u\n", args->cmd);
 597                 ret = -EINVAL;
 598                 break;
 599         }
 600
 601         return ret;
 602 }
 603 #endif
 604
 605 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 606                 struct nvgpu_set_error_notifier *args)
 607 {
 608         struct device *dev = dev_from_gk20a(ch->g);
 609         struct dma_buf *dmabuf;
 610         void *va;
 611         u64 end = args->offset + sizeof(struct nvgpu_notification);
 612
 613         if (!args->mem) {
 614                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
 615                 return -EINVAL;
 616         }
 617
 618         dmabuf = dma_buf_get(args->mem);
 619
 620         if (ch->error_notifier_ref)
 621                 gk20a_free_error_notifiers(ch);
 622
 623         if (IS_ERR(dmabuf)) {
 624                 pr_err("Invalid handle: %d\n", args->mem);
 625                 return -EINVAL;
 626         }
 627
 628         if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
 629                 dma_buf_put(dmabuf);
 630                 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
 631                 return -EINVAL;
 632         }
 633
 634         /* map handle */
 635         va = dma_buf_vmap(dmabuf);
 636         if (!va) {
 637                 dma_buf_put(dmabuf);
 638                 pr_err("Cannot map notifier handle\n");
 639                 return -ENOMEM;
 640         }
 641
 642         /* set channel notifiers pointer */
 643         ch->error_notifier_ref = dmabuf;
 644         ch->error_notifier = va + args->offset;
 645         ch->error_notifier_va = va;
 646         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
 647         return 0;
 648 }
 649
 650 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 651 {
 652         if (ch->error_notifier_ref) {
 653                 struct timespec time_data;
 654                 u64 nsec;
 655                 getnstimeofday(&time_data);
 656                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
 657                                 (u64)time_data.tv_nsec;
 658                 ch->error_notifier->time_stamp.nanoseconds[0] =
 659                                 (u32)nsec;
 660                 ch->error_notifier->time_stamp.nanoseconds[1] =
 661                                 (u32)(nsec >> 32);
 662                 ch->error_notifier->info32 = error;
 663                 ch->error_notifier->status = 0xffff;
 664
 665                 gk20a_err(dev_from_gk20a(ch->g),
 666                     "error notifier set to %d for ch %d", error, ch->hw_chid);
 667         }
 668 }
 669
 670 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 671 {
 672         if (ch->error_notifier_ref) {
 673                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
 674                 dma_buf_put(ch->error_notifier_ref);
 675                 ch->error_notifier_ref = NULL;
 676                 ch->error_notifier = NULL;
 677                 ch->error_notifier_va = NULL;
 678         }
 679 }
 680
 681 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
 682  * is positive */
 683 static int cyclic_delta(int a, int b)
 684 {
 685         return a - b;
 686 }
 687
 688 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
 689 {
 690         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
 691         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
 692
 693         /* wait until all stalling irqs are handled */
 694         wait_event(g->sw_irq_stall_last_handled_wq,
 695                    cyclic_delta(stall_irq_threshold,
 696                                 atomic_read(&g->sw_irq_stall_last_handled))
 697                    <= 0);
 698
 699         /* wait until all non-stalling irqs are handled */
 700         wait_event(g->sw_irq_nonstall_last_handled_wq,
 701                    cyclic_delta(nonstall_irq_threshold,
 702                                 atomic_read(&g->sw_irq_nonstall_last_handled))
 703                    <= 0);
 704 }
 705
 706 static void gk20a_wait_until_counter_is_N(
 707         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
 708         wait_queue_head_t *wq, const char *caller, const char *counter_name)
 709 {
 710         while (true) {
 711                 if (wait_event_timeout(
 712                             *wq,
 713                             atomic_read(counter) == wait_value,
 714                             msecs_to_jiffies(5000)) > 0)
 715                         break;
 716
 717                 gk20a_warn(dev_from_gk20a(ch->g),
 718                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
 719                            caller, ch->hw_chid, counter_name,
 720                            atomic_read(counter), wait_value);
 721         }
 722 }
 723
 724
 725
 726 /* call ONLY when no references to the channel exist: after the last put */
 727 static void gk20a_free_channel(struct channel_gk20a *ch)
 728 {
 729         struct gk20a *g = ch->g;
 730         struct fifo_gk20a *f = &g->fifo;
 731         struct gr_gk20a *gr = &g->gr;
 732         struct vm_gk20a *ch_vm = ch->vm;
 733         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
 734         struct dbg_session_gk20a *dbg_s;
 735         bool was_reset;
 736         gk20a_dbg_fn("");
 737
 738         WARN_ON(ch->g == NULL);
 739
 740         trace_gk20a_free_channel(ch->hw_chid);
 741
 742         /* abort channel and remove from runlist */
 743         gk20a_disable_channel(ch);
 744
 745         /* wait until there's only our ref to the channel */
 746         gk20a_wait_until_counter_is_N(
 747                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
 748                 __func__, "references");
 749
 750         /* wait until all pending interrupts for recently completed
 751          * jobs are handled */
 752         gk20a_wait_for_deferred_interrupts(g);
 753
 754         /* prevent new refs */
 755         spin_lock(&ch->ref_obtain_lock);
 756         if (!ch->referenceable) {
 757                 spin_unlock(&ch->ref_obtain_lock);
 758                 gk20a_err(dev_from_gk20a(ch->g),
 759                           "Extra %s() called to channel %u",
 760                           __func__, ch->hw_chid);
 761                 return;
 762         }
 763         ch->referenceable = false;
 764         spin_unlock(&ch->ref_obtain_lock);
 765
 766         /* matches with the initial reference in gk20a_open_new_channel() */
 767         atomic_dec(&ch->ref_count);
 768
 769         /* wait until no more refs to the channel */
 770         gk20a_wait_until_counter_is_N(
 771                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
 772                 __func__, "references");
 773
 774         /* if engine reset was deferred, perform it now */
 775         mutex_lock(&f->deferred_reset_mutex);
 776         if (g->fifo.deferred_reset_pending) {
 777                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
 778                            " deferred, running now");
 779                 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
 780                 mutex_lock(&g->fifo.gr_reset_mutex);
 781                 /* if lock is already taken, a reset is taking place
 782                 so no need to repeat */
 783                 if (!was_reset) {
 784                         gk20a_fifo_reset_engine(g,
 785                                 g->fifo.deferred_fault_engines);
 786                 }
 787                 mutex_unlock(&g->fifo.gr_reset_mutex);
 788                 g->fifo.deferred_fault_engines = 0;
 789                 g->fifo.deferred_reset_pending = false;
 790         }
 791         mutex_unlock(&f->deferred_reset_mutex);
 792
 793         if (!ch->bound)
 794                 goto release;
 795
 796         if (!gk20a_channel_as_bound(ch))
 797                 goto unbind;
 798
 799         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
 800                         timeout);
 801
 802         gk20a_free_error_notifiers(ch);
 803
 804         /* release channel ctx */
 805         g->ops.gr.free_channel_ctx(ch);
 806
 807         gk20a_gr_flush_channel_tlb(gr);
 808
 809         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
 810
 811         gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
 812
 813         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 814
 815 #if defined(CONFIG_GK20A_CYCLE_STATS)
 816         gk20a_free_cycle_stats_buffer(ch);
 817         gk20a_free_cycle_stats_snapshot(ch);
 818 #endif
 819
 820         channel_gk20a_free_priv_cmdbuf(ch);
 821
 822         /* sync must be destroyed before releasing channel vm */
 823         mutex_lock(&ch->sync_lock);
 824         if (ch->sync) {
 825                 ch->sync->destroy(ch->sync);
 826                 ch->sync = NULL;
 827         }
 828         mutex_unlock(&ch->sync_lock);
 829
 830         /* release channel binding to the as_share */
 831         if (ch_vm->as_share)
 832                 gk20a_as_release_share(ch_vm->as_share);
 833         else
 834                 gk20a_vm_put(ch_vm);
 835
 836         spin_lock(&ch->update_fn_lock);
 837         ch->update_fn = NULL;
 838         ch->update_fn_data = NULL;
 839         spin_unlock(&ch->update_fn_lock);
 840         cancel_work_sync(&ch->update_fn_work);
 841
 842         /* make sure we don't have deferred interrupts pending that
 843          * could still touch the channel */
 844         gk20a_wait_for_deferred_interrupts(g);
 845
 846 unbind:
 847         if (gk20a_is_channel_marked_as_tsg(ch))
 848                 gk20a_tsg_unbind_channel(ch);
 849
 850         g->ops.fifo.unbind_channel(ch);
 851         g->ops.fifo.free_inst(g, ch);
 852
 853         ch->vpr = false;
 854         ch->vm = NULL;
 855
 856         mutex_lock(&ch->submit_lock);
 857         gk20a_fence_put(ch->last_submit.pre_fence);
 858         gk20a_fence_put(ch->last_submit.post_fence);
 859         ch->last_submit.pre_fence = NULL;
 860         ch->last_submit.post_fence = NULL;
 861         mutex_unlock(&ch->submit_lock);
 862         WARN_ON(ch->sync);
 863
 864         /* unlink all debug sessions */
 865         mutex_lock(&ch->dbg_s_lock);
 866
 867         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
 868                 dbg_s->ch = NULL;
 869                 list_del_init(&dbg_s->dbg_s_list_node);
 870         }
 871
 872         mutex_unlock(&ch->dbg_s_lock);
 873
 874 release:
 875         /* make sure we catch accesses of unopened channels in case
 876          * there's non-refcounted channel pointers hanging around */
 877         ch->g = NULL;
 878         wmb();
 879
 880         /* ALWAYS last */
 881         free_channel(f, ch);
 882 }
 883
 884 /* Try to get a reference to the channel. Return nonzero on success. If fails,
 885  * the channel is dead or being freed elsewhere and you must not touch it.
 886  *
 887  * Always when a channel_gk20a pointer is seen and about to be used, a
 888  * reference must be held to it - either by you or the caller, which should be
 889  * documented well or otherwise clearly seen. This usually boils down to the
 890  * file from ioctls directly, or an explicit get in exception handlers when the
 891  * channel is found by a hw_chid.
 892  *
 893  * Most global functions in this file require a reference to be held by the
 894  * caller.
 895  */
 896 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
 897                                          const char *caller) {
 898         struct channel_gk20a *ret;
 899
 900         spin_lock(&ch->ref_obtain_lock);
 901
 902         if (likely(ch->referenceable)) {
 903                 atomic_inc(&ch->ref_count);
 904                 ret = ch;
 905         } else
 906                 ret = NULL;
 907
 908         spin_unlock(&ch->ref_obtain_lock);
 909
 910         if (ret)
 911                 trace_gk20a_channel_get(ch->hw_chid, caller);
 912
 913         return ret;
 914 }
 915
 916 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
 917 {
 918         trace_gk20a_channel_put(ch->hw_chid, caller);
 919         atomic_dec(&ch->ref_count);
 920         wake_up_all(&ch->ref_count_dec_wq);
 921
 922         /* More puts than gets. Channel is probably going to get
 923          * stuck. */
 924         WARN_ON(atomic_read(&ch->ref_count) < 0);
 925
 926         /* Also, more puts than gets. ref_count can go to 0 only if
 927          * the channel is closing. Channel is probably going to get
 928          * stuck. */
 929         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
 930 }
 931
 932 void gk20a_channel_close(struct channel_gk20a *ch)
 933 {
 934         gk20a_free_channel(ch);
 935 }
 936
 937 int gk20a_channel_release(struct inode *inode, struct file *filp)
 938 {
 939         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
 940         struct gk20a *g = ch ? ch->g : NULL;
 941         int err;
 942
 943         if (!ch)
 944                 return 0;
 945
 946         trace_gk20a_channel_release(dev_name(&g->dev->dev));
 947
 948         err = gk20a_busy(g->dev);
 949         if (err) {
 950                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
 951                         ch->hw_chid);
 952                 return err;
 953         }
 954         gk20a_channel_close(ch);
 955         gk20a_idle(g->dev);
 956
 957         filp->private_data = NULL;
 958         return 0;
 959 }
 960
 961 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
 962 {
 963         struct channel_gk20a *ch =
 964                 container_of(work, struct channel_gk20a, update_fn_work);
 965         void (*update_fn)(struct channel_gk20a *, void *);
 966         void *update_fn_data;
 967
 968         spin_lock(&ch->update_fn_lock);
 969         update_fn = ch->update_fn;
 970         update_fn_data = ch->update_fn_data;
 971         spin_unlock(&ch->update_fn_lock);
 972
 973         if (update_fn)
 974                 update_fn(ch, update_fn_data);
 975 }
 976
 977 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 978                 void (*update_fn)(struct channel_gk20a *, void *),
 979                 void *update_fn_data)
 980 {
 981         struct channel_gk20a *ch = gk20a_open_new_channel(g);
 982
 983         if (ch) {
 984                 spin_lock(&ch->update_fn_lock);
 985                 ch->update_fn = update_fn;
 986                 ch->update_fn_data = update_fn_data;
 987                 spin_unlock(&ch->update_fn_lock);
 988         }
 989
 990         return ch;
 991 }
 992
 993 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 994 {
 995         struct fifo_gk20a *f = &g->fifo;
 996         struct channel_gk20a *ch;
 997
 998         gk20a_dbg_fn("");
 999
1000         ch = allocate_channel(f);
1001         if (ch == NULL) {
1002                 /* TBD: we want to make this virtualizable */
1003                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
1004                 return NULL;
1005         }
1006
1007         trace_gk20a_open_new_channel(ch->hw_chid);
1008
1009         BUG_ON(ch->g);
1010         ch->g = g;
1011
1012         if (g->ops.fifo.alloc_inst(g, ch)) {
1013                 ch->g = NULL;
1014                 free_channel(f, ch);
1015                 gk20a_err(dev_from_gk20a(g),
1016                            "failed to open gk20a channel, out of inst mem");
1017                 return NULL;
1018         }
1019
1020         /* now the channel is in a limbo out of the free list but not marked as
1021          * alive and used (i.e. get-able) yet */
1022
1023         ch->pid = current->pid;
1024
1025         /* By default, channel is regular (non-TSG) channel */
1026         ch->tsgid = NVGPU_INVALID_TSG_ID;
1027
1028         /* reset timeout counter and update timestamp */
1029         ch->timeout_accumulated_ms = 0;
1030         ch->timeout_gpfifo_get = 0;
1031         /* set gr host default timeout */
1032         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1033         ch->timeout_debug_dump = true;
1034         ch->has_timedout = false;
1035         ch->obj_class = 0;
1036         ch->clean_up.scheduled = false;
1037
1038         /* The channel is *not* runnable at this point. It still needs to have
1039          * an address space bound and allocate a gpfifo and grctx. */
1040
1041         init_waitqueue_head(&ch->notifier_wq);
1042         init_waitqueue_head(&ch->semaphore_wq);
1043         init_waitqueue_head(&ch->submit_wq);
1044
1045         mutex_init(&ch->poll_events.lock);
1046         ch->poll_events.events_enabled = false;
1047         ch->poll_events.num_pending_events = 0;
1048
1049         ch->update_fn = NULL;
1050         ch->update_fn_data = NULL;
1051         spin_lock_init(&ch->update_fn_lock);
1052         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1053
1054         /* Mark the channel alive, get-able, with 1 initial use
1055          * references. The initial reference will be decreased in
1056          * gk20a_free_channel() */
1057         ch->referenceable = true;
1058         atomic_set(&ch->ref_count, 1);
1059         wmb();
1060
1061         return ch;
1062 }
1063
1064 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1065 {
1066         int err;
1067         struct channel_gk20a *ch;
1068
1069         trace_gk20a_channel_open(dev_name(&g->dev->dev));
1070
1071         err = gk20a_busy(g->dev);
1072         if (err) {
1073                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1074                 return err;
1075         }
1076         ch = gk20a_open_new_channel(g);
1077         gk20a_idle(g->dev);
1078         if (!ch) {
1079                 gk20a_err(dev_from_gk20a(g),
1080                         "failed to get f");
1081                 return -ENOMEM;
1082         }
1083
1084         filp->private_data = ch;
1085         return 0;
1086 }
1087
1088 int gk20a_channel_open(struct inode *inode, struct file *filp)
1089 {
1090         struct gk20a *g = container_of(inode->i_cdev,
1091                         struct gk20a, channel.cdev);
1092         int ret;
1093
1094         gk20a_dbg_fn("start");
1095         ret = __gk20a_channel_open(g, filp);
1096
1097         gk20a_dbg_fn("end");
1098         return ret;
1099 }
1100
1101 int gk20a_channel_open_ioctl(struct gk20a *g,
1102                 struct nvgpu_channel_open_args *args)
1103 {
1104         int err;
1105         int fd;
1106         struct file *file;
1107         char *name;
1108
1109         err = get_unused_fd_flags(O_RDWR);
1110         if (err < 0)
1111                 return err;
1112         fd = err;
1113
1114         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1115                         dev_name(&g->dev->dev), fd);
1116         if (!name) {
1117                 err = -ENOMEM;
1118                 goto clean_up;
1119         }
1120
1121         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1122         kfree(name);
1123         if (IS_ERR(file)) {
1124                 err = PTR_ERR(file);
1125                 goto clean_up;
1126         }
1127
1128         err = __gk20a_channel_open(g, file);
1129         if (err)
1130                 goto clean_up_file;
1131
1132         fd_install(fd, file);
1133         args->channel_fd = fd;
1134         return 0;
1135
1136 clean_up_file:
1137         fput(file);
1138 clean_up:
1139         put_unused_fd(fd);
1140         return err;
1141 }
1142
1143 /* allocate private cmd buffer.
1144    used for inserting commands before/after user submitted buffers. */
1145 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1146 {
1147         struct device *d = dev_from_gk20a(c->g);
1148         struct vm_gk20a *ch_vm = c->vm;
1149         struct priv_cmd_queue *q = &c->priv_cmd_q;
1150         u32 size;
1151         int err = 0;
1152
1153         /* Kernel can insert gpfifos before and after user gpfifos.
1154            Before user gpfifos, kernel inserts fence_wait, which takes
1155            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1156            After user gpfifos, kernel inserts fence_get, which takes
1157            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1158            = 6 dwords.
1159            Worse case if kernel adds both of them for every user gpfifo,
1160            max size of priv_cmdbuf is :
1161            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1162         size = roundup_pow_of_two(
1163                 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1164
1165         err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1166         if (err) {
1167                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1168                 goto clean_up;
1169         }
1170
1171         q->size = q->mem.size / sizeof (u32);
1172
1173         INIT_LIST_HEAD(&q->free);
1174
1175         return 0;
1176
1177 clean_up:
1178         channel_gk20a_free_priv_cmdbuf(c);
1179         return err;
1180 }
1181
1182 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1183 {
1184         struct vm_gk20a *ch_vm = c->vm;
1185         struct priv_cmd_queue *q = &c->priv_cmd_q;
1186         struct priv_cmd_entry *e;
1187         struct list_head *pos, *tmp, *head;
1188
1189         if (q->size == 0)
1190                 return;
1191
1192         gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1193
1194         /* free free list */
1195         head = &q->free;
1196         list_for_each_safe(pos, tmp, head) {
1197                 e = container_of(pos, struct priv_cmd_entry, list);
1198                 kfree(e);
1199         }
1200
1201         memset(q, 0, sizeof(struct priv_cmd_queue));
1202 }
1203
1204 /* allocate a cmd buffer with given size. size is number of u32 entries */
1205 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1206                              struct priv_cmd_entry **entry)
1207 {
1208         struct priv_cmd_queue *q = &c->priv_cmd_q;
1209         struct priv_cmd_entry *e;
1210         u32 free_count;
1211         u32 size = orig_size;
1212
1213         gk20a_dbg_fn("size %d", orig_size);
1214
1215         *entry = NULL;
1216
1217         /* if free space in the end is less than requested, increase the size
1218          * to make the real allocated space start from beginning. */
1219         if (q->put + size > q->size)
1220                 size = orig_size + (q->size - q->put);
1221
1222         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1223                         c->hw_chid, q->get, q->put);
1224
1225         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1226
1227         if (size > free_count)
1228                 return -EAGAIN;
1229
1230         if (list_empty(&q->free))
1231                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1232         else {
1233                 e = container_of((&q->free)->next,
1234                                  struct priv_cmd_entry, list);
1235                 list_del(&e->list);
1236         }
1237         if (!e) {
1238                 gk20a_err(dev_from_gk20a(c->g),
1239                         "ch %d: fail to allocate priv cmd entry",
1240                         c->hw_chid);
1241                 return -ENOMEM;
1242         }
1243
1244         e->size = orig_size;
1245         e->gp_get = c->gpfifo.get;
1246         e->gp_put = c->gpfifo.put;
1247         e->gp_wrap = c->gpfifo.wrap;
1248
1249         /* if we have increased size to skip free space in the end, set put
1250            to beginning of cmd buffer (0) + size */
1251         if (size != orig_size) {
1252                 e->ptr = (u32 *)q->mem.cpu_va;
1253                 e->gva = q->mem.gpu_va;
1254                 q->put = orig_size;
1255         } else {
1256                 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1257                 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1258                 q->put = (q->put + orig_size) & (q->size - 1);
1259         }
1260
1261         /* we already handled q->put + size > q->size so BUG_ON this */
1262         BUG_ON(q->put > q->size);
1263
1264         *entry = e;
1265
1266         gk20a_dbg_fn("done");
1267
1268         return 0;
1269 }
1270
1271 /* Don't call this to free an explict cmd entry.
1272  * It doesn't update priv_cmd_queue get/put */
1273 static void free_priv_cmdbuf(struct channel_gk20a *c,
1274                              struct priv_cmd_entry *e)
1275 {
1276         struct priv_cmd_queue *q = &c->priv_cmd_q;
1277
1278         list_add(&e->list, &q->free);
1279 }
1280
1281 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1282                 struct nvgpu_alloc_gpfifo_args *args)
1283 {
1284         struct gk20a *g = c->g;
1285         struct device *d = dev_from_gk20a(g);
1286         struct vm_gk20a *ch_vm;
1287         u32 gpfifo_size;
1288         int err = 0;
1289
1290         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1291            and another one after, for internal usage. Triple the requested size. */
1292         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1293
1294         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1295                 c->vpr = true;
1296
1297         /* an address space needs to have been bound at this point. */
1298         if (!gk20a_channel_as_bound(c)) {
1299                 gk20a_err(d,
1300                             "not bound to an address space at time of gpfifo"
1301                             " allocation.");
1302                 return -EINVAL;
1303         }
1304         ch_vm = c->vm;
1305
1306         c->cmds_pending = false;
1307         mutex_lock(&c->submit_lock);
1308         gk20a_fence_put(c->last_submit.pre_fence);
1309         gk20a_fence_put(c->last_submit.post_fence);
1310         c->last_submit.pre_fence = NULL;
1311         c->last_submit.post_fence = NULL;
1312         mutex_unlock(&c->submit_lock);
1313
1314         c->ramfc.offset = 0;
1315         c->ramfc.size = ram_in_ramfc_s() / 8;
1316
1317         if (c->gpfifo.mem.cpu_va) {
1318                 gk20a_err(d, "channel %d :"
1319                            "gpfifo already allocated", c->hw_chid);
1320                 return -EEXIST;
1321         }
1322
1323         err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1324                         &c->gpfifo.mem);
1325         if (err) {
1326                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1327                 goto clean_up;
1328         }
1329
1330         c->gpfifo.entry_num = gpfifo_size;
1331         c->gpfifo.get = c->gpfifo.put = 0;
1332
1333         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1334                 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1335
1336         channel_gk20a_setup_userd(c);
1337
1338         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1339                                         c->gpfifo.entry_num, args->flags);
1340         if (err)
1341                 goto clean_up_unmap;
1342
1343         /* TBD: setup engine contexts */
1344
1345         err = channel_gk20a_alloc_priv_cmdbuf(c);
1346         if (err)
1347                 goto clean_up_unmap;
1348
1349         err = channel_gk20a_update_runlist(c, true);
1350         if (err)
1351                 goto clean_up_unmap;
1352
1353         g->ops.fifo.bind_channel(c);
1354
1355         gk20a_dbg_fn("done");
1356         return 0;
1357
1358 clean_up_unmap:
1359         gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1360 clean_up:
1361         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1362         gk20a_err(d, "fail");
1363         return err;
1364 }
1365
1366 static inline bool check_gp_put(struct gk20a *g,
1367                                 struct channel_gk20a *c)
1368 {
1369         u32 put;
1370         /* gp_put changed unexpectedly since last update? */
1371         put = gk20a_bar1_readl(g,
1372                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1373         if (c->gpfifo.put != put) {
1374                 /*TBD: BUG_ON/teardown on this*/
1375                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1376                           "since last update, channel put = %u, ram put = %u\n",
1377                           c->gpfifo.put, put);
1378                 c->gpfifo.put = put;
1379                 return false; /* surprise! */
1380         }
1381         return true; /* checked out ok */
1382 }
1383
1384 /* Update with this periodically to determine how the gpfifo is draining. */
1385 static inline u32 update_gp_get(struct gk20a *g,
1386                                 struct channel_gk20a *c)
1387 {
1388         u32 new_get = gk20a_bar1_readl(g,
1389                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1390         if (new_get < c->gpfifo.get)
1391                 c->gpfifo.wrap = !c->gpfifo.wrap;
1392         c->gpfifo.get = new_get;
1393         return new_get;
1394 }
1395
1396 static inline u32 gp_free_count(struct channel_gk20a *c)
1397 {
1398         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1399                 c->gpfifo.entry_num;
1400 }
1401
1402 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1403                 u32 timeout_delta_ms)
1404 {
1405         u32 gpfifo_get = update_gp_get(ch->g, ch);
1406         /* Count consequent timeout isr */
1407         if (gpfifo_get == ch->timeout_gpfifo_get) {
1408                 /* we didn't advance since previous channel timeout check */
1409                 ch->timeout_accumulated_ms += timeout_delta_ms;
1410         } else {
1411                 /* first timeout isr encountered */
1412                 ch->timeout_accumulated_ms = timeout_delta_ms;
1413         }
1414
1415         ch->timeout_gpfifo_get = gpfifo_get;
1416
1417         return ch->g->timeouts_enabled &&
1418                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1419 }
1420
1421 static u32 get_gp_free_count(struct channel_gk20a *c)
1422 {
1423         update_gp_get(c->g, c);
1424         return gp_free_count(c);
1425 }
1426
1427 static void trace_write_pushbuffer(struct channel_gk20a *c,
1428                                    struct nvgpu_gpfifo *g)
1429 {
1430         void *mem = NULL;
1431         unsigned int words;
1432         u64 offset;
1433         struct dma_buf *dmabuf = NULL;
1434
1435         if (gk20a_debug_trace_cmdbuf) {
1436                 u64 gpu_va = (u64)g->entry0 |
1437                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1438                 int err;
1439
1440                 words = pbdma_gp_entry1_length_v(g->entry1);
1441                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1442                 if (!err)
1443                         mem = dma_buf_vmap(dmabuf);
1444         }
1445
1446         if (mem) {
1447                 u32 i;
1448                 /*
1449                  * Write in batches of 128 as there seems to be a limit
1450                  * of how much you can output to ftrace at once.
1451                  */
1452                 for (i = 0; i < words; i += 128U) {
1453                         trace_gk20a_push_cmdbuf(
1454                                 c->g->dev->name,
1455                                 0,
1456                                 min(words - i, 128U),
1457                                 offset + i * sizeof(u32),
1458                                 mem);
1459                 }
1460                 dma_buf_vunmap(dmabuf, mem);
1461         }
1462 }
1463
1464 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1465                                          struct nvgpu_gpfifo *g,
1466                                          struct nvgpu_submit_gpfifo_args *args,
1467                                          int offset,
1468                                          int count)
1469 {
1470         u32 size;
1471         int i;
1472         struct nvgpu_gpfifo *gp;
1473         bool gpfifo_allocated = false;
1474
1475         if (!gk20a_debug_trace_cmdbuf)
1476                 return;
1477
1478         if (!g && !args)
1479                 return;
1480
1481         if (!g) {
1482                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1483                 if (size) {
1484                         g = nvgpu_alloc(size, false);
1485                         if (!g)
1486                                 return;
1487
1488                         if (copy_from_user(g,
1489                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1490                                 return;
1491                         }
1492                 }
1493                 gpfifo_allocated = true;
1494         }
1495
1496         gp = g + offset;
1497         for (i = 0; i < count; i++, gp++)
1498                 trace_write_pushbuffer(c, gp);
1499
1500         if (gpfifo_allocated)
1501                 nvgpu_free(g);
1502 }
1503
1504 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1505                                         struct priv_cmd_entry *e)
1506 {
1507         struct priv_cmd_queue *q = &c->priv_cmd_q;
1508         u32 cmd_entry_start;
1509         struct device *d = dev_from_gk20a(c->g);
1510
1511         if (!e)
1512                 return 0;
1513
1514         cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1515         if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1516                 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1517
1518         q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1519         free_priv_cmdbuf(c, e);
1520
1521         return 0;
1522 }
1523
1524 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
1525 {
1526         mutex_lock(&c->clean_up.lock);
1527
1528         if (c->clean_up.scheduled) {
1529                 mutex_unlock(&c->clean_up.lock);
1530                 return;
1531         }
1532
1533         c->clean_up.scheduled = true;
1534         schedule_delayed_work(&c->clean_up.wq, 1);
1535
1536         mutex_unlock(&c->clean_up.lock);
1537 }
1538
1539 void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1540                                 bool wait_for_completion)
1541 {
1542         if (wait_for_completion)
1543                 cancel_delayed_work_sync(&c->clean_up.wq);
1544
1545         mutex_lock(&c->clean_up.lock);
1546         c->clean_up.scheduled = false;
1547         mutex_unlock(&c->clean_up.lock);
1548 }
1549
1550 static int gk20a_channel_add_job(struct channel_gk20a *c,
1551                                  struct gk20a_fence *pre_fence,
1552                                  struct gk20a_fence *post_fence,
1553                                  struct priv_cmd_entry *wait_cmd,
1554                                  struct priv_cmd_entry *incr_cmd,
1555                                  bool skip_buffer_refcounting)
1556 {
1557         struct vm_gk20a *vm = c->vm;
1558         struct channel_gk20a_job *job = NULL;
1559         struct mapped_buffer_node **mapped_buffers = NULL;
1560         int err = 0, num_mapped_buffers = 0;
1561
1562         /* job needs reference to this vm (released in channel_update) */
1563         gk20a_vm_get(vm);
1564
1565         if (!skip_buffer_refcounting) {
1566                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1567                                         &num_mapped_buffers);
1568                 if (err) {
1569                         gk20a_vm_put(vm);
1570                         return err;
1571                 }
1572         }
1573
1574         job = kzalloc(sizeof(*job), GFP_KERNEL);
1575         if (!job) {
1576                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1577                 gk20a_vm_put(vm);
1578                 return -ENOMEM;
1579         }
1580
1581         /* put() is done in gk20a_channel_update() when the job is done */
1582         c = gk20a_channel_get(c);
1583
1584         if (c) {
1585                 job->num_mapped_buffers = num_mapped_buffers;
1586                 job->mapped_buffers = mapped_buffers;
1587                 job->pre_fence = gk20a_fence_get(pre_fence);
1588                 job->post_fence = gk20a_fence_get(post_fence);
1589                 job->wait_cmd = wait_cmd;
1590                 job->incr_cmd = incr_cmd;
1591
1592                 mutex_lock(&c->jobs_lock);
1593                 list_add_tail(&job->list, &c->jobs);
1594                 mutex_unlock(&c->jobs_lock);
1595         } else {
1596                 return -ETIMEDOUT;
1597         }
1598
1599         return 0;
1600 }
1601
1602 static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1603 {
1604         struct channel_gk20a *c = container_of(to_delayed_work(work),
1605                         struct channel_gk20a, clean_up.wq);
1606         struct vm_gk20a *vm;
1607         struct channel_gk20a_job *job, *n;
1608         struct gk20a_platform *platform;
1609
1610         c = gk20a_channel_get(c);
1611         if (!c)
1612                 return;
1613
1614         if (!c->g->power_on) { /* shutdown case */
1615                 gk20a_channel_put(c);
1616                 return;
1617         }
1618
1619         vm = c->vm;
1620         platform = gk20a_get_platform(c->g->dev);
1621
1622         mutex_lock(&c->submit_lock);
1623
1624         /* gp_put check needs to be done inside submit lock */
1625         check_gp_put(c->g, c);
1626
1627         gk20a_channel_cancel_job_clean_up(c, false);
1628
1629         mutex_lock(&c->jobs_lock);
1630         list_for_each_entry_safe(job, n, &c->jobs, list) {
1631                 struct gk20a *g = c->g;
1632
1633                 bool completed = gk20a_fence_is_expired(job->post_fence);
1634                 if (!completed)
1635                         break;
1636
1637                 if (c->sync)
1638                         c->sync->signal_timeline(c->sync);
1639
1640                 if (job->num_mapped_buffers)
1641                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1642                                 job->num_mapped_buffers);
1643
1644                 /* Close the fences (this will unref the semaphores and release
1645                  * them to the pool). */
1646                 gk20a_fence_put(job->pre_fence);
1647                 gk20a_fence_put(job->post_fence);
1648
1649                 /* Free the private command buffers (wait_cmd first and
1650                  * then incr_cmd i.e. order of allocation) */
1651                 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1652                 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1653
1654                 /* job is done. release its vm reference (taken in add_job) */
1655                 gk20a_vm_put(vm);
1656                 /* another bookkeeping taken in add_job. caller must hold a ref
1657                  * so this wouldn't get freed here. */
1658                 gk20a_channel_put(c);
1659
1660                 list_del_init(&job->list);
1661                 kfree(job);
1662                 gk20a_idle(g->dev);
1663         }
1664
1665         /*
1666          * If job list is empty then channel is idle and we can free
1667          * the syncpt here (given aggressive_destroy flag is set)
1668          * Note: check if last submit is complete before destroying
1669          * the sync resource
1670          */
1671         if (list_empty(&c->jobs)) {
1672                 mutex_lock(&c->sync_lock);
1673                 if (c->sync && platform->aggressive_sync_destroy &&
1674                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1675                         c->sync->destroy(c->sync);
1676                         c->sync = NULL;
1677                 }
1678                 mutex_unlock(&c->sync_lock);
1679         }
1680         mutex_unlock(&c->jobs_lock);
1681         mutex_unlock(&c->submit_lock);
1682
1683         if (c->update_fn)
1684                 schedule_work(&c->update_fn_work);
1685
1686         gk20a_channel_put(c);
1687 }
1688
1689 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1690 {
1691         c = gk20a_channel_get(c);
1692         if (!c)
1693                 return;
1694
1695         if (!c->g->power_on) { /* shutdown case */
1696                 gk20a_channel_put(c);
1697                 return;
1698         }
1699
1700         update_gp_get(c->g, c);
1701         wake_up(&c->submit_wq);
1702
1703         trace_gk20a_channel_update(c->hw_chid);
1704         gk20a_channel_schedule_job_clean_up(c);
1705
1706         gk20a_channel_put(c);
1707 }
1708
1709 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1710                                 struct nvgpu_gpfifo *gpfifo,
1711                                 struct nvgpu_submit_gpfifo_args *args,
1712                                 u32 num_entries,
1713                                 u32 flags,
1714                                 struct nvgpu_fence *fence,
1715                                 struct gk20a_fence **fence_out,
1716                                 bool force_need_sync_fence)
1717 {
1718         struct gk20a *g = c->g;
1719         struct device *d = dev_from_gk20a(g);
1720         int err = 0;
1721         int start, end;
1722         int wait_fence_fd = -1;
1723         struct priv_cmd_entry *wait_cmd = NULL;
1724         struct priv_cmd_entry *incr_cmd = NULL;
1725         struct gk20a_fence *pre_fence = NULL;
1726         struct gk20a_fence *post_fence = NULL;
1727         /* we might need two extra gpfifo entries - one for pre fence
1728          * and one for post fence. */
1729         const int extra_entries = 2;
1730         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1731         bool skip_buffer_refcounting = (flags &
1732                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1733         bool need_sync_fence = false;
1734
1735         /*
1736          * If user wants to allocate sync_fence_fd always, then respect that;
1737          * otherwise, allocate sync_fence_fd based on user flags only
1738          */
1739         if (force_need_sync_fence)
1740                 need_sync_fence = true;
1741
1742         if (c->has_timedout)
1743                 return -ETIMEDOUT;
1744
1745         /* fifo not large enough for request. Return error immediately.
1746          * Kernel can insert gpfifo entries before and after user gpfifos.
1747          * So, add extra_entries in user request. Also, HW with fifo size N
1748          * can accept only N-1 entreis and so the below condition */
1749         if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1750                 gk20a_err(d, "not enough gpfifo space allocated");
1751                 return -ENOMEM;
1752         }
1753
1754         if (!gpfifo && !args)
1755                 return -EINVAL;
1756
1757         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1758                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1759             !fence)
1760                 return -EINVAL;
1761
1762         /* an address space needs to have been bound at this point. */
1763         if (!gk20a_channel_as_bound(c)) {
1764                 gk20a_err(d,
1765                             "not bound to an address space at time of gpfifo"
1766                             " submission.");
1767                 return -EINVAL;
1768         }
1769
1770 #ifdef CONFIG_DEBUG_FS
1771         /* update debug settings */
1772         if (g->ops.ltc.sync_debugfs)
1773                 g->ops.ltc.sync_debugfs(g);
1774 #endif
1775
1776         gk20a_dbg_info("channel %d", c->hw_chid);
1777
1778         /* gk20a_channel_update releases this ref. */
1779         err = gk20a_busy(g->dev);
1780         if (err) {
1781                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1782                 return err;
1783         }
1784
1785         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1786                                           c->hw_chid,
1787                                           num_entries,
1788                                           flags,
1789                                           fence ? fence->id : 0,
1790                                           fence ? fence->value : 0);
1791
1792         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1793                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1794
1795         /* Make sure we have enough space for gpfifo entries. If not,
1796          * wait for signals from completed submits */
1797         if (gp_free_count(c) < num_entries + extra_entries) {
1798                 /* we can get here via locked ioctl and other paths too */
1799                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1800                 if (locked_path)
1801                         mutex_unlock(&c->ioctl_lock);
1802
1803                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1804                 err = wait_event_interruptible(c->submit_wq,
1805                         get_gp_free_count(c) >= num_entries + extra_entries ||
1806                         c->has_timedout);
1807                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1808
1809                 if (locked_path)
1810                         mutex_lock(&c->ioctl_lock);
1811         }
1812
1813         if (c->has_timedout) {
1814                 err = -ETIMEDOUT;
1815                 goto clean_up;
1816         }
1817
1818         if (err) {
1819                 gk20a_err(d, "timeout waiting for gpfifo space");
1820                 err = -EAGAIN;
1821                 goto clean_up;
1822         }
1823
1824         mutex_lock(&c->submit_lock);
1825
1826         mutex_lock(&c->sync_lock);
1827         if (!c->sync) {
1828                 c->sync = gk20a_channel_sync_create(c);
1829                 if (!c->sync) {
1830                         err = -ENOMEM;
1831                         mutex_unlock(&c->submit_lock);
1832                         goto clean_up;
1833                 }
1834                 if (g->ops.fifo.resetup_ramfc)
1835                         err = g->ops.fifo.resetup_ramfc(c);
1836                 if (err)
1837                         return err;
1838         }
1839         mutex_unlock(&c->sync_lock);
1840
1841         /*
1842          * optionally insert syncpt wait in the beginning of gpfifo submission
1843          * when user requested and the wait hasn't expired.
1844          * validate that the id makes sense, elide if not
1845          * the only reason this isn't being unceremoniously killed is to
1846          * keep running some tests which trigger this condition
1847          */
1848         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1849                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1850                         wait_fence_fd = fence->id;
1851                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1852                                         &wait_cmd, &pre_fence);
1853                 } else {
1854                         err = c->sync->wait_syncpt(c->sync, fence->id,
1855                                         fence->value, &wait_cmd, &pre_fence);
1856                 }
1857         }
1858         if (err) {
1859                 mutex_unlock(&c->submit_lock);
1860                 goto clean_up;
1861         }
1862
1863         if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1864                         (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1865                 need_sync_fence = true;
1866
1867         /* always insert syncpt increment at end of gpfifo submission
1868            to keep track of method completion for idle railgating */
1869         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1870                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1871                                  &post_fence, need_wfi, need_sync_fence);
1872         else
1873                 err = c->sync->incr(c->sync, &incr_cmd,
1874                                     &post_fence, need_sync_fence);
1875         if (err) {
1876                 mutex_unlock(&c->submit_lock);
1877                 goto clean_up;
1878         }
1879
1880         if (wait_cmd) {
1881                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1882                         u64_lo32(wait_cmd->gva);
1883                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1884                         u64_hi32(wait_cmd->gva) |
1885                         pbdma_gp_entry1_length_f(wait_cmd->size);
1886                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1887                         0, wait_cmd->size, 0, wait_cmd->ptr);
1888
1889                 c->gpfifo.put = (c->gpfifo.put + 1) &
1890                         (c->gpfifo.entry_num - 1);
1891
1892                 /* save gp_put */
1893                 wait_cmd->gp_put = c->gpfifo.put;
1894         }
1895
1896         /*
1897          * Copy source gpfifo entries into the gpfifo ring buffer,
1898          * potentially splitting into two memcpies to handle the
1899          * ring buffer wrap-around case.
1900          */
1901         start = c->gpfifo.put;
1902         end = start + num_entries;
1903
1904         if (gpfifo) {
1905                 if (end > c->gpfifo.entry_num) {
1906                         int length0 = c->gpfifo.entry_num - start;
1907                         int length1 = num_entries - length0;
1908
1909                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1910                                 gpfifo,
1911                                 length0 * sizeof(*gpfifo));
1912
1913                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1914                                 gpfifo + length0,
1915                                 length1 * sizeof(*gpfifo));
1916
1917                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1918                                         0, length0);
1919                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1920                                         length0, length1);
1921                 } else {
1922                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1923                                 gpfifo,
1924                                 num_entries * sizeof(*gpfifo));
1925
1926                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1927                                         0, num_entries);
1928                 }
1929         } else {
1930                 struct nvgpu_gpfifo __user *user_gpfifo =
1931                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1932                 if (end > c->gpfifo.entry_num) {
1933                         int length0 = c->gpfifo.entry_num - start;
1934                         int length1 = num_entries - length0;
1935
1936                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1937                                 user_gpfifo,
1938                                 length0 * sizeof(*user_gpfifo));
1939                         if (err) {
1940                                 mutex_unlock(&c->submit_lock);
1941                                 goto clean_up;
1942                         }
1943
1944                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1945                                 user_gpfifo + length0,
1946                                 length1 * sizeof(*user_gpfifo));
1947                         if (err) {
1948                                 mutex_unlock(&c->submit_lock);
1949                                 goto clean_up;
1950                         }
1951
1952                         trace_write_pushbuffer_range(c, NULL, args,
1953                                         0, length0);
1954                         trace_write_pushbuffer_range(c, NULL, args,
1955                                         length0, length1);
1956                 } else {
1957                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1958                                 user_gpfifo,
1959                                 num_entries * sizeof(*user_gpfifo));
1960                         if (err) {
1961                                 mutex_unlock(&c->submit_lock);
1962                                 goto clean_up;
1963                         }
1964
1965                         trace_write_pushbuffer_range(c, NULL, args,
1966                                         0, num_entries);
1967                 }
1968         }
1969
1970         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1971                 (c->gpfifo.entry_num - 1);
1972
1973         if (incr_cmd) {
1974                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1975                         u64_lo32(incr_cmd->gva);
1976                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1977                         u64_hi32(incr_cmd->gva) |
1978                         pbdma_gp_entry1_length_f(incr_cmd->size);
1979                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1980                         0, incr_cmd->size, 0, incr_cmd->ptr);
1981
1982                 c->gpfifo.put = (c->gpfifo.put + 1) &
1983                         (c->gpfifo.entry_num - 1);
1984
1985                 /* save gp_put */
1986                 incr_cmd->gp_put = c->gpfifo.put;
1987         }
1988
1989         gk20a_fence_put(c->last_submit.pre_fence);
1990         gk20a_fence_put(c->last_submit.post_fence);
1991         c->last_submit.pre_fence = pre_fence;
1992         c->last_submit.post_fence = post_fence;
1993         if (fence_out)
1994                 *fence_out = gk20a_fence_get(post_fence);
1995
1996         /* TODO! Check for errors... */
1997         gk20a_channel_add_job(c, pre_fence, post_fence,
1998                                 wait_cmd, incr_cmd,
1999                                 skip_buffer_refcounting);
2000
2001         c->cmds_pending = true;
2002         gk20a_bar1_writel(g,
2003                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
2004                 c->gpfifo.put);
2005
2006         mutex_unlock(&c->submit_lock);
2007
2008         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
2009                                              c->hw_chid,
2010                                              num_entries,
2011                                              flags,
2012                                              post_fence->syncpt_id,
2013                                              post_fence->syncpt_value);
2014
2015         gk20a_dbg_info("post-submit put %d, get %d, size %d",
2016                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
2017
2018         gk20a_dbg_fn("done");
2019         return err;
2020
2021 clean_up:
2022         gk20a_err(d, "fail");
2023         free_priv_cmdbuf(c, wait_cmd);
2024         free_priv_cmdbuf(c, incr_cmd);
2025         gk20a_fence_put(pre_fence);
2026         gk20a_fence_put(post_fence);
2027         gk20a_idle(g->dev);
2028         return err;
2029 }
2030
2031 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2032 {
2033         struct channel_gk20a *c = g->fifo.channel+chid;
2034         c->g = NULL;
2035         c->hw_chid = chid;
2036         c->bound = false;
2037         spin_lock_init(&c->ref_obtain_lock);
2038         atomic_set(&c->ref_count, 0);
2039         c->referenceable = false;
2040         init_waitqueue_head(&c->ref_count_dec_wq);
2041         mutex_init(&c->ioctl_lock);
2042         mutex_init(&c->jobs_lock);
2043         mutex_init(&c->submit_lock);
2044         INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
2045         mutex_init(&c->clean_up.lock);
2046         mutex_init(&c->sync_lock);
2047         INIT_LIST_HEAD(&c->jobs);
2048 #if defined(CONFIG_GK20A_CYCLE_STATS)
2049         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2050         mutex_init(&c->cs_client_mutex);
2051 #endif
2052         INIT_LIST_HEAD(&c->dbg_s_list);
2053         mutex_init(&c->dbg_s_lock);
2054         list_add(&c->free_chs, &g->fifo.free_chs);
2055
2056         return 0;
2057 }
2058
2059 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2060 {
2061         int err = 0;
2062         struct gk20a_fence *fence = ch->last_submit.post_fence;
2063
2064         if (!ch->cmds_pending)
2065                 return 0;
2066
2067         /* Do not wait for a timedout channel */
2068         if (ch->has_timedout)
2069                 return -ETIMEDOUT;
2070
2071         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2072                      fence->syncpt_value, fence->semaphore);
2073
2074         err = gk20a_fence_wait(fence, timeout);
2075         if (WARN_ON(err))
2076                 dev_warn(dev_from_gk20a(ch->g),
2077                        "timed out waiting for gk20a channel to finish");
2078         else
2079                 ch->cmds_pending = false;
2080
2081         return err;
2082 }
2083
2084 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2085                                         ulong id, u32 offset,
2086                                         u32 payload, long timeout)
2087 {
2088         struct platform_device *pdev = ch->g->dev;
2089         struct dma_buf *dmabuf;
2090         void *data;
2091         u32 *semaphore;
2092         int ret = 0;
2093         long remain;
2094
2095         /* do not wait if channel has timed out */
2096         if (ch->has_timedout)
2097                 return -ETIMEDOUT;
2098
2099         dmabuf = dma_buf_get(id);
2100         if (IS_ERR(dmabuf)) {
2101                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2102                            id);
2103                 return -EINVAL;
2104         }
2105
2106         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2107         if (!data) {
2108                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2109                 ret = -EINVAL;
2110                 goto cleanup_put;
2111         }
2112
2113         semaphore = data + (offset & ~PAGE_MASK);
2114
2115         remain = wait_event_interruptible_timeout(
2116                         ch->semaphore_wq,
2117                         *semaphore == payload || ch->has_timedout,
2118                         timeout);
2119
2120         if (remain == 0 && *semaphore != payload)
2121                 ret = -ETIMEDOUT;
2122         else if (remain < 0)
2123                 ret = remain;
2124
2125         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2126 cleanup_put:
2127         dma_buf_put(dmabuf);
2128         return ret;
2129 }
2130
2131 static int gk20a_channel_wait(struct channel_gk20a *ch,
2132                               struct nvgpu_wait_args *args)
2133 {
2134         struct device *d = dev_from_gk20a(ch->g);
2135         struct dma_buf *dmabuf;
2136         struct notification *notif;
2137         struct timespec tv;
2138         u64 jiffies;
2139         ulong id;
2140         u32 offset;
2141         unsigned long timeout;
2142         int remain, ret = 0;
2143
2144         gk20a_dbg_fn("");
2145
2146         if (ch->has_timedout)
2147                 return -ETIMEDOUT;
2148
2149         if (args->timeout == NVGPU_NO_TIMEOUT)
2150                 timeout = MAX_SCHEDULE_TIMEOUT;
2151         else
2152                 timeout = (u32)msecs_to_jiffies(args->timeout);
2153
2154         switch (args->type) {
2155         case NVGPU_WAIT_TYPE_NOTIFIER:
2156                 id = args->condition.notifier.dmabuf_fd;
2157                 offset = args->condition.notifier.offset;
2158
2159                 dmabuf = dma_buf_get(id);
2160                 if (IS_ERR(dmabuf)) {
2161                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2162                                    id);
2163                         return -EINVAL;
2164                 }
2165
2166                 notif = dma_buf_vmap(dmabuf);
2167                 if (!notif) {
2168                         gk20a_err(d, "failed to map notifier memory");
2169                         return -ENOMEM;
2170                 }
2171
2172                 notif = (struct notification *)((uintptr_t)notif + offset);
2173
2174                 /* user should set status pending before
2175                  * calling this ioctl */
2176                 remain = wait_event_interruptible_timeout(
2177                                 ch->notifier_wq,
2178                                 notif->status == 0 || ch->has_timedout,
2179                                 timeout);
2180
2181                 if (remain == 0 && notif->status != 0) {
2182                         ret = -ETIMEDOUT;
2183                         goto notif_clean_up;
2184                 } else if (remain < 0) {
2185                         ret = -EINTR;
2186                         goto notif_clean_up;
2187                 }
2188
2189                 /* TBD: fill in correct information */
2190                 jiffies = get_jiffies_64();
2191                 jiffies_to_timespec(jiffies, &tv);
2192                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2193                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2194                 notif->info32 = 0xDEADBEEF; /* should be object name */
2195                 notif->info16 = ch->hw_chid; /* should be method offset */
2196
2197 notif_clean_up:
2198                 dma_buf_vunmap(dmabuf, notif);
2199                 return ret;
2200
2201         case NVGPU_WAIT_TYPE_SEMAPHORE:
2202                 ret = gk20a_channel_wait_semaphore(ch,
2203                                 args->condition.semaphore.dmabuf_fd,
2204                                 args->condition.semaphore.offset,
2205                                 args->condition.semaphore.payload,
2206                                 timeout);
2207
2208                 break;
2209
2210         default:
2211                 ret = -EINVAL;
2212                 break;
2213         }
2214
2215         return ret;
2216 }
2217
2218 /* poll events for semaphores */
2219
2220 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2221 {
2222         gk20a_dbg_fn("");
2223
2224         mutex_lock(&ev->lock);
2225
2226         ev->events_enabled = true;
2227         ev->num_pending_events = 0;
2228
2229         mutex_unlock(&ev->lock);
2230 }
2231
2232 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2233 {
2234         gk20a_dbg_fn("");
2235
2236         mutex_lock(&ev->lock);
2237
2238         ev->events_enabled = false;
2239         ev->num_pending_events = 0;
2240
2241         mutex_unlock(&ev->lock);
2242 }
2243
2244 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2245 {
2246         gk20a_dbg_fn("");
2247
2248         mutex_lock(&ev->lock);
2249
2250         if (ev->events_enabled &&
2251                         ev->num_pending_events > 0)
2252                 ev->num_pending_events--;
2253
2254         mutex_unlock(&ev->lock);
2255 }
2256
2257 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2258                           struct nvgpu_channel_events_ctrl_args *args)
2259 {
2260         int ret = 0;
2261
2262         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2263                         "channel events ctrl cmd %d", args->cmd);
2264
2265         switch (args->cmd) {
2266         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2267                 gk20a_channel_events_enable(&ch->poll_events);
2268                 break;
2269
2270         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2271                 gk20a_channel_events_disable(&ch->poll_events);
2272                 break;
2273
2274         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2275                 gk20a_channel_events_clear(&ch->poll_events);
2276                 break;
2277
2278         default:
2279                 gk20a_err(dev_from_gk20a(ch->g),
2280                            "unrecognized channel events ctrl cmd: 0x%x",
2281                            args->cmd);
2282                 ret = -EINVAL;
2283                 break;
2284         }
2285
2286         return ret;
2287 }
2288
2289 void gk20a_channel_event(struct channel_gk20a *ch)
2290 {
2291         mutex_lock(&ch->poll_events.lock);
2292
2293         if (ch->poll_events.events_enabled) {
2294                 gk20a_dbg_info("posting event on channel id %d",
2295                                 ch->hw_chid);
2296                 gk20a_dbg_info("%d channel events pending",
2297                                 ch->poll_events.num_pending_events);
2298
2299                 ch->poll_events.num_pending_events++;
2300                 /* not waking up here, caller does that */
2301         }
2302
2303         mutex_unlock(&ch->poll_events.lock);
2304 }
2305
2306 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2307 {
2308         unsigned int mask = 0;
2309         struct channel_gk20a *ch = filep->private_data;
2310
2311         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2312
2313         poll_wait(filep, &ch->semaphore_wq, wait);
2314
2315         mutex_lock(&ch->poll_events.lock);
2316
2317         if (ch->poll_events.events_enabled &&
2318                         ch->poll_events.num_pending_events > 0) {
2319                 gk20a_dbg_info("found pending event on channel id %d",
2320                                 ch->hw_chid);
2321                 gk20a_dbg_info("%d channel events pending",
2322                                 ch->poll_events.num_pending_events);
2323                 mask = (POLLPRI | POLLIN);
2324         }
2325
2326         mutex_unlock(&ch->poll_events.lock);
2327
2328         return mask;
2329 }
2330
2331 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2332                 u32 priority)
2333 {
2334         u32 timeslice_timeout;
2335         /* set priority of graphics channel */
2336         switch (priority) {
2337         case NVGPU_PRIORITY_LOW:
2338                 /* 64 << 3 = 512us */
2339                 timeslice_timeout = 64;
2340                 break;
2341         case NVGPU_PRIORITY_MEDIUM:
2342                 /* 128 << 3 = 1024us */
2343                 timeslice_timeout = 128;
2344                 break;
2345         case NVGPU_PRIORITY_HIGH:
2346                 /* 255 << 3 = 2048us */
2347                 timeslice_timeout = 255;
2348                 break;
2349         default:
2350                 pr_err("Unsupported priority");
2351                 return -EINVAL;
2352         }
2353         channel_gk20a_set_schedule_params(ch,
2354                         timeslice_timeout);
2355         return 0;
2356 }
2357
2358 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2359                             struct nvgpu_zcull_bind_args *args)
2360 {
2361         struct gk20a *g = ch->g;
2362         struct gr_gk20a *gr = &g->gr;
2363
2364         gk20a_dbg_fn("");
2365
2366         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2367                                 args->gpu_va, args->mode);
2368 }
2369
2370 /* in this context the "channel" is the host1x channel which
2371  * maps to *all* gk20a channels */
2372 int gk20a_channel_suspend(struct gk20a *g)
2373 {
2374         struct fifo_gk20a *f = &g->fifo;
2375         u32 chid;
2376         bool channels_in_use = false;
2377         int err;
2378
2379         gk20a_dbg_fn("");
2380
2381         /* wait for engine idle */
2382         err = g->ops.fifo.wait_engine_idle(g);
2383         if (err)
2384                 return err;
2385
2386         for (chid = 0; chid < f->num_channels; chid++) {
2387                 struct channel_gk20a *ch = &f->channel[chid];
2388                 if (gk20a_channel_get(ch)) {
2389                         gk20a_dbg_info("suspend channel %d", chid);
2390                         /* disable channel */
2391                         g->ops.fifo.disable_channel(ch);
2392                         /* preempt the channel */
2393                         gk20a_fifo_preempt(ch->g, ch);
2394                         /* wait for channel update notifiers */
2395                         if (ch->update_fn &&
2396                                         work_pending(&ch->update_fn_work))
2397                                 flush_work(&ch->update_fn_work);
2398                         gk20a_channel_cancel_job_clean_up(ch, true);
2399
2400                         channels_in_use = true;
2401
2402                         gk20a_channel_put(ch);
2403                 }
2404         }
2405
2406         if (channels_in_use) {
2407                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2408
2409                 for (chid = 0; chid < f->num_channels; chid++) {
2410                         if (gk20a_channel_get(&f->channel[chid])) {
2411                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2412                                 gk20a_channel_put(&f->channel[chid]);
2413                         }
2414                 }
2415         }
2416
2417         gk20a_dbg_fn("done");
2418         return 0;
2419 }
2420
2421 int gk20a_channel_resume(struct gk20a *g)
2422 {
2423         struct fifo_gk20a *f = &g->fifo;
2424         u32 chid;
2425         bool channels_in_use = false;
2426
2427         gk20a_dbg_fn("");
2428
2429         for (chid = 0; chid < f->num_channels; chid++) {
2430                 if (gk20a_channel_get(&f->channel[chid])) {
2431                         gk20a_dbg_info("resume channel %d", chid);
2432                         g->ops.fifo.bind_channel(&f->channel[chid]);
2433                         channels_in_use = true;
2434                         gk20a_channel_put(&f->channel[chid]);
2435                 }
2436         }
2437
2438         if (channels_in_use)
2439                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2440
2441         gk20a_dbg_fn("done");
2442         return 0;
2443 }
2444
2445 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2446 {
2447         struct fifo_gk20a *f = &g->fifo;
2448         u32 chid;
2449
2450         gk20a_dbg_fn("");
2451
2452         for (chid = 0; chid < f->num_channels; chid++) {
2453                 struct channel_gk20a *c = g->fifo.channel+chid;
2454                 if (gk20a_channel_get(c)) {
2455                         gk20a_channel_event(c);
2456                         wake_up_interruptible_all(&c->semaphore_wq);
2457                         gk20a_channel_update(c, 0);
2458                         gk20a_channel_put(c);
2459                 }
2460         }
2461 }
2462
2463 static int gk20a_ioctl_channel_submit_gpfifo(
2464         struct channel_gk20a *ch,
2465         struct nvgpu_submit_gpfifo_args *args)
2466 {
2467         struct gk20a_fence *fence_out;
2468         int ret = 0;
2469
2470         gk20a_dbg_fn("");
2471
2472         if (ch->has_timedout)
2473                 return -ETIMEDOUT;
2474
2475         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2476                                           args->flags, &args->fence,
2477                                           &fence_out, false);
2478
2479         if (ret)
2480                 goto clean_up;
2481
2482         /* Convert fence_out to something we can pass back to user space. */
2483         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2484                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2485                         int fd = gk20a_fence_install_fd(fence_out);
2486                         if (fd < 0)
2487                                 ret = fd;
2488                         else
2489                                 args->fence.id = fd;
2490                 } else {
2491                         args->fence.id = fence_out->syncpt_id;
2492                         args->fence.value = fence_out->syncpt_value;
2493                 }
2494         }
2495         gk20a_fence_put(fence_out);
2496
2497 clean_up:
2498         return ret;
2499 }
2500
2501 void gk20a_init_channel(struct gpu_ops *gops)
2502 {
2503         gops->fifo.bind_channel = channel_gk20a_bind;
2504         gops->fifo.unbind_channel = channel_gk20a_unbind;
2505         gops->fifo.disable_channel = channel_gk20a_disable;
2506         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2507         gops->fifo.free_inst = channel_gk20a_free_inst;
2508         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2509 }
2510
2511 long gk20a_channel_ioctl(struct file *filp,
2512         unsigned int cmd, unsigned long arg)
2513 {
2514         struct channel_gk20a *ch = filp->private_data;
2515         struct platform_device *dev = ch->g->dev;
2516         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2517         int err = 0;
2518
2519         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2520
2521         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2522                 (_IOC_NR(cmd) == 0) ||
2523                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2524                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2525                 return -EINVAL;
2526
2527         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2528                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2529                         return -EFAULT;
2530         }
2531
2532         /* take a ref or return timeout if channel refs can't be taken */
2533         ch = gk20a_channel_get(ch);
2534         if (!ch)
2535                 return -ETIMEDOUT;
2536
2537         /* protect our sanity for threaded userspace - most of the channel is
2538          * not thread safe */
2539         mutex_lock(&ch->ioctl_lock);
2540
2541         /* this ioctl call keeps a ref to the file which keeps a ref to the
2542          * channel */
2543
2544         switch (cmd) {
2545         case NVGPU_IOCTL_CHANNEL_OPEN:
2546                 err = gk20a_channel_open_ioctl(ch->g,
2547                         (struct nvgpu_channel_open_args *)buf);
2548                 break;
2549         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2550                 break;
2551         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2552                 err = gk20a_busy(dev);
2553                 if (err) {
2554                         dev_err(&dev->dev,
2555                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2556                                 __func__, cmd);
2557                         break;
2558                 }
2559                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2560                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2561                 gk20a_idle(dev);
2562                 break;
2563         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2564                 err = gk20a_busy(dev);
2565                 if (err) {
2566                         dev_err(&dev->dev,
2567                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2568                                 __func__, cmd);
2569                         break;
2570                 }
2571                 err = ch->g->ops.gr.free_obj_ctx(ch,
2572                                 (struct nvgpu_free_obj_ctx_args *)buf);
2573                 gk20a_idle(dev);
2574                 break;
2575         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2576                 err = gk20a_busy(dev);
2577                 if (err) {
2578                         dev_err(&dev->dev,
2579                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2580                                 __func__, cmd);
2581                         break;
2582                 }
2583                 err = gk20a_alloc_channel_gpfifo(ch,
2584                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2585                 gk20a_idle(dev);
2586                 break;
2587         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2588                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2589                                 (struct nvgpu_submit_gpfifo_args *)buf);
2590                 break;
2591         case NVGPU_IOCTL_CHANNEL_WAIT:
2592                 err = gk20a_busy(dev);
2593                 if (err) {
2594                         dev_err(&dev->dev,
2595                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2596                                 __func__, cmd);
2597                         break;
2598                 }
2599
2600                 /* waiting is thread-safe, not dropping this mutex could
2601                  * deadlock in certain conditions */
2602                 mutex_unlock(&ch->ioctl_lock);
2603
2604                 err = gk20a_channel_wait(ch,
2605                                 (struct nvgpu_wait_args *)buf);
2606
2607                 mutex_lock(&ch->ioctl_lock);
2608
2609                 gk20a_idle(dev);
2610                 break;
2611         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2612                 err = gk20a_busy(dev);
2613                 if (err) {
2614                         dev_err(&dev->dev,
2615                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2616                                 __func__, cmd);
2617                         break;
2618                 }
2619                 err = gk20a_channel_zcull_bind(ch,
2620                                 (struct nvgpu_zcull_bind_args *)buf);
2621                 gk20a_idle(dev);
2622                 break;
2623         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2624                 err = gk20a_busy(dev);
2625                 if (err) {
2626                         dev_err(&dev->dev,
2627                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2628                                 __func__, cmd);
2629                         break;
2630                 }
2631                 err = gk20a_init_error_notifier(ch,
2632                                 (struct nvgpu_set_error_notifier *)buf);
2633                 gk20a_idle(dev);
2634                 break;
2635 #ifdef CONFIG_GK20A_CYCLE_STATS
2636         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2637                 err = gk20a_busy(dev);
2638                 if (err) {
2639                         dev_err(&dev->dev,
2640                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2641                                 __func__, cmd);
2642                         break;
2643                 }
2644                 err = gk20a_channel_cycle_stats(ch,
2645                                 (struct nvgpu_cycle_stats_args *)buf);
2646                 gk20a_idle(dev);
2647                 break;
2648 #endif
2649         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2650         {
2651                 u32 timeout =
2652                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2653                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2654                            timeout, ch->hw_chid);
2655                 ch->timeout_ms_max = timeout;
2656                 break;
2657         }
2658         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2659         {
2660                 u32 timeout =
2661                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2662                 bool timeout_debug_dump = !((u32)
2663                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2664                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2665                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2666                            timeout, ch->hw_chid);
2667                 ch->timeout_ms_max = timeout;
2668                 ch->timeout_debug_dump = timeout_debug_dump;
2669                 break;
2670         }
2671         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2672                 ((struct nvgpu_get_param_args *)buf)->value =
2673                         ch->has_timedout;
2674                 break;
2675         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2676                 err = gk20a_busy(dev);
2677                 if (err) {
2678                         dev_err(&dev->dev,
2679                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2680                                 __func__, cmd);
2681                         break;
2682                 }
2683                 gk20a_channel_set_priority(ch,
2684                         ((struct nvgpu_set_priority_args *)buf)->priority);
2685                 gk20a_idle(dev);
2686                 break;
2687         case NVGPU_IOCTL_CHANNEL_ENABLE:
2688                 err = gk20a_busy(dev);
2689                 if (err) {
2690                         dev_err(&dev->dev,
2691                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2692                                 __func__, cmd);
2693                         break;
2694                 }
2695                 /* enable channel */
2696                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2697                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2698                         ccsr_channel_enable_set_true_f());
2699                 gk20a_idle(dev);
2700                 break;
2701         case NVGPU_IOCTL_CHANNEL_DISABLE:
2702                 err = gk20a_busy(dev);
2703                 if (err) {
2704                         dev_err(&dev->dev,
2705                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2706                                 __func__, cmd);
2707                         break;
2708                 }
2709                 /* disable channel */
2710                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2711                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2712                         ccsr_channel_enable_clr_true_f());
2713                 gk20a_idle(dev);
2714                 break;
2715         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2716                 err = gk20a_busy(dev);
2717                 if (err) {
2718                         dev_err(&dev->dev,
2719                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2720                                 __func__, cmd);
2721                         break;
2722                 }
2723                 err = gk20a_fifo_preempt(ch->g, ch);
2724                 gk20a_idle(dev);
2725                 break;
2726         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2727                 err = gk20a_busy(dev);
2728                 if (err) {
2729                         dev_err(&dev->dev,
2730                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2731                                 __func__, cmd);
2732                         break;
2733                 }
2734                 err = gk20a_fifo_force_reset_ch(ch, true);
2735                 gk20a_idle(dev);
2736                 break;
2737         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2738                 err = gk20a_channel_events_ctrl(ch,
2739                            (struct nvgpu_channel_events_ctrl_args *)buf);
2740                 break;
2741 #ifdef CONFIG_GK20A_CYCLE_STATS
2742         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2743                 err = gk20a_busy(dev);
2744                 if (err) {
2745                         dev_err(&dev->dev,
2746                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2747                                 __func__, cmd);
2748                         break;
2749                 }
2750                 err = gk20a_channel_cycle_stats_snapshot(ch,
2751                                 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2752                 gk20a_idle(dev);
2753                 break;
2754 #endif
2755         default:
2756                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2757                 err = -ENOTTY;
2758                 break;
2759         }
2760
2761         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2762                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2763
2764         mutex_unlock(&ch->ioctl_lock);
2765
2766         gk20a_channel_put(ch);
2767
2768         gk20a_dbg_fn("end");
2769
2770         return err;
2771 }