drivers/gpu/nvgpu/gk20a/channel_gk20a.c

   1 /*
   2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
   3  *
   4  * GK20A Graphics channel
   5  *
   6  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
   7  *
   8  * This program is free software; you can redistribute it and/or modify it
   9  * under the terms and conditions of the GNU General Public License,
  10  * version 2, as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15  * more details.
  16  *
  17  * You should have received a copy of the GNU General Public License along with
  18  * this program; if not, write to the Free Software Foundation, Inc.,
  19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20  */
  21
  22 #include <linux/nvhost.h>
  23 #include <linux/list.h>
  24 #include <linux/delay.h>
  25 #include <linux/highmem.h> /* need for nvmap.h*/
  26 #include <trace/events/gk20a.h>
  27 #include <linux/scatterlist.h>
  28 #include <linux/file.h>
  29 #include <linux/anon_inodes.h>
  30 #include <linux/dma-buf.h>
  31
  32 #include "debug_gk20a.h"
  33
  34 #include "gk20a.h"
  35 #include "dbg_gpu_gk20a.h"
  36
  37 #include "hw_ram_gk20a.h"
  38 #include "hw_fifo_gk20a.h"
  39 #include "hw_pbdma_gk20a.h"
  40 #include "hw_ccsr_gk20a.h"
  41 #include "hw_ltc_gk20a.h"
  42
  43 #define NVMAP_HANDLE_PARAM_SIZE 1
  44
  45 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
  46 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
  47
  48 static void free_priv_cmdbuf(struct channel_gk20a *c,
  49                              struct priv_cmd_entry *e);
  50 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
  51
  52 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
  53 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
  54
  55 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
  56 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
  57 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
  58                         u64 gpfifo_base, u32 gpfifo_entries);
  59
  60 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
  61 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
  62
  63 static int channel_gk20a_alloc_inst(struct gk20a *g,
  64                                 struct channel_gk20a *ch);
  65 static void channel_gk20a_free_inst(struct gk20a *g,
  66                                 struct channel_gk20a *ch);
  67
  68 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
  69                                         bool add);
  70 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
  71
  72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
  73 {
  74         struct channel_gk20a *ch = NULL;
  75         int chid;
  76
  77         mutex_lock(&f->ch_inuse_mutex);
  78         for (chid = 0; chid < f->num_channels; chid++) {
  79                 if (!f->channel[chid].in_use) {
  80                         f->channel[chid].in_use = true;
  81                         ch = &f->channel[chid];
  82                         break;
  83                 }
  84         }
  85         mutex_unlock(&f->ch_inuse_mutex);
  86
  87         return ch;
  88 }
  89
  90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
  91 {
  92         mutex_lock(&f->ch_inuse_mutex);
  93         f->channel[c->hw_chid].in_use = false;
  94         mutex_unlock(&f->ch_inuse_mutex);
  95 }
  96
  97 int channel_gk20a_commit_va(struct channel_gk20a *c)
  98 {
  99         u64 addr;
 100         u32 addr_lo;
 101         u32 addr_hi;
 102         void *inst_ptr;
 103
 104         gk20a_dbg_fn("");
 105
 106         inst_ptr = c->inst_block.cpuva;
 107         if (!inst_ptr)
 108                 return -ENOMEM;
 109
 110         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
 111         addr_lo = u64_lo32(addr >> 12);
 112         addr_hi = u64_hi32(addr);
 113
 114         gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
 115                    (u64)addr, addr_lo, addr_hi);
 116
 117         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
 118                 ram_in_page_dir_base_target_vid_mem_f() |
 119                 ram_in_page_dir_base_vol_true_f() |
 120                 ram_in_page_dir_base_lo_f(addr_lo));
 121
 122         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
 123                 ram_in_page_dir_base_hi_f(addr_hi));
 124
 125         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
 126                  u64_lo32(c->vm->va_limit) | 0xFFF);
 127
 128         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
 129                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
 130
 131         return 0;
 132 }
 133
 134 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
 135 {
 136         u32 addr_lo;
 137         u32 addr_hi;
 138         void *inst_ptr;
 139
 140         gk20a_dbg_fn("");
 141
 142         inst_ptr = c->inst_block.cpuva;
 143         if (!inst_ptr)
 144                 return -ENOMEM;
 145
 146         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
 147         addr_hi = u64_hi32(c->userd_iova);
 148
 149         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
 150                 c->hw_chid, (u64)c->userd_iova);
 151
 152         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
 153                  pbdma_userd_target_vid_mem_f() |
 154                  pbdma_userd_addr_f(addr_lo));
 155
 156         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
 157                  pbdma_userd_target_vid_mem_f() |
 158                  pbdma_userd_hi_addr_f(addr_hi));
 159
 160         return 0;
 161 }
 162
 163 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 164                                 u32 timeslice_timeout)
 165 {
 166         void *inst_ptr;
 167         int shift = 3;
 168         int value = timeslice_timeout;
 169
 170         inst_ptr = c->inst_block.cpuva;
 171         if (!inst_ptr)
 172                 return -ENOMEM;
 173
 174         /* disable channel */
 175         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 176                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 177                 ccsr_channel_enable_clr_true_f());
 178
 179         /* preempt the channel */
 180         WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
 181
 182         /* value field is 8 bits long */
 183         while (value >= 1 << 8) {
 184                 value >>= 1;
 185                 shift++;
 186         }
 187
 188         /* time slice register is only 18bits long */
 189         if ((value << shift) >= 1<<19) {
 190                 pr_err("Requested timeslice value is clamped to 18 bits\n");
 191                 value = 255;
 192                 shift = 10;
 193         }
 194
 195         /* set new timeslice */
 196         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
 197                 value | (shift << 12) |
 198                 fifo_eng_timeslice_enable_true_f());
 199
 200         /* enable channel */
 201         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 202                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 203                 ccsr_channel_enable_set_true_f());
 204
 205         return 0;
 206 }
 207
 208 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 209                                 u64 gpfifo_base, u32 gpfifo_entries)
 210 {
 211         void *inst_ptr;
 212
 213         gk20a_dbg_fn("");
 214
 215         inst_ptr = c->inst_block.cpuva;
 216         if (!inst_ptr)
 217                 return -ENOMEM;
 218
 219         memset(inst_ptr, 0, ram_fc_size_val_v());
 220
 221         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
 222                 pbdma_gp_base_offset_f(
 223                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
 224
 225         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
 226                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
 227                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 228
 229         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
 230                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
 231
 232         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
 233                 pbdma_formats_gp_fermi0_f() |
 234                 pbdma_formats_pb_fermi1_f() |
 235                 pbdma_formats_mp_fermi0_f());
 236
 237         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
 238                 pbdma_pb_header_priv_user_f() |
 239                 pbdma_pb_header_method_zero_f() |
 240                 pbdma_pb_header_subchannel_zero_f() |
 241                 pbdma_pb_header_level_main_f() |
 242                 pbdma_pb_header_first_true_f() |
 243                 pbdma_pb_header_type_inc_f());
 244
 245         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
 246                 pbdma_subdevice_id_f(1) |
 247                 pbdma_subdevice_status_active_f() |
 248                 pbdma_subdevice_channel_dma_enable_f());
 249
 250         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
 251
 252         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
 253                 pbdma_acquire_retry_man_2_f() |
 254                 pbdma_acquire_retry_exp_2_f() |
 255                 pbdma_acquire_timeout_exp_max_f() |
 256                 pbdma_acquire_timeout_man_max_f() |
 257                 pbdma_acquire_timeout_en_disable_f());
 258
 259         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
 260                 fifo_eng_timeslice_timeout_128_f() |
 261                 fifo_eng_timeslice_timescale_3_f() |
 262                 fifo_eng_timeslice_enable_true_f());
 263
 264         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
 265                 fifo_pb_timeslice_timeout_16_f() |
 266                 fifo_pb_timeslice_timescale_0_f() |
 267                 fifo_pb_timeslice_enable_true_f());
 268
 269         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 270
 271         return 0;
 272 }
 273
 274 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
 275 {
 276         BUG_ON(!c->userd_cpu_va);
 277
 278         gk20a_dbg_fn("");
 279
 280         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
 281         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
 282         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
 283         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
 284         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
 285         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
 286         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
 287         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
 288         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
 289         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
 290
 291         return 0;
 292 }
 293
 294 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
 295 {
 296         struct gk20a *g = ch_gk20a->g;
 297         struct fifo_gk20a *f = &g->fifo;
 298         struct fifo_engine_info_gk20a *engine_info =
 299                 f->engine_info + ENGINE_GR_GK20A;
 300
 301         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
 302                 >> ram_in_base_shift_v();
 303
 304         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
 305                 ch_gk20a->hw_chid, inst_ptr);
 306
 307         ch_gk20a->bound = true;
 308
 309         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 310                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 311                  ~ccsr_channel_runlist_f(~0)) |
 312                  ccsr_channel_runlist_f(engine_info->runlist_id));
 313
 314         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 315                 ccsr_channel_inst_ptr_f(inst_ptr) |
 316                 ccsr_channel_inst_target_vid_mem_f() |
 317                 ccsr_channel_inst_bind_true_f());
 318
 319         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 320                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 321                  ~ccsr_channel_enable_set_f(~0)) |
 322                  ccsr_channel_enable_set_true_f());
 323 }
 324
 325 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 326 {
 327         struct gk20a *g = ch_gk20a->g;
 328
 329         gk20a_dbg_fn("");
 330
 331         if (ch_gk20a->bound)
 332                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 333                         ccsr_channel_inst_ptr_f(0) |
 334                         ccsr_channel_inst_bind_false_f());
 335
 336         ch_gk20a->bound = false;
 337
 338         /*
 339          * if we are agrressive then we can destroy the syncpt
 340          * resource at this point
 341          * if not, then it will be destroyed at channel_free()
 342          */
 343         if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) {
 344                 ch_gk20a->sync->destroy(ch_gk20a->sync);
 345                 ch_gk20a->sync = NULL;
 346         }
 347 }
 348
 349 static int channel_gk20a_alloc_inst(struct gk20a *g,
 350                                 struct channel_gk20a *ch)
 351 {
 352         struct device *d = dev_from_gk20a(g);
 353         int err = 0;
 354         dma_addr_t iova;
 355
 356         gk20a_dbg_fn("");
 357
 358         ch->inst_block.size = ram_in_alloc_size_v();
 359         ch->inst_block.cpuva = dma_alloc_coherent(d,
 360                                         ch->inst_block.size,
 361                                         &iova,
 362                                         GFP_KERNEL);
 363         if (!ch->inst_block.cpuva) {
 364                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
 365                 err = -ENOMEM;
 366                 goto clean_up;
 367         }
 368
 369         ch->inst_block.iova = iova;
 370         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
 371                                                         ch->inst_block.iova);
 372         if (!ch->inst_block.cpu_pa) {
 373                 gk20a_err(d, "%s: failed to get physical address\n", __func__);
 374                 err = -ENOMEM;
 375                 goto clean_up;
 376         }
 377
 378         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
 379                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
 380
 381         gk20a_dbg_fn("done");
 382         return 0;
 383
 384 clean_up:
 385         gk20a_err(d, "fail");
 386         channel_gk20a_free_inst(g, ch);
 387         return err;
 388 }
 389
 390 static void channel_gk20a_free_inst(struct gk20a *g,
 391                                 struct channel_gk20a *ch)
 392 {
 393         struct device *d = dev_from_gk20a(g);
 394
 395         if (ch->inst_block.cpuva)
 396                 dma_free_coherent(d, ch->inst_block.size,
 397                                 ch->inst_block.cpuva, ch->inst_block.iova);
 398         ch->inst_block.cpuva = NULL;
 399         ch->inst_block.iova = 0;
 400         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
 401 }
 402
 403 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 404 {
 405         return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
 406 }
 407
 408 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
 409 {
 410         /* ensure no fences are pending */
 411         if (ch->sync)
 412                 ch->sync->set_min_eq_max(ch->sync);
 413
 414         /* disable channel */
 415         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 416                      gk20a_readl(ch->g,
 417                      ccsr_channel_r(ch->hw_chid)) |
 418                      ccsr_channel_enable_clr_true_f());
 419 }
 420
 421 static void channel_gk20a_enable(struct channel_gk20a *ch)
 422 {
 423         /* enable channel */
 424         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 425                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
 426                 ccsr_channel_enable_set_true_f());
 427 }
 428
 429 static void channel_gk20a_disable(struct channel_gk20a *ch)
 430 {
 431         /* disable channel */
 432         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 433                 gk20a_readl(ch->g,
 434                         ccsr_channel_r(ch->hw_chid)) |
 435                         ccsr_channel_enable_clr_true_f());
 436 }
 437
 438 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
 439 {
 440         bool channel_idle = false;
 441         unsigned long end_jiffies = jiffies +
 442                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
 443
 444         do {
 445                 mutex_lock(&ch->jobs_lock);
 446                 channel_idle = list_empty(&ch->jobs);
 447                 mutex_unlock(&ch->jobs_lock);
 448                 if (channel_idle)
 449                         break;
 450
 451                 usleep_range(1000, 3000);
 452         } while (time_before(jiffies, end_jiffies)
 453                         || !tegra_platform_is_silicon());
 454
 455         if (!channel_idle) {
 456                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
 457                                 ch->hw_chid);
 458                 return -EBUSY;
 459         }
 460
 461         return 0;
 462 }
 463
 464 void gk20a_disable_channel(struct channel_gk20a *ch,
 465                            bool finish,
 466                            unsigned long finish_timeout)
 467 {
 468         if (finish) {
 469                 int err = gk20a_channel_finish(ch, finish_timeout);
 470                 WARN_ON(err);
 471         }
 472
 473         /* disable the channel from hw and increment syncpoints */
 474         gk20a_disable_channel_no_update(ch);
 475
 476         gk20a_wait_channel_idle(ch);
 477
 478         /* preempt the channel */
 479         gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
 480
 481         /* remove channel from runlist */
 482         channel_gk20a_update_runlist(ch, false);
 483 }
 484
 485 #if defined(CONFIG_GK20A_CYCLE_STATS)
 486
 487 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 488 {
 489         /* disable existing cyclestats buffer */
 490         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
 491         if (ch->cyclestate.cyclestate_buffer_handler) {
 492                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
 493                                 ch->cyclestate.cyclestate_buffer);
 494                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
 495                 ch->cyclestate.cyclestate_buffer_handler = NULL;
 496                 ch->cyclestate.cyclestate_buffer = NULL;
 497                 ch->cyclestate.cyclestate_buffer_size = 0;
 498         }
 499         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
 500 }
 501
 502 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
 503                        struct nvhost_cycle_stats_args *args)
 504 {
 505         struct dma_buf *dmabuf;
 506         void *virtual_address;
 507
 508         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
 509
 510                 /* set up new cyclestats buffer */
 511                 dmabuf = dma_buf_get(args->nvmap_handle);
 512                 if (IS_ERR(dmabuf))
 513                         return PTR_ERR(dmabuf);
 514                 virtual_address = dma_buf_vmap(dmabuf);
 515                 if (!virtual_address)
 516                         return -ENOMEM;
 517
 518                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
 519                 ch->cyclestate.cyclestate_buffer = virtual_address;
 520                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
 521                 return 0;
 522
 523         } else if (!args->nvmap_handle &&
 524                         ch->cyclestate.cyclestate_buffer_handler) {
 525                 gk20a_free_cycle_stats_buffer(ch);
 526                 return 0;
 527
 528         } else if (!args->nvmap_handle &&
 529                         !ch->cyclestate.cyclestate_buffer_handler) {
 530                 /* no requst from GL */
 531                 return 0;
 532
 533         } else {
 534                 pr_err("channel already has cyclestats buffer\n");
 535                 return -EINVAL;
 536         }
 537 }
 538 #endif
 539
 540 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 541                 struct nvhost_set_error_notifier *args)
 542 {
 543         struct device *dev = dev_from_gk20a(ch->g);
 544         struct dma_buf *dmabuf;
 545         void *va;
 546         u64 end = args->offset + sizeof(struct nvhost_notification);
 547
 548         if (!args->mem) {
 549                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
 550                 return -EINVAL;
 551         }
 552
 553         dmabuf = dma_buf_get(args->mem);
 554
 555         if (ch->error_notifier_ref)
 556                 gk20a_free_error_notifiers(ch);
 557
 558         if (IS_ERR(dmabuf)) {
 559                 pr_err("Invalid handle: %d\n", args->mem);
 560                 return -EINVAL;
 561         }
 562
 563         if (end > dmabuf->size || end < sizeof(struct nvhost_notification)) {
 564                 dma_buf_put(dmabuf);
 565                 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
 566                 return -EINVAL;
 567         }
 568
 569         /* map handle */
 570         va = dma_buf_vmap(dmabuf);
 571         if (!va) {
 572                 dma_buf_put(dmabuf);
 573                 pr_err("Cannot map notifier handle\n");
 574                 return -ENOMEM;
 575         }
 576
 577         /* set channel notifiers pointer */
 578         ch->error_notifier_ref = dmabuf;
 579         ch->error_notifier = va + args->offset;
 580         ch->error_notifier_va = va;
 581         memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
 582         return 0;
 583 }
 584
 585 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 586 {
 587         if (ch->error_notifier_ref) {
 588                 struct timespec time_data;
 589                 u64 nsec;
 590                 getnstimeofday(&time_data);
 591                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
 592                                 (u64)time_data.tv_nsec;
 593                 ch->error_notifier->time_stamp.nanoseconds[0] =
 594                                 (u32)nsec;
 595                 ch->error_notifier->time_stamp.nanoseconds[1] =
 596                                 (u32)(nsec >> 32);
 597                 ch->error_notifier->info32 = error;
 598                 ch->error_notifier->status = 0xffff;
 599                 gk20a_err(dev_from_gk20a(ch->g),
 600                                 "error notifier set to %d\n", error);
 601         }
 602 }
 603
 604 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 605 {
 606         if (ch->error_notifier_ref) {
 607                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
 608                 dma_buf_put(ch->error_notifier_ref);
 609                 ch->error_notifier_ref = 0;
 610                 ch->error_notifier = 0;
 611                 ch->error_notifier_va = 0;
 612         }
 613 }
 614
 615 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
 616 {
 617         struct gk20a *g = ch->g;
 618         struct device *d = dev_from_gk20a(g);
 619         struct fifo_gk20a *f = &g->fifo;
 620         struct gr_gk20a *gr = &g->gr;
 621         struct vm_gk20a *ch_vm = ch->vm;
 622         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
 623         struct dbg_session_gk20a *dbg_s;
 624
 625         gk20a_dbg_fn("");
 626
 627         /* if engine reset was deferred, perform it now */
 628         mutex_lock(&f->deferred_reset_mutex);
 629         if (g->fifo.deferred_reset_pending) {
 630                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
 631                            " deferred, running now");
 632                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
 633                 g->fifo.mmu_fault_engines = 0;
 634                 g->fifo.deferred_reset_pending = false;
 635         }
 636         mutex_unlock(&f->deferred_reset_mutex);
 637
 638         if (!ch->bound)
 639                 return;
 640
 641         if (!gk20a_channel_as_bound(ch))
 642                 goto unbind;
 643
 644         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
 645                         timeout);
 646
 647         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
 648
 649         gk20a_free_error_notifiers(ch);
 650
 651         /* release channel ctx */
 652         gk20a_free_channel_ctx(ch);
 653
 654         gk20a_gr_flush_channel_tlb(gr);
 655
 656         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
 657
 658         /* free gpfifo */
 659         if (ch->gpfifo.gpu_va)
 660                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
 661                         ch->gpfifo.size, gk20a_mem_flag_none);
 662         if (ch->gpfifo.cpu_va)
 663                 dma_free_coherent(d, ch->gpfifo.size,
 664                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
 665         ch->gpfifo.cpu_va = NULL;
 666         ch->gpfifo.iova = 0;
 667
 668         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 669
 670 #if defined(CONFIG_GK20A_CYCLE_STATS)
 671         gk20a_free_cycle_stats_buffer(ch);
 672 #endif
 673
 674         channel_gk20a_free_priv_cmdbuf(ch);
 675
 676         if (ch->sync) {
 677                 ch->sync->destroy(ch->sync);
 678                 ch->sync = NULL;
 679         }
 680
 681         /* release channel binding to the as_share */
 682         gk20a_as_release_share(ch_vm->as_share);
 683
 684 unbind:
 685         channel_gk20a_unbind(ch);
 686         channel_gk20a_free_inst(g, ch);
 687
 688         ch->vpr = false;
 689         ch->vm = NULL;
 690         WARN_ON(ch->sync);
 691
 692         /* unlink all debug sessions */
 693         mutex_lock(&ch->dbg_s_lock);
 694
 695         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
 696                 dbg_s->ch = NULL;
 697                 list_del_init(&dbg_s->dbg_s_list_node);
 698         }
 699
 700         mutex_unlock(&ch->dbg_s_lock);
 701
 702         /* ALWAYS last */
 703         release_used_channel(f, ch);
 704 }
 705
 706 int gk20a_channel_release(struct inode *inode, struct file *filp)
 707 {
 708         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
 709         struct gk20a *g = ch->g;
 710         int err;
 711
 712         trace_gk20a_channel_release(dev_name(&g->dev->dev));
 713
 714         err = gk20a_busy(ch->g->dev);
 715         if (err) {
 716                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
 717                         ch->hw_chid);
 718                 return err;
 719         }
 720         gk20a_free_channel(ch, true);
 721         gk20a_idle(ch->g->dev);
 722
 723         gk20a_put_client(g);
 724         filp->private_data = NULL;
 725         return 0;
 726 }
 727
 728 static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 729 {
 730         struct fifo_gk20a *f = &g->fifo;
 731         struct channel_gk20a *ch;
 732
 733         ch = acquire_unused_channel(f);
 734         if (ch == NULL) {
 735                 /* TBD: we want to make this virtualizable */
 736                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
 737                 return 0;
 738         }
 739
 740         ch->g = g;
 741
 742         if (channel_gk20a_alloc_inst(g, ch)) {
 743                 ch->in_use = false;
 744                 gk20a_err(dev_from_gk20a(g),
 745                            "failed to open gk20a channel, out of inst mem");
 746
 747                 return 0;
 748         }
 749         g->ops.fifo.bind_channel(ch);
 750         ch->pid = current->pid;
 751
 752         /* reset timeout counter and update timestamp */
 753         ch->timeout_accumulated_ms = 0;
 754         ch->timeout_gpfifo_get = 0;
 755         /* set gr host default timeout */
 756         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
 757         ch->timeout_debug_dump = true;
 758         ch->has_timedout = false;
 759         ch->obj_class = 0;
 760
 761         /* The channel is *not* runnable at this point. It still needs to have
 762          * an address space bound and allocate a gpfifo and grctx. */
 763
 764         init_waitqueue_head(&ch->notifier_wq);
 765         init_waitqueue_head(&ch->semaphore_wq);
 766         init_waitqueue_head(&ch->submit_wq);
 767
 768         return ch;
 769 }
 770
 771 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
 772 {
 773         int err;
 774         struct channel_gk20a *ch;
 775
 776         trace_gk20a_channel_open(dev_name(&g->dev->dev));
 777
 778         err = gk20a_get_client(g);
 779         if (err) {
 780                 gk20a_err(dev_from_gk20a(g),
 781                         "failed to get client ref");
 782                 return err;
 783         }
 784
 785         err = gk20a_busy(g->dev);
 786         if (err) {
 787                 gk20a_put_client(g);
 788                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
 789                 return err;
 790         }
 791         ch = gk20a_open_new_channel(g);
 792         gk20a_idle(g->dev);
 793         if (!ch) {
 794                 gk20a_put_client(g);
 795                 gk20a_err(dev_from_gk20a(g),
 796                         "failed to get f");
 797                 return -ENOMEM;
 798         }
 799
 800         filp->private_data = ch;
 801         return 0;
 802 }
 803
 804 int gk20a_channel_open(struct inode *inode, struct file *filp)
 805 {
 806         struct gk20a *g = container_of(inode->i_cdev,
 807                         struct gk20a, channel.cdev);
 808         return __gk20a_channel_open(g, filp);
 809 }
 810
 811 /* allocate private cmd buffer.
 812    used for inserting commands before/after user submitted buffers. */
 813 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
 814 {
 815         struct device *d = dev_from_gk20a(c->g);
 816         struct vm_gk20a *ch_vm = c->vm;
 817         struct priv_cmd_queue *q = &c->priv_cmd_q;
 818         struct priv_cmd_entry *e;
 819         u32 i = 0, size;
 820         int err = 0;
 821         struct sg_table *sgt;
 822         dma_addr_t iova;
 823
 824         /* Kernel can insert gpfifos before and after user gpfifos.
 825            Before user gpfifos, kernel inserts fence_wait, which takes
 826            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
 827            After user gpfifos, kernel inserts fence_get, which takes
 828            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
 829            = 6 dwords.
 830            Worse case if kernel adds both of them for every user gpfifo,
 831            max size of priv_cmdbuf is :
 832            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
 833         size = roundup_pow_of_two(
 834                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
 835
 836         q->mem.base_cpuva = dma_alloc_coherent(d, size,
 837                                         &iova,
 838                                         GFP_KERNEL);
 839         if (!q->mem.base_cpuva) {
 840                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
 841                 err = -ENOMEM;
 842                 goto clean_up;
 843         }
 844
 845         q->mem.base_iova = iova;
 846         q->mem.size = size;
 847
 848         err = gk20a_get_sgtable(d, &sgt,
 849                         q->mem.base_cpuva, q->mem.base_iova, size);
 850         if (err) {
 851                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
 852                 goto clean_up;
 853         }
 854
 855         memset(q->mem.base_cpuva, 0, size);
 856
 857         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
 858                                         size,
 859                                         0, /* flags */
 860                                         gk20a_mem_flag_none);
 861         if (!q->base_gpuva) {
 862                 gk20a_err(d, "ch %d : failed to map gpu va"
 863                            "for priv cmd buffer", c->hw_chid);
 864                 err = -ENOMEM;
 865                 goto clean_up_sgt;
 866         }
 867
 868         q->size = q->mem.size / sizeof (u32);
 869
 870         INIT_LIST_HEAD(&q->head);
 871         INIT_LIST_HEAD(&q->free);
 872
 873         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
 874         for (i = 0; i < q->size / 4; i++) {
 875                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
 876                 if (!e) {
 877                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
 878                                 c->hw_chid);
 879                         err = -ENOMEM;
 880                         goto clean_up_sgt;
 881                 }
 882                 e->pre_alloc = true;
 883                 list_add(&e->list, &q->free);
 884         }
 885
 886         gk20a_free_sgtable(&sgt);
 887
 888         return 0;
 889
 890 clean_up_sgt:
 891         gk20a_free_sgtable(&sgt);
 892 clean_up:
 893         channel_gk20a_free_priv_cmdbuf(c);
 894         return err;
 895 }
 896
 897 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 898 {
 899         struct device *d = dev_from_gk20a(c->g);
 900         struct vm_gk20a *ch_vm = c->vm;
 901         struct priv_cmd_queue *q = &c->priv_cmd_q;
 902         struct priv_cmd_entry *e;
 903         struct list_head *pos, *tmp, *head;
 904
 905         if (q->size == 0)
 906                 return;
 907
 908         if (q->base_gpuva)
 909                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
 910                                 q->mem.size, gk20a_mem_flag_none);
 911         if (q->mem.base_cpuva)
 912                 dma_free_coherent(d, q->mem.size,
 913                         q->mem.base_cpuva, q->mem.base_iova);
 914         q->mem.base_cpuva = NULL;
 915         q->mem.base_iova = 0;
 916
 917         /* free used list */
 918         head = &q->head;
 919         list_for_each_safe(pos, tmp, head) {
 920                 e = container_of(pos, struct priv_cmd_entry, list);
 921                 free_priv_cmdbuf(c, e);
 922         }
 923
 924         /* free free list */
 925         head = &q->free;
 926         list_for_each_safe(pos, tmp, head) {
 927                 e = container_of(pos, struct priv_cmd_entry, list);
 928                 e->pre_alloc = false;
 929                 free_priv_cmdbuf(c, e);
 930         }
 931
 932         memset(q, 0, sizeof(struct priv_cmd_queue));
 933 }
 934
 935 /* allocate a cmd buffer with given size. size is number of u32 entries */
 936 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
 937                              struct priv_cmd_entry **entry)
 938 {
 939         struct priv_cmd_queue *q = &c->priv_cmd_q;
 940         struct priv_cmd_entry *e;
 941         struct list_head *node;
 942         u32 free_count;
 943         u32 size = orig_size;
 944         bool no_retry = false;
 945
 946         gk20a_dbg_fn("size %d", orig_size);
 947
 948         *entry = NULL;
 949
 950         /* if free space in the end is less than requested, increase the size
 951          * to make the real allocated space start from beginning. */
 952         if (q->put + size > q->size)
 953                 size = orig_size + (q->size - q->put);
 954
 955         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
 956                         c->hw_chid, q->get, q->put);
 957
 958 TRY_AGAIN:
 959         free_count = (q->size - (q->put - q->get) - 1) % q->size;
 960
 961         if (size > free_count) {
 962                 if (!no_retry) {
 963                         recycle_priv_cmdbuf(c);
 964                         no_retry = true;
 965                         goto TRY_AGAIN;
 966                 } else
 967                         return -EAGAIN;
 968         }
 969
 970         if (unlikely(list_empty(&q->free))) {
 971
 972                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
 973                         c->hw_chid);
 974
 975                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
 976                 if (!e) {
 977                         gk20a_err(dev_from_gk20a(c->g),
 978                                 "ch %d: fail to allocate priv cmd entry",
 979                                 c->hw_chid);
 980                         return -ENOMEM;
 981                 }
 982         } else  {
 983                 node = q->free.next;
 984                 list_del(node);
 985                 e = container_of(node, struct priv_cmd_entry, list);
 986         }
 987
 988         e->size = orig_size;
 989         e->gp_get = c->gpfifo.get;
 990         e->gp_put = c->gpfifo.put;
 991         e->gp_wrap = c->gpfifo.wrap;
 992
 993         /* if we have increased size to skip free space in the end, set put
 994            to beginning of cmd buffer (0) + size */
 995         if (size != orig_size) {
 996                 e->ptr = q->mem.base_cpuva;
 997                 e->gva = q->base_gpuva;
 998                 q->put = orig_size;
 999         } else {
1000                 e->ptr = q->mem.base_cpuva + q->put;
1001                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1002                 q->put = (q->put + orig_size) & (q->size - 1);
1003         }
1004
1005         /* we already handled q->put + size > q->size so BUG_ON this */
1006         BUG_ON(q->put > q->size);
1007
1008         /* add new entry to head since we free from head */
1009         list_add(&e->list, &q->head);
1010
1011         *entry = e;
1012
1013         gk20a_dbg_fn("done");
1014
1015         return 0;
1016 }
1017
1018 /* Don't call this to free an explict cmd entry.
1019  * It doesn't update priv_cmd_queue get/put */
1020 static void free_priv_cmdbuf(struct channel_gk20a *c,
1021                              struct priv_cmd_entry *e)
1022 {
1023         struct priv_cmd_queue *q = &c->priv_cmd_q;
1024
1025         if (!e)
1026                 return;
1027
1028         list_del(&e->list);
1029
1030         if (unlikely(!e->pre_alloc))
1031                 kfree(e);
1032         else {
1033                 memset(e, 0, sizeof(struct priv_cmd_entry));
1034                 e->pre_alloc = true;
1035                 list_add(&e->list, &q->free);
1036         }
1037 }
1038
1039 /* free entries if they're no longer being used */
1040 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1041 {
1042         struct priv_cmd_queue *q = &c->priv_cmd_q;
1043         struct priv_cmd_entry *e, *tmp;
1044         struct list_head *head = &q->head;
1045         bool wrap_around, found = false;
1046
1047         gk20a_dbg_fn("");
1048
1049         /* Find the most recent free entry. Free it and everything before it */
1050         list_for_each_entry(e, head, list) {
1051
1052                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1053                         "curr get:put:wrap %d:%d:%d",
1054                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1055                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1056
1057                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1058                 if (e->gp_get < e->gp_put) {
1059                         if (c->gpfifo.get >= e->gp_put ||
1060                             wrap_around) {
1061                                 found = true;
1062                                 break;
1063                         } else
1064                                 e->gp_get = c->gpfifo.get;
1065                 } else if (e->gp_get > e->gp_put) {
1066                         if (wrap_around &&
1067                             c->gpfifo.get >= e->gp_put) {
1068                                 found = true;
1069                                 break;
1070                         } else
1071                                 e->gp_get = c->gpfifo.get;
1072                 }
1073         }
1074
1075         if (found)
1076                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1077         else {
1078                 gk20a_dbg_info("no free entry recycled");
1079                 return;
1080         }
1081
1082         list_for_each_entry_safe_continue(e, tmp, head, list) {
1083                 free_priv_cmdbuf(c, e);
1084         }
1085
1086         gk20a_dbg_fn("done");
1087 }
1088
1089
1090 static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1091                                       struct nvhost_alloc_gpfifo_args *args)
1092 {
1093         struct gk20a *g = c->g;
1094         struct device *d = dev_from_gk20a(g);
1095         struct vm_gk20a *ch_vm;
1096         u32 gpfifo_size;
1097         int err = 0;
1098         struct sg_table *sgt;
1099         dma_addr_t iova;
1100
1101         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1102            and another one after, for internal usage. Triple the requested size. */
1103         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1104
1105         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1106                 c->vpr = true;
1107
1108         /* an address space needs to have been bound at this point.   */
1109         if (!gk20a_channel_as_bound(c)) {
1110                 gk20a_err(d,
1111                             "not bound to an address space at time of gpfifo"
1112                             " allocation.  Attempting to create and bind to"
1113                             " one...");
1114                 return -EINVAL;
1115         }
1116         ch_vm = c->vm;
1117
1118         c->cmds_pending = false;
1119         c->last_submit_fence.valid = false;
1120
1121         c->ramfc.offset = 0;
1122         c->ramfc.size = ram_in_ramfc_s() / 8;
1123
1124         if (c->gpfifo.cpu_va) {
1125                 gk20a_err(d, "channel %d :"
1126                            "gpfifo already allocated", c->hw_chid);
1127                 return -EEXIST;
1128         }
1129
1130         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1131         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1132                                                 c->gpfifo.size,
1133                                                 &iova,
1134                                                 GFP_KERNEL);
1135         if (!c->gpfifo.cpu_va) {
1136                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1137                 err = -ENOMEM;
1138                 goto clean_up;
1139         }
1140
1141         c->gpfifo.iova = iova;
1142         c->gpfifo.entry_num = gpfifo_size;
1143
1144         c->gpfifo.get = c->gpfifo.put = 0;
1145
1146         err = gk20a_get_sgtable(d, &sgt,
1147                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1148         if (err) {
1149                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1150                 goto clean_up;
1151         }
1152
1153         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1154                                         &sgt,
1155                                         c->gpfifo.size,
1156                                         0, /* flags */
1157                                         gk20a_mem_flag_none);
1158         if (!c->gpfifo.gpu_va) {
1159                 gk20a_err(d, "channel %d : failed to map"
1160                            " gpu_va for gpfifo", c->hw_chid);
1161                 err = -ENOMEM;
1162                 goto clean_up_sgt;
1163         }
1164
1165         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1166                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1167
1168         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1169
1170         channel_gk20a_setup_userd(c);
1171         channel_gk20a_commit_userd(c);
1172
1173         /* TBD: setup engine contexts */
1174
1175         err = channel_gk20a_alloc_priv_cmdbuf(c);
1176         if (err)
1177                 goto clean_up_unmap;
1178
1179         err = channel_gk20a_update_runlist(c, true);
1180         if (err)
1181                 goto clean_up_unmap;
1182
1183         gk20a_free_sgtable(&sgt);
1184
1185         gk20a_dbg_fn("done");
1186         return 0;
1187
1188 clean_up_unmap:
1189         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1190                 c->gpfifo.size, gk20a_mem_flag_none);
1191 clean_up_sgt:
1192         gk20a_free_sgtable(&sgt);
1193 clean_up:
1194         dma_free_coherent(d, c->gpfifo.size,
1195                 c->gpfifo.cpu_va, c->gpfifo.iova);
1196         c->gpfifo.cpu_va = NULL;
1197         c->gpfifo.iova = 0;
1198         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1199         gk20a_err(d, "fail");
1200         return err;
1201 }
1202
1203 static inline int wfi_cmd_size(void)
1204 {
1205         return 2;
1206 }
1207 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1208 {
1209         /* wfi */
1210         cmd->ptr[(*i)++] = 0x2001001E;
1211         /* handle, ignored */
1212         cmd->ptr[(*i)++] = 0x00000000;
1213 }
1214
1215 static inline bool check_gp_put(struct gk20a *g,
1216                                 struct channel_gk20a *c)
1217 {
1218         u32 put;
1219         /* gp_put changed unexpectedly since last update? */
1220         put = gk20a_bar1_readl(g,
1221                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1222         if (c->gpfifo.put != put) {
1223                 /*TBD: BUG_ON/teardown on this*/
1224                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1225                            "since last update");
1226                 c->gpfifo.put = put;
1227                 return false; /* surprise! */
1228         }
1229         return true; /* checked out ok */
1230 }
1231
1232 /* Update with this periodically to determine how the gpfifo is draining. */
1233 static inline u32 update_gp_get(struct gk20a *g,
1234                                 struct channel_gk20a *c)
1235 {
1236         u32 new_get = gk20a_bar1_readl(g,
1237                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1238         if (new_get < c->gpfifo.get)
1239                 c->gpfifo.wrap = !c->gpfifo.wrap;
1240         c->gpfifo.get = new_get;
1241         return new_get;
1242 }
1243
1244 static inline u32 gp_free_count(struct channel_gk20a *c)
1245 {
1246         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1247                 c->gpfifo.entry_num;
1248 }
1249
1250 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1251                 u32 timeout_delta_ms)
1252 {
1253         u32 gpfifo_get = update_gp_get(ch->g, ch);
1254         /* Count consequent timeout isr */
1255         if (gpfifo_get == ch->timeout_gpfifo_get) {
1256                 /* we didn't advance since previous channel timeout check */
1257                 ch->timeout_accumulated_ms += timeout_delta_ms;
1258         } else {
1259                 /* first timeout isr encountered */
1260                 ch->timeout_accumulated_ms = timeout_delta_ms;
1261         }
1262
1263         ch->timeout_gpfifo_get = gpfifo_get;
1264
1265         return ch->g->timeouts_enabled &&
1266                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1267 }
1268
1269
1270 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1271  * command.  All commands on the channel will have been
1272  * consumed at the time the fence syncpoint increment occurs.
1273  */
1274 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1275 {
1276         struct priv_cmd_entry *cmd = NULL;
1277         struct gk20a *g = c->g;
1278         u32 free_count;
1279         int err;
1280
1281         if (c->has_timedout)
1282                 return -ETIMEDOUT;
1283
1284         if (!c->sync) {
1285                 c->sync = gk20a_channel_sync_create(c);
1286                 if (!c->sync)
1287                         return -ENOMEM;
1288         }
1289
1290         update_gp_get(g, c);
1291         free_count = gp_free_count(c);
1292         if (unlikely(!free_count)) {
1293                 gk20a_err(dev_from_gk20a(g),
1294                            "not enough gpfifo space");
1295                 return -EAGAIN;
1296         }
1297
1298         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
1299         if (unlikely(err))
1300                 return err;
1301
1302         WARN_ON(!c->last_submit_fence.wfi);
1303
1304         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1305         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1306                 pbdma_gp_entry1_length_f(cmd->size);
1307
1308         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1309
1310         /* save gp_put */
1311         cmd->gp_put = c->gpfifo.put;
1312
1313         gk20a_bar1_writel(g,
1314                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1315                 c->gpfifo.put);
1316
1317         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1318                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1319
1320         return 0;
1321 }
1322
1323 static u32 get_gp_free_count(struct channel_gk20a *c)
1324 {
1325         update_gp_get(c->g, c);
1326         return gp_free_count(c);
1327 }
1328
1329 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1330 {
1331         void *mem = NULL;
1332         unsigned int words;
1333         u64 offset;
1334         struct dma_buf *dmabuf = NULL;
1335
1336         if (gk20a_debug_trace_cmdbuf) {
1337                 u64 gpu_va = (u64)g->entry0 |
1338                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1339                 int err;
1340
1341                 words = pbdma_gp_entry1_length_v(g->entry1);
1342                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1343                 if (!err)
1344                         mem = dma_buf_vmap(dmabuf);
1345         }
1346
1347         if (mem) {
1348                 u32 i;
1349                 /*
1350                  * Write in batches of 128 as there seems to be a limit
1351                  * of how much you can output to ftrace at once.
1352                  */
1353                 for (i = 0; i < words; i += 128U) {
1354                         trace_gk20a_push_cmdbuf(
1355                                 c->g->dev->name,
1356                                 0,
1357                                 min(words - i, 128U),
1358                                 offset + i * sizeof(u32),
1359                                 mem);
1360                 }
1361                 dma_buf_vunmap(dmabuf, mem);
1362         }
1363 }
1364
1365 static int gk20a_channel_add_job(struct channel_gk20a *c,
1366                                  struct gk20a_channel_fence *fence)
1367 {
1368         struct vm_gk20a *vm = c->vm;
1369         struct channel_gk20a_job *job = NULL;
1370         struct mapped_buffer_node **mapped_buffers = NULL;
1371         int err = 0, num_mapped_buffers;
1372
1373         /* job needs reference to this vm */
1374         gk20a_vm_get(vm);
1375
1376         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1377         if (err) {
1378                 gk20a_vm_put(vm);
1379                 return err;
1380         }
1381
1382         job = kzalloc(sizeof(*job), GFP_KERNEL);
1383         if (!job) {
1384                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1385                 gk20a_vm_put(vm);
1386                 return -ENOMEM;
1387         }
1388
1389         job->num_mapped_buffers = num_mapped_buffers;
1390         job->mapped_buffers = mapped_buffers;
1391         job->fence = *fence;
1392
1393         mutex_lock(&c->jobs_lock);
1394         list_add_tail(&job->list, &c->jobs);
1395         mutex_unlock(&c->jobs_lock);
1396
1397         return 0;
1398 }
1399
1400 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1401 {
1402         struct vm_gk20a *vm = c->vm;
1403         struct channel_gk20a_job *job, *n;
1404
1405         wake_up(&c->submit_wq);
1406
1407         mutex_lock(&c->jobs_lock);
1408         list_for_each_entry_safe(job, n, &c->jobs, list) {
1409                 bool completed = WARN_ON(!c->sync) ||
1410                         c->sync->is_expired(c->sync, &job->fence);
1411                 if (!completed)
1412                         break;
1413
1414                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1415                                 job->num_mapped_buffers);
1416
1417                 /* job is done. release its reference to vm */
1418                 gk20a_vm_put(vm);
1419
1420                 list_del_init(&job->list);
1421                 kfree(job);
1422                 gk20a_idle(c->g->dev);
1423         }
1424         mutex_unlock(&c->jobs_lock);
1425 }
1426
1427 static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1428                                 struct nvhost_gpfifo *gpfifo,
1429                                 u32 num_entries,
1430                                 struct nvhost_fence *fence,
1431                                 u32 flags)
1432 {
1433         struct gk20a *g = c->g;
1434         struct device *d = dev_from_gk20a(g);
1435         int err = 0;
1436         int i;
1437         struct priv_cmd_entry *wait_cmd = NULL;
1438         struct priv_cmd_entry *incr_cmd = NULL;
1439         /* we might need two extra gpfifo entries - one for pre fence
1440          * and one for post fence. */
1441         const int extra_entries = 2;
1442         bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1443
1444         if (c->has_timedout)
1445                 return -ETIMEDOUT;
1446
1447         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1448                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1449             !fence)
1450                 return -EINVAL;
1451
1452         if (!c->sync) {
1453                 c->sync = gk20a_channel_sync_create(c);
1454                 if (!c->sync)
1455                         return -ENOMEM;
1456         }
1457
1458 #ifdef CONFIG_DEBUG_FS
1459         /* update debug settings */
1460         if (g->ops.ltc.sync_debugfs)
1461                 g->ops.ltc.sync_debugfs(g);
1462 #endif
1463
1464         gk20a_dbg_info("channel %d", c->hw_chid);
1465
1466         /* gk20a_channel_update releases this ref. */
1467         err = gk20a_busy(g->dev);
1468         if (err) {
1469                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1470                 return err;
1471         }
1472
1473         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1474                                           c->hw_chid,
1475                                           num_entries,
1476                                           flags,
1477                                           fence->syncpt_id, fence->value);
1478         check_gp_put(g, c);
1479         update_gp_get(g, c);
1480
1481         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1482                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1483
1484         /* Invalidate tlb if it's dirty...                                   */
1485         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1486         /* We don't know what context is currently running...                */
1487         /* Note also: there can be more than one context associated with the */
1488         /* address space (vm).   */
1489         gk20a_mm_tlb_invalidate(c->vm);
1490
1491         /* Make sure we have enough space for gpfifo entries. If not,
1492          * wait for signals from completed submits */
1493         if (gp_free_count(c) < num_entries + extra_entries) {
1494                 err = wait_event_interruptible(c->submit_wq,
1495                         get_gp_free_count(c) >= num_entries + extra_entries ||
1496                         c->has_timedout);
1497         }
1498
1499         if (c->has_timedout) {
1500                 err = -ETIMEDOUT;
1501                 goto clean_up;
1502         }
1503
1504         if (err) {
1505                 gk20a_err(d, "not enough gpfifo space");
1506                 err = -EAGAIN;
1507                 goto clean_up;
1508         }
1509
1510         /*
1511          * optionally insert syncpt wait in the beginning of gpfifo submission
1512          * when user requested and the wait hasn't expired.
1513          * validate that the id makes sense, elide if not
1514          * the only reason this isn't being unceremoniously killed is to
1515          * keep running some tests which trigger this condition
1516          */
1517         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1518                 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1519                         err = c->sync->wait_fd(c->sync, fence->syncpt_id,
1520                                         &wait_cmd);
1521                 else
1522                         err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1523                                         fence->value, &wait_cmd);
1524         }
1525         if (err)
1526                 goto clean_up;
1527
1528
1529         /* always insert syncpt increment at end of gpfifo submission
1530            to keep track of method completion for idle railgating */
1531         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1532                         flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1533                 err = c->sync->incr_user_fd(c->sync, &incr_cmd,
1534                                             &c->last_submit_fence,
1535                                             need_wfi,
1536                                             &fence->syncpt_id);
1537         else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1538                 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1539                                                 &c->last_submit_fence,
1540                                                 need_wfi,
1541                                                 &fence->syncpt_id,
1542                                                 &fence->value);
1543         else
1544                 err = c->sync->incr(c->sync, &incr_cmd,
1545                                     &c->last_submit_fence);
1546         if (err)
1547                 goto clean_up;
1548
1549         if (wait_cmd) {
1550                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1551                         u64_lo32(wait_cmd->gva);
1552                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1553                         u64_hi32(wait_cmd->gva) |
1554                         pbdma_gp_entry1_length_f(wait_cmd->size);
1555                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1556                         0, wait_cmd->size, 0, wait_cmd->ptr);
1557
1558                 c->gpfifo.put = (c->gpfifo.put + 1) &
1559                         (c->gpfifo.entry_num - 1);
1560
1561                 /* save gp_put */
1562                 wait_cmd->gp_put = c->gpfifo.put;
1563         }
1564
1565         for (i = 0; i < num_entries; i++) {
1566                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1567                         gpfifo[i].entry0; /* cmd buf va low 32 */
1568                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1569                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1570                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1571                 c->gpfifo.put = (c->gpfifo.put + 1) &
1572                         (c->gpfifo.entry_num - 1);
1573         }
1574
1575         if (incr_cmd) {
1576                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1577                         u64_lo32(incr_cmd->gva);
1578                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1579                         u64_hi32(incr_cmd->gva) |
1580                         pbdma_gp_entry1_length_f(incr_cmd->size);
1581                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1582                         0, incr_cmd->size, 0, incr_cmd->ptr);
1583
1584                 c->gpfifo.put = (c->gpfifo.put + 1) &
1585                         (c->gpfifo.entry_num - 1);
1586
1587                 /* save gp_put */
1588                 incr_cmd->gp_put = c->gpfifo.put;
1589         }
1590
1591         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1592                                              c->hw_chid,
1593                                              num_entries,
1594                                              flags,
1595                                              fence->syncpt_id, fence->value);
1596
1597         /* TODO! Check for errors... */
1598         gk20a_channel_add_job(c, &c->last_submit_fence);
1599
1600         c->cmds_pending = true;
1601         gk20a_bar1_writel(g,
1602                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1603                 c->gpfifo.put);
1604
1605         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1606                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1607
1608         gk20a_dbg_fn("done");
1609         return err;
1610
1611 clean_up:
1612         gk20a_err(d, "fail");
1613         free_priv_cmdbuf(c, wait_cmd);
1614         free_priv_cmdbuf(c, incr_cmd);
1615         gk20a_idle(g->dev);
1616         return err;
1617 }
1618
1619 void gk20a_remove_channel_support(struct channel_gk20a *c)
1620 {
1621
1622 }
1623
1624 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1625 {
1626         struct channel_gk20a *c = g->fifo.channel+chid;
1627         c->g = g;
1628         c->in_use = false;
1629         c->hw_chid = chid;
1630         c->bound = false;
1631         c->remove_support = gk20a_remove_channel_support;
1632         mutex_init(&c->jobs_lock);
1633         INIT_LIST_HEAD(&c->jobs);
1634 #if defined(CONFIG_GK20A_CYCLE_STATS)
1635         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1636 #endif
1637         INIT_LIST_HEAD(&c->dbg_s_list);
1638         mutex_init(&c->dbg_s_lock);
1639
1640         return 0;
1641 }
1642
1643 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1644 {
1645         int err = 0;
1646
1647         if (!ch->cmds_pending)
1648                 return 0;
1649
1650         /* Do not wait for a timedout channel */
1651         if (ch->has_timedout)
1652                 return -ETIMEDOUT;
1653
1654         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1655                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1656                 err = gk20a_channel_submit_wfi(ch);
1657         }
1658         if (err)
1659                 return err;
1660
1661         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1662
1663         gk20a_dbg_fn("waiting for channel to finish thresh:%d",
1664                       ch->last_submit_fence.thresh);
1665
1666         err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout);
1667         if (WARN_ON(err))
1668                 dev_warn(dev_from_gk20a(ch->g),
1669                          "timed out waiting for gk20a channel to finish");
1670         else
1671                 ch->cmds_pending = false;
1672
1673         return err;
1674 }
1675
1676 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1677                                         ulong id, u32 offset,
1678                                         u32 payload, long timeout)
1679 {
1680         struct platform_device *pdev = ch->g->dev;
1681         struct dma_buf *dmabuf;
1682         void *data;
1683         u32 *semaphore;
1684         int ret = 0;
1685         long remain;
1686
1687         /* do not wait if channel has timed out */
1688         if (ch->has_timedout)
1689                 return -ETIMEDOUT;
1690
1691         dmabuf = dma_buf_get(id);
1692         if (IS_ERR(dmabuf)) {
1693                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1694                            id);
1695                 return -EINVAL;
1696         }
1697
1698         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1699         if (!data) {
1700                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1701                 ret = -EINVAL;
1702                 goto cleanup_put;
1703         }
1704
1705         semaphore = data + (offset & ~PAGE_MASK);
1706
1707         remain = wait_event_interruptible_timeout(
1708                         ch->semaphore_wq,
1709                         *semaphore == payload || ch->has_timedout,
1710                         timeout);
1711
1712         if (remain == 0 && *semaphore != payload)
1713                 ret = -ETIMEDOUT;
1714         else if (remain < 0)
1715                 ret = remain;
1716
1717         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1718 cleanup_put:
1719         dma_buf_put(dmabuf);
1720         return ret;
1721 }
1722
1723 static int gk20a_channel_wait(struct channel_gk20a *ch,
1724                               struct nvhost_wait_args *args)
1725 {
1726         struct device *d = dev_from_gk20a(ch->g);
1727         struct dma_buf *dmabuf;
1728         struct notification *notif;
1729         struct timespec tv;
1730         u64 jiffies;
1731         ulong id;
1732         u32 offset;
1733         unsigned long timeout;
1734         int remain, ret = 0;
1735         u64 end;
1736
1737         gk20a_dbg_fn("");
1738
1739         if (ch->has_timedout)
1740                 return -ETIMEDOUT;
1741
1742         if (args->timeout == NVHOST_NO_TIMEOUT)
1743                 timeout = MAX_SCHEDULE_TIMEOUT;
1744         else
1745                 timeout = (u32)msecs_to_jiffies(args->timeout);
1746
1747         switch (args->type) {
1748         case NVHOST_WAIT_TYPE_NOTIFIER:
1749                 id = args->condition.notifier.nvmap_handle;
1750                 offset = args->condition.notifier.offset;
1751                 end = offset + sizeof(struct notification);
1752
1753                 dmabuf = dma_buf_get(id);
1754                 if (IS_ERR(dmabuf)) {
1755                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1756                                    id);
1757                         return -EINVAL;
1758                 }
1759
1760                 if (end > dmabuf->size || end < sizeof(struct notification)) {
1761                         dma_buf_put(dmabuf);
1762                         gk20a_err(d, "invalid notifier offset\n");
1763                         return -EINVAL;
1764                 }
1765
1766                 notif = dma_buf_vmap(dmabuf);
1767                 if (!notif) {
1768                         gk20a_err(d, "failed to map notifier memory");
1769                         return -ENOMEM;
1770                 }
1771
1772                 notif = (struct notification *)((uintptr_t)notif + offset);
1773
1774                 /* user should set status pending before
1775                  * calling this ioctl */
1776                 remain = wait_event_interruptible_timeout(
1777                                 ch->notifier_wq,
1778                                 notif->status == 0 || ch->has_timedout,
1779                                 timeout);
1780
1781                 if (remain == 0 && notif->status != 0) {
1782                         ret = -ETIMEDOUT;
1783                         goto notif_clean_up;
1784                 } else if (remain < 0) {
1785                         ret = -EINTR;
1786                         goto notif_clean_up;
1787                 }
1788
1789                 /* TBD: fill in correct information */
1790                 jiffies = get_jiffies_64();
1791                 jiffies_to_timespec(jiffies, &tv);
1792                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1793                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1794                 notif->info32 = 0xDEADBEEF; /* should be object name */
1795                 notif->info16 = ch->hw_chid; /* should be method offset */
1796
1797 notif_clean_up:
1798                 dma_buf_vunmap(dmabuf, notif);
1799                 return ret;
1800
1801         case NVHOST_WAIT_TYPE_SEMAPHORE:
1802                 ret = gk20a_channel_wait_semaphore(ch,
1803                                 args->condition.semaphore.nvmap_handle,
1804                                 args->condition.semaphore.offset,
1805                                 args->condition.semaphore.payload,
1806                                 timeout);
1807
1808                 break;
1809
1810         default:
1811                 ret = -EINVAL;
1812                 break;
1813         }
1814
1815         return ret;
1816 }
1817
1818 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
1819                 u32 priority)
1820 {
1821         u32 timeslice_timeout;
1822         /* set priority of graphics channel */
1823         switch (priority) {
1824         case NVHOST_PRIORITY_LOW:
1825                 /* 64 << 3 = 512us */
1826                 timeslice_timeout = 64;
1827                 break;
1828         case NVHOST_PRIORITY_MEDIUM:
1829                 /* 128 << 3 = 1024us */
1830                 timeslice_timeout = 128;
1831                 break;
1832         case NVHOST_PRIORITY_HIGH:
1833                 /* 255 << 3 = 2048us */
1834                 timeslice_timeout = 255;
1835                 break;
1836         default:
1837                 pr_err("Unsupported priority");
1838                 return -EINVAL;
1839         }
1840         channel_gk20a_set_schedule_params(ch,
1841                         timeslice_timeout);
1842         return 0;
1843 }
1844
1845 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1846                             struct nvhost_zcull_bind_args *args)
1847 {
1848         struct gk20a *g = ch->g;
1849         struct gr_gk20a *gr = &g->gr;
1850
1851         gk20a_dbg_fn("");
1852
1853         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1854                                 args->gpu_va, args->mode);
1855 }
1856
1857 /* in this context the "channel" is the host1x channel which
1858  * maps to *all* gk20a channels */
1859 int gk20a_channel_suspend(struct gk20a *g)
1860 {
1861         struct fifo_gk20a *f = &g->fifo;
1862         u32 chid;
1863         bool channels_in_use = false;
1864         int err;
1865
1866         gk20a_dbg_fn("");
1867
1868         /* wait for engine idle */
1869         err = gk20a_fifo_wait_engine_idle(g);
1870         if (err)
1871                 return err;
1872
1873         for (chid = 0; chid < f->num_channels; chid++) {
1874                 if (f->channel[chid].in_use) {
1875
1876                         gk20a_dbg_info("suspend channel %d", chid);
1877                         /* disable channel */
1878                         gk20a_writel(g, ccsr_channel_r(chid),
1879                                 gk20a_readl(g, ccsr_channel_r(chid)) |
1880                                 ccsr_channel_enable_clr_true_f());
1881                         /* preempt the channel */
1882                         gk20a_fifo_preempt_channel(g, chid);
1883
1884                         channels_in_use = true;
1885                 }
1886         }
1887
1888         if (channels_in_use) {
1889                 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
1890
1891                 for (chid = 0; chid < f->num_channels; chid++) {
1892                         if (f->channel[chid].in_use)
1893                                 channel_gk20a_unbind(&f->channel[chid]);
1894                 }
1895         }
1896
1897         gk20a_dbg_fn("done");
1898         return 0;
1899 }
1900
1901 /* in this context the "channel" is the host1x channel which
1902  * maps to *all* gk20a channels */
1903 int gk20a_channel_resume(struct gk20a *g)
1904 {
1905         struct fifo_gk20a *f = &g->fifo;
1906         u32 chid;
1907         bool channels_in_use = false;
1908
1909         gk20a_dbg_fn("");
1910
1911         for (chid = 0; chid < f->num_channels; chid++) {
1912                 if (f->channel[chid].in_use) {
1913                         gk20a_dbg_info("resume channel %d", chid);
1914                         g->ops.fifo.bind_channel(&f->channel[chid]);
1915                         channels_in_use = true;
1916                 }
1917         }
1918
1919         if (channels_in_use)
1920                 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
1921
1922         gk20a_dbg_fn("done");
1923         return 0;
1924 }
1925
1926 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
1927 {
1928         struct fifo_gk20a *f = &g->fifo;
1929         u32 chid;
1930
1931         gk20a_dbg_fn("");
1932
1933         for (chid = 0; chid < f->num_channels; chid++) {
1934                 struct channel_gk20a *c = g->fifo.channel+chid;
1935                 if (c->in_use)
1936                         wake_up_interruptible_all(&c->semaphore_wq);
1937         }
1938 }
1939
1940 static int gk20a_ioctl_channel_submit_gpfifo(
1941         struct channel_gk20a *ch,
1942         struct nvhost_submit_gpfifo_args *args)
1943 {
1944         void *gpfifo;
1945         u32 size;
1946         int ret = 0;
1947
1948         gk20a_dbg_fn("");
1949
1950         if (ch->has_timedout)
1951                 return -ETIMEDOUT;
1952
1953         size = args->num_entries * sizeof(struct nvhost_gpfifo);
1954
1955         gpfifo = kzalloc(size, GFP_KERNEL);
1956         if (!gpfifo)
1957                 return -ENOMEM;
1958
1959         if (copy_from_user(gpfifo,
1960                            (void __user *)(uintptr_t)args->gpfifo, size)) {
1961                 ret = -EINVAL;
1962                 goto clean_up;
1963         }
1964
1965         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
1966                                         &args->fence, args->flags);
1967
1968 clean_up:
1969         kfree(gpfifo);
1970         return ret;
1971 }
1972
1973 void gk20a_init_channel(struct gpu_ops *gops)
1974 {
1975         gops->fifo.bind_channel = channel_gk20a_bind;
1976         gops->fifo.disable_channel = channel_gk20a_disable;
1977         gops->fifo.enable_channel = channel_gk20a_enable;
1978 }
1979
1980 long gk20a_channel_ioctl(struct file *filp,
1981         unsigned int cmd, unsigned long arg)
1982 {
1983         struct channel_gk20a *ch = filp->private_data;
1984         struct platform_device *dev = ch->g->dev;
1985         u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
1986         int err = 0;
1987
1988         if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
1989                 (_IOC_NR(cmd) == 0) ||
1990                 (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
1991                 (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
1992                 return -EFAULT;
1993
1994         if (_IOC_DIR(cmd) & _IOC_WRITE) {
1995                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
1996                         return -EFAULT;
1997         }
1998
1999         switch (cmd) {
2000         case NVHOST_IOCTL_CHANNEL_OPEN:
2001         {
2002                 int fd;
2003                 struct file *file;
2004                 char *name;
2005
2006                 err = get_unused_fd_flags(O_RDWR);
2007                 if (err < 0)
2008                         break;
2009                 fd = err;
2010
2011                 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2012                                 dev_name(&dev->dev), fd);
2013                 if (!name) {
2014                         err = -ENOMEM;
2015                         put_unused_fd(fd);
2016                         break;
2017                 }
2018
2019                 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2020                 kfree(name);
2021                 if (IS_ERR(file)) {
2022                         err = PTR_ERR(file);
2023                         put_unused_fd(fd);
2024                         break;
2025                 }
2026                 fd_install(fd, file);
2027
2028                 err = __gk20a_channel_open(ch->g, file);
2029                 if (err) {
2030                         put_unused_fd(fd);
2031                         fput(file);
2032                         break;
2033                 }
2034
2035                 ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
2036                 break;
2037         }
2038         case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
2039                 break;
2040         case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2041                 err = gk20a_busy(dev);
2042                 if (err) {
2043                         dev_err(&dev->dev,
2044                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2045                                 __func__, cmd);
2046                         return err;
2047                 }
2048                 err = gk20a_alloc_obj_ctx(ch,
2049                                 (struct nvhost_alloc_obj_ctx_args *)buf);
2050                 gk20a_idle(dev);
2051                 break;
2052         case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
2053                 err = gk20a_busy(dev);
2054                 if (err) {
2055                         dev_err(&dev->dev,
2056                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2057                                 __func__, cmd);
2058                         return err;
2059                 }
2060                 err = gk20a_free_obj_ctx(ch,
2061                                 (struct nvhost_free_obj_ctx_args *)buf);
2062                 gk20a_idle(dev);
2063                 break;
2064         case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
2065                 err = gk20a_busy(dev);
2066                 if (err) {
2067                         dev_err(&dev->dev,
2068                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2069                                 __func__, cmd);
2070                         return err;
2071                 }
2072                 err = gk20a_alloc_channel_gpfifo(ch,
2073                                 (struct nvhost_alloc_gpfifo_args *)buf);
2074                 gk20a_idle(dev);
2075                 break;
2076         case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2077                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2078                                 (struct nvhost_submit_gpfifo_args *)buf);
2079                 break;
2080         case NVHOST_IOCTL_CHANNEL_WAIT:
2081                 err = gk20a_busy(dev);
2082                 if (err) {
2083                         dev_err(&dev->dev,
2084                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2085                                 __func__, cmd);
2086                         return err;
2087                 }
2088                 err = gk20a_channel_wait(ch,
2089                                 (struct nvhost_wait_args *)buf);
2090                 gk20a_idle(dev);
2091                 break;
2092         case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
2093                 err = gk20a_busy(dev);
2094                 if (err) {
2095                         dev_err(&dev->dev,
2096                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2097                                 __func__, cmd);
2098                         return err;
2099                 }
2100                 err = gk20a_channel_zcull_bind(ch,
2101                                 (struct nvhost_zcull_bind_args *)buf);
2102                 gk20a_idle(dev);
2103                 break;
2104         case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2105                 err = gk20a_busy(dev);
2106                 if (err) {
2107                         dev_err(&dev->dev,
2108                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2109                                 __func__, cmd);
2110                         return err;
2111                 }
2112                 err = gk20a_init_error_notifier(ch,
2113                                 (struct nvhost_set_error_notifier *)buf);
2114                 gk20a_idle(dev);
2115                 break;
2116 #ifdef CONFIG_GK20A_CYCLE_STATS
2117         case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
2118                 err = gk20a_busy(dev);
2119                 if (err) {
2120                         dev_err(&dev->dev,
2121                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2122                                 __func__, cmd);
2123                         return err;
2124                 }
2125                 err = gk20a_channel_cycle_stats(ch,
2126                                 (struct nvhost_cycle_stats_args *)buf);
2127                 gk20a_idle(dev);
2128                 break;
2129 #endif
2130         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
2131         {
2132                 u32 timeout =
2133                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2134                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2135                            timeout, ch->hw_chid);
2136                 ch->timeout_ms_max = timeout;
2137                 break;
2138         }
2139         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2140         {
2141                 u32 timeout =
2142                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2143                 bool timeout_debug_dump = !((u32)
2144                         ((struct nvhost_set_timeout_ex_args *)buf)->flags &
2145                         (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
2146                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2147                            timeout, ch->hw_chid);
2148                 ch->timeout_ms_max = timeout;
2149                 ch->timeout_debug_dump = timeout_debug_dump;
2150                 break;
2151         }
2152         case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
2153                 ((struct nvhost_get_param_args *)buf)->value =
2154                         ch->has_timedout;
2155                 break;
2156         case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
2157                 err = gk20a_busy(dev);
2158                 if (err) {
2159                         dev_err(&dev->dev,
2160                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2161                                 __func__, cmd);
2162                         return err;
2163                 }
2164                 gk20a_channel_set_priority(ch,
2165                         ((struct nvhost_set_priority_args *)buf)->priority);
2166                 gk20a_idle(dev);
2167                 break;
2168         default:
2169                 dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2170                 err = -ENOTTY;
2171                 break;
2172         }
2173
2174         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2175                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2176
2177         return err;
2178 }