drivers/video/tegra/host/gk20a/channel_gk20a.c

   1 /*
   2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
   3  *
   4  * GK20A Graphics channel
   5  *
   6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
   7  *
   8  * This program is free software; you can redistribute it and/or modify it
   9  * under the terms and conditions of the GNU General Public License,
  10  * version 2, as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15  * more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include <linux/list.h>
  22 #include <linux/delay.h>
  23 #include <linux/highmem.h> /* need for nvmap.h*/
  24 #include <trace/events/nvhost.h>
  25 #include <linux/scatterlist.h>
  26
  27 #include "dev.h"
  28 #include "nvhost_as.h"
  29 #include "debug.h"
  30 #include "nvhost_sync.h"
  31
  32 #include "gk20a.h"
  33 #include "dbg_gpu_gk20a.h"
  34
  35 #include "hw_ram_gk20a.h"
  36 #include "hw_fifo_gk20a.h"
  37 #include "hw_pbdma_gk20a.h"
  38 #include "hw_ccsr_gk20a.h"
  39 #include "hw_ltc_gk20a.h"
  40 #include "chip_support.h"
  41
  42 #define NVMAP_HANDLE_PARAM_SIZE 1
  43
  44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
  45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
  46
  47 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
  48                              struct priv_cmd_entry **entry);
  49 static void free_priv_cmdbuf(struct channel_gk20a *c,
  50                              struct priv_cmd_entry *e);
  51 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
  52
  53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
  54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
  55
  56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
  57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
  58 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
  59                         u64 gpfifo_base, u32 gpfifo_entries);
  60
  61 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
  62 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
  63
  64 static int channel_gk20a_alloc_inst(struct gk20a *g,
  65                                 struct channel_gk20a *ch);
  66 static void channel_gk20a_free_inst(struct gk20a *g,
  67                                 struct channel_gk20a *ch);
  68
  69 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
  70                                         bool add);
  71
  72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
  73 {
  74         struct channel_gk20a *ch = NULL;
  75         int chid;
  76
  77         mutex_lock(&f->ch_inuse_mutex);
  78         for (chid = 0; chid < f->num_channels; chid++) {
  79                 if (!f->channel[chid].in_use) {
  80                         f->channel[chid].in_use = true;
  81                         ch = &f->channel[chid];
  82                         break;
  83                 }
  84         }
  85         mutex_unlock(&f->ch_inuse_mutex);
  86
  87         return ch;
  88 }
  89
  90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
  91 {
  92         mutex_lock(&f->ch_inuse_mutex);
  93         f->channel[c->hw_chid].in_use = false;
  94         mutex_unlock(&f->ch_inuse_mutex);
  95 }
  96
  97 int channel_gk20a_commit_va(struct channel_gk20a *c)
  98 {
  99         u64 addr;
 100         u32 addr_lo;
 101         u32 addr_hi;
 102         void *inst_ptr;
 103
 104         nvhost_dbg_fn("");
 105
 106         inst_ptr = c->inst_block.cpuva;
 107         if (!inst_ptr)
 108                 return -ENOMEM;
 109
 110         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
 111         addr_lo = u64_lo32(addr >> 12);
 112         addr_hi = u64_hi32(addr);
 113
 114         nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
 115                    (u64)addr, addr_lo, addr_hi);
 116
 117         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
 118                 ram_in_page_dir_base_target_vid_mem_f() |
 119                 ram_in_page_dir_base_vol_true_f() |
 120                 ram_in_page_dir_base_lo_f(addr_lo));
 121
 122         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
 123                 ram_in_page_dir_base_hi_f(addr_hi));
 124
 125         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
 126                  u64_lo32(c->vm->va_limit) | 0xFFF);
 127
 128         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
 129                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
 130
 131         gk20a_mm_l2_invalidate(c->g);
 132
 133         return 0;
 134 }
 135
 136 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
 137 {
 138         u32 addr_lo;
 139         u32 addr_hi;
 140         void *inst_ptr;
 141
 142         nvhost_dbg_fn("");
 143
 144         inst_ptr = c->inst_block.cpuva;
 145         if (!inst_ptr)
 146                 return -ENOMEM;
 147
 148         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
 149         addr_hi = u64_hi32(c->userd_iova);
 150
 151         nvhost_dbg_info("channel %d : set ramfc userd 0x%16llx",
 152                 c->hw_chid, c->userd_iova);
 153
 154         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
 155                  pbdma_userd_target_vid_mem_f() |
 156                  pbdma_userd_addr_f(addr_lo));
 157
 158         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
 159                  pbdma_userd_target_vid_mem_f() |
 160                  pbdma_userd_hi_addr_f(addr_hi));
 161
 162         gk20a_mm_l2_invalidate(c->g);
 163
 164         return 0;
 165 }
 166
 167 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 168                                 u32 timeslice_timeout)
 169 {
 170         void *inst_ptr;
 171         int shift = 3;
 172         int value = timeslice_timeout;
 173
 174         inst_ptr = c->inst_block.cpuva;
 175         if (!inst_ptr)
 176                 return -ENOMEM;
 177
 178         /* disable channel */
 179         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 180                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 181                 ccsr_channel_enable_clr_true_f());
 182
 183         /* preempt the channel */
 184         WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
 185
 186         /* flush GPU cache */
 187         gk20a_mm_l2_flush(c->g, true);
 188
 189         /* value field is 8 bits long */
 190         while (value >= 1 << 8) {
 191                 value >>= 1;
 192                 shift++;
 193         }
 194
 195         /* time slice register is only 18bits long */
 196         if ((value << shift) >= 1<<19) {
 197                 pr_err("Requested timeslice value is clamped to 18 bits\n");
 198                 value = 255;
 199                 shift = 10;
 200         }
 201
 202         /* set new timeslice */
 203         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
 204                 value | (shift << 12) |
 205                 fifo_eng_timeslice_enable_true_f());
 206
 207         /* enable channel */
 208         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
 209                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 210                 ccsr_channel_enable_set_true_f());
 211
 212         gk20a_mm_l2_invalidate(c->g);
 213
 214         return 0;
 215 }
 216
 217 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 218                                 u64 gpfifo_base, u32 gpfifo_entries)
 219 {
 220         void *inst_ptr;
 221
 222         nvhost_dbg_fn("");
 223
 224         inst_ptr = c->inst_block.cpuva;
 225         if (!inst_ptr)
 226                 return -ENOMEM;
 227
 228         memset(inst_ptr, 0, ram_fc_size_val_v());
 229
 230         mem_wr32(inst_ptr, ram_fc_gp_base_w(),
 231                 pbdma_gp_base_offset_f(
 232                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
 233
 234         mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
 235                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
 236                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 237
 238         mem_wr32(inst_ptr, ram_fc_signature_w(),
 239                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
 240
 241         mem_wr32(inst_ptr, ram_fc_formats_w(),
 242                 pbdma_formats_gp_fermi0_f() |
 243                 pbdma_formats_pb_fermi1_f() |
 244                 pbdma_formats_mp_fermi0_f());
 245
 246         mem_wr32(inst_ptr, ram_fc_pb_header_w(),
 247                 pbdma_pb_header_priv_user_f() |
 248                 pbdma_pb_header_method_zero_f() |
 249                 pbdma_pb_header_subchannel_zero_f() |
 250                 pbdma_pb_header_level_main_f() |
 251                 pbdma_pb_header_first_true_f() |
 252                 pbdma_pb_header_type_inc_f());
 253
 254         mem_wr32(inst_ptr, ram_fc_subdevice_w(),
 255                 pbdma_subdevice_id_f(1) |
 256                 pbdma_subdevice_status_active_f() |
 257                 pbdma_subdevice_channel_dma_enable_f());
 258
 259         mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
 260
 261         mem_wr32(inst_ptr, ram_fc_acquire_w(),
 262                 pbdma_acquire_retry_man_2_f() |
 263                 pbdma_acquire_retry_exp_2_f() |
 264                 pbdma_acquire_timeout_exp_max_f() |
 265                 pbdma_acquire_timeout_man_max_f() |
 266                 pbdma_acquire_timeout_en_disable_f());
 267
 268         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
 269                 fifo_eng_timeslice_timeout_128_f() |
 270                 fifo_eng_timeslice_timescale_3_f() |
 271                 fifo_eng_timeslice_enable_true_f());
 272
 273         mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
 274                 fifo_pb_timeslice_timeout_16_f() |
 275                 fifo_pb_timeslice_timescale_0_f() |
 276                 fifo_pb_timeslice_enable_true_f());
 277
 278         mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 279
 280         gk20a_mm_l2_invalidate(c->g);
 281
 282         return 0;
 283 }
 284
 285 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
 286 {
 287         BUG_ON(!c->userd_cpu_va);
 288
 289         nvhost_dbg_fn("");
 290
 291         mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
 292         mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
 293         mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
 294         mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
 295         mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
 296         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
 297         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
 298         mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
 299         mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
 300         mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
 301
 302         gk20a_mm_l2_invalidate(c->g);
 303
 304         return 0;
 305 }
 306
 307 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
 308 {
 309         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
 310         struct fifo_gk20a *f = &g->fifo;
 311         struct fifo_engine_info_gk20a *engine_info =
 312                 f->engine_info + ENGINE_GR_GK20A;
 313
 314         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
 315                 >> ram_in_base_shift_v();
 316
 317         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
 318                 ch_gk20a->hw_chid, inst_ptr);
 319
 320         ch_gk20a->bound = true;
 321
 322         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 323                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 324                  ~ccsr_channel_runlist_f(~0)) |
 325                  ccsr_channel_runlist_f(engine_info->runlist_id));
 326
 327         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 328                 ccsr_channel_inst_ptr_f(inst_ptr) |
 329                 ccsr_channel_inst_target_vid_mem_f() |
 330                 ccsr_channel_inst_bind_true_f());
 331
 332         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
 333                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
 334                  ~ccsr_channel_enable_set_f(~0)) |
 335                  ccsr_channel_enable_set_true_f());
 336 }
 337
 338 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 339 {
 340         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
 341
 342         nvhost_dbg_fn("");
 343
 344         if (ch_gk20a->bound)
 345                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
 346                         ccsr_channel_inst_ptr_f(0) |
 347                         ccsr_channel_inst_bind_false_f());
 348
 349         ch_gk20a->bound = false;
 350 }
 351
 352 static int channel_gk20a_alloc_inst(struct gk20a *g,
 353                                 struct channel_gk20a *ch)
 354 {
 355         struct device *d = dev_from_gk20a(g);
 356         int err = 0;
 357
 358         nvhost_dbg_fn("");
 359
 360         ch->inst_block.size = ram_in_alloc_size_v();
 361         ch->inst_block.cpuva = dma_alloc_coherent(d,
 362                                         ch->inst_block.size,
 363                                         &ch->inst_block.iova,
 364                                         GFP_KERNEL);
 365         if (!ch->inst_block.cpuva) {
 366                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
 367                 err = -ENOMEM;
 368                 goto clean_up;
 369         }
 370
 371         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
 372                                                         ch->inst_block.iova);
 373         if (!ch->inst_block.cpu_pa) {
 374                 nvhost_err(d, "%s: failed to get physical address\n", __func__);
 375                 err = -ENOMEM;
 376                 goto clean_up;
 377         }
 378
 379         nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
 380                 ch->hw_chid, ch->inst_block.cpu_pa);
 381
 382         nvhost_dbg_fn("done");
 383         return 0;
 384
 385 clean_up:
 386         nvhost_err(d, "fail");
 387         channel_gk20a_free_inst(g, ch);
 388         return err;
 389 }
 390
 391 static void channel_gk20a_free_inst(struct gk20a *g,
 392                                 struct channel_gk20a *ch)
 393 {
 394         struct device *d = dev_from_gk20a(g);
 395
 396         if (ch->inst_block.cpuva)
 397                 dma_free_coherent(d, ch->inst_block.size,
 398                                 ch->inst_block.cpuva, ch->inst_block.iova);
 399         ch->inst_block.cpuva = NULL;
 400         ch->inst_block.iova = 0;
 401         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
 402 }
 403
 404 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 405 {
 406         return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
 407 }
 408
 409 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
 410 {
 411         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
 412         struct nvhost_master *host = host_from_gk20a_channel(ch);
 413
 414         /* ensure no fences are pending */
 415         nvhost_syncpt_set_min_eq_max(&host->syncpt,
 416                                      ch->hw_chid + pdata->syncpt_base);
 417
 418         /* disable channel */
 419         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
 420                      gk20a_readl(ch->g,
 421                      ccsr_channel_r(ch->hw_chid)) |
 422                      ccsr_channel_enable_clr_true_f());
 423 }
 424
 425 static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
 426 {
 427         bool channel_idle = false;
 428         unsigned long end_jiffies = jiffies +
 429                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
 430
 431         do {
 432                 mutex_lock(&ch->jobs_lock);
 433                 channel_idle = list_empty(&ch->jobs);
 434                 mutex_unlock(&ch->jobs_lock);
 435                 if (channel_idle)
 436                         break;
 437
 438                 usleep_range(1000, 3000);
 439         } while (time_before(jiffies, end_jiffies));
 440
 441         if (!channel_idle)
 442                 nvhost_err(dev_from_gk20a(ch->g), "channel jobs not freed");
 443
 444         return 0;
 445 }
 446
 447 void gk20a_disable_channel(struct channel_gk20a *ch,
 448                            bool finish,
 449                            unsigned long finish_timeout)
 450 {
 451         if (finish) {
 452                 int err = gk20a_channel_finish(ch, finish_timeout);
 453                 WARN_ON(err);
 454         }
 455
 456         /* disable the channel from hw and increment syncpoints */
 457         gk20a_disable_channel_no_update(ch);
 458
 459         gk20a_wait_channel_idle(ch);
 460
 461         /* preempt the channel */
 462         gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
 463
 464         /* remove channel from runlist */
 465         channel_gk20a_update_runlist(ch, false);
 466 }
 467
 468 #if defined(CONFIG_GK20A_CYCLE_STATS)
 469
 470 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 471 {
 472         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
 473         /* disable existing cyclestats buffer */
 474         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
 475         if (ch->cyclestate.cyclestate_buffer_handler) {
 476                 nvhost_memmgr_munmap(ch->cyclestate.cyclestate_buffer_handler,
 477                                 ch->cyclestate.cyclestate_buffer);
 478                 nvhost_memmgr_put(memmgr,
 479                                 ch->cyclestate.cyclestate_buffer_handler);
 480                 ch->cyclestate.cyclestate_buffer_handler = NULL;
 481                 ch->cyclestate.cyclestate_buffer = NULL;
 482                 ch->cyclestate.cyclestate_buffer_size = 0;
 483         }
 484         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
 485 }
 486
 487 int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
 488                        struct nvhost_cycle_stats_args *args)
 489 {
 490         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
 491         struct mem_handle *handle_ref;
 492         void *virtual_address;
 493         u64 cyclestate_buffer_size;
 494         struct platform_device *dev = ch->ch->dev;
 495
 496         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
 497
 498                 /* set up new cyclestats buffer */
 499                 handle_ref = nvhost_memmgr_get(memmgr,
 500                                 args->nvmap_handle, dev);
 501                 if (IS_ERR(handle_ref))
 502                         return PTR_ERR(handle_ref);
 503                 virtual_address = nvhost_memmgr_mmap(handle_ref);
 504                 if (!virtual_address)
 505                         return -ENOMEM;
 506
 507                 nvhost_memmgr_get_param(memmgr, handle_ref,
 508                                         NVMAP_HANDLE_PARAM_SIZE,
 509                                         &cyclestate_buffer_size);
 510
 511                 ch->cyclestate.cyclestate_buffer_handler = handle_ref;
 512                 ch->cyclestate.cyclestate_buffer = virtual_address;
 513                 ch->cyclestate.cyclestate_buffer_size = cyclestate_buffer_size;
 514                 return 0;
 515
 516         } else if (!args->nvmap_handle &&
 517                         ch->cyclestate.cyclestate_buffer_handler) {
 518                 gk20a_free_cycle_stats_buffer(ch);
 519                 return 0;
 520
 521         } else if (!args->nvmap_handle &&
 522                         !ch->cyclestate.cyclestate_buffer_handler) {
 523                 /* no requst from GL */
 524                 return 0;
 525
 526         } else {
 527                 pr_err("channel already has cyclestats buffer\n");
 528                 return -EINVAL;
 529         }
 530 }
 531 #endif
 532
 533 int gk20a_init_error_notifier(struct nvhost_hwctx *ctx,
 534                 u32 memhandle, u64 offset) {
 535         struct channel_gk20a *ch = ctx->priv;
 536         struct platform_device *dev = ch->ch->dev;
 537         void *va;
 538
 539         struct mem_mgr *memmgr;
 540         struct mem_handle *handle_ref;
 541
 542         if (!memhandle) {
 543                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
 544                 return -EINVAL;
 545         }
 546
 547         memmgr = gk20a_channel_mem_mgr(ch);
 548         handle_ref = nvhost_memmgr_get(memmgr, memhandle, dev);
 549
 550         if (ctx->error_notifier_ref)
 551                 gk20a_free_error_notifiers(ctx);
 552
 553         if (IS_ERR(handle_ref)) {
 554                 pr_err("Invalid handle: %d\n", memhandle);
 555                 return -EINVAL;
 556         }
 557         /* map handle */
 558         va = nvhost_memmgr_mmap(handle_ref);
 559         if (!va) {
 560                 nvhost_memmgr_put(memmgr, handle_ref);
 561                 pr_err("Cannot map notifier handle\n");
 562                 return -ENOMEM;
 563         }
 564
 565         /* set hwctx notifiers pointer */
 566         ctx->error_notifier_ref = handle_ref;
 567         ctx->error_notifier = va + offset;
 568         ctx->error_notifier_va = va;
 569         return 0;
 570 }
 571
 572 void gk20a_set_error_notifier(struct nvhost_hwctx *ctx, __u32 error)
 573 {
 574         if (ctx->error_notifier_ref) {
 575                 struct timespec time_data;
 576                 u64 nsec;
 577                 getnstimeofday(&time_data);
 578                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
 579                                 (u64)time_data.tv_nsec;
 580                 ctx->error_notifier->time_stamp.nanoseconds[0] =
 581                                 (u32)nsec;
 582                 ctx->error_notifier->time_stamp.nanoseconds[1] =
 583                                 (u32)(nsec >> 32);
 584                 ctx->error_notifier->info32 = error;
 585                 ctx->error_notifier->status = 0xffff;
 586                 nvhost_err(&ctx->channel->dev->dev,
 587                                 "error notifier set to %d\n", error);
 588         }
 589 }
 590
 591 void gk20a_free_error_notifiers(struct nvhost_hwctx *ctx)
 592 {
 593         if (ctx->error_notifier_ref) {
 594                 struct channel_gk20a *ch = ctx->priv;
 595                 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
 596                 nvhost_memmgr_munmap(ctx->error_notifier_ref,
 597                                 ctx->error_notifier_va);
 598                 nvhost_memmgr_put(memmgr, ctx->error_notifier_ref);
 599                 ctx->error_notifier_ref = 0;
 600         }
 601 }
 602
 603 void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish)
 604 {
 605         struct channel_gk20a *ch = ctx->priv;
 606         struct gk20a *g = ch->g;
 607         struct device *d = dev_from_gk20a(g);
 608         struct fifo_gk20a *f = &g->fifo;
 609         struct gr_gk20a *gr = &g->gr;
 610         struct vm_gk20a *ch_vm = ch->vm;
 611         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
 612         struct dbg_session_gk20a *dbg_s;
 613
 614         nvhost_dbg_fn("");
 615
 616         /* if engine reset was deferred, perform it now */
 617         mutex_lock(&f->deferred_reset_mutex);
 618         if (g->fifo.deferred_reset_pending) {
 619                 nvhost_dbg(dbg_intr | dbg_gpu_dbg, "engine reset was"
 620                            " deferred, running now");
 621                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
 622                 g->fifo.mmu_fault_engines = 0;
 623                 g->fifo.deferred_reset_pending = false;
 624         }
 625         mutex_unlock(&f->deferred_reset_mutex);
 626
 627         if (!ch->bound)
 628                 return;
 629
 630         if (!gk20a_channel_as_bound(ch))
 631                 goto unbind;
 632
 633         nvhost_dbg_info("freeing bound channel context, timeout=%ld",
 634                         timeout);
 635
 636         gk20a_disable_channel(ch, finish && !ch->hwctx->has_timedout, timeout);
 637
 638         gk20a_free_error_notifiers(ctx);
 639
 640         /* release channel ctx */
 641         gk20a_free_channel_ctx(ch);
 642
 643         gk20a_gr_flush_channel_tlb(gr);
 644
 645         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
 646
 647         /* free gpfifo */
 648         if (ch->gpfifo.gpu_va)
 649                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
 650                         ch->gpfifo.size, mem_flag_none);
 651         if (ch->gpfifo.cpu_va)
 652                 dma_free_coherent(d, ch->gpfifo.size,
 653                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
 654         ch->gpfifo.cpu_va = NULL;
 655         ch->gpfifo.iova = 0;
 656
 657         gk20a_mm_l2_invalidate(ch->g);
 658
 659         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 660
 661 #if defined(CONFIG_GK20A_CYCLE_STATS)
 662         gk20a_free_cycle_stats_buffer(ch);
 663 #endif
 664
 665         ctx->priv = NULL;
 666         channel_gk20a_free_priv_cmdbuf(ch);
 667
 668         /* release hwctx binding to the as_share */
 669         nvhost_as_release_share(ch_vm->as_share, ctx);
 670
 671 unbind:
 672         channel_gk20a_unbind(ch);
 673         channel_gk20a_free_inst(g, ch);
 674
 675         ch->vpr = false;
 676
 677         /* unlink all debug sessions */
 678         mutex_lock(&ch->dbg_s_lock);
 679
 680         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
 681                 dbg_s->ch = NULL;
 682                 list_del_init(&dbg_s->dbg_s_list_node);
 683         }
 684
 685         mutex_unlock(&ch->dbg_s_lock);
 686
 687         /* ALWAYS last */
 688         release_used_channel(f, ch);
 689 }
 690
 691 struct nvhost_hwctx *gk20a_open_channel(struct nvhost_channel *ch,
 692                                          struct nvhost_hwctx *ctx)
 693 {
 694         struct gk20a *g = get_gk20a(ch->dev);
 695         struct fifo_gk20a *f = &g->fifo;
 696         struct channel_gk20a *ch_gk20a;
 697
 698         ch_gk20a = acquire_unused_channel(f);
 699         if (ch_gk20a == NULL) {
 700                 /* TBD: we want to make this virtualizable */
 701                 nvhost_err(dev_from_gk20a(g), "out of hw chids");
 702                 return 0;
 703         }
 704
 705         ctx->priv = ch_gk20a;
 706         ch_gk20a->g = g;
 707         /* note the ch here is the same for *EVERY* gk20a channel */
 708         ch_gk20a->ch = ch;
 709         /* but thre's one hwctx per gk20a channel */
 710         ch_gk20a->hwctx = ctx;
 711
 712         if (channel_gk20a_alloc_inst(g, ch_gk20a)) {
 713                 ch_gk20a->in_use = false;
 714                 ctx->priv = 0;
 715                 nvhost_err(dev_from_gk20a(g),
 716                            "failed to open gk20a channel, out of inst mem");
 717
 718                 return 0;
 719         }
 720         channel_gk20a_bind(ch_gk20a);
 721         ch_gk20a->pid = current->pid;
 722
 723         /* reset timeout counter and update timestamp */
 724         ch_gk20a->timeout_accumulated_ms = 0;
 725         ch_gk20a->timeout_gpfifo_get = 0;
 726         /* set gr host default timeout */
 727         ch_gk20a->hwctx->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
 728
 729         /* The channel is *not* runnable at this point. It still needs to have
 730          * an address space bound and allocate a gpfifo and grctx. */
 731
 732         init_waitqueue_head(&ch_gk20a->notifier_wq);
 733         init_waitqueue_head(&ch_gk20a->semaphore_wq);
 734         init_waitqueue_head(&ch_gk20a->submit_wq);
 735
 736         return ctx;
 737 }
 738
 739 #if 0
 740 /* move to debug_gk20a.c ... */
 741 static void dump_gpfifo(struct channel_gk20a *c)
 742 {
 743         void *inst_ptr;
 744         u32 chid = c->hw_chid;
 745
 746         nvhost_dbg_fn("");
 747
 748         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
 749         if (!inst_ptr)
 750                 return;
 751
 752         nvhost_dbg_info("ramfc for channel %d:\n"
 753                 "ramfc: gp_base 0x%08x, gp_base_hi 0x%08x, "
 754                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
 755                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
 756                 "pb_get 0x%08x, pb_get_hi 0x%08x, "
 757                 "pb_put 0x%08x, pb_put_hi 0x%08x\n"
 758                 "userd: gp_put 0x%08x, gp_get 0x%08x, "
 759                 "get 0x%08x, get_hi 0x%08x, "
 760                 "put 0x%08x, put_hi 0x%08x\n"
 761                 "pbdma: status 0x%08x, channel 0x%08x, userd 0x%08x, "
 762                 "gp_base 0x%08x, gp_base_hi 0x%08x, "
 763                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
 764                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
 765                 "get 0x%08x, get_hi 0x%08x, put 0x%08x, put_hi 0x%08x\n"
 766                 "channel: ccsr_channel 0x%08x",
 767                 chid,
 768                 mem_rd32(inst_ptr, ram_fc_gp_base_w()),
 769                 mem_rd32(inst_ptr, ram_fc_gp_base_hi_w()),
 770                 mem_rd32(inst_ptr, ram_fc_gp_fetch_w()),
 771                 mem_rd32(inst_ptr, ram_fc_gp_get_w()),
 772                 mem_rd32(inst_ptr, ram_fc_gp_put_w()),
 773                 mem_rd32(inst_ptr, ram_fc_pb_fetch_w()),
 774                 mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()),
 775                 mem_rd32(inst_ptr, ram_fc_pb_get_w()),
 776                 mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()),
 777                 mem_rd32(inst_ptr, ram_fc_pb_put_w()),
 778                 mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()),
 779                 mem_rd32(c->userd_cpu_va, ram_userd_gp_put_w()),
 780                 mem_rd32(c->userd_cpu_va, ram_userd_gp_get_w()),
 781                 mem_rd32(c->userd_cpu_va, ram_userd_get_w()),
 782                 mem_rd32(c->userd_cpu_va, ram_userd_get_hi_w()),
 783                 mem_rd32(c->userd_cpu_va, ram_userd_put_w()),
 784                 mem_rd32(c->userd_cpu_va, ram_userd_put_hi_w()),
 785                 gk20a_readl(c->g, pbdma_status_r(0)),
 786                 gk20a_readl(c->g, pbdma_channel_r(0)),
 787                 gk20a_readl(c->g, pbdma_userd_r(0)),
 788                 gk20a_readl(c->g, pbdma_gp_base_r(0)),
 789                 gk20a_readl(c->g, pbdma_gp_base_hi_r(0)),
 790                 gk20a_readl(c->g, pbdma_gp_fetch_r(0)),
 791                 gk20a_readl(c->g, pbdma_gp_get_r(0)),
 792                 gk20a_readl(c->g, pbdma_gp_put_r(0)),
 793                 gk20a_readl(c->g, pbdma_pb_fetch_r(0)),
 794                 gk20a_readl(c->g, pbdma_pb_fetch_hi_r(0)),
 795                 gk20a_readl(c->g, pbdma_get_r(0)),
 796                 gk20a_readl(c->g, pbdma_get_hi_r(0)),
 797                 gk20a_readl(c->g, pbdma_put_r(0)),
 798                 gk20a_readl(c->g, pbdma_put_hi_r(0)),
 799                 gk20a_readl(c->g, ccsr_channel_r(chid)));
 800
 801         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
 802         gk20a_mm_l2_invalidate(c->g);
 803 }
 804 #endif
 805
 806 /* allocate private cmd buffer.
 807    used for inserting commands before/after user submitted buffers. */
 808 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
 809 {
 810         struct device *d = dev_from_gk20a(c->g);
 811         struct vm_gk20a *ch_vm = c->vm;
 812         struct priv_cmd_queue *q = &c->priv_cmd_q;
 813         struct priv_cmd_entry *e;
 814         u32 i = 0, size;
 815         int err = 0;
 816         struct sg_table *sgt;
 817
 818         /* Kernel can insert gpfifos before and after user gpfifos.
 819            Before user gpfifos, kernel inserts fence_wait, which takes
 820            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
 821            After user gpfifos, kernel inserts fence_get, which takes
 822            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
 823            = 6 dwords.
 824            Worse case if kernel adds both of them for every user gpfifo,
 825            max size of priv_cmdbuf is :
 826            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
 827         size = roundup_pow_of_two(
 828                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
 829
 830         q->mem.base_cpuva = dma_alloc_coherent(d, size,
 831                                         &q->mem.base_iova,
 832                                         GFP_KERNEL);
 833         if (!q->mem.base_cpuva) {
 834                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
 835                 err = -ENOMEM;
 836                 goto clean_up;
 837         }
 838
 839         q->mem.size = size;
 840
 841         err = gk20a_get_sgtable(d, &sgt,
 842                         q->mem.base_cpuva, q->mem.base_iova, size);
 843         if (err) {
 844                 nvhost_err(d, "%s: failed to create sg table\n", __func__);
 845                 goto clean_up;
 846         }
 847
 848         memset(q->mem.base_cpuva, 0, size);
 849
 850         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
 851                                         size,
 852                                         0, /* flags */
 853                                         mem_flag_none);
 854         if (!q->base_gpuva) {
 855                 nvhost_err(d, "ch %d : failed to map gpu va"
 856                            "for priv cmd buffer", c->hw_chid);
 857                 err = -ENOMEM;
 858                 goto clean_up_sgt;
 859         }
 860
 861         q->size = q->mem.size / sizeof (u32);
 862
 863         INIT_LIST_HEAD(&q->head);
 864         INIT_LIST_HEAD(&q->free);
 865
 866         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
 867         for (i = 0; i < q->size / 4; i++) {
 868                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
 869                 if (!e) {
 870                         nvhost_err(d, "ch %d: fail to pre-alloc cmd entry",
 871                                 c->hw_chid);
 872                         err = -ENOMEM;
 873                         goto clean_up_sgt;
 874                 }
 875                 e->pre_alloc = true;
 876                 list_add(&e->list, &q->free);
 877         }
 878
 879         gk20a_free_sgtable(&sgt);
 880
 881         return 0;
 882
 883 clean_up_sgt:
 884         gk20a_free_sgtable(&sgt);
 885 clean_up:
 886         channel_gk20a_free_priv_cmdbuf(c);
 887         return err;
 888 }
 889
 890 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 891 {
 892         struct device *d = dev_from_gk20a(c->g);
 893         struct vm_gk20a *ch_vm = c->vm;
 894         struct priv_cmd_queue *q = &c->priv_cmd_q;
 895         struct priv_cmd_entry *e;
 896         struct list_head *pos, *tmp, *head;
 897
 898         if (q->size == 0)
 899                 return;
 900
 901         if (q->base_gpuva)
 902                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
 903                                 q->mem.size, mem_flag_none);
 904         if (q->mem.base_cpuva)
 905                 dma_free_coherent(d, q->mem.size,
 906                         q->mem.base_cpuva, q->mem.base_iova);
 907         q->mem.base_cpuva = NULL;
 908         q->mem.base_iova = 0;
 909
 910         /* free used list */
 911         head = &q->head;
 912         list_for_each_safe(pos, tmp, head) {
 913                 e = container_of(pos, struct priv_cmd_entry, list);
 914                 free_priv_cmdbuf(c, e);
 915         }
 916
 917         /* free free list */
 918         head = &q->free;
 919         list_for_each_safe(pos, tmp, head) {
 920                 e = container_of(pos, struct priv_cmd_entry, list);
 921                 e->pre_alloc = false;
 922                 free_priv_cmdbuf(c, e);
 923         }
 924
 925         memset(q, 0, sizeof(struct priv_cmd_queue));
 926 }
 927
 928 /* allocate a cmd buffer with given size. size is number of u32 entries */
 929 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
 930                              struct priv_cmd_entry **entry)
 931 {
 932         struct priv_cmd_queue *q = &c->priv_cmd_q;
 933         struct priv_cmd_entry *e;
 934         struct list_head *node;
 935         u32 free_count;
 936         u32 size = orig_size;
 937         bool no_retry = false;
 938
 939         nvhost_dbg_fn("size %d", orig_size);
 940
 941         *entry = NULL;
 942
 943         /* if free space in the end is less than requested, increase the size
 944          * to make the real allocated space start from beginning. */
 945         if (q->put + size > q->size)
 946                 size = orig_size + (q->size - q->put);
 947
 948         nvhost_dbg_info("ch %d: priv cmd queue get:put %d:%d",
 949                         c->hw_chid, q->get, q->put);
 950
 951 TRY_AGAIN:
 952         free_count = (q->size - (q->put - q->get) - 1) % q->size;
 953
 954         if (size > free_count) {
 955                 if (!no_retry) {
 956                         recycle_priv_cmdbuf(c);
 957                         no_retry = true;
 958                         goto TRY_AGAIN;
 959                 } else
 960                         return -EAGAIN;
 961         }
 962
 963         if (unlikely(list_empty(&q->free))) {
 964
 965                 nvhost_dbg_info("ch %d: run out of pre-alloc entries",
 966                         c->hw_chid);
 967
 968                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
 969                 if (!e) {
 970                         nvhost_err(dev_from_gk20a(c->g),
 971                                 "ch %d: fail to allocate priv cmd entry",
 972                                 c->hw_chid);
 973                         return -ENOMEM;
 974                 }
 975         } else  {
 976                 node = q->free.next;
 977                 list_del(node);
 978                 e = container_of(node, struct priv_cmd_entry, list);
 979         }
 980
 981         e->size = orig_size;
 982         e->gp_get = c->gpfifo.get;
 983         e->gp_put = c->gpfifo.put;
 984         e->gp_wrap = c->gpfifo.wrap;
 985
 986         /* if we have increased size to skip free space in the end, set put
 987            to beginning of cmd buffer (0) + size */
 988         if (size != orig_size) {
 989                 e->ptr = q->mem.base_cpuva;
 990                 e->gva = q->base_gpuva;
 991                 q->put = orig_size;
 992         } else {
 993                 e->ptr = q->mem.base_cpuva + q->put;
 994                 e->gva = q->base_gpuva + q->put * sizeof(u32);
 995                 q->put = (q->put + orig_size) & (q->size - 1);
 996         }
 997
 998         /* we already handled q->put + size > q->size so BUG_ON this */
 999         BUG_ON(q->put > q->size);
1000
1001         /* add new entry to head since we free from head */
1002         list_add(&e->list, &q->head);
1003
1004         *entry = e;
1005
1006         nvhost_dbg_fn("done");
1007
1008         return 0;
1009 }
1010
1011 /* Don't call this to free an explict cmd entry.
1012  * It doesn't update priv_cmd_queue get/put */
1013 static void free_priv_cmdbuf(struct channel_gk20a *c,
1014                              struct priv_cmd_entry *e)
1015 {
1016         struct priv_cmd_queue *q = &c->priv_cmd_q;
1017
1018         if (!e)
1019                 return;
1020
1021         list_del(&e->list);
1022
1023         if (unlikely(!e->pre_alloc))
1024                 kfree(e);
1025         else {
1026                 memset(e, 0, sizeof(struct priv_cmd_entry));
1027                 e->pre_alloc = true;
1028                 list_add(&e->list, &q->free);
1029         }
1030 }
1031
1032 /* free entries if they're no longer being used */
1033 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1034 {
1035         struct priv_cmd_queue *q = &c->priv_cmd_q;
1036         struct priv_cmd_entry *e, *tmp;
1037         struct list_head *head = &q->head;
1038         bool wrap_around, found = false;
1039
1040         nvhost_dbg_fn("");
1041
1042         /* Find the most recent free entry. Free it and everything before it */
1043         list_for_each_entry(e, head, list) {
1044
1045                 nvhost_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1046                         "curr get:put:wrap %d:%d:%d",
1047                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1048                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1049
1050                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1051                 if (e->gp_get < e->gp_put) {
1052                         if (c->gpfifo.get >= e->gp_put ||
1053                             wrap_around) {
1054                                 found = true;
1055                                 break;
1056                         } else
1057                                 e->gp_get = c->gpfifo.get;
1058                 } else if (e->gp_get > e->gp_put) {
1059                         if (wrap_around &&
1060                             c->gpfifo.get >= e->gp_put) {
1061                                 found = true;
1062                                 break;
1063                         } else
1064                                 e->gp_get = c->gpfifo.get;
1065                 }
1066         }
1067
1068         if (found)
1069                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1070         else {
1071                 nvhost_dbg_info("no free entry recycled");
1072                 return;
1073         }
1074
1075         list_for_each_entry_safe_continue(e, tmp, head, list) {
1076                 free_priv_cmdbuf(c, e);
1077         }
1078
1079         nvhost_dbg_fn("done");
1080 }
1081
1082
1083 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1084                                struct nvhost_alloc_gpfifo_args *args)
1085 {
1086         struct gk20a *g = c->g;
1087         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1088         struct device *d = dev_from_gk20a(g);
1089         struct vm_gk20a *ch_vm;
1090         u32 gpfifo_size;
1091         int err = 0;
1092         struct sg_table *sgt;
1093
1094         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1095            and another one after, for internal usage. Triple the requested size. */
1096         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1097
1098         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1099                 c->vpr = true;
1100
1101         /* an address space needs to have been bound at this point.   */
1102         if (!gk20a_channel_as_bound(c)) {
1103                 nvhost_err(d,
1104                             "not bound to an address space at time of gpfifo"
1105                             " allocation.  Attempting to create and bind to"
1106                             " one...");
1107                 return -EINVAL;
1108         }
1109         ch_vm = c->vm;
1110
1111         c->cmds_pending = false;
1112
1113         c->last_submit_fence.valid        = false;
1114         c->last_submit_fence.syncpt_value = 0;
1115         c->last_submit_fence.syncpt_id    = c->hw_chid + pdata->syncpt_base;
1116
1117         c->ramfc.offset = 0;
1118         c->ramfc.size = ram_in_ramfc_s() / 8;
1119
1120         if (c->gpfifo.cpu_va) {
1121                 nvhost_err(d, "channel %d :"
1122                            "gpfifo already allocated", c->hw_chid);
1123                 return -EEXIST;
1124         }
1125
1126         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1127         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1128                                                 c->gpfifo.size,
1129                                                 &c->gpfifo.iova,
1130                                                 GFP_KERNEL);
1131         if (!c->gpfifo.cpu_va) {
1132                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
1133                 err = -ENOMEM;
1134                 goto clean_up;
1135         }
1136
1137         c->gpfifo.entry_num = gpfifo_size;
1138
1139         c->gpfifo.get = c->gpfifo.put = 0;
1140
1141         err = gk20a_get_sgtable(d, &sgt,
1142                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1143         if (err) {
1144                 nvhost_err(d, "%s: failed to allocate sg table\n", __func__);
1145                 goto clean_up;
1146         }
1147
1148         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1149                                         &sgt,
1150                                         c->gpfifo.size,
1151                                         0, /* flags */
1152                                         mem_flag_none);
1153         if (!c->gpfifo.gpu_va) {
1154                 nvhost_err(d, "channel %d : failed to map"
1155                            " gpu_va for gpfifo", c->hw_chid);
1156                 err = -ENOMEM;
1157                 goto clean_up_sgt;
1158         }
1159
1160         nvhost_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1161                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1162
1163         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1164
1165         channel_gk20a_setup_userd(c);
1166         channel_gk20a_commit_userd(c);
1167
1168         gk20a_mm_l2_invalidate(c->g);
1169
1170         /* TBD: setup engine contexts */
1171
1172         err = channel_gk20a_alloc_priv_cmdbuf(c);
1173         if (err)
1174                 goto clean_up_unmap;
1175
1176         err = channel_gk20a_update_runlist(c, true);
1177         if (err)
1178                 goto clean_up_unmap;
1179
1180         gk20a_free_sgtable(&sgt);
1181
1182         nvhost_dbg_fn("done");
1183         return 0;
1184
1185 clean_up_unmap:
1186         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1187                 c->gpfifo.size, mem_flag_none);
1188 clean_up_sgt:
1189         gk20a_free_sgtable(&sgt);
1190 clean_up:
1191         dma_free_coherent(d, c->gpfifo.size,
1192                 c->gpfifo.cpu_va, c->gpfifo.iova);
1193         c->gpfifo.cpu_va = NULL;
1194         c->gpfifo.iova = 0;
1195         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1196         nvhost_err(d, "fail");
1197         return err;
1198 }
1199
1200 static inline int wfi_cmd_size(void)
1201 {
1202         return 2;
1203 }
1204 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1205 {
1206         /* wfi */
1207         cmd->ptr[(*i)++] = 0x2001001E;
1208         /* handle, ignored */
1209         cmd->ptr[(*i)++] = 0x00000000;
1210 }
1211
1212 static inline bool check_gp_put(struct gk20a *g,
1213                                 struct channel_gk20a *c)
1214 {
1215         u32 put;
1216         /* gp_put changed unexpectedly since last update? */
1217         put = gk20a_bar1_readl(g,
1218                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1219         if (c->gpfifo.put != put) {
1220                 /*TBD: BUG_ON/teardown on this*/
1221                 nvhost_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1222                            "since last update");
1223                 c->gpfifo.put = put;
1224                 return false; /* surprise! */
1225         }
1226         return true; /* checked out ok */
1227 }
1228
1229 /* Update with this periodically to determine how the gpfifo is draining. */
1230 static inline u32 update_gp_get(struct gk20a *g,
1231                                 struct channel_gk20a *c)
1232 {
1233         u32 new_get = gk20a_bar1_readl(g,
1234                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1235         if (new_get < c->gpfifo.get)
1236                 c->gpfifo.wrap = !c->gpfifo.wrap;
1237         c->gpfifo.get = new_get;
1238         return new_get;
1239 }
1240
1241 static inline u32 gp_free_count(struct channel_gk20a *c)
1242 {
1243         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1244                 c->gpfifo.entry_num;
1245 }
1246
1247 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1248                 u32 timeout_delta_ms)
1249 {
1250         u32 gpfifo_get = update_gp_get(ch->g, ch);
1251         /* Count consequent timeout isr */
1252         if (gpfifo_get == ch->timeout_gpfifo_get) {
1253                 /* we didn't advance since previous channel timeout check */
1254                 ch->timeout_accumulated_ms += timeout_delta_ms;
1255         } else {
1256                 /* first timeout isr encountered */
1257                 ch->timeout_accumulated_ms = timeout_delta_ms;
1258         }
1259
1260         ch->timeout_gpfifo_get = gpfifo_get;
1261
1262         return ch->g->timeouts_enabled &&
1263                 ch->timeout_accumulated_ms > ch->hwctx->timeout_ms_max;
1264 }
1265
1266
1267 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1268  * command.  All commands on the channel will have been
1269  * consumed at the time the fence syncpoint increment occurs.
1270  */
1271 int gk20a_channel_submit_wfi_fence(struct gk20a *g,
1272                                    struct channel_gk20a *c,
1273                                    struct nvhost_syncpt *sp,
1274                                    struct nvhost_fence *fence)
1275 {
1276         struct priv_cmd_entry *cmd = NULL;
1277         int cmd_size, j = 0;
1278         u32 free_count;
1279         int err;
1280
1281         if (c->hwctx->has_timedout)
1282                 return -ETIMEDOUT;
1283
1284         cmd_size =  4 + wfi_cmd_size();
1285
1286         update_gp_get(g, c);
1287         free_count = gp_free_count(c);
1288         if (unlikely(!free_count)) {
1289                 nvhost_err(dev_from_gk20a(g),
1290                            "not enough gpfifo space");
1291                 return -EAGAIN;
1292         }
1293
1294         err = alloc_priv_cmdbuf(c, cmd_size, &cmd);
1295         if (unlikely(err)) {
1296                 nvhost_err(dev_from_gk20a(g),
1297                            "not enough priv cmd buffer space");
1298                 return err;
1299         }
1300
1301         fence->value = nvhost_syncpt_incr_max(sp, fence->syncpt_id, 1);
1302
1303         c->last_submit_fence.valid        = true;
1304         c->last_submit_fence.syncpt_value = fence->value;
1305         c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1306         c->last_submit_fence.wfi          = true;
1307
1308         trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1309
1310         add_wfi_cmd(cmd, &j);
1311
1312         /* syncpoint_a */
1313         cmd->ptr[j++] = 0x2001001C;
1314         /* payload, ignored */
1315         cmd->ptr[j++] = 0;
1316         /* syncpoint_b */
1317         cmd->ptr[j++] = 0x2001001D;
1318         /* syncpt_id, incr */
1319         cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1320
1321         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1322         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1323                 pbdma_gp_entry1_length_f(cmd->size);
1324
1325         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1326
1327         /* save gp_put */
1328         cmd->gp_put = c->gpfifo.put;
1329
1330         gk20a_bar1_writel(g,
1331                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1332                 c->gpfifo.put);
1333
1334         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1335                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1336
1337         return 0;
1338 }
1339
1340 static u32 get_gp_free_count(struct channel_gk20a *c)
1341 {
1342         update_gp_get(c->g, c);
1343         return gp_free_count(c);
1344 }
1345
1346 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1347 {
1348         void *mem = NULL;
1349         unsigned int words;
1350         u64 offset;
1351         struct mem_handle *r = NULL;
1352
1353         if (nvhost_debug_trace_cmdbuf) {
1354                 u64 gpu_va = (u64)g->entry0 |
1355                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1356                 struct mem_mgr *memmgr = NULL;
1357                 int err;
1358
1359                 words = pbdma_gp_entry1_length_v(g->entry1);
1360                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &memmgr, &r,
1361                                            &offset);
1362                 if (!err)
1363                         mem = nvhost_memmgr_mmap(r);
1364         }
1365
1366         if (mem) {
1367                 u32 i;
1368                 /*
1369                  * Write in batches of 128 as there seems to be a limit
1370                  * of how much you can output to ftrace at once.
1371                  */
1372                 for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
1373                         trace_nvhost_cdma_push_gather(
1374                                 c->ch->dev->name,
1375                                 0,
1376                                 min(words - i, TRACE_MAX_LENGTH),
1377                                 offset + i * sizeof(u32),
1378                                 mem);
1379                 }
1380                 nvhost_memmgr_munmap(r, mem);
1381         }
1382 }
1383
1384 static int gk20a_channel_add_job(struct channel_gk20a *c,
1385                                  struct nvhost_fence *fence)
1386 {
1387         struct vm_gk20a *vm = c->vm;
1388         struct channel_gk20a_job *job = NULL;
1389         struct mapped_buffer_node **mapped_buffers = NULL;
1390         int err = 0, num_mapped_buffers;
1391
1392         /* job needs reference to this vm */
1393         gk20a_vm_get(vm);
1394
1395         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1396         if (err) {
1397                 gk20a_vm_put(vm);
1398                 return err;
1399         }
1400
1401         job = kzalloc(sizeof(*job), GFP_KERNEL);
1402         if (!job) {
1403                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1404                 gk20a_vm_put(vm);
1405                 return -ENOMEM;
1406         }
1407
1408         job->num_mapped_buffers = num_mapped_buffers;
1409         job->mapped_buffers = mapped_buffers;
1410         job->fence = *fence;
1411
1412         mutex_lock(&c->jobs_lock);
1413         list_add_tail(&job->list, &c->jobs);
1414         mutex_unlock(&c->jobs_lock);
1415
1416         return 0;
1417 }
1418
1419 void gk20a_channel_update(struct channel_gk20a *c)
1420 {
1421         struct gk20a *g = c->g;
1422         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1423         struct vm_gk20a *vm = c->vm;
1424         struct channel_gk20a_job *job, *n;
1425
1426         mutex_lock(&c->jobs_lock);
1427         list_for_each_entry_safe(job, n, &c->jobs, list) {
1428                 bool completed = nvhost_syncpt_is_expired(sp,
1429                         job->fence.syncpt_id, job->fence.value);
1430                 if (!completed)
1431                         break;
1432
1433                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1434                                 job->num_mapped_buffers);
1435
1436                 /* job is done. release its reference to vm */
1437                 gk20a_vm_put(vm);
1438
1439                 list_del_init(&job->list);
1440                 kfree(job);
1441                 nvhost_module_idle(g->dev);
1442         }
1443         mutex_unlock(&c->jobs_lock);
1444 }
1445 #ifdef CONFIG_DEBUG_FS
1446 static void gk20a_sync_debugfs(struct gk20a *g)
1447 {
1448         u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
1449         spin_lock(&g->debugfs_lock);
1450         if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
1451                 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
1452                 if (g->mm.ltc_enabled_debug)
1453                         /* bypass disabled (normal caching ops)*/
1454                         reg &= ~reg_f;
1455                 else
1456                         /* bypass enabled (no caching) */
1457                         reg |= reg_f;
1458
1459                 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
1460                 g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
1461         }
1462         spin_unlock(&g->debugfs_lock);
1463 }
1464 #endif
1465
1466 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1467 {
1468         /* syncpoint_a */
1469         ptr[0] = 0x2001001C;
1470         /* payload */
1471         ptr[1] = thresh;
1472         /* syncpoint_b */
1473         ptr[2] = 0x2001001D;
1474         /* syncpt_id, switch_en, wait */
1475         ptr[3] = (id << 8) | 0x10;
1476 }
1477
1478 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1479                                 struct nvhost_gpfifo *gpfifo,
1480                                 u32 num_entries,
1481                                 struct nvhost_fence *fence,
1482                                 u32 flags)
1483 {
1484         struct gk20a *g = c->g;
1485         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1486         struct device *d = dev_from_gk20a(g);
1487         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1488         u32 i, incr_id = ~0, wait_id = ~0, wait_value = 0;
1489         u32 err = 0;
1490         int incr_cmd_size;
1491         bool wfi_cmd;
1492         int num_wait_cmds = 0;
1493         struct priv_cmd_entry *wait_cmd = NULL;
1494         struct priv_cmd_entry *incr_cmd = NULL;
1495         struct sync_fence *sync_fence = NULL;
1496         /* we might need two extra gpfifo entries - one for syncpoint
1497          * wait and one for syncpoint increment */
1498         const int extra_entries = 2;
1499
1500         if (c->hwctx->has_timedout)
1501                 return -ETIMEDOUT;
1502
1503         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1504                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1505             !fence)
1506                 return -EINVAL;
1507 #ifdef CONFIG_DEBUG_FS
1508         /* update debug settings */
1509         gk20a_sync_debugfs(g);
1510 #endif
1511
1512         nvhost_dbg_info("channel %d", c->hw_chid);
1513
1514         nvhost_module_busy(g->dev);
1515         trace_nvhost_channel_submit_gpfifo(c->ch->dev->name,
1516                                            c->hw_chid,
1517                                            num_entries,
1518                                            flags,
1519                                            fence->syncpt_id, fence->value,
1520                                            c->hw_chid + pdata->syncpt_base);
1521         check_gp_put(g, c);
1522         update_gp_get(g, c);
1523
1524         nvhost_dbg_info("pre-submit put %d, get %d, size %d",
1525                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1526
1527         /* If the caller has requested a fence "get" then we need to be
1528          * sure the fence represents work completion.  In that case
1529          * issue a wait-for-idle before the syncpoint increment.
1530          */
1531         wfi_cmd = !!(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1532                 && c->obj_class != KEPLER_C;
1533
1534         /* Invalidate tlb if it's dirty...                                   */
1535         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1536         /* We don't know what context is currently running...                */
1537         /* Note also: there can be more than one context associated with the */
1538         /* address space (vm).   */
1539         gk20a_mm_tlb_invalidate(c->vm);
1540
1541         /* Make sure we have enough space for gpfifo entries. If not,
1542          * wait for signals from completed submits */
1543         if (gp_free_count(c) < num_entries + extra_entries) {
1544                 err = wait_event_interruptible(c->submit_wq,
1545                         get_gp_free_count(c) >= num_entries + extra_entries ||
1546                         c->hwctx->has_timedout);
1547         }
1548
1549         if (c->hwctx->has_timedout) {
1550                 err = -ETIMEDOUT;
1551                 goto clean_up;
1552         }
1553
1554         if (err) {
1555                 nvhost_err(d, "not enough gpfifo space");
1556                 err = -EAGAIN;
1557                 goto clean_up;
1558         }
1559
1560
1561         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE
1562                         && flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1563                 sync_fence = nvhost_sync_fdget(fence->syncpt_id);
1564                 if (!sync_fence) {
1565                         nvhost_err(d, "invalid fence fd");
1566                         err = -EINVAL;
1567                         goto clean_up;
1568                 }
1569                 num_wait_cmds = nvhost_sync_num_pts(sync_fence);
1570         }
1571         /*
1572          * optionally insert syncpt wait in the beginning of gpfifo submission
1573          * when user requested and the wait hasn't expired.
1574          * validate that the id makes sense, elide if not
1575          * the only reason this isn't being unceremoniously killed is to
1576          * keep running some tests which trigger this condition
1577          */
1578         else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1579                 if (fence->syncpt_id >= nvhost_syncpt_nb_pts(sp))
1580                         dev_warn(d,
1581                                 "invalid wait id in gpfifo submit, elided");
1582                 if (!nvhost_syncpt_is_expired(sp,
1583                                         fence->syncpt_id, fence->value))
1584                         num_wait_cmds = 1;
1585         }
1586
1587         if (num_wait_cmds) {
1588                 alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
1589                 if (wait_cmd == NULL) {
1590                         nvhost_err(d, "not enough priv cmd buffer space");
1591                         err = -EAGAIN;
1592                         goto clean_up;
1593                 }
1594         }
1595
1596         /* always insert syncpt increment at end of gpfifo submission
1597            to keep track of method completion for idle railgating */
1598         /* TODO: we need to find a way to get rid of these wfi on every
1599          * submission...
1600          */
1601         incr_cmd_size = 4;
1602         if (wfi_cmd)
1603                 incr_cmd_size += wfi_cmd_size();
1604         alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
1605         if (incr_cmd == NULL) {
1606                 nvhost_err(d, "not enough priv cmd buffer space");
1607                 err = -EAGAIN;
1608                 goto clean_up;
1609         }
1610
1611         if (num_wait_cmds) {
1612                 if (sync_fence) {
1613                         struct sync_pt *pos;
1614                         struct nvhost_sync_pt *pt;
1615                         i = 0;
1616
1617                         list_for_each_entry(pos, &sync_fence->pt_list_head,
1618                                         pt_list) {
1619                                 pt = to_nvhost_sync_pt(pos);
1620
1621                                 wait_id = nvhost_sync_pt_id(pt);
1622                                 wait_value = nvhost_sync_pt_thresh(pt);
1623
1624                                 add_wait_cmd(&wait_cmd->ptr[i * 4],
1625                                                 wait_id, wait_value);
1626
1627                                 i++;
1628                         }
1629                         sync_fence_put(sync_fence);
1630                         sync_fence = NULL;
1631                 } else {
1632                                 wait_id = fence->syncpt_id;
1633                                 wait_value = fence->value;
1634                                 add_wait_cmd(&wait_cmd->ptr[0],
1635                                                 wait_id, wait_value);
1636                 }
1637
1638                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1639                         u64_lo32(wait_cmd->gva);
1640                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1641                         u64_hi32(wait_cmd->gva) |
1642                         pbdma_gp_entry1_length_f(wait_cmd->size);
1643                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1644
1645                 c->gpfifo.put = (c->gpfifo.put + 1) &
1646                         (c->gpfifo.entry_num - 1);
1647
1648                 /* save gp_put */
1649                 wait_cmd->gp_put = c->gpfifo.put;
1650         }
1651
1652         for (i = 0; i < num_entries; i++) {
1653                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1654                         gpfifo[i].entry0; /* cmd buf va low 32 */
1655                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1656                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1657                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1658                 c->gpfifo.put = (c->gpfifo.put + 1) &
1659                         (c->gpfifo.entry_num - 1);
1660         }
1661
1662         if (incr_cmd) {
1663                 int j = 0;
1664                 incr_id = c->hw_chid + pdata->syncpt_base;
1665                 fence->syncpt_id = incr_id;
1666                 fence->value     = nvhost_syncpt_incr_max(sp, incr_id, 1);
1667
1668                 c->last_submit_fence.valid        = true;
1669                 c->last_submit_fence.syncpt_value = fence->value;
1670                 c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1671                 c->last_submit_fence.wfi          = wfi_cmd;
1672
1673                 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1674                 if (c->obj_class == KEPLER_C) {
1675                         /* setobject KEPLER_C */
1676                         incr_cmd->ptr[j++] = 0x20010000;
1677                         incr_cmd->ptr[j++] = KEPLER_C;
1678                         /* syncpt incr */
1679                         incr_cmd->ptr[j++] = 0x200100B2;
1680                         incr_cmd->ptr[j++] = fence->syncpt_id | (0x1 << 20)
1681                                 | (0x1 << 16);
1682                 } else {
1683                         if (wfi_cmd)
1684                                 add_wfi_cmd(incr_cmd, &j);
1685                         /* syncpoint_a */
1686                         incr_cmd->ptr[j++] = 0x2001001C;
1687                         /* payload, ignored */
1688                         incr_cmd->ptr[j++] = 0;
1689                         /* syncpoint_b */
1690                         incr_cmd->ptr[j++] = 0x2001001D;
1691                         /* syncpt_id, incr */
1692                         incr_cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1693                 }
1694
1695                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1696                         u64_lo32(incr_cmd->gva);
1697                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1698                         u64_hi32(incr_cmd->gva) |
1699                         pbdma_gp_entry1_length_f(incr_cmd->size);
1700                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1701
1702                 c->gpfifo.put = (c->gpfifo.put + 1) &
1703                         (c->gpfifo.entry_num - 1);
1704
1705                 /* save gp_put */
1706                 incr_cmd->gp_put = c->gpfifo.put;
1707
1708                 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1709                         struct nvhost_ctrl_sync_fence_info pts;
1710
1711                         pts.id = fence->syncpt_id;
1712                         pts.thresh = fence->value;
1713
1714                         fence->syncpt_id = 0;
1715                         fence->value = 0;
1716                         err = nvhost_sync_create_fence(sp, &pts, 1, "fence",
1717                                         &fence->syncpt_id);
1718                 }
1719         }
1720
1721         /* Invalidate tlb if it's dirty...                                   */
1722         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1723         /* We don't know what context is currently running...                */
1724         /* Note also: there can be more than one context associated with the */
1725         /* address space (vm).   */
1726         gk20a_mm_tlb_invalidate(c->vm);
1727
1728         trace_nvhost_channel_submitted_gpfifo(c->ch->dev->name,
1729                                            c->hw_chid,
1730                                            num_entries,
1731                                            flags,
1732                                            wait_id, wait_value,
1733                                            fence->syncpt_id, fence->value);
1734
1735
1736         /* TODO! Check for errors... */
1737         gk20a_channel_add_job(c, fence);
1738
1739         c->cmds_pending = true;
1740         gk20a_bar1_writel(g,
1741                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1742                 c->gpfifo.put);
1743
1744         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1745                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1746
1747         nvhost_dbg_fn("done");
1748         return err;
1749
1750 clean_up:
1751         if (sync_fence)
1752                 sync_fence_put(sync_fence);
1753         nvhost_err(d, "fail");
1754         free_priv_cmdbuf(c, wait_cmd);
1755         free_priv_cmdbuf(c, incr_cmd);
1756         nvhost_module_idle(g->dev);
1757         return err;
1758 }
1759
1760 void gk20a_remove_channel_support(struct channel_gk20a *c)
1761 {
1762
1763 }
1764
1765 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1766 {
1767         struct channel_gk20a *c = g->fifo.channel+chid;
1768         c->g = g;
1769         c->in_use = false;
1770         c->hw_chid = chid;
1771         c->bound = false;
1772         c->remove_support = gk20a_remove_channel_support;
1773         mutex_init(&c->jobs_lock);
1774         INIT_LIST_HEAD(&c->jobs);
1775 #if defined(CONFIG_GK20A_CYCLE_STATS)
1776         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1777 #endif
1778         INIT_LIST_HEAD(&c->dbg_s_list);
1779         mutex_init(&c->dbg_s_lock);
1780
1781         return 0;
1782 }
1783
1784 int gk20a_channel_init(struct nvhost_channel *ch,
1785                        struct nvhost_master *host, int index)
1786 {
1787         return 0;
1788 }
1789
1790 int gk20a_channel_alloc_obj(struct nvhost_channel *channel,
1791                         u32 class_num,
1792                         u32 *obj_id,
1793                         u32 vaspace_share)
1794 {
1795         nvhost_dbg_fn("");
1796         return 0;
1797 }
1798
1799 int gk20a_channel_free_obj(struct nvhost_channel *channel, u32 obj_id)
1800 {
1801         nvhost_dbg_fn("");
1802         return 0;
1803 }
1804
1805 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1806 {
1807         struct nvhost_syncpt *sp = syncpt_from_gk20a(ch->g);
1808         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
1809         struct nvhost_fence fence;
1810         int err = 0;
1811
1812         if (!ch->cmds_pending)
1813                 return 0;
1814
1815         /* Do not wait for a timedout channel */
1816         if (ch->hwctx && ch->hwctx->has_timedout)
1817                 return -ETIMEDOUT;
1818
1819         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1820                 nvhost_dbg_fn("issuing wfi, incr to finish the channel");
1821                 fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
1822                 err = gk20a_channel_submit_wfi_fence(ch->g, ch,
1823                                                      sp, &fence);
1824         }
1825         if (err)
1826                 return err;
1827
1828         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1829
1830         nvhost_dbg_fn("waiting for channel to finish syncpt:%d val:%d",
1831                       ch->last_submit_fence.syncpt_id,
1832                       ch->last_submit_fence.syncpt_value);
1833
1834         err = nvhost_syncpt_wait_timeout(sp,
1835                                          ch->last_submit_fence.syncpt_id,
1836                                          ch->last_submit_fence.syncpt_value,
1837                                          timeout, &fence.value, NULL, false);
1838         if (WARN_ON(err))
1839                 dev_warn(dev_from_gk20a(ch->g),
1840                          "timed out waiting for gk20a channel to finish");
1841         else
1842                 ch->cmds_pending = false;
1843
1844         return err;
1845 }
1846
1847 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1848                                         ulong id, u32 offset,
1849                                         u32 payload, long timeout)
1850 {
1851         struct platform_device *pdev = ch->ch->dev;
1852         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1853         struct mem_handle *handle_ref;
1854         void *data;
1855         u32 *semaphore;
1856         int ret = 0;
1857         long remain;
1858
1859         /* do not wait if channel has timed out */
1860         if (ch->hwctx->has_timedout)
1861                 return -ETIMEDOUT;
1862
1863         handle_ref = nvhost_memmgr_get(memmgr, id, pdev);
1864         if (IS_ERR(handle_ref)) {
1865                 nvhost_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1866                            id);
1867                 return -EINVAL;
1868         }
1869
1870         data = nvhost_memmgr_kmap(handle_ref, offset >> PAGE_SHIFT);
1871         if (!data) {
1872                 nvhost_err(&pdev->dev, "failed to map notifier memory");
1873                 ret = -EINVAL;
1874                 goto cleanup_put;
1875         }
1876
1877         semaphore = data + (offset & ~PAGE_MASK);
1878
1879         remain = wait_event_interruptible_timeout(
1880                         ch->semaphore_wq,
1881                         *semaphore == payload || ch->hwctx->has_timedout,
1882                         timeout);
1883
1884         if (remain == 0 && *semaphore != payload)
1885                 ret = -ETIMEDOUT;
1886         else if (remain < 0)
1887                 ret = remain;
1888
1889         nvhost_memmgr_kunmap(handle_ref, offset >> PAGE_SHIFT, data);
1890 cleanup_put:
1891         nvhost_memmgr_put(memmgr, handle_ref);
1892         return ret;
1893 }
1894
1895 int gk20a_channel_wait(struct channel_gk20a *ch,
1896                        struct nvhost_wait_args *args)
1897 {
1898         struct device *d = dev_from_gk20a(ch->g);
1899         struct platform_device *dev = ch->ch->dev;
1900         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1901         struct mem_handle *handle_ref;
1902         struct notification *notif;
1903         struct timespec tv;
1904         u64 jiffies;
1905         ulong id;
1906         u32 offset;
1907         unsigned long timeout;
1908         int remain, ret = 0;
1909
1910         nvhost_dbg_fn("");
1911
1912         if (ch->hwctx->has_timedout)
1913                 return -ETIMEDOUT;
1914
1915         if (args->timeout == NVHOST_NO_TIMEOUT)
1916                 timeout = MAX_SCHEDULE_TIMEOUT;
1917         else
1918                 timeout = (u32)msecs_to_jiffies(args->timeout);
1919
1920         switch (args->type) {
1921         case NVHOST_WAIT_TYPE_NOTIFIER:
1922                 id = args->condition.notifier.nvmap_handle;
1923                 offset = args->condition.notifier.offset;
1924
1925                 handle_ref = nvhost_memmgr_get(memmgr, id, dev);
1926                 if (IS_ERR(handle_ref)) {
1927                         nvhost_err(d, "invalid notifier nvmap handle 0x%lx",
1928                                    id);
1929                         return -EINVAL;
1930                 }
1931
1932                 notif = nvhost_memmgr_mmap(handle_ref);
1933                 if (!notif) {
1934                         nvhost_err(d, "failed to map notifier memory");
1935                         return -ENOMEM;
1936                 }
1937
1938                 notif = (struct notification *)((uintptr_t)notif + offset);
1939
1940                 /* user should set status pending before
1941                  * calling this ioctl */
1942                 remain = wait_event_interruptible_timeout(
1943                                 ch->notifier_wq,
1944                                 notif->status == 0 || ch->hwctx->has_timedout,
1945                                 timeout);
1946
1947                 if (remain == 0 && notif->status != 0) {
1948                         ret = -ETIMEDOUT;
1949                         goto notif_clean_up;
1950                 } else if (remain < 0) {
1951                         ret = -EINTR;
1952                         goto notif_clean_up;
1953                 }
1954
1955                 /* TBD: fill in correct information */
1956                 jiffies = get_jiffies_64();
1957                 jiffies_to_timespec(jiffies, &tv);
1958                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1959                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1960                 notif->info32 = 0xDEADBEEF; /* should be object name */
1961                 notif->info16 = ch->hw_chid; /* should be method offset */
1962
1963 notif_clean_up:
1964                 nvhost_memmgr_munmap(handle_ref, notif);
1965                 return ret;
1966
1967         case NVHOST_WAIT_TYPE_SEMAPHORE:
1968                 ret = gk20a_channel_wait_semaphore(ch,
1969                                 args->condition.semaphore.nvmap_handle,
1970                                 args->condition.semaphore.offset,
1971                                 args->condition.semaphore.payload,
1972                                 timeout);
1973
1974                 break;
1975
1976         default:
1977                 ret = -EINVAL;
1978                 break;
1979         }
1980
1981         return ret;
1982 }
1983
1984 int gk20a_channel_set_priority(struct channel_gk20a *ch,
1985                 u32 priority)
1986 {
1987         u32 timeslice_timeout;
1988         /* set priority of graphics channel */
1989         switch (priority) {
1990         case NVHOST_PRIORITY_LOW:
1991                 /* 64 << 3 = 512us */
1992                 timeslice_timeout = 64;
1993                 break;
1994         case NVHOST_PRIORITY_MEDIUM:
1995                 /* 128 << 3 = 1024us */
1996                 timeslice_timeout = 128;
1997                 break;
1998         case NVHOST_PRIORITY_HIGH:
1999                 /* 255 << 3 = 2048us */
2000                 timeslice_timeout = 255;
2001                 break;
2002         default:
2003                 pr_err("Unsupported priority");
2004                 return -EINVAL;
2005         }
2006         channel_gk20a_set_schedule_params(ch,
2007                         timeslice_timeout);
2008         return 0;
2009 }
2010
2011 int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2012                             struct nvhost_zcull_bind_args *args)
2013 {
2014         struct gk20a *g = ch->g;
2015         struct gr_gk20a *gr = &g->gr;
2016
2017         nvhost_dbg_fn("");
2018
2019         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
2020                                 args->gpu_va, args->mode);
2021 }
2022
2023 /* in this context the "channel" is the host1x channel which
2024  * maps to *all* gk20a channels */
2025 int gk20a_channel_suspend(struct gk20a *g)
2026 {
2027         struct fifo_gk20a *f = &g->fifo;
2028         u32 chid;
2029         bool channels_in_use = false;
2030         struct nvhost_fence fence;
2031         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
2032         struct device *d = dev_from_gk20a(g);
2033         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
2034         int err;
2035
2036         nvhost_dbg_fn("");
2037
2038         /* idle the engine by submitting WFI on non-KEPLER_C channel */
2039         for (chid = 0; chid < f->num_channels; chid++) {
2040                 struct channel_gk20a *c = &f->channel[chid];
2041                 if (c->in_use && c->obj_class != KEPLER_C) {
2042                         fence.syncpt_id = chid + pdata->syncpt_base;
2043                         err = gk20a_channel_submit_wfi_fence(g,
2044                                         c, sp, &fence);
2045                         if (err) {
2046                                 nvhost_err(d, "cannot idle channel %d\n",
2047                                                 chid);
2048                                 return err;
2049                         }
2050
2051                         nvhost_syncpt_wait_timeout(sp,
2052                                         fence.syncpt_id, fence.value,
2053                                         500000,
2054                                         NULL, NULL,
2055                                         false);
2056                         break;
2057                 }
2058         }
2059
2060         for (chid = 0; chid < f->num_channels; chid++) {
2061                 if (f->channel[chid].in_use) {
2062
2063                         nvhost_dbg_info("suspend channel %d", chid);
2064                         /* disable channel */
2065                         gk20a_writel(g, ccsr_channel_r(chid),
2066                                 gk20a_readl(g, ccsr_channel_r(chid)) |
2067                                 ccsr_channel_enable_clr_true_f());
2068                         /* preempt the channel */
2069                         gk20a_fifo_preempt_channel(g, chid);
2070
2071                         channels_in_use = true;
2072                 }
2073         }
2074
2075         if (channels_in_use) {
2076                 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
2077
2078                 for (chid = 0; chid < f->num_channels; chid++) {
2079                         if (f->channel[chid].in_use)
2080                                 channel_gk20a_unbind(&f->channel[chid]);
2081                 }
2082         }
2083
2084         nvhost_dbg_fn("done");
2085         return 0;
2086 }
2087
2088 /* in this context the "channel" is the host1x channel which
2089  * maps to *all* gk20a channels */
2090 int gk20a_channel_resume(struct gk20a *g)
2091 {
2092         struct fifo_gk20a *f = &g->fifo;
2093         u32 chid;
2094         bool channels_in_use = false;
2095
2096         nvhost_dbg_fn("");
2097
2098         for (chid = 0; chid < f->num_channels; chid++) {
2099                 if (f->channel[chid].in_use) {
2100                         nvhost_dbg_info("resume channel %d", chid);
2101                         channel_gk20a_bind(&f->channel[chid]);
2102                         channels_in_use = true;
2103                 }
2104         }
2105
2106         if (channels_in_use)
2107                 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
2108
2109         nvhost_dbg_fn("done");
2110         return 0;
2111 }
2112
2113 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2114 {
2115         struct fifo_gk20a *f = &g->fifo;
2116         u32 chid;
2117
2118         nvhost_dbg_fn("");
2119
2120         for (chid = 0; chid < f->num_channels; chid++) {
2121                 struct channel_gk20a *c = g->fifo.channel+chid;
2122                 if (c->in_use)
2123                         wake_up_interruptible_all(&c->semaphore_wq);
2124         }
2125 }