2 * drivers/video/tegra/host/gk20a/channel_gk20a.c
4 * GK20A Graphics channel
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include <linux/list.h>
22 #include <linux/delay.h>
23 #include <linux/highmem.h> /* need for nvmap.h*/
24 #include <trace/events/nvhost.h>
25 #include <linux/scatterlist.h>
28 #include "nvhost_as.h"
30 #include "nvhost_sync.h"
33 #include "dbg_gpu_gk20a.h"
35 #include "hw_ram_gk20a.h"
36 #include "hw_fifo_gk20a.h"
37 #include "hw_pbdma_gk20a.h"
38 #include "hw_ccsr_gk20a.h"
39 #include "hw_ltc_gk20a.h"
40 #include "chip_support.h"
42 #define NVMAP_HANDLE_PARAM_SIZE 1
44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
48 struct priv_cmd_entry **entry);
49 static void free_priv_cmdbuf(struct channel_gk20a *c,
50 struct priv_cmd_entry *e);
51 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
58 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
59 u64 gpfifo_base, u32 gpfifo_entries);
61 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
62 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
64 static int channel_gk20a_alloc_inst(struct gk20a *g,
65 struct channel_gk20a *ch);
66 static void channel_gk20a_free_inst(struct gk20a *g,
67 struct channel_gk20a *ch);
69 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
74 struct channel_gk20a *ch = NULL;
77 mutex_lock(&f->ch_inuse_mutex);
78 for (chid = 0; chid < f->num_channels; chid++) {
79 if (!f->channel[chid].in_use) {
80 f->channel[chid].in_use = true;
81 ch = &f->channel[chid];
85 mutex_unlock(&f->ch_inuse_mutex);
90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
92 mutex_lock(&f->ch_inuse_mutex);
93 f->channel[c->hw_chid].in_use = false;
94 mutex_unlock(&f->ch_inuse_mutex);
97 int channel_gk20a_commit_va(struct channel_gk20a *c)
106 inst_ptr = c->inst_block.cpuva;
110 addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
111 addr_lo = u64_lo32(addr >> 12);
112 addr_hi = u64_hi32(addr);
114 nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
115 (u64)addr, addr_lo, addr_hi);
117 mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
118 ram_in_page_dir_base_target_vid_mem_f() |
119 ram_in_page_dir_base_vol_true_f() |
120 ram_in_page_dir_base_lo_f(addr_lo));
122 mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
123 ram_in_page_dir_base_hi_f(addr_hi));
125 mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
126 u64_lo32(c->vm->va_limit) | 0xFFF);
128 mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
131 gk20a_mm_l2_invalidate(c->g);
136 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
144 inst_ptr = c->inst_block.cpuva;
148 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
149 addr_hi = u64_hi32(c->userd_iova);
151 nvhost_dbg_info("channel %d : set ramfc userd 0x%16llx",
152 c->hw_chid, c->userd_iova);
154 mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
155 pbdma_userd_target_vid_mem_f() |
156 pbdma_userd_addr_f(addr_lo));
158 mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
159 pbdma_userd_target_vid_mem_f() |
160 pbdma_userd_hi_addr_f(addr_hi));
162 gk20a_mm_l2_invalidate(c->g);
167 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
168 u32 timeslice_timeout)
172 int value = timeslice_timeout;
174 inst_ptr = c->inst_block.cpuva;
178 /* disable channel */
179 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
180 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
181 ccsr_channel_enable_clr_true_f());
183 /* preempt the channel */
184 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
186 /* flush GPU cache */
187 gk20a_mm_l2_flush(c->g, true);
189 /* value field is 8 bits long */
190 while (value >= 1 << 8) {
195 /* time slice register is only 18bits long */
196 if ((value << shift) >= 1<<19) {
197 pr_err("Requested timeslice value is clamped to 18 bits\n");
202 /* set new timeslice */
203 mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
204 value | (shift << 12) |
205 fifo_eng_timeslice_enable_true_f());
208 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
209 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
210 ccsr_channel_enable_set_true_f());
212 gk20a_mm_l2_invalidate(c->g);
217 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
218 u64 gpfifo_base, u32 gpfifo_entries)
224 inst_ptr = c->inst_block.cpuva;
228 memset(inst_ptr, 0, ram_fc_size_val_v());
230 mem_wr32(inst_ptr, ram_fc_gp_base_w(),
231 pbdma_gp_base_offset_f(
232 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
234 mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
235 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
236 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
238 mem_wr32(inst_ptr, ram_fc_signature_w(),
239 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
241 mem_wr32(inst_ptr, ram_fc_formats_w(),
242 pbdma_formats_gp_fermi0_f() |
243 pbdma_formats_pb_fermi1_f() |
244 pbdma_formats_mp_fermi0_f());
246 mem_wr32(inst_ptr, ram_fc_pb_header_w(),
247 pbdma_pb_header_priv_user_f() |
248 pbdma_pb_header_method_zero_f() |
249 pbdma_pb_header_subchannel_zero_f() |
250 pbdma_pb_header_level_main_f() |
251 pbdma_pb_header_first_true_f() |
252 pbdma_pb_header_type_inc_f());
254 mem_wr32(inst_ptr, ram_fc_subdevice_w(),
255 pbdma_subdevice_id_f(1) |
256 pbdma_subdevice_status_active_f() |
257 pbdma_subdevice_channel_dma_enable_f());
259 mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
261 mem_wr32(inst_ptr, ram_fc_acquire_w(),
262 pbdma_acquire_retry_man_2_f() |
263 pbdma_acquire_retry_exp_2_f() |
264 pbdma_acquire_timeout_exp_max_f() |
265 pbdma_acquire_timeout_man_max_f() |
266 pbdma_acquire_timeout_en_disable_f());
268 mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
269 fifo_eng_timeslice_timeout_128_f() |
270 fifo_eng_timeslice_timescale_3_f() |
271 fifo_eng_timeslice_enable_true_f());
273 mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
274 fifo_pb_timeslice_timeout_16_f() |
275 fifo_pb_timeslice_timescale_0_f() |
276 fifo_pb_timeslice_enable_true_f());
278 mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
280 gk20a_mm_l2_invalidate(c->g);
285 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
287 BUG_ON(!c->userd_cpu_va);
291 mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
292 mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
293 mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
294 mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
295 mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
296 mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
297 mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
298 mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
299 mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
300 mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
302 gk20a_mm_l2_invalidate(c->g);
307 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
309 struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
310 struct fifo_gk20a *f = &g->fifo;
311 struct fifo_engine_info_gk20a *engine_info =
312 f->engine_info + ENGINE_GR_GK20A;
314 u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
315 >> ram_in_base_shift_v();
317 nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
318 ch_gk20a->hw_chid, inst_ptr);
320 ch_gk20a->bound = true;
322 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
323 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
324 ~ccsr_channel_runlist_f(~0)) |
325 ccsr_channel_runlist_f(engine_info->runlist_id));
327 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
328 ccsr_channel_inst_ptr_f(inst_ptr) |
329 ccsr_channel_inst_target_vid_mem_f() |
330 ccsr_channel_inst_bind_true_f());
332 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
333 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
334 ~ccsr_channel_enable_set_f(~0)) |
335 ccsr_channel_enable_set_true_f());
338 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
340 struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
345 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
346 ccsr_channel_inst_ptr_f(0) |
347 ccsr_channel_inst_bind_false_f());
349 ch_gk20a->bound = false;
352 static int channel_gk20a_alloc_inst(struct gk20a *g,
353 struct channel_gk20a *ch)
355 struct device *d = dev_from_gk20a(g);
360 ch->inst_block.size = ram_in_alloc_size_v();
361 ch->inst_block.cpuva = dma_alloc_coherent(d,
363 &ch->inst_block.iova,
365 if (!ch->inst_block.cpuva) {
366 nvhost_err(d, "%s: memory allocation failed\n", __func__);
371 ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
372 ch->inst_block.iova);
373 if (!ch->inst_block.cpu_pa) {
374 nvhost_err(d, "%s: failed to get physical address\n", __func__);
379 nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
380 ch->hw_chid, ch->inst_block.cpu_pa);
382 nvhost_dbg_fn("done");
386 nvhost_err(d, "fail");
387 channel_gk20a_free_inst(g, ch);
391 static void channel_gk20a_free_inst(struct gk20a *g,
392 struct channel_gk20a *ch)
394 struct device *d = dev_from_gk20a(g);
396 if (ch->inst_block.cpuva)
397 dma_free_coherent(d, ch->inst_block.size,
398 ch->inst_block.cpuva, ch->inst_block.iova);
399 ch->inst_block.cpuva = NULL;
400 ch->inst_block.iova = 0;
401 memset(&ch->inst_block, 0, sizeof(struct inst_desc));
404 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
406 return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
409 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
411 struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
412 struct nvhost_master *host = host_from_gk20a_channel(ch);
414 /* ensure no fences are pending */
415 nvhost_syncpt_set_min_eq_max(&host->syncpt,
416 ch->hw_chid + pdata->syncpt_base);
418 /* disable channel */
419 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
421 ccsr_channel_r(ch->hw_chid)) |
422 ccsr_channel_enable_clr_true_f());
425 static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
427 bool channel_idle = false;
428 unsigned long end_jiffies = jiffies +
429 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
432 mutex_lock(&ch->jobs_lock);
433 channel_idle = list_empty(&ch->jobs);
434 mutex_unlock(&ch->jobs_lock);
438 usleep_range(1000, 3000);
439 } while (time_before(jiffies, end_jiffies));
442 nvhost_err(dev_from_gk20a(ch->g), "channel jobs not freed");
447 void gk20a_disable_channel(struct channel_gk20a *ch,
449 unsigned long finish_timeout)
452 int err = gk20a_channel_finish(ch, finish_timeout);
456 /* disable the channel from hw and increment syncpoints */
457 gk20a_disable_channel_no_update(ch);
459 gk20a_wait_channel_idle(ch);
461 /* preempt the channel */
462 gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
464 /* remove channel from runlist */
465 channel_gk20a_update_runlist(ch, false);
468 #if defined(CONFIG_GK20A_CYCLE_STATS)
470 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
472 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
473 /* disable existing cyclestats buffer */
474 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
475 if (ch->cyclestate.cyclestate_buffer_handler) {
476 nvhost_memmgr_munmap(ch->cyclestate.cyclestate_buffer_handler,
477 ch->cyclestate.cyclestate_buffer);
478 nvhost_memmgr_put(memmgr,
479 ch->cyclestate.cyclestate_buffer_handler);
480 ch->cyclestate.cyclestate_buffer_handler = NULL;
481 ch->cyclestate.cyclestate_buffer = NULL;
482 ch->cyclestate.cyclestate_buffer_size = 0;
484 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
487 int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
488 struct nvhost_cycle_stats_args *args)
490 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
491 struct mem_handle *handle_ref;
492 void *virtual_address;
493 u64 cyclestate_buffer_size;
494 struct platform_device *dev = ch->ch->dev;
496 if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
498 /* set up new cyclestats buffer */
499 handle_ref = nvhost_memmgr_get(memmgr,
500 args->nvmap_handle, dev);
501 if (IS_ERR(handle_ref))
502 return PTR_ERR(handle_ref);
503 virtual_address = nvhost_memmgr_mmap(handle_ref);
504 if (!virtual_address)
507 nvhost_memmgr_get_param(memmgr, handle_ref,
508 NVMAP_HANDLE_PARAM_SIZE,
509 &cyclestate_buffer_size);
511 ch->cyclestate.cyclestate_buffer_handler = handle_ref;
512 ch->cyclestate.cyclestate_buffer = virtual_address;
513 ch->cyclestate.cyclestate_buffer_size = cyclestate_buffer_size;
516 } else if (!args->nvmap_handle &&
517 ch->cyclestate.cyclestate_buffer_handler) {
518 gk20a_free_cycle_stats_buffer(ch);
521 } else if (!args->nvmap_handle &&
522 !ch->cyclestate.cyclestate_buffer_handler) {
523 /* no requst from GL */
527 pr_err("channel already has cyclestats buffer\n");
533 int gk20a_init_error_notifier(struct nvhost_hwctx *ctx,
534 u32 memhandle, u64 offset) {
535 struct channel_gk20a *ch = ctx->priv;
536 struct platform_device *dev = ch->ch->dev;
539 struct mem_mgr *memmgr;
540 struct mem_handle *handle_ref;
543 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
547 memmgr = gk20a_channel_mem_mgr(ch);
548 handle_ref = nvhost_memmgr_get(memmgr, memhandle, dev);
550 if (ctx->error_notifier_ref)
551 gk20a_free_error_notifiers(ctx);
553 if (IS_ERR(handle_ref)) {
554 pr_err("Invalid handle: %d\n", memhandle);
558 va = nvhost_memmgr_mmap(handle_ref);
560 nvhost_memmgr_put(memmgr, handle_ref);
561 pr_err("Cannot map notifier handle\n");
565 /* set hwctx notifiers pointer */
566 ctx->error_notifier_ref = handle_ref;
567 ctx->error_notifier = va + offset;
568 ctx->error_notifier_va = va;
572 void gk20a_set_error_notifier(struct nvhost_hwctx *ctx, __u32 error)
574 if (ctx->error_notifier_ref) {
575 struct timespec time_data;
577 getnstimeofday(&time_data);
578 nsec = ((u64)time_data.tv_sec) * 1000000000u +
579 (u64)time_data.tv_nsec;
580 ctx->error_notifier->time_stamp.nanoseconds[0] =
582 ctx->error_notifier->time_stamp.nanoseconds[1] =
584 ctx->error_notifier->info32 = error;
585 ctx->error_notifier->status = 0xffff;
586 nvhost_err(&ctx->channel->dev->dev,
587 "error notifier set to %d\n", error);
591 void gk20a_free_error_notifiers(struct nvhost_hwctx *ctx)
593 if (ctx->error_notifier_ref) {
594 struct channel_gk20a *ch = ctx->priv;
595 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
596 nvhost_memmgr_munmap(ctx->error_notifier_ref,
597 ctx->error_notifier_va);
598 nvhost_memmgr_put(memmgr, ctx->error_notifier_ref);
599 ctx->error_notifier_ref = 0;
603 void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish)
605 struct channel_gk20a *ch = ctx->priv;
606 struct gk20a *g = ch->g;
607 struct device *d = dev_from_gk20a(g);
608 struct fifo_gk20a *f = &g->fifo;
609 struct gr_gk20a *gr = &g->gr;
610 struct vm_gk20a *ch_vm = ch->vm;
611 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
612 struct dbg_session_gk20a *dbg_s;
616 /* if engine reset was deferred, perform it now */
617 mutex_lock(&f->deferred_reset_mutex);
618 if (g->fifo.deferred_reset_pending) {
619 nvhost_dbg(dbg_intr | dbg_gpu_dbg, "engine reset was"
620 " deferred, running now");
621 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
622 g->fifo.mmu_fault_engines = 0;
623 g->fifo.deferred_reset_pending = false;
625 mutex_unlock(&f->deferred_reset_mutex);
630 if (!gk20a_channel_as_bound(ch))
633 nvhost_dbg_info("freeing bound channel context, timeout=%ld",
636 gk20a_disable_channel(ch, finish && !ch->hwctx->has_timedout, timeout);
638 gk20a_free_error_notifiers(ctx);
640 /* release channel ctx */
641 gk20a_free_channel_ctx(ch);
643 gk20a_gr_flush_channel_tlb(gr);
645 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
648 if (ch->gpfifo.gpu_va)
649 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
650 ch->gpfifo.size, mem_flag_none);
651 if (ch->gpfifo.cpu_va)
652 dma_free_coherent(d, ch->gpfifo.size,
653 ch->gpfifo.cpu_va, ch->gpfifo.iova);
654 ch->gpfifo.cpu_va = NULL;
657 gk20a_mm_l2_invalidate(ch->g);
659 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
661 #if defined(CONFIG_GK20A_CYCLE_STATS)
662 gk20a_free_cycle_stats_buffer(ch);
666 channel_gk20a_free_priv_cmdbuf(ch);
668 /* release hwctx binding to the as_share */
669 nvhost_as_release_share(ch_vm->as_share, ctx);
672 channel_gk20a_unbind(ch);
673 channel_gk20a_free_inst(g, ch);
677 /* unlink all debug sessions */
678 mutex_lock(&ch->dbg_s_lock);
680 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
682 list_del_init(&dbg_s->dbg_s_list_node);
685 mutex_unlock(&ch->dbg_s_lock);
688 release_used_channel(f, ch);
691 struct nvhost_hwctx *gk20a_open_channel(struct nvhost_channel *ch,
692 struct nvhost_hwctx *ctx)
694 struct gk20a *g = get_gk20a(ch->dev);
695 struct fifo_gk20a *f = &g->fifo;
696 struct channel_gk20a *ch_gk20a;
698 ch_gk20a = acquire_unused_channel(f);
699 if (ch_gk20a == NULL) {
700 /* TBD: we want to make this virtualizable */
701 nvhost_err(dev_from_gk20a(g), "out of hw chids");
705 ctx->priv = ch_gk20a;
707 /* note the ch here is the same for *EVERY* gk20a channel */
709 /* but thre's one hwctx per gk20a channel */
710 ch_gk20a->hwctx = ctx;
712 if (channel_gk20a_alloc_inst(g, ch_gk20a)) {
713 ch_gk20a->in_use = false;
715 nvhost_err(dev_from_gk20a(g),
716 "failed to open gk20a channel, out of inst mem");
720 channel_gk20a_bind(ch_gk20a);
721 ch_gk20a->pid = current->pid;
723 /* reset timeout counter and update timestamp */
724 ch_gk20a->timeout_accumulated_ms = 0;
725 ch_gk20a->timeout_gpfifo_get = 0;
726 /* set gr host default timeout */
727 ch_gk20a->hwctx->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
729 /* The channel is *not* runnable at this point. It still needs to have
730 * an address space bound and allocate a gpfifo and grctx. */
732 init_waitqueue_head(&ch_gk20a->notifier_wq);
733 init_waitqueue_head(&ch_gk20a->semaphore_wq);
734 init_waitqueue_head(&ch_gk20a->submit_wq);
740 /* move to debug_gk20a.c ... */
741 static void dump_gpfifo(struct channel_gk20a *c)
744 u32 chid = c->hw_chid;
748 inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
752 nvhost_dbg_info("ramfc for channel %d:\n"
753 "ramfc: gp_base 0x%08x, gp_base_hi 0x%08x, "
754 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
755 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
756 "pb_get 0x%08x, pb_get_hi 0x%08x, "
757 "pb_put 0x%08x, pb_put_hi 0x%08x\n"
758 "userd: gp_put 0x%08x, gp_get 0x%08x, "
759 "get 0x%08x, get_hi 0x%08x, "
760 "put 0x%08x, put_hi 0x%08x\n"
761 "pbdma: status 0x%08x, channel 0x%08x, userd 0x%08x, "
762 "gp_base 0x%08x, gp_base_hi 0x%08x, "
763 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
764 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
765 "get 0x%08x, get_hi 0x%08x, put 0x%08x, put_hi 0x%08x\n"
766 "channel: ccsr_channel 0x%08x",
768 mem_rd32(inst_ptr, ram_fc_gp_base_w()),
769 mem_rd32(inst_ptr, ram_fc_gp_base_hi_w()),
770 mem_rd32(inst_ptr, ram_fc_gp_fetch_w()),
771 mem_rd32(inst_ptr, ram_fc_gp_get_w()),
772 mem_rd32(inst_ptr, ram_fc_gp_put_w()),
773 mem_rd32(inst_ptr, ram_fc_pb_fetch_w()),
774 mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()),
775 mem_rd32(inst_ptr, ram_fc_pb_get_w()),
776 mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()),
777 mem_rd32(inst_ptr, ram_fc_pb_put_w()),
778 mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()),
779 mem_rd32(c->userd_cpu_va, ram_userd_gp_put_w()),
780 mem_rd32(c->userd_cpu_va, ram_userd_gp_get_w()),
781 mem_rd32(c->userd_cpu_va, ram_userd_get_w()),
782 mem_rd32(c->userd_cpu_va, ram_userd_get_hi_w()),
783 mem_rd32(c->userd_cpu_va, ram_userd_put_w()),
784 mem_rd32(c->userd_cpu_va, ram_userd_put_hi_w()),
785 gk20a_readl(c->g, pbdma_status_r(0)),
786 gk20a_readl(c->g, pbdma_channel_r(0)),
787 gk20a_readl(c->g, pbdma_userd_r(0)),
788 gk20a_readl(c->g, pbdma_gp_base_r(0)),
789 gk20a_readl(c->g, pbdma_gp_base_hi_r(0)),
790 gk20a_readl(c->g, pbdma_gp_fetch_r(0)),
791 gk20a_readl(c->g, pbdma_gp_get_r(0)),
792 gk20a_readl(c->g, pbdma_gp_put_r(0)),
793 gk20a_readl(c->g, pbdma_pb_fetch_r(0)),
794 gk20a_readl(c->g, pbdma_pb_fetch_hi_r(0)),
795 gk20a_readl(c->g, pbdma_get_r(0)),
796 gk20a_readl(c->g, pbdma_get_hi_r(0)),
797 gk20a_readl(c->g, pbdma_put_r(0)),
798 gk20a_readl(c->g, pbdma_put_hi_r(0)),
799 gk20a_readl(c->g, ccsr_channel_r(chid)));
801 nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
802 gk20a_mm_l2_invalidate(c->g);
806 /* allocate private cmd buffer.
807 used for inserting commands before/after user submitted buffers. */
808 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
810 struct device *d = dev_from_gk20a(c->g);
811 struct vm_gk20a *ch_vm = c->vm;
812 struct priv_cmd_queue *q = &c->priv_cmd_q;
813 struct priv_cmd_entry *e;
816 struct sg_table *sgt;
818 /* Kernel can insert gpfifos before and after user gpfifos.
819 Before user gpfifos, kernel inserts fence_wait, which takes
820 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
821 After user gpfifos, kernel inserts fence_get, which takes
822 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
824 Worse case if kernel adds both of them for every user gpfifo,
825 max size of priv_cmdbuf is :
826 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
827 size = roundup_pow_of_two(
828 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
830 q->mem.base_cpuva = dma_alloc_coherent(d, size,
833 if (!q->mem.base_cpuva) {
834 nvhost_err(d, "%s: memory allocation failed\n", __func__);
841 err = gk20a_get_sgtable(d, &sgt,
842 q->mem.base_cpuva, q->mem.base_iova, size);
844 nvhost_err(d, "%s: failed to create sg table\n", __func__);
848 memset(q->mem.base_cpuva, 0, size);
850 q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
854 if (!q->base_gpuva) {
855 nvhost_err(d, "ch %d : failed to map gpu va"
856 "for priv cmd buffer", c->hw_chid);
861 q->size = q->mem.size / sizeof (u32);
863 INIT_LIST_HEAD(&q->head);
864 INIT_LIST_HEAD(&q->free);
866 /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
867 for (i = 0; i < q->size / 4; i++) {
868 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
870 nvhost_err(d, "ch %d: fail to pre-alloc cmd entry",
876 list_add(&e->list, &q->free);
879 gk20a_free_sgtable(&sgt);
884 gk20a_free_sgtable(&sgt);
886 channel_gk20a_free_priv_cmdbuf(c);
890 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
892 struct device *d = dev_from_gk20a(c->g);
893 struct vm_gk20a *ch_vm = c->vm;
894 struct priv_cmd_queue *q = &c->priv_cmd_q;
895 struct priv_cmd_entry *e;
896 struct list_head *pos, *tmp, *head;
902 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
903 q->mem.size, mem_flag_none);
904 if (q->mem.base_cpuva)
905 dma_free_coherent(d, q->mem.size,
906 q->mem.base_cpuva, q->mem.base_iova);
907 q->mem.base_cpuva = NULL;
908 q->mem.base_iova = 0;
912 list_for_each_safe(pos, tmp, head) {
913 e = container_of(pos, struct priv_cmd_entry, list);
914 free_priv_cmdbuf(c, e);
919 list_for_each_safe(pos, tmp, head) {
920 e = container_of(pos, struct priv_cmd_entry, list);
921 e->pre_alloc = false;
922 free_priv_cmdbuf(c, e);
925 memset(q, 0, sizeof(struct priv_cmd_queue));
928 /* allocate a cmd buffer with given size. size is number of u32 entries */
929 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
930 struct priv_cmd_entry **entry)
932 struct priv_cmd_queue *q = &c->priv_cmd_q;
933 struct priv_cmd_entry *e;
934 struct list_head *node;
936 u32 size = orig_size;
937 bool no_retry = false;
939 nvhost_dbg_fn("size %d", orig_size);
943 /* if free space in the end is less than requested, increase the size
944 * to make the real allocated space start from beginning. */
945 if (q->put + size > q->size)
946 size = orig_size + (q->size - q->put);
948 nvhost_dbg_info("ch %d: priv cmd queue get:put %d:%d",
949 c->hw_chid, q->get, q->put);
952 free_count = (q->size - (q->put - q->get) - 1) % q->size;
954 if (size > free_count) {
956 recycle_priv_cmdbuf(c);
963 if (unlikely(list_empty(&q->free))) {
965 nvhost_dbg_info("ch %d: run out of pre-alloc entries",
968 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
970 nvhost_err(dev_from_gk20a(c->g),
971 "ch %d: fail to allocate priv cmd entry",
978 e = container_of(node, struct priv_cmd_entry, list);
982 e->gp_get = c->gpfifo.get;
983 e->gp_put = c->gpfifo.put;
984 e->gp_wrap = c->gpfifo.wrap;
986 /* if we have increased size to skip free space in the end, set put
987 to beginning of cmd buffer (0) + size */
988 if (size != orig_size) {
989 e->ptr = q->mem.base_cpuva;
990 e->gva = q->base_gpuva;
993 e->ptr = q->mem.base_cpuva + q->put;
994 e->gva = q->base_gpuva + q->put * sizeof(u32);
995 q->put = (q->put + orig_size) & (q->size - 1);
998 /* we already handled q->put + size > q->size so BUG_ON this */
999 BUG_ON(q->put > q->size);
1001 /* add new entry to head since we free from head */
1002 list_add(&e->list, &q->head);
1006 nvhost_dbg_fn("done");
1011 /* Don't call this to free an explict cmd entry.
1012 * It doesn't update priv_cmd_queue get/put */
1013 static void free_priv_cmdbuf(struct channel_gk20a *c,
1014 struct priv_cmd_entry *e)
1016 struct priv_cmd_queue *q = &c->priv_cmd_q;
1023 if (unlikely(!e->pre_alloc))
1026 memset(e, 0, sizeof(struct priv_cmd_entry));
1027 e->pre_alloc = true;
1028 list_add(&e->list, &q->free);
1032 /* free entries if they're no longer being used */
1033 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1035 struct priv_cmd_queue *q = &c->priv_cmd_q;
1036 struct priv_cmd_entry *e, *tmp;
1037 struct list_head *head = &q->head;
1038 bool wrap_around, found = false;
1042 /* Find the most recent free entry. Free it and everything before it */
1043 list_for_each_entry(e, head, list) {
1045 nvhost_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1046 "curr get:put:wrap %d:%d:%d",
1047 c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1048 c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1050 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1051 if (e->gp_get < e->gp_put) {
1052 if (c->gpfifo.get >= e->gp_put ||
1057 e->gp_get = c->gpfifo.get;
1058 } else if (e->gp_get > e->gp_put) {
1060 c->gpfifo.get >= e->gp_put) {
1064 e->gp_get = c->gpfifo.get;
1069 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1071 nvhost_dbg_info("no free entry recycled");
1075 list_for_each_entry_safe_continue(e, tmp, head, list) {
1076 free_priv_cmdbuf(c, e);
1079 nvhost_dbg_fn("done");
1083 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1084 struct nvhost_alloc_gpfifo_args *args)
1086 struct gk20a *g = c->g;
1087 struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1088 struct device *d = dev_from_gk20a(g);
1089 struct vm_gk20a *ch_vm;
1092 struct sg_table *sgt;
1094 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1095 and another one after, for internal usage. Triple the requested size. */
1096 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1098 if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1101 /* an address space needs to have been bound at this point. */
1102 if (!gk20a_channel_as_bound(c)) {
1104 "not bound to an address space at time of gpfifo"
1105 " allocation. Attempting to create and bind to"
1111 c->cmds_pending = false;
1113 c->last_submit_fence.valid = false;
1114 c->last_submit_fence.syncpt_value = 0;
1115 c->last_submit_fence.syncpt_id = c->hw_chid + pdata->syncpt_base;
1117 c->ramfc.offset = 0;
1118 c->ramfc.size = ram_in_ramfc_s() / 8;
1120 if (c->gpfifo.cpu_va) {
1121 nvhost_err(d, "channel %d :"
1122 "gpfifo already allocated", c->hw_chid);
1126 c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1127 c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1131 if (!c->gpfifo.cpu_va) {
1132 nvhost_err(d, "%s: memory allocation failed\n", __func__);
1137 c->gpfifo.entry_num = gpfifo_size;
1139 c->gpfifo.get = c->gpfifo.put = 0;
1141 err = gk20a_get_sgtable(d, &sgt,
1142 c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1144 nvhost_err(d, "%s: failed to allocate sg table\n", __func__);
1148 c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1153 if (!c->gpfifo.gpu_va) {
1154 nvhost_err(d, "channel %d : failed to map"
1155 " gpu_va for gpfifo", c->hw_chid);
1160 nvhost_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1161 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1163 channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1165 channel_gk20a_setup_userd(c);
1166 channel_gk20a_commit_userd(c);
1168 gk20a_mm_l2_invalidate(c->g);
1170 /* TBD: setup engine contexts */
1172 err = channel_gk20a_alloc_priv_cmdbuf(c);
1174 goto clean_up_unmap;
1176 err = channel_gk20a_update_runlist(c, true);
1178 goto clean_up_unmap;
1180 gk20a_free_sgtable(&sgt);
1182 nvhost_dbg_fn("done");
1186 gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1187 c->gpfifo.size, mem_flag_none);
1189 gk20a_free_sgtable(&sgt);
1191 dma_free_coherent(d, c->gpfifo.size,
1192 c->gpfifo.cpu_va, c->gpfifo.iova);
1193 c->gpfifo.cpu_va = NULL;
1195 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1196 nvhost_err(d, "fail");
1200 static inline int wfi_cmd_size(void)
1204 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1207 cmd->ptr[(*i)++] = 0x2001001E;
1208 /* handle, ignored */
1209 cmd->ptr[(*i)++] = 0x00000000;
1212 static inline bool check_gp_put(struct gk20a *g,
1213 struct channel_gk20a *c)
1216 /* gp_put changed unexpectedly since last update? */
1217 put = gk20a_bar1_readl(g,
1218 c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1219 if (c->gpfifo.put != put) {
1220 /*TBD: BUG_ON/teardown on this*/
1221 nvhost_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1222 "since last update");
1223 c->gpfifo.put = put;
1224 return false; /* surprise! */
1226 return true; /* checked out ok */
1229 /* Update with this periodically to determine how the gpfifo is draining. */
1230 static inline u32 update_gp_get(struct gk20a *g,
1231 struct channel_gk20a *c)
1233 u32 new_get = gk20a_bar1_readl(g,
1234 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1235 if (new_get < c->gpfifo.get)
1236 c->gpfifo.wrap = !c->gpfifo.wrap;
1237 c->gpfifo.get = new_get;
1241 static inline u32 gp_free_count(struct channel_gk20a *c)
1243 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1244 c->gpfifo.entry_num;
1247 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1248 u32 timeout_delta_ms)
1250 u32 gpfifo_get = update_gp_get(ch->g, ch);
1251 /* Count consequent timeout isr */
1252 if (gpfifo_get == ch->timeout_gpfifo_get) {
1253 /* we didn't advance since previous channel timeout check */
1254 ch->timeout_accumulated_ms += timeout_delta_ms;
1256 /* first timeout isr encountered */
1257 ch->timeout_accumulated_ms = timeout_delta_ms;
1260 ch->timeout_gpfifo_get = gpfifo_get;
1262 return ch->g->timeouts_enabled &&
1263 ch->timeout_accumulated_ms > ch->hwctx->timeout_ms_max;
1267 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1268 * command. All commands on the channel will have been
1269 * consumed at the time the fence syncpoint increment occurs.
1271 int gk20a_channel_submit_wfi_fence(struct gk20a *g,
1272 struct channel_gk20a *c,
1273 struct nvhost_syncpt *sp,
1274 struct nvhost_fence *fence)
1276 struct priv_cmd_entry *cmd = NULL;
1277 int cmd_size, j = 0;
1281 if (c->hwctx->has_timedout)
1284 cmd_size = 4 + wfi_cmd_size();
1286 update_gp_get(g, c);
1287 free_count = gp_free_count(c);
1288 if (unlikely(!free_count)) {
1289 nvhost_err(dev_from_gk20a(g),
1290 "not enough gpfifo space");
1294 err = alloc_priv_cmdbuf(c, cmd_size, &cmd);
1295 if (unlikely(err)) {
1296 nvhost_err(dev_from_gk20a(g),
1297 "not enough priv cmd buffer space");
1301 fence->value = nvhost_syncpt_incr_max(sp, fence->syncpt_id, 1);
1303 c->last_submit_fence.valid = true;
1304 c->last_submit_fence.syncpt_value = fence->value;
1305 c->last_submit_fence.syncpt_id = fence->syncpt_id;
1306 c->last_submit_fence.wfi = true;
1308 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1310 add_wfi_cmd(cmd, &j);
1313 cmd->ptr[j++] = 0x2001001C;
1314 /* payload, ignored */
1317 cmd->ptr[j++] = 0x2001001D;
1318 /* syncpt_id, incr */
1319 cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1321 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1322 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1323 pbdma_gp_entry1_length_f(cmd->size);
1325 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1328 cmd->gp_put = c->gpfifo.put;
1330 gk20a_bar1_writel(g,
1331 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1334 nvhost_dbg_info("post-submit put %d, get %d, size %d",
1335 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1340 static u32 get_gp_free_count(struct channel_gk20a *c)
1342 update_gp_get(c->g, c);
1343 return gp_free_count(c);
1346 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1351 struct mem_handle *r = NULL;
1353 if (nvhost_debug_trace_cmdbuf) {
1354 u64 gpu_va = (u64)g->entry0 |
1355 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1356 struct mem_mgr *memmgr = NULL;
1359 words = pbdma_gp_entry1_length_v(g->entry1);
1360 err = gk20a_vm_find_buffer(c->vm, gpu_va, &memmgr, &r,
1363 mem = nvhost_memmgr_mmap(r);
1369 * Write in batches of 128 as there seems to be a limit
1370 * of how much you can output to ftrace at once.
1372 for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
1373 trace_nvhost_cdma_push_gather(
1376 min(words - i, TRACE_MAX_LENGTH),
1377 offset + i * sizeof(u32),
1380 nvhost_memmgr_munmap(r, mem);
1384 static int gk20a_channel_add_job(struct channel_gk20a *c,
1385 struct nvhost_fence *fence)
1387 struct vm_gk20a *vm = c->vm;
1388 struct channel_gk20a_job *job = NULL;
1389 struct mapped_buffer_node **mapped_buffers = NULL;
1390 int err = 0, num_mapped_buffers;
1392 /* job needs reference to this vm */
1395 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1401 job = kzalloc(sizeof(*job), GFP_KERNEL);
1403 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1408 job->num_mapped_buffers = num_mapped_buffers;
1409 job->mapped_buffers = mapped_buffers;
1410 job->fence = *fence;
1412 mutex_lock(&c->jobs_lock);
1413 list_add_tail(&job->list, &c->jobs);
1414 mutex_unlock(&c->jobs_lock);
1419 void gk20a_channel_update(struct channel_gk20a *c)
1421 struct gk20a *g = c->g;
1422 struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1423 struct vm_gk20a *vm = c->vm;
1424 struct channel_gk20a_job *job, *n;
1426 mutex_lock(&c->jobs_lock);
1427 list_for_each_entry_safe(job, n, &c->jobs, list) {
1428 bool completed = nvhost_syncpt_is_expired(sp,
1429 job->fence.syncpt_id, job->fence.value);
1433 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1434 job->num_mapped_buffers);
1436 /* job is done. release its reference to vm */
1439 list_del_init(&job->list);
1441 nvhost_module_idle(g->dev);
1443 mutex_unlock(&c->jobs_lock);
1445 #ifdef CONFIG_DEBUG_FS
1446 static void gk20a_sync_debugfs(struct gk20a *g)
1448 u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
1449 spin_lock(&g->debugfs_lock);
1450 if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
1451 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
1452 if (g->mm.ltc_enabled_debug)
1453 /* bypass disabled (normal caching ops)*/
1456 /* bypass enabled (no caching) */
1459 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
1460 g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
1462 spin_unlock(&g->debugfs_lock);
1466 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1469 ptr[0] = 0x2001001C;
1473 ptr[2] = 0x2001001D;
1474 /* syncpt_id, switch_en, wait */
1475 ptr[3] = (id << 8) | 0x10;
1478 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1479 struct nvhost_gpfifo *gpfifo,
1481 struct nvhost_fence *fence,
1484 struct gk20a *g = c->g;
1485 struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1486 struct device *d = dev_from_gk20a(g);
1487 struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1488 u32 i, incr_id = ~0, wait_id = ~0, wait_value = 0;
1492 int num_wait_cmds = 0;
1493 struct priv_cmd_entry *wait_cmd = NULL;
1494 struct priv_cmd_entry *incr_cmd = NULL;
1495 struct sync_fence *sync_fence = NULL;
1496 /* we might need two extra gpfifo entries - one for syncpoint
1497 * wait and one for syncpoint increment */
1498 const int extra_entries = 2;
1500 if (c->hwctx->has_timedout)
1503 if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1504 NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1507 #ifdef CONFIG_DEBUG_FS
1508 /* update debug settings */
1509 gk20a_sync_debugfs(g);
1512 nvhost_dbg_info("channel %d", c->hw_chid);
1514 nvhost_module_busy(g->dev);
1515 trace_nvhost_channel_submit_gpfifo(c->ch->dev->name,
1519 fence->syncpt_id, fence->value,
1520 c->hw_chid + pdata->syncpt_base);
1522 update_gp_get(g, c);
1524 nvhost_dbg_info("pre-submit put %d, get %d, size %d",
1525 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1527 /* If the caller has requested a fence "get" then we need to be
1528 * sure the fence represents work completion. In that case
1529 * issue a wait-for-idle before the syncpoint increment.
1531 wfi_cmd = !!(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1532 && c->obj_class != KEPLER_C;
1534 /* Invalidate tlb if it's dirty... */
1535 /* TBD: this should be done in the cmd stream, not with PRIs. */
1536 /* We don't know what context is currently running... */
1537 /* Note also: there can be more than one context associated with the */
1538 /* address space (vm). */
1539 gk20a_mm_tlb_invalidate(c->vm);
1541 /* Make sure we have enough space for gpfifo entries. If not,
1542 * wait for signals from completed submits */
1543 if (gp_free_count(c) < num_entries + extra_entries) {
1544 err = wait_event_interruptible(c->submit_wq,
1545 get_gp_free_count(c) >= num_entries + extra_entries ||
1546 c->hwctx->has_timedout);
1549 if (c->hwctx->has_timedout) {
1555 nvhost_err(d, "not enough gpfifo space");
1561 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE
1562 && flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1563 sync_fence = nvhost_sync_fdget(fence->syncpt_id);
1565 nvhost_err(d, "invalid fence fd");
1569 num_wait_cmds = nvhost_sync_num_pts(sync_fence);
1572 * optionally insert syncpt wait in the beginning of gpfifo submission
1573 * when user requested and the wait hasn't expired.
1574 * validate that the id makes sense, elide if not
1575 * the only reason this isn't being unceremoniously killed is to
1576 * keep running some tests which trigger this condition
1578 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1579 if (fence->syncpt_id >= nvhost_syncpt_nb_pts(sp))
1581 "invalid wait id in gpfifo submit, elided");
1582 if (!nvhost_syncpt_is_expired(sp,
1583 fence->syncpt_id, fence->value))
1587 if (num_wait_cmds) {
1588 alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
1589 if (wait_cmd == NULL) {
1590 nvhost_err(d, "not enough priv cmd buffer space");
1596 /* always insert syncpt increment at end of gpfifo submission
1597 to keep track of method completion for idle railgating */
1598 /* TODO: we need to find a way to get rid of these wfi on every
1603 incr_cmd_size += wfi_cmd_size();
1604 alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
1605 if (incr_cmd == NULL) {
1606 nvhost_err(d, "not enough priv cmd buffer space");
1611 if (num_wait_cmds) {
1613 struct sync_pt *pos;
1614 struct nvhost_sync_pt *pt;
1617 list_for_each_entry(pos, &sync_fence->pt_list_head,
1619 pt = to_nvhost_sync_pt(pos);
1621 wait_id = nvhost_sync_pt_id(pt);
1622 wait_value = nvhost_sync_pt_thresh(pt);
1624 add_wait_cmd(&wait_cmd->ptr[i * 4],
1625 wait_id, wait_value);
1629 sync_fence_put(sync_fence);
1632 wait_id = fence->syncpt_id;
1633 wait_value = fence->value;
1634 add_wait_cmd(&wait_cmd->ptr[0],
1635 wait_id, wait_value);
1638 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1639 u64_lo32(wait_cmd->gva);
1640 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1641 u64_hi32(wait_cmd->gva) |
1642 pbdma_gp_entry1_length_f(wait_cmd->size);
1643 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1645 c->gpfifo.put = (c->gpfifo.put + 1) &
1646 (c->gpfifo.entry_num - 1);
1649 wait_cmd->gp_put = c->gpfifo.put;
1652 for (i = 0; i < num_entries; i++) {
1653 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1654 gpfifo[i].entry0; /* cmd buf va low 32 */
1655 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1656 gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1657 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1658 c->gpfifo.put = (c->gpfifo.put + 1) &
1659 (c->gpfifo.entry_num - 1);
1664 incr_id = c->hw_chid + pdata->syncpt_base;
1665 fence->syncpt_id = incr_id;
1666 fence->value = nvhost_syncpt_incr_max(sp, incr_id, 1);
1668 c->last_submit_fence.valid = true;
1669 c->last_submit_fence.syncpt_value = fence->value;
1670 c->last_submit_fence.syncpt_id = fence->syncpt_id;
1671 c->last_submit_fence.wfi = wfi_cmd;
1673 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1674 if (c->obj_class == KEPLER_C) {
1675 /* setobject KEPLER_C */
1676 incr_cmd->ptr[j++] = 0x20010000;
1677 incr_cmd->ptr[j++] = KEPLER_C;
1679 incr_cmd->ptr[j++] = 0x200100B2;
1680 incr_cmd->ptr[j++] = fence->syncpt_id | (0x1 << 20)
1684 add_wfi_cmd(incr_cmd, &j);
1686 incr_cmd->ptr[j++] = 0x2001001C;
1687 /* payload, ignored */
1688 incr_cmd->ptr[j++] = 0;
1690 incr_cmd->ptr[j++] = 0x2001001D;
1691 /* syncpt_id, incr */
1692 incr_cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1695 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1696 u64_lo32(incr_cmd->gva);
1697 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1698 u64_hi32(incr_cmd->gva) |
1699 pbdma_gp_entry1_length_f(incr_cmd->size);
1700 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1702 c->gpfifo.put = (c->gpfifo.put + 1) &
1703 (c->gpfifo.entry_num - 1);
1706 incr_cmd->gp_put = c->gpfifo.put;
1708 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1709 struct nvhost_ctrl_sync_fence_info pts;
1711 pts.id = fence->syncpt_id;
1712 pts.thresh = fence->value;
1714 fence->syncpt_id = 0;
1716 err = nvhost_sync_create_fence(sp, &pts, 1, "fence",
1721 /* Invalidate tlb if it's dirty... */
1722 /* TBD: this should be done in the cmd stream, not with PRIs. */
1723 /* We don't know what context is currently running... */
1724 /* Note also: there can be more than one context associated with the */
1725 /* address space (vm). */
1726 gk20a_mm_tlb_invalidate(c->vm);
1728 trace_nvhost_channel_submitted_gpfifo(c->ch->dev->name,
1732 wait_id, wait_value,
1733 fence->syncpt_id, fence->value);
1736 /* TODO! Check for errors... */
1737 gk20a_channel_add_job(c, fence);
1739 c->cmds_pending = true;
1740 gk20a_bar1_writel(g,
1741 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1744 nvhost_dbg_info("post-submit put %d, get %d, size %d",
1745 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1747 nvhost_dbg_fn("done");
1752 sync_fence_put(sync_fence);
1753 nvhost_err(d, "fail");
1754 free_priv_cmdbuf(c, wait_cmd);
1755 free_priv_cmdbuf(c, incr_cmd);
1756 nvhost_module_idle(g->dev);
1760 void gk20a_remove_channel_support(struct channel_gk20a *c)
1765 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1767 struct channel_gk20a *c = g->fifo.channel+chid;
1772 c->remove_support = gk20a_remove_channel_support;
1773 mutex_init(&c->jobs_lock);
1774 INIT_LIST_HEAD(&c->jobs);
1775 #if defined(CONFIG_GK20A_CYCLE_STATS)
1776 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1778 INIT_LIST_HEAD(&c->dbg_s_list);
1779 mutex_init(&c->dbg_s_lock);
1784 int gk20a_channel_init(struct nvhost_channel *ch,
1785 struct nvhost_master *host, int index)
1790 int gk20a_channel_alloc_obj(struct nvhost_channel *channel,
1799 int gk20a_channel_free_obj(struct nvhost_channel *channel, u32 obj_id)
1805 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1807 struct nvhost_syncpt *sp = syncpt_from_gk20a(ch->g);
1808 struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
1809 struct nvhost_fence fence;
1812 if (!ch->cmds_pending)
1815 /* Do not wait for a timedout channel */
1816 if (ch->hwctx && ch->hwctx->has_timedout)
1819 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1820 nvhost_dbg_fn("issuing wfi, incr to finish the channel");
1821 fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
1822 err = gk20a_channel_submit_wfi_fence(ch->g, ch,
1828 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1830 nvhost_dbg_fn("waiting for channel to finish syncpt:%d val:%d",
1831 ch->last_submit_fence.syncpt_id,
1832 ch->last_submit_fence.syncpt_value);
1834 err = nvhost_syncpt_wait_timeout(sp,
1835 ch->last_submit_fence.syncpt_id,
1836 ch->last_submit_fence.syncpt_value,
1837 timeout, &fence.value, NULL, false);
1839 dev_warn(dev_from_gk20a(ch->g),
1840 "timed out waiting for gk20a channel to finish");
1842 ch->cmds_pending = false;
1847 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1848 ulong id, u32 offset,
1849 u32 payload, long timeout)
1851 struct platform_device *pdev = ch->ch->dev;
1852 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1853 struct mem_handle *handle_ref;
1859 /* do not wait if channel has timed out */
1860 if (ch->hwctx->has_timedout)
1863 handle_ref = nvhost_memmgr_get(memmgr, id, pdev);
1864 if (IS_ERR(handle_ref)) {
1865 nvhost_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1870 data = nvhost_memmgr_kmap(handle_ref, offset >> PAGE_SHIFT);
1872 nvhost_err(&pdev->dev, "failed to map notifier memory");
1877 semaphore = data + (offset & ~PAGE_MASK);
1879 remain = wait_event_interruptible_timeout(
1881 *semaphore == payload || ch->hwctx->has_timedout,
1884 if (remain == 0 && *semaphore != payload)
1886 else if (remain < 0)
1889 nvhost_memmgr_kunmap(handle_ref, offset >> PAGE_SHIFT, data);
1891 nvhost_memmgr_put(memmgr, handle_ref);
1895 int gk20a_channel_wait(struct channel_gk20a *ch,
1896 struct nvhost_wait_args *args)
1898 struct device *d = dev_from_gk20a(ch->g);
1899 struct platform_device *dev = ch->ch->dev;
1900 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1901 struct mem_handle *handle_ref;
1902 struct notification *notif;
1907 unsigned long timeout;
1908 int remain, ret = 0;
1912 if (ch->hwctx->has_timedout)
1915 if (args->timeout == NVHOST_NO_TIMEOUT)
1916 timeout = MAX_SCHEDULE_TIMEOUT;
1918 timeout = (u32)msecs_to_jiffies(args->timeout);
1920 switch (args->type) {
1921 case NVHOST_WAIT_TYPE_NOTIFIER:
1922 id = args->condition.notifier.nvmap_handle;
1923 offset = args->condition.notifier.offset;
1925 handle_ref = nvhost_memmgr_get(memmgr, id, dev);
1926 if (IS_ERR(handle_ref)) {
1927 nvhost_err(d, "invalid notifier nvmap handle 0x%lx",
1932 notif = nvhost_memmgr_mmap(handle_ref);
1934 nvhost_err(d, "failed to map notifier memory");
1938 notif = (struct notification *)((uintptr_t)notif + offset);
1940 /* user should set status pending before
1941 * calling this ioctl */
1942 remain = wait_event_interruptible_timeout(
1944 notif->status == 0 || ch->hwctx->has_timedout,
1947 if (remain == 0 && notif->status != 0) {
1949 goto notif_clean_up;
1950 } else if (remain < 0) {
1952 goto notif_clean_up;
1955 /* TBD: fill in correct information */
1956 jiffies = get_jiffies_64();
1957 jiffies_to_timespec(jiffies, &tv);
1958 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1959 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1960 notif->info32 = 0xDEADBEEF; /* should be object name */
1961 notif->info16 = ch->hw_chid; /* should be method offset */
1964 nvhost_memmgr_munmap(handle_ref, notif);
1967 case NVHOST_WAIT_TYPE_SEMAPHORE:
1968 ret = gk20a_channel_wait_semaphore(ch,
1969 args->condition.semaphore.nvmap_handle,
1970 args->condition.semaphore.offset,
1971 args->condition.semaphore.payload,
1984 int gk20a_channel_set_priority(struct channel_gk20a *ch,
1987 u32 timeslice_timeout;
1988 /* set priority of graphics channel */
1990 case NVHOST_PRIORITY_LOW:
1991 /* 64 << 3 = 512us */
1992 timeslice_timeout = 64;
1994 case NVHOST_PRIORITY_MEDIUM:
1995 /* 128 << 3 = 1024us */
1996 timeslice_timeout = 128;
1998 case NVHOST_PRIORITY_HIGH:
1999 /* 255 << 3 = 2048us */
2000 timeslice_timeout = 255;
2003 pr_err("Unsupported priority");
2006 channel_gk20a_set_schedule_params(ch,
2011 int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2012 struct nvhost_zcull_bind_args *args)
2014 struct gk20a *g = ch->g;
2015 struct gr_gk20a *gr = &g->gr;
2019 return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
2020 args->gpu_va, args->mode);
2023 /* in this context the "channel" is the host1x channel which
2024 * maps to *all* gk20a channels */
2025 int gk20a_channel_suspend(struct gk20a *g)
2027 struct fifo_gk20a *f = &g->fifo;
2029 bool channels_in_use = false;
2030 struct nvhost_fence fence;
2031 struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
2032 struct device *d = dev_from_gk20a(g);
2033 struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
2038 /* idle the engine by submitting WFI on non-KEPLER_C channel */
2039 for (chid = 0; chid < f->num_channels; chid++) {
2040 struct channel_gk20a *c = &f->channel[chid];
2041 if (c->in_use && c->obj_class != KEPLER_C) {
2042 fence.syncpt_id = chid + pdata->syncpt_base;
2043 err = gk20a_channel_submit_wfi_fence(g,
2046 nvhost_err(d, "cannot idle channel %d\n",
2051 nvhost_syncpt_wait_timeout(sp,
2052 fence.syncpt_id, fence.value,
2060 for (chid = 0; chid < f->num_channels; chid++) {
2061 if (f->channel[chid].in_use) {
2063 nvhost_dbg_info("suspend channel %d", chid);
2064 /* disable channel */
2065 gk20a_writel(g, ccsr_channel_r(chid),
2066 gk20a_readl(g, ccsr_channel_r(chid)) |
2067 ccsr_channel_enable_clr_true_f());
2068 /* preempt the channel */
2069 gk20a_fifo_preempt_channel(g, chid);
2071 channels_in_use = true;
2075 if (channels_in_use) {
2076 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
2078 for (chid = 0; chid < f->num_channels; chid++) {
2079 if (f->channel[chid].in_use)
2080 channel_gk20a_unbind(&f->channel[chid]);
2084 nvhost_dbg_fn("done");
2088 /* in this context the "channel" is the host1x channel which
2089 * maps to *all* gk20a channels */
2090 int gk20a_channel_resume(struct gk20a *g)
2092 struct fifo_gk20a *f = &g->fifo;
2094 bool channels_in_use = false;
2098 for (chid = 0; chid < f->num_channels; chid++) {
2099 if (f->channel[chid].in_use) {
2100 nvhost_dbg_info("resume channel %d", chid);
2101 channel_gk20a_bind(&f->channel[chid]);
2102 channels_in_use = true;
2106 if (channels_in_use)
2107 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
2109 nvhost_dbg_fn("done");
2113 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2115 struct fifo_gk20a *f = &g->fifo;
2120 for (chid = 0; chid < f->num_channels; chid++) {
2121 struct channel_gk20a *c = g->fifo.channel+chid;
2123 wake_up_interruptible_all(&c->semaphore_wq);