2 * drivers/video/tegra/host/gk20a/channel_gk20a.c
4 * GK20A Graphics channel
6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 #include <linux/nvhost.h>
23 #include <linux/list.h>
24 #include <linux/delay.h>
25 #include <linux/highmem.h> /* need for nvmap.h*/
26 #include <trace/events/gk20a.h>
27 #include <linux/scatterlist.h>
28 #include <linux/file.h>
29 #include <linux/anon_inodes.h>
30 #include <linux/dma-buf.h>
32 #include "debug_gk20a.h"
35 #include "dbg_gpu_gk20a.h"
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
43 #define NVMAP_HANDLE_PARAM_SIZE 1
45 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
46 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
48 static void free_priv_cmdbuf(struct channel_gk20a *c,
49 struct priv_cmd_entry *e);
50 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
52 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
53 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
55 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
56 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
58 u64 gpfifo_base, u32 gpfifo_entries);
60 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
63 static int channel_gk20a_alloc_inst(struct gk20a *g,
64 struct channel_gk20a *ch);
65 static void channel_gk20a_free_inst(struct gk20a *g,
66 struct channel_gk20a *ch);
68 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
70 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
74 struct channel_gk20a *ch = NULL;
77 mutex_lock(&f->ch_inuse_mutex);
78 for (chid = 0; chid < f->num_channels; chid++) {
79 if (!f->channel[chid].in_use) {
80 f->channel[chid].in_use = true;
81 ch = &f->channel[chid];
85 mutex_unlock(&f->ch_inuse_mutex);
90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
92 mutex_lock(&f->ch_inuse_mutex);
93 f->channel[c->hw_chid].in_use = false;
94 mutex_unlock(&f->ch_inuse_mutex);
97 int channel_gk20a_commit_va(struct channel_gk20a *c)
106 inst_ptr = c->inst_block.cpuva;
110 addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
111 addr_lo = u64_lo32(addr >> 12);
112 addr_hi = u64_hi32(addr);
114 gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
115 (u64)addr, addr_lo, addr_hi);
117 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
118 ram_in_page_dir_base_target_vid_mem_f() |
119 ram_in_page_dir_base_vol_true_f() |
120 ram_in_page_dir_base_lo_f(addr_lo));
122 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
123 ram_in_page_dir_base_hi_f(addr_hi));
125 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
126 u64_lo32(c->vm->va_limit) | 0xFFF);
128 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
134 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
142 inst_ptr = c->inst_block.cpuva;
146 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
147 addr_hi = u64_hi32(c->userd_iova);
149 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
150 c->hw_chid, (u64)c->userd_iova);
152 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
153 pbdma_userd_target_vid_mem_f() |
154 pbdma_userd_addr_f(addr_lo));
156 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
157 pbdma_userd_target_vid_mem_f() |
158 pbdma_userd_hi_addr_f(addr_hi));
163 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
164 u32 timeslice_timeout)
168 int value = timeslice_timeout;
170 inst_ptr = c->inst_block.cpuva;
174 /* disable channel */
175 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
176 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
177 ccsr_channel_enable_clr_true_f());
179 /* preempt the channel */
180 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
182 /* value field is 8 bits long */
183 while (value >= 1 << 8) {
188 /* time slice register is only 18bits long */
189 if ((value << shift) >= 1<<19) {
190 pr_err("Requested timeslice value is clamped to 18 bits\n");
195 /* set new timeslice */
196 gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
197 value | (shift << 12) |
198 fifo_eng_timeslice_enable_true_f());
201 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
202 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
203 ccsr_channel_enable_set_true_f());
208 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
209 u64 gpfifo_base, u32 gpfifo_entries)
215 inst_ptr = c->inst_block.cpuva;
219 memset(inst_ptr, 0, ram_fc_size_val_v());
221 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
222 pbdma_gp_base_offset_f(
223 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
225 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
226 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
227 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
229 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
230 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
232 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
233 pbdma_formats_gp_fermi0_f() |
234 pbdma_formats_pb_fermi1_f() |
235 pbdma_formats_mp_fermi0_f());
237 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
238 pbdma_pb_header_priv_user_f() |
239 pbdma_pb_header_method_zero_f() |
240 pbdma_pb_header_subchannel_zero_f() |
241 pbdma_pb_header_level_main_f() |
242 pbdma_pb_header_first_true_f() |
243 pbdma_pb_header_type_inc_f());
245 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
246 pbdma_subdevice_id_f(1) |
247 pbdma_subdevice_status_active_f() |
248 pbdma_subdevice_channel_dma_enable_f());
250 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
252 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
253 pbdma_acquire_retry_man_2_f() |
254 pbdma_acquire_retry_exp_2_f() |
255 pbdma_acquire_timeout_exp_max_f() |
256 pbdma_acquire_timeout_man_max_f() |
257 pbdma_acquire_timeout_en_disable_f());
259 gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
260 fifo_eng_timeslice_timeout_128_f() |
261 fifo_eng_timeslice_timescale_3_f() |
262 fifo_eng_timeslice_enable_true_f());
264 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
265 fifo_pb_timeslice_timeout_16_f() |
266 fifo_pb_timeslice_timescale_0_f() |
267 fifo_pb_timeslice_enable_true_f());
269 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
274 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
276 BUG_ON(!c->userd_cpu_va);
280 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
281 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
282 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
283 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
284 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
285 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
286 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
287 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
288 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
289 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
294 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
296 struct gk20a *g = ch_gk20a->g;
297 struct fifo_gk20a *f = &g->fifo;
298 struct fifo_engine_info_gk20a *engine_info =
299 f->engine_info + ENGINE_GR_GK20A;
301 u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
302 >> ram_in_base_shift_v();
304 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
305 ch_gk20a->hw_chid, inst_ptr);
307 ch_gk20a->bound = true;
309 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
310 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
311 ~ccsr_channel_runlist_f(~0)) |
312 ccsr_channel_runlist_f(engine_info->runlist_id));
314 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
315 ccsr_channel_inst_ptr_f(inst_ptr) |
316 ccsr_channel_inst_target_vid_mem_f() |
317 ccsr_channel_inst_bind_true_f());
319 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
320 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
321 ~ccsr_channel_enable_set_f(~0)) |
322 ccsr_channel_enable_set_true_f());
325 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
327 struct gk20a *g = ch_gk20a->g;
332 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
333 ccsr_channel_inst_ptr_f(0) |
334 ccsr_channel_inst_bind_false_f());
336 ch_gk20a->bound = false;
339 * if we are agrressive then we can destroy the syncpt
340 * resource at this point
341 * if not, then it will be destroyed at channel_free()
343 if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) {
344 ch_gk20a->sync->destroy(ch_gk20a->sync);
345 ch_gk20a->sync = NULL;
349 static int channel_gk20a_alloc_inst(struct gk20a *g,
350 struct channel_gk20a *ch)
352 struct device *d = dev_from_gk20a(g);
358 ch->inst_block.size = ram_in_alloc_size_v();
359 ch->inst_block.cpuva = dma_alloc_coherent(d,
363 if (!ch->inst_block.cpuva) {
364 gk20a_err(d, "%s: memory allocation failed\n", __func__);
369 ch->inst_block.iova = iova;
370 ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
371 ch->inst_block.iova);
372 if (!ch->inst_block.cpu_pa) {
373 gk20a_err(d, "%s: failed to get physical address\n", __func__);
378 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
379 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
381 gk20a_dbg_fn("done");
385 gk20a_err(d, "fail");
386 channel_gk20a_free_inst(g, ch);
390 static void channel_gk20a_free_inst(struct gk20a *g,
391 struct channel_gk20a *ch)
393 struct device *d = dev_from_gk20a(g);
395 if (ch->inst_block.cpuva)
396 dma_free_coherent(d, ch->inst_block.size,
397 ch->inst_block.cpuva, ch->inst_block.iova);
398 ch->inst_block.cpuva = NULL;
399 ch->inst_block.iova = 0;
400 memset(&ch->inst_block, 0, sizeof(struct inst_desc));
403 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
405 return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
408 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
410 /* ensure no fences are pending */
412 ch->sync->set_min_eq_max(ch->sync);
414 /* disable channel */
415 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
417 ccsr_channel_r(ch->hw_chid)) |
418 ccsr_channel_enable_clr_true_f());
421 static void channel_gk20a_enable(struct channel_gk20a *ch)
424 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
425 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
426 ccsr_channel_enable_set_true_f());
429 static void channel_gk20a_disable(struct channel_gk20a *ch)
431 /* disable channel */
432 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
434 ccsr_channel_r(ch->hw_chid)) |
435 ccsr_channel_enable_clr_true_f());
438 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
440 bool channel_idle = false;
441 unsigned long end_jiffies = jiffies +
442 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
445 mutex_lock(&ch->jobs_lock);
446 channel_idle = list_empty(&ch->jobs);
447 mutex_unlock(&ch->jobs_lock);
451 usleep_range(1000, 3000);
452 } while (time_before(jiffies, end_jiffies)
453 || !tegra_platform_is_silicon());
456 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
464 void gk20a_disable_channel(struct channel_gk20a *ch,
466 unsigned long finish_timeout)
469 int err = gk20a_channel_finish(ch, finish_timeout);
473 /* disable the channel from hw and increment syncpoints */
474 gk20a_disable_channel_no_update(ch);
476 gk20a_wait_channel_idle(ch);
478 /* preempt the channel */
479 gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
481 /* remove channel from runlist */
482 channel_gk20a_update_runlist(ch, false);
485 #if defined(CONFIG_GK20A_CYCLE_STATS)
487 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
489 /* disable existing cyclestats buffer */
490 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
491 if (ch->cyclestate.cyclestate_buffer_handler) {
492 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
493 ch->cyclestate.cyclestate_buffer);
494 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
495 ch->cyclestate.cyclestate_buffer_handler = NULL;
496 ch->cyclestate.cyclestate_buffer = NULL;
497 ch->cyclestate.cyclestate_buffer_size = 0;
499 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
502 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
503 struct nvhost_cycle_stats_args *args)
505 struct dma_buf *dmabuf;
506 void *virtual_address;
508 if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
510 /* set up new cyclestats buffer */
511 dmabuf = dma_buf_get(args->nvmap_handle);
513 return PTR_ERR(dmabuf);
514 virtual_address = dma_buf_vmap(dmabuf);
515 if (!virtual_address)
518 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
519 ch->cyclestate.cyclestate_buffer = virtual_address;
520 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
523 } else if (!args->nvmap_handle &&
524 ch->cyclestate.cyclestate_buffer_handler) {
525 gk20a_free_cycle_stats_buffer(ch);
528 } else if (!args->nvmap_handle &&
529 !ch->cyclestate.cyclestate_buffer_handler) {
530 /* no requst from GL */
534 pr_err("channel already has cyclestats buffer\n");
540 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
541 struct nvhost_set_error_notifier *args)
543 struct device *dev = dev_from_gk20a(ch->g);
544 struct dma_buf *dmabuf;
546 u64 end = args->offset + sizeof(struct nvhost_notification);
549 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
553 dmabuf = dma_buf_get(args->mem);
555 if (ch->error_notifier_ref)
556 gk20a_free_error_notifiers(ch);
558 if (IS_ERR(dmabuf)) {
559 pr_err("Invalid handle: %d\n", args->mem);
563 if (end > dmabuf->size || end < sizeof(struct nvhost_notification)) {
565 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
570 va = dma_buf_vmap(dmabuf);
573 pr_err("Cannot map notifier handle\n");
577 /* set channel notifiers pointer */
578 ch->error_notifier_ref = dmabuf;
579 ch->error_notifier = va + args->offset;
580 ch->error_notifier_va = va;
581 memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
585 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
587 if (ch->error_notifier_ref) {
588 struct timespec time_data;
590 getnstimeofday(&time_data);
591 nsec = ((u64)time_data.tv_sec) * 1000000000u +
592 (u64)time_data.tv_nsec;
593 ch->error_notifier->time_stamp.nanoseconds[0] =
595 ch->error_notifier->time_stamp.nanoseconds[1] =
597 ch->error_notifier->info32 = error;
598 ch->error_notifier->status = 0xffff;
599 gk20a_err(dev_from_gk20a(ch->g),
600 "error notifier set to %d\n", error);
604 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
606 if (ch->error_notifier_ref) {
607 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
608 dma_buf_put(ch->error_notifier_ref);
609 ch->error_notifier_ref = 0;
610 ch->error_notifier = 0;
611 ch->error_notifier_va = 0;
615 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
617 struct gk20a *g = ch->g;
618 struct device *d = dev_from_gk20a(g);
619 struct fifo_gk20a *f = &g->fifo;
620 struct gr_gk20a *gr = &g->gr;
621 struct vm_gk20a *ch_vm = ch->vm;
622 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
623 struct dbg_session_gk20a *dbg_s;
627 /* if engine reset was deferred, perform it now */
628 mutex_lock(&f->deferred_reset_mutex);
629 if (g->fifo.deferred_reset_pending) {
630 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
631 " deferred, running now");
632 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
633 g->fifo.mmu_fault_engines = 0;
634 g->fifo.deferred_reset_pending = false;
636 mutex_unlock(&f->deferred_reset_mutex);
641 if (!gk20a_channel_as_bound(ch))
644 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
647 gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
649 gk20a_free_error_notifiers(ch);
651 /* release channel ctx */
652 gk20a_free_channel_ctx(ch);
654 gk20a_gr_flush_channel_tlb(gr);
656 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
659 if (ch->gpfifo.gpu_va)
660 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
661 ch->gpfifo.size, gk20a_mem_flag_none);
662 if (ch->gpfifo.cpu_va)
663 dma_free_coherent(d, ch->gpfifo.size,
664 ch->gpfifo.cpu_va, ch->gpfifo.iova);
665 ch->gpfifo.cpu_va = NULL;
668 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
670 #if defined(CONFIG_GK20A_CYCLE_STATS)
671 gk20a_free_cycle_stats_buffer(ch);
674 channel_gk20a_free_priv_cmdbuf(ch);
677 ch->sync->destroy(ch->sync);
681 /* release channel binding to the as_share */
682 gk20a_as_release_share(ch_vm->as_share);
685 channel_gk20a_unbind(ch);
686 channel_gk20a_free_inst(g, ch);
692 /* unlink all debug sessions */
693 mutex_lock(&ch->dbg_s_lock);
695 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
697 list_del_init(&dbg_s->dbg_s_list_node);
700 mutex_unlock(&ch->dbg_s_lock);
703 release_used_channel(f, ch);
706 int gk20a_channel_release(struct inode *inode, struct file *filp)
708 struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
709 struct gk20a *g = ch->g;
712 trace_gk20a_channel_release(dev_name(&g->dev->dev));
714 err = gk20a_busy(ch->g->dev);
716 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
720 gk20a_free_channel(ch, true);
721 gk20a_idle(ch->g->dev);
724 filp->private_data = NULL;
728 static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
730 struct fifo_gk20a *f = &g->fifo;
731 struct channel_gk20a *ch;
733 ch = acquire_unused_channel(f);
735 /* TBD: we want to make this virtualizable */
736 gk20a_err(dev_from_gk20a(g), "out of hw chids");
742 if (channel_gk20a_alloc_inst(g, ch)) {
744 gk20a_err(dev_from_gk20a(g),
745 "failed to open gk20a channel, out of inst mem");
749 g->ops.fifo.bind_channel(ch);
750 ch->pid = current->pid;
752 /* reset timeout counter and update timestamp */
753 ch->timeout_accumulated_ms = 0;
754 ch->timeout_gpfifo_get = 0;
755 /* set gr host default timeout */
756 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
757 ch->timeout_debug_dump = true;
758 ch->has_timedout = false;
761 /* The channel is *not* runnable at this point. It still needs to have
762 * an address space bound and allocate a gpfifo and grctx. */
764 init_waitqueue_head(&ch->notifier_wq);
765 init_waitqueue_head(&ch->semaphore_wq);
766 init_waitqueue_head(&ch->submit_wq);
771 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
774 struct channel_gk20a *ch;
776 trace_gk20a_channel_open(dev_name(&g->dev->dev));
778 err = gk20a_get_client(g);
780 gk20a_err(dev_from_gk20a(g),
781 "failed to get client ref");
785 err = gk20a_busy(g->dev);
788 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
791 ch = gk20a_open_new_channel(g);
795 gk20a_err(dev_from_gk20a(g),
800 filp->private_data = ch;
804 int gk20a_channel_open(struct inode *inode, struct file *filp)
806 struct gk20a *g = container_of(inode->i_cdev,
807 struct gk20a, channel.cdev);
808 return __gk20a_channel_open(g, filp);
811 /* allocate private cmd buffer.
812 used for inserting commands before/after user submitted buffers. */
813 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
815 struct device *d = dev_from_gk20a(c->g);
816 struct vm_gk20a *ch_vm = c->vm;
817 struct priv_cmd_queue *q = &c->priv_cmd_q;
818 struct priv_cmd_entry *e;
821 struct sg_table *sgt;
824 /* Kernel can insert gpfifos before and after user gpfifos.
825 Before user gpfifos, kernel inserts fence_wait, which takes
826 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
827 After user gpfifos, kernel inserts fence_get, which takes
828 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
830 Worse case if kernel adds both of them for every user gpfifo,
831 max size of priv_cmdbuf is :
832 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
833 size = roundup_pow_of_two(
834 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
836 q->mem.base_cpuva = dma_alloc_coherent(d, size,
839 if (!q->mem.base_cpuva) {
840 gk20a_err(d, "%s: memory allocation failed\n", __func__);
845 q->mem.base_iova = iova;
848 err = gk20a_get_sgtable(d, &sgt,
849 q->mem.base_cpuva, q->mem.base_iova, size);
851 gk20a_err(d, "%s: failed to create sg table\n", __func__);
855 memset(q->mem.base_cpuva, 0, size);
857 q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
860 gk20a_mem_flag_none);
861 if (!q->base_gpuva) {
862 gk20a_err(d, "ch %d : failed to map gpu va"
863 "for priv cmd buffer", c->hw_chid);
868 q->size = q->mem.size / sizeof (u32);
870 INIT_LIST_HEAD(&q->head);
871 INIT_LIST_HEAD(&q->free);
873 /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
874 for (i = 0; i < q->size / 4; i++) {
875 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
877 gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
883 list_add(&e->list, &q->free);
886 gk20a_free_sgtable(&sgt);
891 gk20a_free_sgtable(&sgt);
893 channel_gk20a_free_priv_cmdbuf(c);
897 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
899 struct device *d = dev_from_gk20a(c->g);
900 struct vm_gk20a *ch_vm = c->vm;
901 struct priv_cmd_queue *q = &c->priv_cmd_q;
902 struct priv_cmd_entry *e;
903 struct list_head *pos, *tmp, *head;
909 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
910 q->mem.size, gk20a_mem_flag_none);
911 if (q->mem.base_cpuva)
912 dma_free_coherent(d, q->mem.size,
913 q->mem.base_cpuva, q->mem.base_iova);
914 q->mem.base_cpuva = NULL;
915 q->mem.base_iova = 0;
919 list_for_each_safe(pos, tmp, head) {
920 e = container_of(pos, struct priv_cmd_entry, list);
921 free_priv_cmdbuf(c, e);
926 list_for_each_safe(pos, tmp, head) {
927 e = container_of(pos, struct priv_cmd_entry, list);
928 e->pre_alloc = false;
929 free_priv_cmdbuf(c, e);
932 memset(q, 0, sizeof(struct priv_cmd_queue));
935 /* allocate a cmd buffer with given size. size is number of u32 entries */
936 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
937 struct priv_cmd_entry **entry)
939 struct priv_cmd_queue *q = &c->priv_cmd_q;
940 struct priv_cmd_entry *e;
941 struct list_head *node;
943 u32 size = orig_size;
944 bool no_retry = false;
946 gk20a_dbg_fn("size %d", orig_size);
950 /* if free space in the end is less than requested, increase the size
951 * to make the real allocated space start from beginning. */
952 if (q->put + size > q->size)
953 size = orig_size + (q->size - q->put);
955 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
956 c->hw_chid, q->get, q->put);
959 free_count = (q->size - (q->put - q->get) - 1) % q->size;
961 if (size > free_count) {
963 recycle_priv_cmdbuf(c);
970 if (unlikely(list_empty(&q->free))) {
972 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
975 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
977 gk20a_err(dev_from_gk20a(c->g),
978 "ch %d: fail to allocate priv cmd entry",
985 e = container_of(node, struct priv_cmd_entry, list);
989 e->gp_get = c->gpfifo.get;
990 e->gp_put = c->gpfifo.put;
991 e->gp_wrap = c->gpfifo.wrap;
993 /* if we have increased size to skip free space in the end, set put
994 to beginning of cmd buffer (0) + size */
995 if (size != orig_size) {
996 e->ptr = q->mem.base_cpuva;
997 e->gva = q->base_gpuva;
1000 e->ptr = q->mem.base_cpuva + q->put;
1001 e->gva = q->base_gpuva + q->put * sizeof(u32);
1002 q->put = (q->put + orig_size) & (q->size - 1);
1005 /* we already handled q->put + size > q->size so BUG_ON this */
1006 BUG_ON(q->put > q->size);
1008 /* add new entry to head since we free from head */
1009 list_add(&e->list, &q->head);
1013 gk20a_dbg_fn("done");
1018 /* Don't call this to free an explict cmd entry.
1019 * It doesn't update priv_cmd_queue get/put */
1020 static void free_priv_cmdbuf(struct channel_gk20a *c,
1021 struct priv_cmd_entry *e)
1023 struct priv_cmd_queue *q = &c->priv_cmd_q;
1030 if (unlikely(!e->pre_alloc))
1033 memset(e, 0, sizeof(struct priv_cmd_entry));
1034 e->pre_alloc = true;
1035 list_add(&e->list, &q->free);
1039 /* free entries if they're no longer being used */
1040 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1042 struct priv_cmd_queue *q = &c->priv_cmd_q;
1043 struct priv_cmd_entry *e, *tmp;
1044 struct list_head *head = &q->head;
1045 bool wrap_around, found = false;
1049 /* Find the most recent free entry. Free it and everything before it */
1050 list_for_each_entry(e, head, list) {
1052 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1053 "curr get:put:wrap %d:%d:%d",
1054 c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1055 c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1057 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1058 if (e->gp_get < e->gp_put) {
1059 if (c->gpfifo.get >= e->gp_put ||
1064 e->gp_get = c->gpfifo.get;
1065 } else if (e->gp_get > e->gp_put) {
1067 c->gpfifo.get >= e->gp_put) {
1071 e->gp_get = c->gpfifo.get;
1076 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1078 gk20a_dbg_info("no free entry recycled");
1082 list_for_each_entry_safe_continue(e, tmp, head, list) {
1083 free_priv_cmdbuf(c, e);
1086 gk20a_dbg_fn("done");
1090 static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1091 struct nvhost_alloc_gpfifo_args *args)
1093 struct gk20a *g = c->g;
1094 struct device *d = dev_from_gk20a(g);
1095 struct vm_gk20a *ch_vm;
1098 struct sg_table *sgt;
1101 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1102 and another one after, for internal usage. Triple the requested size. */
1103 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1105 if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1108 /* an address space needs to have been bound at this point. */
1109 if (!gk20a_channel_as_bound(c)) {
1111 "not bound to an address space at time of gpfifo"
1112 " allocation. Attempting to create and bind to"
1118 c->cmds_pending = false;
1119 c->last_submit_fence.valid = false;
1121 c->ramfc.offset = 0;
1122 c->ramfc.size = ram_in_ramfc_s() / 8;
1124 if (c->gpfifo.cpu_va) {
1125 gk20a_err(d, "channel %d :"
1126 "gpfifo already allocated", c->hw_chid);
1130 c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1131 c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1135 if (!c->gpfifo.cpu_va) {
1136 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1141 c->gpfifo.iova = iova;
1142 c->gpfifo.entry_num = gpfifo_size;
1144 c->gpfifo.get = c->gpfifo.put = 0;
1146 err = gk20a_get_sgtable(d, &sgt,
1147 c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1149 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1153 c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1157 gk20a_mem_flag_none);
1158 if (!c->gpfifo.gpu_va) {
1159 gk20a_err(d, "channel %d : failed to map"
1160 " gpu_va for gpfifo", c->hw_chid);
1165 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1166 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1168 channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1170 channel_gk20a_setup_userd(c);
1171 channel_gk20a_commit_userd(c);
1173 /* TBD: setup engine contexts */
1175 err = channel_gk20a_alloc_priv_cmdbuf(c);
1177 goto clean_up_unmap;
1179 err = channel_gk20a_update_runlist(c, true);
1181 goto clean_up_unmap;
1183 gk20a_free_sgtable(&sgt);
1185 gk20a_dbg_fn("done");
1189 gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1190 c->gpfifo.size, gk20a_mem_flag_none);
1192 gk20a_free_sgtable(&sgt);
1194 dma_free_coherent(d, c->gpfifo.size,
1195 c->gpfifo.cpu_va, c->gpfifo.iova);
1196 c->gpfifo.cpu_va = NULL;
1198 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1199 gk20a_err(d, "fail");
1203 static inline int wfi_cmd_size(void)
1207 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1210 cmd->ptr[(*i)++] = 0x2001001E;
1211 /* handle, ignored */
1212 cmd->ptr[(*i)++] = 0x00000000;
1215 static inline bool check_gp_put(struct gk20a *g,
1216 struct channel_gk20a *c)
1219 /* gp_put changed unexpectedly since last update? */
1220 put = gk20a_bar1_readl(g,
1221 c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1222 if (c->gpfifo.put != put) {
1223 /*TBD: BUG_ON/teardown on this*/
1224 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1225 "since last update");
1226 c->gpfifo.put = put;
1227 return false; /* surprise! */
1229 return true; /* checked out ok */
1232 /* Update with this periodically to determine how the gpfifo is draining. */
1233 static inline u32 update_gp_get(struct gk20a *g,
1234 struct channel_gk20a *c)
1236 u32 new_get = gk20a_bar1_readl(g,
1237 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1238 if (new_get < c->gpfifo.get)
1239 c->gpfifo.wrap = !c->gpfifo.wrap;
1240 c->gpfifo.get = new_get;
1244 static inline u32 gp_free_count(struct channel_gk20a *c)
1246 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1247 c->gpfifo.entry_num;
1250 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1251 u32 timeout_delta_ms)
1253 u32 gpfifo_get = update_gp_get(ch->g, ch);
1254 /* Count consequent timeout isr */
1255 if (gpfifo_get == ch->timeout_gpfifo_get) {
1256 /* we didn't advance since previous channel timeout check */
1257 ch->timeout_accumulated_ms += timeout_delta_ms;
1259 /* first timeout isr encountered */
1260 ch->timeout_accumulated_ms = timeout_delta_ms;
1263 ch->timeout_gpfifo_get = gpfifo_get;
1265 return ch->g->timeouts_enabled &&
1266 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1270 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1271 * command. All commands on the channel will have been
1272 * consumed at the time the fence syncpoint increment occurs.
1274 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1276 struct priv_cmd_entry *cmd = NULL;
1277 struct gk20a *g = c->g;
1281 if (c->has_timedout)
1285 c->sync = gk20a_channel_sync_create(c);
1290 update_gp_get(g, c);
1291 free_count = gp_free_count(c);
1292 if (unlikely(!free_count)) {
1293 gk20a_err(dev_from_gk20a(g),
1294 "not enough gpfifo space");
1298 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
1302 WARN_ON(!c->last_submit_fence.wfi);
1304 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1305 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1306 pbdma_gp_entry1_length_f(cmd->size);
1308 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1311 cmd->gp_put = c->gpfifo.put;
1313 gk20a_bar1_writel(g,
1314 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1317 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1318 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1323 static u32 get_gp_free_count(struct channel_gk20a *c)
1325 update_gp_get(c->g, c);
1326 return gp_free_count(c);
1329 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1334 struct dma_buf *dmabuf = NULL;
1336 if (gk20a_debug_trace_cmdbuf) {
1337 u64 gpu_va = (u64)g->entry0 |
1338 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1341 words = pbdma_gp_entry1_length_v(g->entry1);
1342 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1344 mem = dma_buf_vmap(dmabuf);
1350 * Write in batches of 128 as there seems to be a limit
1351 * of how much you can output to ftrace at once.
1353 for (i = 0; i < words; i += 128U) {
1354 trace_gk20a_push_cmdbuf(
1357 min(words - i, 128U),
1358 offset + i * sizeof(u32),
1361 dma_buf_vunmap(dmabuf, mem);
1365 static int gk20a_channel_add_job(struct channel_gk20a *c,
1366 struct gk20a_channel_fence *fence)
1368 struct vm_gk20a *vm = c->vm;
1369 struct channel_gk20a_job *job = NULL;
1370 struct mapped_buffer_node **mapped_buffers = NULL;
1371 int err = 0, num_mapped_buffers;
1373 /* job needs reference to this vm */
1376 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1382 job = kzalloc(sizeof(*job), GFP_KERNEL);
1384 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1389 job->num_mapped_buffers = num_mapped_buffers;
1390 job->mapped_buffers = mapped_buffers;
1391 job->fence = *fence;
1393 mutex_lock(&c->jobs_lock);
1394 list_add_tail(&job->list, &c->jobs);
1395 mutex_unlock(&c->jobs_lock);
1400 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1402 struct vm_gk20a *vm = c->vm;
1403 struct channel_gk20a_job *job, *n;
1405 wake_up(&c->submit_wq);
1407 mutex_lock(&c->jobs_lock);
1408 list_for_each_entry_safe(job, n, &c->jobs, list) {
1409 bool completed = WARN_ON(!c->sync) ||
1410 c->sync->is_expired(c->sync, &job->fence);
1414 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1415 job->num_mapped_buffers);
1417 /* job is done. release its reference to vm */
1420 list_del_init(&job->list);
1422 gk20a_idle(c->g->dev);
1424 mutex_unlock(&c->jobs_lock);
1427 static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1428 struct nvhost_gpfifo *gpfifo,
1430 struct nvhost_fence *fence,
1433 struct gk20a *g = c->g;
1434 struct device *d = dev_from_gk20a(g);
1437 struct priv_cmd_entry *wait_cmd = NULL;
1438 struct priv_cmd_entry *incr_cmd = NULL;
1439 /* we might need two extra gpfifo entries - one for pre fence
1440 * and one for post fence. */
1441 const int extra_entries = 2;
1442 bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1444 if (c->has_timedout)
1447 if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1448 NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1453 c->sync = gk20a_channel_sync_create(c);
1458 #ifdef CONFIG_DEBUG_FS
1459 /* update debug settings */
1460 if (g->ops.ltc.sync_debugfs)
1461 g->ops.ltc.sync_debugfs(g);
1464 gk20a_dbg_info("channel %d", c->hw_chid);
1466 /* gk20a_channel_update releases this ref. */
1467 err = gk20a_busy(g->dev);
1469 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1473 trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1477 fence->syncpt_id, fence->value);
1479 update_gp_get(g, c);
1481 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1482 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1484 /* Invalidate tlb if it's dirty... */
1485 /* TBD: this should be done in the cmd stream, not with PRIs. */
1486 /* We don't know what context is currently running... */
1487 /* Note also: there can be more than one context associated with the */
1488 /* address space (vm). */
1489 gk20a_mm_tlb_invalidate(c->vm);
1491 /* Make sure we have enough space for gpfifo entries. If not,
1492 * wait for signals from completed submits */
1493 if (gp_free_count(c) < num_entries + extra_entries) {
1494 err = wait_event_interruptible(c->submit_wq,
1495 get_gp_free_count(c) >= num_entries + extra_entries ||
1499 if (c->has_timedout) {
1505 gk20a_err(d, "not enough gpfifo space");
1511 * optionally insert syncpt wait in the beginning of gpfifo submission
1512 * when user requested and the wait hasn't expired.
1513 * validate that the id makes sense, elide if not
1514 * the only reason this isn't being unceremoniously killed is to
1515 * keep running some tests which trigger this condition
1517 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1518 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1519 err = c->sync->wait_fd(c->sync, fence->syncpt_id,
1522 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1523 fence->value, &wait_cmd);
1529 /* always insert syncpt increment at end of gpfifo submission
1530 to keep track of method completion for idle railgating */
1531 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1532 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1533 err = c->sync->incr_user_fd(c->sync, &incr_cmd,
1534 &c->last_submit_fence,
1537 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1538 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1539 &c->last_submit_fence,
1544 err = c->sync->incr(c->sync, &incr_cmd,
1545 &c->last_submit_fence);
1550 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1551 u64_lo32(wait_cmd->gva);
1552 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1553 u64_hi32(wait_cmd->gva) |
1554 pbdma_gp_entry1_length_f(wait_cmd->size);
1555 trace_gk20a_push_cmdbuf(c->g->dev->name,
1556 0, wait_cmd->size, 0, wait_cmd->ptr);
1558 c->gpfifo.put = (c->gpfifo.put + 1) &
1559 (c->gpfifo.entry_num - 1);
1562 wait_cmd->gp_put = c->gpfifo.put;
1565 for (i = 0; i < num_entries; i++) {
1566 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1567 gpfifo[i].entry0; /* cmd buf va low 32 */
1568 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1569 gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1570 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1571 c->gpfifo.put = (c->gpfifo.put + 1) &
1572 (c->gpfifo.entry_num - 1);
1576 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1577 u64_lo32(incr_cmd->gva);
1578 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1579 u64_hi32(incr_cmd->gva) |
1580 pbdma_gp_entry1_length_f(incr_cmd->size);
1581 trace_gk20a_push_cmdbuf(c->g->dev->name,
1582 0, incr_cmd->size, 0, incr_cmd->ptr);
1584 c->gpfifo.put = (c->gpfifo.put + 1) &
1585 (c->gpfifo.entry_num - 1);
1588 incr_cmd->gp_put = c->gpfifo.put;
1591 trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1595 fence->syncpt_id, fence->value);
1597 /* TODO! Check for errors... */
1598 gk20a_channel_add_job(c, &c->last_submit_fence);
1600 c->cmds_pending = true;
1601 gk20a_bar1_writel(g,
1602 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1605 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1606 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1608 gk20a_dbg_fn("done");
1612 gk20a_err(d, "fail");
1613 free_priv_cmdbuf(c, wait_cmd);
1614 free_priv_cmdbuf(c, incr_cmd);
1619 void gk20a_remove_channel_support(struct channel_gk20a *c)
1624 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1626 struct channel_gk20a *c = g->fifo.channel+chid;
1631 c->remove_support = gk20a_remove_channel_support;
1632 mutex_init(&c->jobs_lock);
1633 INIT_LIST_HEAD(&c->jobs);
1634 #if defined(CONFIG_GK20A_CYCLE_STATS)
1635 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1637 INIT_LIST_HEAD(&c->dbg_s_list);
1638 mutex_init(&c->dbg_s_lock);
1643 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1647 if (!ch->cmds_pending)
1650 /* Do not wait for a timedout channel */
1651 if (ch->has_timedout)
1654 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1655 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1656 err = gk20a_channel_submit_wfi(ch);
1661 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1663 gk20a_dbg_fn("waiting for channel to finish thresh:%d",
1664 ch->last_submit_fence.thresh);
1666 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout);
1668 dev_warn(dev_from_gk20a(ch->g),
1669 "timed out waiting for gk20a channel to finish");
1671 ch->cmds_pending = false;
1676 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1677 ulong id, u32 offset,
1678 u32 payload, long timeout)
1680 struct platform_device *pdev = ch->g->dev;
1681 struct dma_buf *dmabuf;
1687 /* do not wait if channel has timed out */
1688 if (ch->has_timedout)
1691 dmabuf = dma_buf_get(id);
1692 if (IS_ERR(dmabuf)) {
1693 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1698 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1700 gk20a_err(&pdev->dev, "failed to map notifier memory");
1705 semaphore = data + (offset & ~PAGE_MASK);
1707 remain = wait_event_interruptible_timeout(
1709 *semaphore == payload || ch->has_timedout,
1712 if (remain == 0 && *semaphore != payload)
1714 else if (remain < 0)
1717 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1719 dma_buf_put(dmabuf);
1723 static int gk20a_channel_wait(struct channel_gk20a *ch,
1724 struct nvhost_wait_args *args)
1726 struct device *d = dev_from_gk20a(ch->g);
1727 struct dma_buf *dmabuf;
1728 struct notification *notif;
1733 unsigned long timeout;
1734 int remain, ret = 0;
1739 if (ch->has_timedout)
1742 if (args->timeout == NVHOST_NO_TIMEOUT)
1743 timeout = MAX_SCHEDULE_TIMEOUT;
1745 timeout = (u32)msecs_to_jiffies(args->timeout);
1747 switch (args->type) {
1748 case NVHOST_WAIT_TYPE_NOTIFIER:
1749 id = args->condition.notifier.nvmap_handle;
1750 offset = args->condition.notifier.offset;
1751 end = offset + sizeof(struct notification);
1753 dmabuf = dma_buf_get(id);
1754 if (IS_ERR(dmabuf)) {
1755 gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1760 if (end > dmabuf->size || end < sizeof(struct notification)) {
1761 dma_buf_put(dmabuf);
1762 gk20a_err(d, "invalid notifier offset\n");
1766 notif = dma_buf_vmap(dmabuf);
1768 gk20a_err(d, "failed to map notifier memory");
1772 notif = (struct notification *)((uintptr_t)notif + offset);
1774 /* user should set status pending before
1775 * calling this ioctl */
1776 remain = wait_event_interruptible_timeout(
1778 notif->status == 0 || ch->has_timedout,
1781 if (remain == 0 && notif->status != 0) {
1783 goto notif_clean_up;
1784 } else if (remain < 0) {
1786 goto notif_clean_up;
1789 /* TBD: fill in correct information */
1790 jiffies = get_jiffies_64();
1791 jiffies_to_timespec(jiffies, &tv);
1792 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1793 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1794 notif->info32 = 0xDEADBEEF; /* should be object name */
1795 notif->info16 = ch->hw_chid; /* should be method offset */
1798 dma_buf_vunmap(dmabuf, notif);
1801 case NVHOST_WAIT_TYPE_SEMAPHORE:
1802 ret = gk20a_channel_wait_semaphore(ch,
1803 args->condition.semaphore.nvmap_handle,
1804 args->condition.semaphore.offset,
1805 args->condition.semaphore.payload,
1818 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
1821 u32 timeslice_timeout;
1822 /* set priority of graphics channel */
1824 case NVHOST_PRIORITY_LOW:
1825 /* 64 << 3 = 512us */
1826 timeslice_timeout = 64;
1828 case NVHOST_PRIORITY_MEDIUM:
1829 /* 128 << 3 = 1024us */
1830 timeslice_timeout = 128;
1832 case NVHOST_PRIORITY_HIGH:
1833 /* 255 << 3 = 2048us */
1834 timeslice_timeout = 255;
1837 pr_err("Unsupported priority");
1840 channel_gk20a_set_schedule_params(ch,
1845 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1846 struct nvhost_zcull_bind_args *args)
1848 struct gk20a *g = ch->g;
1849 struct gr_gk20a *gr = &g->gr;
1853 return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1854 args->gpu_va, args->mode);
1857 /* in this context the "channel" is the host1x channel which
1858 * maps to *all* gk20a channels */
1859 int gk20a_channel_suspend(struct gk20a *g)
1861 struct fifo_gk20a *f = &g->fifo;
1863 bool channels_in_use = false;
1868 /* wait for engine idle */
1869 err = gk20a_fifo_wait_engine_idle(g);
1873 for (chid = 0; chid < f->num_channels; chid++) {
1874 if (f->channel[chid].in_use) {
1876 gk20a_dbg_info("suspend channel %d", chid);
1877 /* disable channel */
1878 gk20a_writel(g, ccsr_channel_r(chid),
1879 gk20a_readl(g, ccsr_channel_r(chid)) |
1880 ccsr_channel_enable_clr_true_f());
1881 /* preempt the channel */
1882 gk20a_fifo_preempt_channel(g, chid);
1884 channels_in_use = true;
1888 if (channels_in_use) {
1889 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
1891 for (chid = 0; chid < f->num_channels; chid++) {
1892 if (f->channel[chid].in_use)
1893 channel_gk20a_unbind(&f->channel[chid]);
1897 gk20a_dbg_fn("done");
1901 /* in this context the "channel" is the host1x channel which
1902 * maps to *all* gk20a channels */
1903 int gk20a_channel_resume(struct gk20a *g)
1905 struct fifo_gk20a *f = &g->fifo;
1907 bool channels_in_use = false;
1911 for (chid = 0; chid < f->num_channels; chid++) {
1912 if (f->channel[chid].in_use) {
1913 gk20a_dbg_info("resume channel %d", chid);
1914 g->ops.fifo.bind_channel(&f->channel[chid]);
1915 channels_in_use = true;
1919 if (channels_in_use)
1920 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
1922 gk20a_dbg_fn("done");
1926 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
1928 struct fifo_gk20a *f = &g->fifo;
1933 for (chid = 0; chid < f->num_channels; chid++) {
1934 struct channel_gk20a *c = g->fifo.channel+chid;
1936 wake_up_interruptible_all(&c->semaphore_wq);
1940 static int gk20a_ioctl_channel_submit_gpfifo(
1941 struct channel_gk20a *ch,
1942 struct nvhost_submit_gpfifo_args *args)
1950 if (ch->has_timedout)
1953 size = args->num_entries * sizeof(struct nvhost_gpfifo);
1955 gpfifo = kzalloc(size, GFP_KERNEL);
1959 if (copy_from_user(gpfifo,
1960 (void __user *)(uintptr_t)args->gpfifo, size)) {
1965 ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
1966 &args->fence, args->flags);
1973 void gk20a_init_channel(struct gpu_ops *gops)
1975 gops->fifo.bind_channel = channel_gk20a_bind;
1976 gops->fifo.disable_channel = channel_gk20a_disable;
1977 gops->fifo.enable_channel = channel_gk20a_enable;
1980 long gk20a_channel_ioctl(struct file *filp,
1981 unsigned int cmd, unsigned long arg)
1983 struct channel_gk20a *ch = filp->private_data;
1984 struct platform_device *dev = ch->g->dev;
1985 u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
1988 if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
1989 (_IOC_NR(cmd) == 0) ||
1990 (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
1991 (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
1994 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1995 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2000 case NVHOST_IOCTL_CHANNEL_OPEN:
2006 err = get_unused_fd_flags(O_RDWR);
2011 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2012 dev_name(&dev->dev), fd);
2019 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2022 err = PTR_ERR(file);
2026 fd_install(fd, file);
2028 err = __gk20a_channel_open(ch->g, file);
2035 ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
2038 case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
2040 case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2041 err = gk20a_busy(dev);
2044 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2048 err = gk20a_alloc_obj_ctx(ch,
2049 (struct nvhost_alloc_obj_ctx_args *)buf);
2052 case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
2053 err = gk20a_busy(dev);
2056 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2060 err = gk20a_free_obj_ctx(ch,
2061 (struct nvhost_free_obj_ctx_args *)buf);
2064 case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
2065 err = gk20a_busy(dev);
2068 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2072 err = gk20a_alloc_channel_gpfifo(ch,
2073 (struct nvhost_alloc_gpfifo_args *)buf);
2076 case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2077 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2078 (struct nvhost_submit_gpfifo_args *)buf);
2080 case NVHOST_IOCTL_CHANNEL_WAIT:
2081 err = gk20a_busy(dev);
2084 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2088 err = gk20a_channel_wait(ch,
2089 (struct nvhost_wait_args *)buf);
2092 case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
2093 err = gk20a_busy(dev);
2096 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2100 err = gk20a_channel_zcull_bind(ch,
2101 (struct nvhost_zcull_bind_args *)buf);
2104 case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2105 err = gk20a_busy(dev);
2108 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2112 err = gk20a_init_error_notifier(ch,
2113 (struct nvhost_set_error_notifier *)buf);
2116 #ifdef CONFIG_GK20A_CYCLE_STATS
2117 case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
2118 err = gk20a_busy(dev);
2121 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2125 err = gk20a_channel_cycle_stats(ch,
2126 (struct nvhost_cycle_stats_args *)buf);
2130 case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
2133 (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2134 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2135 timeout, ch->hw_chid);
2136 ch->timeout_ms_max = timeout;
2139 case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2142 (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2143 bool timeout_debug_dump = !((u32)
2144 ((struct nvhost_set_timeout_ex_args *)buf)->flags &
2145 (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
2146 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2147 timeout, ch->hw_chid);
2148 ch->timeout_ms_max = timeout;
2149 ch->timeout_debug_dump = timeout_debug_dump;
2152 case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
2153 ((struct nvhost_get_param_args *)buf)->value =
2156 case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
2157 err = gk20a_busy(dev);
2160 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2164 gk20a_channel_set_priority(ch,
2165 ((struct nvhost_set_priority_args *)buf)->priority);
2169 dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2174 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2175 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));