2 * GK20A Graphics channel
4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
30 #include "debug_gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
43 #define NVMAP_HANDLE_PARAM_SIZE 1
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51 struct priv_cmd_entry *e);
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
65 /* allocate GPU channel */
66 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
68 struct channel_gk20a *ch = NULL;
69 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
71 mutex_lock(&f->free_chs_mutex);
72 if (!list_empty(&f->free_chs)) {
73 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
75 list_del(&ch->free_chs);
76 WARN_ON(atomic_read(&ch->ref_count));
77 WARN_ON(ch->referenceable);
80 mutex_unlock(&f->free_chs_mutex);
82 if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
83 platform->aggressive_sync_destroy = true;
88 static void free_channel(struct fifo_gk20a *f,
89 struct channel_gk20a *ch)
91 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
93 trace_gk20a_release_used_channel(ch->hw_chid);
94 /* refcount is zero here and channel is in a freed/dead state */
95 mutex_lock(&f->free_chs_mutex);
96 /* add to head to increase visibility of timing-related bugs */
97 list_add(&ch->free_chs, &f->free_chs);
99 mutex_unlock(&f->free_chs_mutex);
101 if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
102 platform->aggressive_sync_destroy = false;
105 int channel_gk20a_commit_va(struct channel_gk20a *c)
109 if (!c->inst_block.cpu_va)
112 gk20a_init_inst_block(&c->inst_block, c->vm,
113 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
118 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
126 inst_ptr = c->inst_block.cpu_va;
130 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
131 addr_hi = u64_hi32(c->userd_iova);
133 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
134 c->hw_chid, (u64)c->userd_iova);
136 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
137 pbdma_userd_target_vid_mem_f() |
138 pbdma_userd_addr_f(addr_lo));
140 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
141 pbdma_userd_target_vid_mem_f() |
142 pbdma_userd_hi_addr_f(addr_hi));
147 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
148 u32 timeslice_timeout)
152 int value = timeslice_timeout;
154 inst_ptr = c->inst_block.cpu_va;
158 /* disable channel */
159 c->g->ops.fifo.disable_channel(c);
161 /* preempt the channel */
162 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
164 /* value field is 8 bits long */
165 while (value >= 1 << 8) {
170 /* time slice register is only 18bits long */
171 if ((value << shift) >= 1<<19) {
172 pr_err("Requested timeslice value is clamped to 18 bits\n");
177 /* set new timeslice */
178 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
179 value | (shift << 12) |
180 fifo_runlist_timeslice_enable_true_f());
183 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
184 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
185 ccsr_channel_enable_set_true_f());
190 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
191 u64 gpfifo_base, u32 gpfifo_entries)
197 inst_ptr = c->inst_block.cpu_va;
201 memset(inst_ptr, 0, ram_fc_size_val_v());
203 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
204 pbdma_gp_base_offset_f(
205 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
207 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
208 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
209 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
211 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
212 c->g->ops.fifo.get_pbdma_signature(c->g));
214 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
215 pbdma_formats_gp_fermi0_f() |
216 pbdma_formats_pb_fermi1_f() |
217 pbdma_formats_mp_fermi0_f());
219 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
220 pbdma_pb_header_priv_user_f() |
221 pbdma_pb_header_method_zero_f() |
222 pbdma_pb_header_subchannel_zero_f() |
223 pbdma_pb_header_level_main_f() |
224 pbdma_pb_header_first_true_f() |
225 pbdma_pb_header_type_inc_f());
227 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
228 pbdma_subdevice_id_f(1) |
229 pbdma_subdevice_status_active_f() |
230 pbdma_subdevice_channel_dma_enable_f());
232 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
234 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
235 pbdma_acquire_retry_man_2_f() |
236 pbdma_acquire_retry_exp_2_f() |
237 pbdma_acquire_timeout_exp_max_f() |
238 pbdma_acquire_timeout_man_max_f() |
239 pbdma_acquire_timeout_en_disable_f());
241 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
242 fifo_runlist_timeslice_timeout_128_f() |
243 fifo_runlist_timeslice_timescale_3_f() |
244 fifo_runlist_timeslice_enable_true_f());
246 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
247 fifo_pb_timeslice_timeout_16_f() |
248 fifo_pb_timeslice_timescale_0_f() |
249 fifo_pb_timeslice_enable_true_f());
251 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
253 return channel_gk20a_commit_userd(c);
256 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
258 BUG_ON(!c->userd_cpu_va);
262 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
263 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
264 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
265 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
266 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
267 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
268 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
269 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
270 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
271 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
276 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
278 struct gk20a *g = ch_gk20a->g;
279 struct fifo_gk20a *f = &g->fifo;
280 struct fifo_engine_info_gk20a *engine_info =
281 f->engine_info + ENGINE_GR_GK20A;
283 u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl)
284 >> ram_in_base_shift_v();
286 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
287 ch_gk20a->hw_chid, inst_ptr);
289 ch_gk20a->bound = true;
291 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
292 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
293 ~ccsr_channel_runlist_f(~0)) |
294 ccsr_channel_runlist_f(engine_info->runlist_id));
296 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
297 ccsr_channel_inst_ptr_f(inst_ptr) |
298 ccsr_channel_inst_target_vid_mem_f() |
299 ccsr_channel_inst_bind_true_f());
301 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
302 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
303 ~ccsr_channel_enable_set_f(~0)) |
304 ccsr_channel_enable_set_true_f());
307 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
309 struct gk20a *g = ch_gk20a->g;
310 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
315 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
316 ccsr_channel_inst_ptr_f(0) |
317 ccsr_channel_inst_bind_false_f());
319 ch_gk20a->bound = false;
322 * if we are agrressive then we can destroy the syncpt
323 * resource at this point
324 * if not, then it will be destroyed at channel_free()
326 if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
327 ch_gk20a->sync->destroy(ch_gk20a->sync);
328 ch_gk20a->sync = NULL;
332 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
338 err = gk20a_alloc_inst_block(g, &ch->inst_block);
342 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
343 ch->hw_chid, (u64)sg_phys(ch->inst_block.sgt->sgl));
345 gk20a_dbg_fn("done");
349 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
351 gk20a_free_inst_block(g, &ch->inst_block);
354 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
356 return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
359 void channel_gk20a_enable(struct channel_gk20a *ch)
362 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
363 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
364 ccsr_channel_enable_set_true_f());
367 void channel_gk20a_disable(struct channel_gk20a *ch)
369 /* disable channel */
370 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
372 ccsr_channel_r(ch->hw_chid)) |
373 ccsr_channel_enable_clr_true_f());
376 void gk20a_channel_abort(struct channel_gk20a *ch)
378 struct channel_gk20a_job *job, *n;
379 bool released_job_semaphore = false;
383 /* make sure new kickoffs are prevented */
384 ch->has_timedout = true;
386 ch->g->ops.fifo.disable_channel(ch);
388 /* ensure no fences are pending */
389 mutex_lock(&ch->submit_lock);
391 ch->sync->set_min_eq_max(ch->sync);
392 mutex_unlock(&ch->submit_lock);
394 /* release all job semaphores (applies only to jobs that use
395 semaphore synchronization) */
396 mutex_lock(&ch->jobs_lock);
397 list_for_each_entry_safe(job, n, &ch->jobs, list) {
398 if (job->post_fence->semaphore) {
399 gk20a_semaphore_release(job->post_fence->semaphore);
400 released_job_semaphore = true;
403 mutex_unlock(&ch->jobs_lock);
405 if (released_job_semaphore) {
406 wake_up_interruptible_all(&ch->semaphore_wq);
407 gk20a_channel_update(ch, 0);
411 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
413 bool channel_idle = false;
414 unsigned long end_jiffies = jiffies +
415 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
418 mutex_lock(&ch->jobs_lock);
419 channel_idle = list_empty(&ch->jobs);
420 mutex_unlock(&ch->jobs_lock);
424 usleep_range(1000, 3000);
425 } while (time_before(jiffies, end_jiffies)
426 || !tegra_platform_is_silicon());
429 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
437 void gk20a_disable_channel(struct channel_gk20a *ch,
439 unsigned long finish_timeout)
444 int err = gk20a_channel_finish(ch, finish_timeout);
448 /* disable the channel from hw and increment syncpoints */
449 gk20a_channel_abort(ch);
451 gk20a_wait_channel_idle(ch);
453 /* preempt the channel */
454 ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
456 /* remove channel from runlist */
457 channel_gk20a_update_runlist(ch, false);
460 #if defined(CONFIG_GK20A_CYCLE_STATS)
462 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
464 /* disable existing cyclestats buffer */
465 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
466 if (ch->cyclestate.cyclestate_buffer_handler) {
467 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
468 ch->cyclestate.cyclestate_buffer);
469 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
470 ch->cyclestate.cyclestate_buffer_handler = NULL;
471 ch->cyclestate.cyclestate_buffer = NULL;
472 ch->cyclestate.cyclestate_buffer_size = 0;
474 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
477 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
478 struct nvgpu_cycle_stats_args *args)
480 struct dma_buf *dmabuf;
481 void *virtual_address;
483 if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
485 /* set up new cyclestats buffer */
486 dmabuf = dma_buf_get(args->dmabuf_fd);
488 return PTR_ERR(dmabuf);
489 virtual_address = dma_buf_vmap(dmabuf);
490 if (!virtual_address)
493 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
494 ch->cyclestate.cyclestate_buffer = virtual_address;
495 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
498 } else if (!args->dmabuf_fd &&
499 ch->cyclestate.cyclestate_buffer_handler) {
500 gk20a_free_cycle_stats_buffer(ch);
503 } else if (!args->dmabuf_fd &&
504 !ch->cyclestate.cyclestate_buffer_handler) {
505 /* no requst from GL */
509 pr_err("channel already has cyclestats buffer\n");
515 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
516 struct nvgpu_set_error_notifier *args) {
519 struct dma_buf *dmabuf;
522 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
526 dmabuf = dma_buf_get(args->mem);
528 if (ch->error_notifier_ref)
529 gk20a_free_error_notifiers(ch);
531 if (IS_ERR(dmabuf)) {
532 pr_err("Invalid handle: %d\n", args->mem);
536 va = dma_buf_vmap(dmabuf);
539 pr_err("Cannot map notifier handle\n");
543 /* set channel notifiers pointer */
544 ch->error_notifier_ref = dmabuf;
545 ch->error_notifier = va + args->offset;
546 ch->error_notifier_va = va;
547 memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
551 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
553 if (ch->error_notifier_ref) {
554 struct timespec time_data;
556 getnstimeofday(&time_data);
557 nsec = ((u64)time_data.tv_sec) * 1000000000u +
558 (u64)time_data.tv_nsec;
559 ch->error_notifier->time_stamp.nanoseconds[0] =
561 ch->error_notifier->time_stamp.nanoseconds[1] =
563 ch->error_notifier->info32 = error;
564 ch->error_notifier->status = 0xffff;
566 gk20a_err(dev_from_gk20a(ch->g),
567 "error notifier set to %d for ch %d", error, ch->hw_chid);
571 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
573 if (ch->error_notifier_ref) {
574 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
575 dma_buf_put(ch->error_notifier_ref);
576 ch->error_notifier_ref = NULL;
577 ch->error_notifier = NULL;
578 ch->error_notifier_va = NULL;
582 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
584 static int cyclic_delta(int a, int b)
589 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
591 int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
592 int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
594 /* wait until all stalling irqs are handled */
595 wait_event(g->sw_irq_stall_last_handled_wq,
596 cyclic_delta(stall_irq_threshold,
597 atomic_read(&g->sw_irq_stall_last_handled))
600 /* wait until all non-stalling irqs are handled */
601 wait_event(g->sw_irq_nonstall_last_handled_wq,
602 cyclic_delta(nonstall_irq_threshold,
603 atomic_read(&g->sw_irq_nonstall_last_handled))
607 static void gk20a_wait_until_counter_is_N(
608 struct channel_gk20a *ch, atomic_t *counter, int wait_value,
609 wait_queue_head_t *wq, const char *caller, const char *counter_name)
612 if (wait_event_timeout(
614 atomic_read(counter) == wait_value,
615 msecs_to_jiffies(5000)) > 0)
618 gk20a_warn(dev_from_gk20a(ch->g),
619 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
620 caller, ch->hw_chid, counter_name,
621 atomic_read(counter), wait_value);
627 /* call ONLY when no references to the channel exist: after the last put */
628 static void gk20a_free_channel(struct channel_gk20a *ch)
630 struct gk20a *g = ch->g;
631 struct device *d = dev_from_gk20a(g);
632 struct fifo_gk20a *f = &g->fifo;
633 struct gr_gk20a *gr = &g->gr;
634 struct vm_gk20a *ch_vm = ch->vm;
635 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
636 struct dbg_session_gk20a *dbg_s;
640 WARN_ON(ch->g == NULL);
642 trace_gk20a_free_channel(ch->hw_chid);
644 /* prevent new kickoffs */
645 ch->has_timedout = true;
648 /* wait until there's only our ref to the channel */
649 gk20a_wait_until_counter_is_N(
650 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
651 __func__, "references");
653 /* wait until all pending interrupts for recently completed
654 * jobs are handled */
655 gk20a_wait_for_deferred_interrupts(g);
657 /* prevent new refs */
658 spin_lock(&ch->ref_obtain_lock);
659 if (!ch->referenceable) {
660 spin_unlock(&ch->ref_obtain_lock);
661 gk20a_err(dev_from_gk20a(ch->g),
662 "Extra %s() called to channel %u",
663 __func__, ch->hw_chid);
666 ch->referenceable = false;
667 spin_unlock(&ch->ref_obtain_lock);
669 /* matches with the initial reference in gk20a_open_new_channel() */
670 atomic_dec(&ch->ref_count);
672 /* wait until no more refs to the channel */
673 gk20a_wait_until_counter_is_N(
674 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
675 __func__, "references");
677 /* if engine reset was deferred, perform it now */
678 mutex_lock(&f->deferred_reset_mutex);
679 if (g->fifo.deferred_reset_pending) {
680 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
681 " deferred, running now");
682 gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines);
683 g->fifo.deferred_fault_engines = 0;
684 g->fifo.deferred_reset_pending = false;
686 mutex_unlock(&f->deferred_reset_mutex);
691 if (!gk20a_channel_as_bound(ch))
694 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
697 gk20a_disable_channel(ch, !ch->has_timedout, timeout);
699 gk20a_free_error_notifiers(ch);
701 /* release channel ctx */
702 g->ops.gr.free_channel_ctx(ch);
704 gk20a_gr_flush_channel_tlb(gr);
706 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
709 if (ch->gpfifo.gpu_va)
710 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
711 ch->gpfifo.size, gk20a_mem_flag_none);
712 if (ch->gpfifo.cpu_va)
713 dma_free_coherent(d, ch->gpfifo.size,
714 ch->gpfifo.cpu_va, ch->gpfifo.iova);
715 ch->gpfifo.cpu_va = NULL;
718 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
720 #if defined(CONFIG_GK20A_CYCLE_STATS)
721 gk20a_free_cycle_stats_buffer(ch);
724 channel_gk20a_free_priv_cmdbuf(ch);
726 /* sync must be destroyed before releasing channel vm */
728 ch->sync->destroy(ch->sync);
732 /* release channel binding to the as_share */
734 gk20a_as_release_share(ch_vm->as_share);
738 spin_lock(&ch->update_fn_lock);
739 ch->update_fn = NULL;
740 ch->update_fn_data = NULL;
741 spin_unlock(&ch->update_fn_lock);
742 cancel_work_sync(&ch->update_fn_work);
744 /* make sure we don't have deferred interrupts pending that
745 * could still touch the channel */
746 gk20a_wait_for_deferred_interrupts(g);
749 if (gk20a_is_channel_marked_as_tsg(ch))
750 gk20a_tsg_unbind_channel(ch);
752 g->ops.fifo.unbind_channel(ch);
753 g->ops.fifo.free_inst(g, ch);
758 mutex_lock(&ch->submit_lock);
759 gk20a_fence_put(ch->last_submit.pre_fence);
760 gk20a_fence_put(ch->last_submit.post_fence);
761 ch->last_submit.pre_fence = NULL;
762 ch->last_submit.post_fence = NULL;
763 mutex_unlock(&ch->submit_lock);
766 /* unlink all debug sessions */
767 mutex_lock(&ch->dbg_s_lock);
769 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
771 list_del_init(&dbg_s->dbg_s_list_node);
774 mutex_unlock(&ch->dbg_s_lock);
776 /* make sure we catch accesses of unopened channels in case
777 * there's non-refcounted channel pointers hanging around */
785 /* Try to get a reference to the channel. Return nonzero on success. If fails,
786 * the channel is dead or being freed elsewhere and you must not touch it.
788 * Always when a channel_gk20a pointer is seen and about to be used, a
789 * reference must be held to it - either by you or the caller, which should be
790 * documented well or otherwise clearly seen. This usually boils down to the
791 * file from ioctls directly, or an explicit get in exception handlers when the
792 * channel is found by a hw_chid.
794 * Most global functions in this file require a reference to be held by the
797 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
798 const char *caller) {
799 struct channel_gk20a *ret;
801 spin_lock(&ch->ref_obtain_lock);
803 if (likely(ch->referenceable)) {
804 atomic_inc(&ch->ref_count);
809 spin_unlock(&ch->ref_obtain_lock);
812 trace_gk20a_channel_get(ch->hw_chid, caller);
817 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
819 trace_gk20a_channel_put(ch->hw_chid, caller);
820 atomic_dec(&ch->ref_count);
821 wake_up_all(&ch->ref_count_dec_wq);
823 /* More puts than gets. Channel is probably going to get
825 WARN_ON(atomic_read(&ch->ref_count) < 0);
827 /* Also, more puts than gets. ref_count can go to 0 only if
828 * the channel is closing. Channel is probably going to get
830 WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
833 void gk20a_channel_close(struct channel_gk20a *ch)
835 gk20a_free_channel(ch);
838 int gk20a_channel_release(struct inode *inode, struct file *filp)
840 struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
841 struct gk20a *g = ch ? ch->g : NULL;
847 trace_gk20a_channel_release(dev_name(&g->dev->dev));
849 err = gk20a_busy(g->dev);
851 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
855 gk20a_channel_close(ch);
858 filp->private_data = NULL;
862 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
864 struct channel_gk20a *ch =
865 container_of(work, struct channel_gk20a, update_fn_work);
866 void (*update_fn)(struct channel_gk20a *, void *);
867 void *update_fn_data;
869 spin_lock(&ch->update_fn_lock);
870 update_fn = ch->update_fn;
871 update_fn_data = ch->update_fn_data;
872 spin_unlock(&ch->update_fn_lock);
875 update_fn(ch, update_fn_data);
878 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
879 void (*update_fn)(struct channel_gk20a *, void *),
880 void *update_fn_data)
882 struct channel_gk20a *ch = gk20a_open_new_channel(g);
885 spin_lock(&ch->update_fn_lock);
886 ch->update_fn = update_fn;
887 ch->update_fn_data = update_fn_data;
888 spin_unlock(&ch->update_fn_lock);
894 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
896 struct fifo_gk20a *f = &g->fifo;
897 struct channel_gk20a *ch;
901 ch = allocate_channel(f);
903 /* TBD: we want to make this virtualizable */
904 gk20a_err(dev_from_gk20a(g), "out of hw chids");
908 trace_gk20a_open_new_channel(ch->hw_chid);
913 if (g->ops.fifo.alloc_inst(g, ch)) {
916 gk20a_err(dev_from_gk20a(g),
917 "failed to open gk20a channel, out of inst mem");
921 /* now the channel is in a limbo out of the free list but not marked as
922 * alive and used (i.e. get-able) yet */
924 ch->pid = current->pid;
926 /* By default, channel is regular (non-TSG) channel */
927 ch->tsgid = NVGPU_INVALID_TSG_ID;
929 /* reset timeout counter and update timestamp */
930 ch->timeout_accumulated_ms = 0;
931 ch->timeout_gpfifo_get = 0;
932 /* set gr host default timeout */
933 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
934 ch->timeout_debug_dump = true;
935 ch->has_timedout = false;
938 /* The channel is *not* runnable at this point. It still needs to have
939 * an address space bound and allocate a gpfifo and grctx. */
941 init_waitqueue_head(&ch->notifier_wq);
942 init_waitqueue_head(&ch->semaphore_wq);
943 init_waitqueue_head(&ch->submit_wq);
945 mutex_init(&ch->poll_events.lock);
946 ch->poll_events.events_enabled = false;
947 ch->poll_events.num_pending_events = 0;
949 ch->update_fn = NULL;
950 ch->update_fn_data = NULL;
951 spin_lock_init(&ch->update_fn_lock);
952 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
954 /* Mark the channel alive, get-able, with 1 initial use
955 * references. The initial reference will be decreased in
956 * gk20a_free_channel() */
957 ch->referenceable = true;
958 atomic_set(&ch->ref_count, 1);
964 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
967 struct channel_gk20a *ch;
969 trace_gk20a_channel_open(dev_name(&g->dev->dev));
971 err = gk20a_busy(g->dev);
973 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
976 ch = gk20a_open_new_channel(g);
979 gk20a_err(dev_from_gk20a(g),
984 filp->private_data = ch;
988 int gk20a_channel_open(struct inode *inode, struct file *filp)
990 struct gk20a *g = container_of(inode->i_cdev,
991 struct gk20a, channel.cdev);
994 gk20a_dbg_fn("start");
995 ret = __gk20a_channel_open(g, filp);
1001 int gk20a_channel_open_ioctl(struct gk20a *g,
1002 struct nvgpu_channel_open_args *args)
1009 err = get_unused_fd_flags(O_RDWR);
1014 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1015 dev_name(&g->dev->dev), fd);
1021 file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1024 err = PTR_ERR(file);
1028 err = __gk20a_channel_open(g, file);
1032 fd_install(fd, file);
1033 args->channel_fd = fd;
1043 /* allocate private cmd buffer.
1044 used for inserting commands before/after user submitted buffers. */
1045 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1047 struct device *d = dev_from_gk20a(c->g);
1048 struct vm_gk20a *ch_vm = c->vm;
1049 struct priv_cmd_queue *q = &c->priv_cmd_q;
1052 struct sg_table *sgt;
1055 /* Kernel can insert gpfifos before and after user gpfifos.
1056 Before user gpfifos, kernel inserts fence_wait, which takes
1057 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1058 After user gpfifos, kernel inserts fence_get, which takes
1059 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1061 Worse case if kernel adds both of them for every user gpfifo,
1062 max size of priv_cmdbuf is :
1063 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1064 size = roundup_pow_of_two(
1065 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
1067 q->mem.base_cpuva = dma_alloc_coherent(d, size,
1070 if (!q->mem.base_cpuva) {
1071 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1076 q->mem.base_iova = iova;
1079 err = gk20a_get_sgtable(d, &sgt,
1080 q->mem.base_cpuva, q->mem.base_iova, size);
1082 gk20a_err(d, "%s: failed to create sg table\n", __func__);
1086 memset(q->mem.base_cpuva, 0, size);
1088 q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
1091 gk20a_mem_flag_none);
1092 if (!q->base_gpuva) {
1093 gk20a_err(d, "ch %d : failed to map gpu va"
1094 "for priv cmd buffer", c->hw_chid);
1099 q->size = q->mem.size / sizeof (u32);
1101 gk20a_free_sgtable(&sgt);
1106 gk20a_free_sgtable(&sgt);
1108 channel_gk20a_free_priv_cmdbuf(c);
1112 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1114 struct device *d = dev_from_gk20a(c->g);
1115 struct vm_gk20a *ch_vm = c->vm;
1116 struct priv_cmd_queue *q = &c->priv_cmd_q;
1122 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
1123 q->mem.size, gk20a_mem_flag_none);
1124 if (q->mem.base_cpuva)
1125 dma_free_coherent(d, q->mem.size,
1126 q->mem.base_cpuva, q->mem.base_iova);
1127 q->mem.base_cpuva = NULL;
1128 q->mem.base_iova = 0;
1130 memset(q, 0, sizeof(struct priv_cmd_queue));
1133 /* allocate a cmd buffer with given size. size is number of u32 entries */
1134 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1135 struct priv_cmd_entry **entry)
1137 struct priv_cmd_queue *q = &c->priv_cmd_q;
1138 struct priv_cmd_entry *e;
1140 u32 size = orig_size;
1142 gk20a_dbg_fn("size %d", orig_size);
1146 /* if free space in the end is less than requested, increase the size
1147 * to make the real allocated space start from beginning. */
1148 if (q->put + size > q->size)
1149 size = orig_size + (q->size - q->put);
1151 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1152 c->hw_chid, q->get, q->put);
1154 free_count = (q->size - (q->put - q->get) - 1) % q->size;
1156 if (size > free_count)
1159 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1161 gk20a_err(dev_from_gk20a(c->g),
1162 "ch %d: fail to allocate priv cmd entry",
1167 e->size = orig_size;
1168 e->gp_get = c->gpfifo.get;
1169 e->gp_put = c->gpfifo.put;
1170 e->gp_wrap = c->gpfifo.wrap;
1172 /* if we have increased size to skip free space in the end, set put
1173 to beginning of cmd buffer (0) + size */
1174 if (size != orig_size) {
1175 e->ptr = q->mem.base_cpuva;
1176 e->gva = q->base_gpuva;
1179 e->ptr = q->mem.base_cpuva + q->put;
1180 e->gva = q->base_gpuva + q->put * sizeof(u32);
1181 q->put = (q->put + orig_size) & (q->size - 1);
1184 /* we already handled q->put + size > q->size so BUG_ON this */
1185 BUG_ON(q->put > q->size);
1189 gk20a_dbg_fn("done");
1194 /* Don't call this to free an explict cmd entry.
1195 * It doesn't update priv_cmd_queue get/put */
1196 static void free_priv_cmdbuf(struct channel_gk20a *c,
1197 struct priv_cmd_entry *e)
1202 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1203 struct nvgpu_alloc_gpfifo_args *args)
1205 struct gk20a *g = c->g;
1206 struct device *d = dev_from_gk20a(g);
1207 struct vm_gk20a *ch_vm;
1210 struct sg_table *sgt;
1213 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1214 and another one after, for internal usage. Triple the requested size. */
1215 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1217 if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1220 /* an address space needs to have been bound at this point. */
1221 if (!gk20a_channel_as_bound(c)) {
1223 "not bound to an address space at time of gpfifo"
1229 c->cmds_pending = false;
1230 mutex_lock(&c->submit_lock);
1231 gk20a_fence_put(c->last_submit.pre_fence);
1232 gk20a_fence_put(c->last_submit.post_fence);
1233 c->last_submit.pre_fence = NULL;
1234 c->last_submit.post_fence = NULL;
1235 mutex_unlock(&c->submit_lock);
1237 c->ramfc.offset = 0;
1238 c->ramfc.size = ram_in_ramfc_s() / 8;
1240 if (c->gpfifo.cpu_va) {
1241 gk20a_err(d, "channel %d :"
1242 "gpfifo already allocated", c->hw_chid);
1246 c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1247 c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1251 if (!c->gpfifo.cpu_va) {
1252 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1257 c->gpfifo.iova = iova;
1258 c->gpfifo.entry_num = gpfifo_size;
1260 c->gpfifo.get = c->gpfifo.put = 0;
1262 err = gk20a_get_sgtable(d, &sgt,
1263 c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1265 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1269 c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1273 gk20a_mem_flag_none);
1274 if (!c->gpfifo.gpu_va) {
1275 gk20a_err(d, "channel %d : failed to map"
1276 " gpu_va for gpfifo", c->hw_chid);
1281 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1282 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1284 channel_gk20a_setup_userd(c);
1286 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1288 goto clean_up_unmap;
1290 /* TBD: setup engine contexts */
1292 err = channel_gk20a_alloc_priv_cmdbuf(c);
1294 goto clean_up_unmap;
1296 err = channel_gk20a_update_runlist(c, true);
1298 goto clean_up_unmap;
1300 g->ops.fifo.bind_channel(c);
1302 gk20a_free_sgtable(&sgt);
1304 gk20a_dbg_fn("done");
1308 gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1309 c->gpfifo.size, gk20a_mem_flag_none);
1311 gk20a_free_sgtable(&sgt);
1313 dma_free_coherent(d, c->gpfifo.size,
1314 c->gpfifo.cpu_va, c->gpfifo.iova);
1315 c->gpfifo.cpu_va = NULL;
1317 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1318 gk20a_err(d, "fail");
1322 static inline bool check_gp_put(struct gk20a *g,
1323 struct channel_gk20a *c)
1326 /* gp_put changed unexpectedly since last update? */
1327 put = gk20a_bar1_readl(g,
1328 c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1329 if (c->gpfifo.put != put) {
1330 /*TBD: BUG_ON/teardown on this*/
1331 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1332 "since last update");
1333 c->gpfifo.put = put;
1334 return false; /* surprise! */
1336 return true; /* checked out ok */
1339 /* Update with this periodically to determine how the gpfifo is draining. */
1340 static inline u32 update_gp_get(struct gk20a *g,
1341 struct channel_gk20a *c)
1343 u32 new_get = gk20a_bar1_readl(g,
1344 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1345 if (new_get < c->gpfifo.get)
1346 c->gpfifo.wrap = !c->gpfifo.wrap;
1347 c->gpfifo.get = new_get;
1351 static inline u32 gp_free_count(struct channel_gk20a *c)
1353 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1354 c->gpfifo.entry_num;
1357 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1358 u32 timeout_delta_ms)
1360 u32 gpfifo_get = update_gp_get(ch->g, ch);
1361 /* Count consequent timeout isr */
1362 if (gpfifo_get == ch->timeout_gpfifo_get) {
1363 /* we didn't advance since previous channel timeout check */
1364 ch->timeout_accumulated_ms += timeout_delta_ms;
1366 /* first timeout isr encountered */
1367 ch->timeout_accumulated_ms = timeout_delta_ms;
1370 ch->timeout_gpfifo_get = gpfifo_get;
1372 return ch->g->timeouts_enabled &&
1373 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1376 static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
1378 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
1380 if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled &&
1381 platform->ch_wdt_timeout_ms)
1382 return platform->ch_wdt_timeout_ms;
1384 return (u32)MAX_SCHEDULE_TIMEOUT;
1387 static u32 get_gp_free_count(struct channel_gk20a *c)
1389 update_gp_get(c->g, c);
1390 return gp_free_count(c);
1393 static void trace_write_pushbuffer(struct channel_gk20a *c,
1394 struct nvgpu_gpfifo *g)
1399 struct dma_buf *dmabuf = NULL;
1401 if (gk20a_debug_trace_cmdbuf) {
1402 u64 gpu_va = (u64)g->entry0 |
1403 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1406 words = pbdma_gp_entry1_length_v(g->entry1);
1407 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1409 mem = dma_buf_vmap(dmabuf);
1415 * Write in batches of 128 as there seems to be a limit
1416 * of how much you can output to ftrace at once.
1418 for (i = 0; i < words; i += 128U) {
1419 trace_gk20a_push_cmdbuf(
1422 min(words - i, 128U),
1423 offset + i * sizeof(u32),
1426 dma_buf_vunmap(dmabuf, mem);
1430 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1431 struct nvgpu_gpfifo *g,
1432 struct nvgpu_submit_gpfifo_args *args,
1438 struct nvgpu_gpfifo *gp;
1439 bool gpfifo_allocated = false;
1441 if (!gk20a_debug_trace_cmdbuf)
1448 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1450 g = nvgpu_alloc(size, false);
1454 if (copy_from_user(g,
1455 (void __user *)(uintptr_t)args->gpfifo, size)) {
1459 gpfifo_allocated = true;
1463 for (i = 0; i < count; i++, gp++)
1464 trace_write_pushbuffer(c, gp);
1466 if (gpfifo_allocated)
1470 static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
1471 struct channel_gk20a_job *job)
1473 mutex_lock(&ch->timeout.lock);
1475 if (ch->timeout.initialized) {
1476 mutex_unlock(&ch->timeout.lock);
1480 ch->timeout.job = job;
1481 ch->timeout.initialized = true;
1482 schedule_delayed_work(&ch->timeout.wq,
1483 msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
1485 mutex_unlock(&ch->timeout.lock);
1488 static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
1490 mutex_lock(&ch->timeout.lock);
1491 if (!ch->timeout.initialized) {
1492 mutex_unlock(&ch->timeout.lock);
1495 mutex_unlock(&ch->timeout.lock);
1497 cancel_delayed_work_sync(&ch->timeout.wq);
1499 mutex_lock(&ch->timeout.lock);
1500 ch->timeout.initialized = false;
1501 mutex_unlock(&ch->timeout.lock);
1504 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
1507 struct fifo_gk20a *f = &g->fifo;
1509 for (chid = 0; chid < f->num_channels; chid++) {
1510 struct channel_gk20a *ch = &f->channel[chid];
1512 if (gk20a_channel_get(ch)) {
1513 mutex_lock(&ch->timeout.lock);
1514 if (!ch->timeout.initialized) {
1515 mutex_unlock(&ch->timeout.lock);
1516 gk20a_channel_put(ch);
1519 mutex_unlock(&ch->timeout.lock);
1521 cancel_delayed_work_sync(&ch->timeout.wq);
1522 if (!ch->has_timedout)
1523 schedule_delayed_work(&ch->timeout.wq,
1525 gk20a_get_channel_watchdog_timeout(ch)));
1527 gk20a_channel_put(ch);
1532 static void gk20a_channel_timeout_handler(struct work_struct *work)
1534 struct channel_gk20a_job *job;
1536 struct channel_gk20a *ch;
1537 struct channel_gk20a *failing_ch;
1540 bool is_tsg = false;
1542 ch = container_of(to_delayed_work(work), struct channel_gk20a,
1544 ch = gk20a_channel_get(ch);
1550 /* Need global lock since multiple channels can timeout at a time */
1551 mutex_lock(&g->ch_wdt_lock);
1553 /* Get timed out job and reset the timer */
1554 mutex_lock(&ch->timeout.lock);
1555 job = ch->timeout.job;
1556 ch->timeout.initialized = false;
1557 mutex_unlock(&ch->timeout.lock);
1559 if (gk20a_fifo_disable_all_engine_activity(g, true))
1562 if (gk20a_fence_is_expired(job->post_fence))
1563 goto fail_enable_engine_activity;
1565 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n",
1568 /* Get failing engine data */
1569 engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
1571 if (engine_id >= g->fifo.max_engines) {
1572 /* If no failing engine, abort the channels */
1573 if (gk20a_is_channel_marked_as_tsg(ch)) {
1574 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
1576 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1577 gk20a_fifo_abort_tsg(g, ch->tsgid);
1579 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1580 gk20a_channel_abort(ch);
1583 /* If failing engine, trigger recovery */
1584 failing_ch = gk20a_channel_get(&g->fifo.channel[id]);
1586 goto fail_enable_engine_activity;
1588 if (failing_ch->hw_chid != ch->hw_chid)
1589 gk20a_channel_timeout_start(ch, job);
1591 gk20a_fifo_recover(g, BIT(engine_id),
1592 failing_ch->hw_chid, is_tsg,
1593 failing_ch->timeout_debug_dump);
1595 gk20a_channel_put(failing_ch);
1598 fail_enable_engine_activity:
1599 gk20a_fifo_enable_all_engine_activity(g);
1601 mutex_unlock(&g->ch_wdt_lock);
1602 gk20a_channel_put(ch);
1605 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1606 struct priv_cmd_entry *e)
1608 struct priv_cmd_queue *q = &c->priv_cmd_q;
1609 u32 cmd_entry_start;
1610 struct device *d = dev_from_gk20a(c->g);
1615 cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.base_cpuva);
1616 if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1617 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1619 q->get = (e->ptr - (u32 *)q->mem.base_cpuva) + e->size;
1620 free_priv_cmdbuf(c, e);
1625 static int gk20a_channel_add_job(struct channel_gk20a *c,
1626 struct gk20a_fence *pre_fence,
1627 struct gk20a_fence *post_fence,
1628 struct priv_cmd_entry *wait_cmd,
1629 struct priv_cmd_entry *incr_cmd,
1630 bool skip_buffer_refcounting)
1632 struct vm_gk20a *vm = c->vm;
1633 struct channel_gk20a_job *job = NULL;
1634 struct mapped_buffer_node **mapped_buffers = NULL;
1635 int err = 0, num_mapped_buffers = 0;
1637 /* job needs reference to this vm (released in channel_update) */
1640 if (!skip_buffer_refcounting) {
1641 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1642 &num_mapped_buffers);
1649 job = kzalloc(sizeof(*job), GFP_KERNEL);
1651 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1656 /* put() is done in gk20a_channel_update() when the job is done */
1657 c = gk20a_channel_get(c);
1660 job->num_mapped_buffers = num_mapped_buffers;
1661 job->mapped_buffers = mapped_buffers;
1662 job->pre_fence = gk20a_fence_get(pre_fence);
1663 job->post_fence = gk20a_fence_get(post_fence);
1664 job->wait_cmd = wait_cmd;
1665 job->incr_cmd = incr_cmd;
1667 gk20a_channel_timeout_start(c, job);
1669 mutex_lock(&c->jobs_lock);
1670 list_add_tail(&job->list, &c->jobs);
1671 mutex_unlock(&c->jobs_lock);
1679 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1681 struct vm_gk20a *vm = c->vm;
1682 struct channel_gk20a_job *job, *n;
1683 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
1685 trace_gk20a_channel_update(c->hw_chid);
1687 wake_up(&c->submit_wq);
1689 mutex_lock(&c->submit_lock);
1690 mutex_lock(&c->jobs_lock);
1691 list_for_each_entry_safe(job, n, &c->jobs, list) {
1692 struct gk20a *g = c->g;
1694 bool completed = gk20a_fence_is_expired(job->post_fence);
1696 gk20a_channel_timeout_start(c, job);
1700 gk20a_channel_timeout_stop(c);
1703 c->sync->signal_timeline(c->sync);
1705 if (job->num_mapped_buffers)
1706 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1707 job->num_mapped_buffers);
1709 /* Close the fences (this will unref the semaphores and release
1710 * them to the pool). */
1711 gk20a_fence_put(job->pre_fence);
1712 gk20a_fence_put(job->post_fence);
1714 /* Free the private command buffers (wait_cmd first and
1715 * then incr_cmd i.e. order of allocation) */
1716 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1717 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1719 /* job is done. release its vm reference (taken in add_job) */
1721 /* another bookkeeping taken in add_job. caller must hold a ref
1722 * so this wouldn't get freed here. */
1723 gk20a_channel_put(c);
1725 list_del_init(&job->list);
1731 * If job list is empty then channel is idle and we can free
1732 * the syncpt here (given aggressive_destroy flag is set)
1733 * Note: check if last submit is complete before destroying
1736 if (list_empty(&c->jobs)) {
1737 if (c->sync && platform->aggressive_sync_destroy &&
1738 gk20a_fence_is_expired(c->last_submit.post_fence)) {
1739 c->sync->destroy(c->sync);
1743 mutex_unlock(&c->jobs_lock);
1744 mutex_unlock(&c->submit_lock);
1747 schedule_work(&c->update_fn_work);
1750 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1751 struct nvgpu_gpfifo *gpfifo,
1752 struct nvgpu_submit_gpfifo_args *args,
1755 struct nvgpu_fence *fence,
1756 struct gk20a_fence **fence_out)
1758 struct gk20a *g = c->g;
1759 struct device *d = dev_from_gk20a(g);
1762 int wait_fence_fd = -1;
1763 struct priv_cmd_entry *wait_cmd = NULL;
1764 struct priv_cmd_entry *incr_cmd = NULL;
1765 struct gk20a_fence *pre_fence = NULL;
1766 struct gk20a_fence *post_fence = NULL;
1767 /* we might need two extra gpfifo entries - one for pre fence
1768 * and one for post fence. */
1769 const int extra_entries = 2;
1770 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1771 bool skip_buffer_refcounting = (flags &
1772 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1774 if (c->has_timedout)
1777 /* fifo not large enough for request. Return error immediately */
1778 if (c->gpfifo.entry_num < num_entries) {
1779 gk20a_err(d, "not enough gpfifo space allocated");
1783 if (!gpfifo && !args)
1786 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1787 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1791 /* an address space needs to have been bound at this point. */
1792 if (!gk20a_channel_as_bound(c)) {
1794 "not bound to an address space at time of gpfifo"
1799 #ifdef CONFIG_DEBUG_FS
1800 /* update debug settings */
1801 if (g->ops.ltc.sync_debugfs)
1802 g->ops.ltc.sync_debugfs(g);
1805 gk20a_dbg_info("channel %d", c->hw_chid);
1807 /* gk20a_channel_update releases this ref. */
1808 err = gk20a_busy(g->dev);
1810 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1814 trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1818 fence ? fence->id : 0,
1819 fence ? fence->value : 0);
1821 update_gp_get(g, c);
1823 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1824 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1826 /* Make sure we have enough space for gpfifo entries. If not,
1827 * wait for signals from completed submits */
1828 if (gp_free_count(c) < num_entries + extra_entries) {
1829 /* we can get here via locked ioctl and other paths too */
1830 int locked_path = mutex_is_locked(&c->ioctl_lock);
1832 mutex_unlock(&c->ioctl_lock);
1834 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1835 err = wait_event_interruptible(c->submit_wq,
1836 get_gp_free_count(c) >= num_entries + extra_entries ||
1838 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1841 mutex_lock(&c->ioctl_lock);
1844 if (c->has_timedout) {
1850 gk20a_err(d, "timeout waiting for gpfifo space");
1855 mutex_lock(&c->submit_lock);
1858 c->sync = gk20a_channel_sync_create(c);
1861 mutex_unlock(&c->submit_lock);
1867 * optionally insert syncpt wait in the beginning of gpfifo submission
1868 * when user requested and the wait hasn't expired.
1869 * validate that the id makes sense, elide if not
1870 * the only reason this isn't being unceremoniously killed is to
1871 * keep running some tests which trigger this condition
1873 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1874 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1875 wait_fence_fd = fence->id;
1876 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1877 &wait_cmd, &pre_fence);
1879 err = c->sync->wait_syncpt(c->sync, fence->id,
1880 fence->value, &wait_cmd, &pre_fence);
1884 mutex_unlock(&c->submit_lock);
1889 /* always insert syncpt increment at end of gpfifo submission
1890 to keep track of method completion for idle railgating */
1891 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1892 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1893 &post_fence, need_wfi);
1895 err = c->sync->incr(c->sync, &incr_cmd,
1898 mutex_unlock(&c->submit_lock);
1903 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1904 u64_lo32(wait_cmd->gva);
1905 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1906 u64_hi32(wait_cmd->gva) |
1907 pbdma_gp_entry1_length_f(wait_cmd->size);
1908 trace_gk20a_push_cmdbuf(c->g->dev->name,
1909 0, wait_cmd->size, 0, wait_cmd->ptr);
1911 c->gpfifo.put = (c->gpfifo.put + 1) &
1912 (c->gpfifo.entry_num - 1);
1915 wait_cmd->gp_put = c->gpfifo.put;
1919 * Copy source gpfifo entries into the gpfifo ring buffer,
1920 * potentially splitting into two memcpies to handle the
1921 * ring buffer wrap-around case.
1923 start = c->gpfifo.put;
1924 end = start + num_entries;
1927 if (end > c->gpfifo.entry_num) {
1928 int length0 = c->gpfifo.entry_num - start;
1929 int length1 = num_entries - length0;
1931 memcpy(c->gpfifo.cpu_va + start, gpfifo,
1932 length0 * sizeof(*gpfifo));
1934 memcpy(c->gpfifo.cpu_va, gpfifo + length0,
1935 length1 * sizeof(*gpfifo));
1937 trace_write_pushbuffer_range(c, gpfifo, NULL,
1939 trace_write_pushbuffer_range(c, gpfifo, NULL,
1942 memcpy(c->gpfifo.cpu_va + start, gpfifo,
1943 num_entries * sizeof(*gpfifo));
1945 trace_write_pushbuffer_range(c, gpfifo, NULL,
1949 struct nvgpu_gpfifo __user *user_gpfifo =
1950 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1951 if (end > c->gpfifo.entry_num) {
1952 int length0 = c->gpfifo.entry_num - start;
1953 int length1 = num_entries - length0;
1955 err = copy_from_user(c->gpfifo.cpu_va + start,
1957 length0 * sizeof(*user_gpfifo));
1959 mutex_unlock(&c->submit_lock);
1963 err = copy_from_user(c->gpfifo.cpu_va,
1964 user_gpfifo + length0,
1965 length1 * sizeof(*user_gpfifo));
1967 mutex_unlock(&c->submit_lock);
1971 trace_write_pushbuffer_range(c, NULL, args,
1973 trace_write_pushbuffer_range(c, NULL, args,
1976 err = copy_from_user(c->gpfifo.cpu_va + start,
1978 num_entries * sizeof(*user_gpfifo));
1980 mutex_unlock(&c->submit_lock);
1984 trace_write_pushbuffer_range(c, NULL, args,
1989 c->gpfifo.put = (c->gpfifo.put + num_entries) &
1990 (c->gpfifo.entry_num - 1);
1993 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1994 u64_lo32(incr_cmd->gva);
1995 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1996 u64_hi32(incr_cmd->gva) |
1997 pbdma_gp_entry1_length_f(incr_cmd->size);
1998 trace_gk20a_push_cmdbuf(c->g->dev->name,
1999 0, incr_cmd->size, 0, incr_cmd->ptr);
2001 c->gpfifo.put = (c->gpfifo.put + 1) &
2002 (c->gpfifo.entry_num - 1);
2005 incr_cmd->gp_put = c->gpfifo.put;
2008 gk20a_fence_put(c->last_submit.pre_fence);
2009 gk20a_fence_put(c->last_submit.post_fence);
2010 c->last_submit.pre_fence = pre_fence;
2011 c->last_submit.post_fence = post_fence;
2013 *fence_out = gk20a_fence_get(post_fence);
2015 /* TODO! Check for errors... */
2016 gk20a_channel_add_job(c, pre_fence, post_fence,
2018 skip_buffer_refcounting);
2020 c->cmds_pending = true;
2021 gk20a_bar1_writel(g,
2022 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
2025 mutex_unlock(&c->submit_lock);
2027 trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
2031 post_fence->syncpt_id,
2032 post_fence->syncpt_value);
2034 gk20a_dbg_info("post-submit put %d, get %d, size %d",
2035 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
2037 gk20a_dbg_fn("done");
2041 gk20a_err(d, "fail");
2042 free_priv_cmdbuf(c, wait_cmd);
2043 free_priv_cmdbuf(c, incr_cmd);
2044 gk20a_fence_put(pre_fence);
2045 gk20a_fence_put(post_fence);
2050 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2052 struct channel_gk20a *c = g->fifo.channel+chid;
2056 spin_lock_init(&c->ref_obtain_lock);
2057 atomic_set(&c->ref_count, 0);
2058 c->referenceable = false;
2059 init_waitqueue_head(&c->ref_count_dec_wq);
2060 mutex_init(&c->ioctl_lock);
2061 mutex_init(&c->jobs_lock);
2062 mutex_init(&c->submit_lock);
2063 mutex_init(&c->timeout.lock);
2064 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
2065 INIT_LIST_HEAD(&c->jobs);
2066 #if defined(CONFIG_GK20A_CYCLE_STATS)
2067 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2069 INIT_LIST_HEAD(&c->dbg_s_list);
2070 mutex_init(&c->dbg_s_lock);
2071 list_add(&c->free_chs, &g->fifo.free_chs);
2076 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2079 struct gk20a_fence *fence = ch->last_submit.post_fence;
2081 if (!ch->cmds_pending)
2084 /* Do not wait for a timedout channel */
2085 if (ch->has_timedout)
2088 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2089 fence->syncpt_value, fence->semaphore);
2091 err = gk20a_fence_wait(fence, timeout);
2093 dev_warn(dev_from_gk20a(ch->g),
2094 "timed out waiting for gk20a channel to finish");
2096 ch->cmds_pending = false;
2101 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2102 ulong id, u32 offset,
2103 u32 payload, long timeout)
2105 struct platform_device *pdev = ch->g->dev;
2106 struct dma_buf *dmabuf;
2112 /* do not wait if channel has timed out */
2113 if (ch->has_timedout)
2116 dmabuf = dma_buf_get(id);
2117 if (IS_ERR(dmabuf)) {
2118 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2123 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2125 gk20a_err(&pdev->dev, "failed to map notifier memory");
2130 semaphore = data + (offset & ~PAGE_MASK);
2132 remain = wait_event_interruptible_timeout(
2134 *semaphore == payload || ch->has_timedout,
2137 if (remain == 0 && *semaphore != payload)
2139 else if (remain < 0)
2142 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2144 dma_buf_put(dmabuf);
2148 static int gk20a_channel_wait(struct channel_gk20a *ch,
2149 struct nvgpu_wait_args *args)
2151 struct device *d = dev_from_gk20a(ch->g);
2152 struct dma_buf *dmabuf;
2153 struct notification *notif;
2158 unsigned long timeout;
2159 int remain, ret = 0;
2163 if (ch->has_timedout)
2166 if (args->timeout == NVGPU_NO_TIMEOUT)
2167 timeout = MAX_SCHEDULE_TIMEOUT;
2169 timeout = (u32)msecs_to_jiffies(args->timeout);
2171 switch (args->type) {
2172 case NVGPU_WAIT_TYPE_NOTIFIER:
2173 id = args->condition.notifier.dmabuf_fd;
2174 offset = args->condition.notifier.offset;
2176 dmabuf = dma_buf_get(id);
2177 if (IS_ERR(dmabuf)) {
2178 gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2183 notif = dma_buf_vmap(dmabuf);
2185 gk20a_err(d, "failed to map notifier memory");
2189 notif = (struct notification *)((uintptr_t)notif + offset);
2191 /* user should set status pending before
2192 * calling this ioctl */
2193 remain = wait_event_interruptible_timeout(
2195 notif->status == 0 || ch->has_timedout,
2198 if (remain == 0 && notif->status != 0) {
2200 goto notif_clean_up;
2201 } else if (remain < 0) {
2203 goto notif_clean_up;
2206 /* TBD: fill in correct information */
2207 jiffies = get_jiffies_64();
2208 jiffies_to_timespec(jiffies, &tv);
2209 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2210 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2211 notif->info32 = 0xDEADBEEF; /* should be object name */
2212 notif->info16 = ch->hw_chid; /* should be method offset */
2215 dma_buf_vunmap(dmabuf, notif);
2218 case NVGPU_WAIT_TYPE_SEMAPHORE:
2219 ret = gk20a_channel_wait_semaphore(ch,
2220 args->condition.semaphore.dmabuf_fd,
2221 args->condition.semaphore.offset,
2222 args->condition.semaphore.payload,
2235 /* poll events for semaphores */
2237 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2241 mutex_lock(&ev->lock);
2243 ev->events_enabled = true;
2244 ev->num_pending_events = 0;
2246 mutex_unlock(&ev->lock);
2249 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2253 mutex_lock(&ev->lock);
2255 ev->events_enabled = false;
2256 ev->num_pending_events = 0;
2258 mutex_unlock(&ev->lock);
2261 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2265 mutex_lock(&ev->lock);
2267 if (ev->events_enabled &&
2268 ev->num_pending_events > 0)
2269 ev->num_pending_events--;
2271 mutex_unlock(&ev->lock);
2274 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2275 struct nvgpu_channel_events_ctrl_args *args)
2279 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2280 "channel events ctrl cmd %d", args->cmd);
2282 switch (args->cmd) {
2283 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2284 gk20a_channel_events_enable(&ch->poll_events);
2287 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2288 gk20a_channel_events_disable(&ch->poll_events);
2291 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2292 gk20a_channel_events_clear(&ch->poll_events);
2296 gk20a_err(dev_from_gk20a(ch->g),
2297 "unrecognized channel events ctrl cmd: 0x%x",
2306 void gk20a_channel_event(struct channel_gk20a *ch)
2308 mutex_lock(&ch->poll_events.lock);
2310 if (ch->poll_events.events_enabled) {
2311 gk20a_dbg_info("posting event on channel id %d",
2313 gk20a_dbg_info("%d channel events pending",
2314 ch->poll_events.num_pending_events);
2316 ch->poll_events.num_pending_events++;
2317 /* not waking up here, caller does that */
2320 mutex_unlock(&ch->poll_events.lock);
2323 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2325 unsigned int mask = 0;
2326 struct channel_gk20a *ch = filep->private_data;
2328 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2330 poll_wait(filep, &ch->semaphore_wq, wait);
2332 mutex_lock(&ch->poll_events.lock);
2334 if (ch->poll_events.events_enabled &&
2335 ch->poll_events.num_pending_events > 0) {
2336 gk20a_dbg_info("found pending event on channel id %d",
2338 gk20a_dbg_info("%d channel events pending",
2339 ch->poll_events.num_pending_events);
2340 mask = (POLLPRI | POLLIN);
2343 mutex_unlock(&ch->poll_events.lock);
2348 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2351 u32 timeslice_timeout;
2352 /* set priority of graphics channel */
2354 case NVGPU_PRIORITY_LOW:
2355 /* 64 << 3 = 512us */
2356 timeslice_timeout = 64;
2358 case NVGPU_PRIORITY_MEDIUM:
2359 /* 128 << 3 = 1024us */
2360 timeslice_timeout = 128;
2362 case NVGPU_PRIORITY_HIGH:
2363 /* 255 << 3 = 2048us */
2364 timeslice_timeout = 255;
2367 pr_err("Unsupported priority");
2370 channel_gk20a_set_schedule_params(ch,
2375 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2376 struct nvgpu_zcull_bind_args *args)
2378 struct gk20a *g = ch->g;
2379 struct gr_gk20a *gr = &g->gr;
2383 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2384 args->gpu_va, args->mode);
2387 /* in this context the "channel" is the host1x channel which
2388 * maps to *all* gk20a channels */
2389 int gk20a_channel_suspend(struct gk20a *g)
2391 struct fifo_gk20a *f = &g->fifo;
2393 bool channels_in_use = false;
2398 /* wait for engine idle */
2399 err = g->ops.fifo.wait_engine_idle(g);
2403 for (chid = 0; chid < f->num_channels; chid++) {
2404 struct channel_gk20a *ch = &f->channel[chid];
2405 if (gk20a_channel_get(ch)) {
2406 gk20a_dbg_info("suspend channel %d", chid);
2407 /* disable channel */
2408 g->ops.fifo.disable_channel(ch);
2409 /* preempt the channel */
2410 g->ops.fifo.preempt_channel(g, chid);
2411 /* wait for channel update notifiers */
2412 if (ch->update_fn &&
2413 work_pending(&ch->update_fn_work))
2414 flush_work(&ch->update_fn_work);
2416 channels_in_use = true;
2418 gk20a_channel_put(ch);
2422 if (channels_in_use) {
2423 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2425 for (chid = 0; chid < f->num_channels; chid++) {
2426 if (gk20a_channel_get(&f->channel[chid])) {
2427 g->ops.fifo.unbind_channel(&f->channel[chid]);
2428 gk20a_channel_put(&f->channel[chid]);
2433 gk20a_dbg_fn("done");
2437 int gk20a_channel_resume(struct gk20a *g)
2439 struct fifo_gk20a *f = &g->fifo;
2441 bool channels_in_use = false;
2445 for (chid = 0; chid < f->num_channels; chid++) {
2446 if (gk20a_channel_get(&f->channel[chid])) {
2447 gk20a_dbg_info("resume channel %d", chid);
2448 g->ops.fifo.bind_channel(&f->channel[chid]);
2449 channels_in_use = true;
2450 gk20a_channel_put(&f->channel[chid]);
2454 if (channels_in_use)
2455 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2457 gk20a_dbg_fn("done");
2461 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2463 struct fifo_gk20a *f = &g->fifo;
2468 for (chid = 0; chid < f->num_channels; chid++) {
2469 struct channel_gk20a *c = g->fifo.channel+chid;
2470 if (gk20a_channel_get(c)) {
2471 wake_up_interruptible_all(&c->semaphore_wq);
2472 gk20a_channel_update(c, 0);
2473 gk20a_channel_put(c);
2478 static int gk20a_ioctl_channel_submit_gpfifo(
2479 struct channel_gk20a *ch,
2480 struct nvgpu_submit_gpfifo_args *args)
2482 struct gk20a_fence *fence_out;
2487 if (ch->has_timedout)
2490 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2491 args->flags, &args->fence,
2497 /* Convert fence_out to something we can pass back to user space. */
2498 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2499 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2500 int fd = gk20a_fence_install_fd(fence_out);
2504 args->fence.id = fd;
2506 args->fence.id = fence_out->syncpt_id;
2507 args->fence.value = fence_out->syncpt_value;
2510 gk20a_fence_put(fence_out);
2516 void gk20a_init_channel(struct gpu_ops *gops)
2518 gops->fifo.bind_channel = channel_gk20a_bind;
2519 gops->fifo.unbind_channel = channel_gk20a_unbind;
2520 gops->fifo.disable_channel = channel_gk20a_disable;
2521 gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2522 gops->fifo.free_inst = channel_gk20a_free_inst;
2523 gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2526 long gk20a_channel_ioctl(struct file *filp,
2527 unsigned int cmd, unsigned long arg)
2529 struct channel_gk20a *ch = filp->private_data;
2530 struct platform_device *dev = ch->g->dev;
2531 u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2534 gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2536 if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2537 (_IOC_NR(cmd) == 0) ||
2538 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2539 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2542 if (_IOC_DIR(cmd) & _IOC_WRITE) {
2543 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2547 /* take a ref or return timeout if channel refs can't be taken */
2548 ch = gk20a_channel_get(ch);
2552 /* protect our sanity for threaded userspace - most of the channel is
2553 * not thread safe */
2554 mutex_lock(&ch->ioctl_lock);
2556 /* this ioctl call keeps a ref to the file which keeps a ref to the
2560 case NVGPU_IOCTL_CHANNEL_OPEN:
2561 err = gk20a_channel_open_ioctl(ch->g,
2562 (struct nvgpu_channel_open_args *)buf);
2564 case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2566 case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2567 err = gk20a_busy(dev);
2570 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2574 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2575 (struct nvgpu_alloc_obj_ctx_args *)buf);
2578 case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2579 err = gk20a_busy(dev);
2582 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2586 err = ch->g->ops.gr.free_obj_ctx(ch,
2587 (struct nvgpu_free_obj_ctx_args *)buf);
2590 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2591 err = gk20a_busy(dev);
2594 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2598 err = gk20a_alloc_channel_gpfifo(ch,
2599 (struct nvgpu_alloc_gpfifo_args *)buf);
2602 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2603 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2604 (struct nvgpu_submit_gpfifo_args *)buf);
2606 case NVGPU_IOCTL_CHANNEL_WAIT:
2607 err = gk20a_busy(dev);
2610 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2615 /* waiting is thread-safe, not dropping this mutex could
2616 * deadlock in certain conditions */
2617 mutex_unlock(&ch->ioctl_lock);
2619 err = gk20a_channel_wait(ch,
2620 (struct nvgpu_wait_args *)buf);
2622 mutex_lock(&ch->ioctl_lock);
2626 case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2627 err = gk20a_busy(dev);
2630 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2634 err = gk20a_channel_zcull_bind(ch,
2635 (struct nvgpu_zcull_bind_args *)buf);
2638 case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2639 err = gk20a_busy(dev);
2642 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2646 err = gk20a_init_error_notifier(ch,
2647 (struct nvgpu_set_error_notifier *)buf);
2650 #ifdef CONFIG_GK20A_CYCLE_STATS
2651 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2652 err = gk20a_busy(dev);
2655 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2659 err = gk20a_channel_cycle_stats(ch,
2660 (struct nvgpu_cycle_stats_args *)buf);
2664 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2667 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2668 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2669 timeout, ch->hw_chid);
2670 ch->timeout_ms_max = timeout;
2673 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2676 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2677 bool timeout_debug_dump = !((u32)
2678 ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2679 (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2680 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2681 timeout, ch->hw_chid);
2682 ch->timeout_ms_max = timeout;
2683 ch->timeout_debug_dump = timeout_debug_dump;
2686 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2687 ((struct nvgpu_get_param_args *)buf)->value =
2690 case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2691 err = gk20a_busy(dev);
2694 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2698 gk20a_channel_set_priority(ch,
2699 ((struct nvgpu_set_priority_args *)buf)->priority);
2702 case NVGPU_IOCTL_CHANNEL_ENABLE:
2703 err = gk20a_busy(dev);
2706 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2710 /* enable channel */
2711 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2712 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2713 ccsr_channel_enable_set_true_f());
2716 case NVGPU_IOCTL_CHANNEL_DISABLE:
2717 err = gk20a_busy(dev);
2720 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2724 /* disable channel */
2725 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2726 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2727 ccsr_channel_enable_clr_true_f());
2730 case NVGPU_IOCTL_CHANNEL_PREEMPT:
2731 err = gk20a_busy(dev);
2734 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2738 err = gk20a_fifo_preempt(ch->g, ch);
2741 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2742 err = gk20a_busy(dev);
2745 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2749 err = ch->g->ops.fifo.force_reset_ch(ch, true);
2752 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2753 err = gk20a_channel_events_ctrl(ch,
2754 (struct nvgpu_channel_events_ctrl_args *)buf);
2757 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2762 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2763 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2765 mutex_unlock(&ch->ioctl_lock);
2767 gk20a_channel_put(ch);
2769 gk20a_dbg_fn("end");