2 * GK20A Graphics channel
4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
30 #include "debug_gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
43 #define NVMAP_HANDLE_PARAM_SIZE 1
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51 struct priv_cmd_entry *e);
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
65 static void gk20a_channel_clean_up_jobs(struct work_struct *work);
67 /* allocate GPU channel */
68 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
70 struct channel_gk20a *ch = NULL;
71 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
73 mutex_lock(&f->free_chs_mutex);
74 if (!list_empty(&f->free_chs)) {
75 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
77 list_del(&ch->free_chs);
78 WARN_ON(atomic_read(&ch->ref_count));
79 WARN_ON(ch->referenceable);
82 mutex_unlock(&f->free_chs_mutex);
84 if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
85 platform->aggressive_sync_destroy = true;
90 static void free_channel(struct fifo_gk20a *f,
91 struct channel_gk20a *ch)
93 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
95 trace_gk20a_release_used_channel(ch->hw_chid);
96 /* refcount is zero here and channel is in a freed/dead state */
97 mutex_lock(&f->free_chs_mutex);
98 /* add to head to increase visibility of timing-related bugs */
99 list_add(&ch->free_chs, &f->free_chs);
101 mutex_unlock(&f->free_chs_mutex);
103 if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
104 platform->aggressive_sync_destroy = false;
107 int channel_gk20a_commit_va(struct channel_gk20a *c)
111 if (!c->inst_block.cpu_va)
114 gk20a_init_inst_block(&c->inst_block, c->vm,
115 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
120 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
128 inst_ptr = c->inst_block.cpu_va;
132 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
133 addr_hi = u64_hi32(c->userd_iova);
135 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
136 c->hw_chid, (u64)c->userd_iova);
138 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
139 pbdma_userd_target_vid_mem_f() |
140 pbdma_userd_addr_f(addr_lo));
142 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
143 pbdma_userd_target_vid_mem_f() |
144 pbdma_userd_hi_addr_f(addr_hi));
149 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
150 u32 timeslice_timeout)
154 int value = timeslice_timeout;
156 inst_ptr = c->inst_block.cpu_va;
160 /* disable channel */
161 c->g->ops.fifo.disable_channel(c);
163 /* preempt the channel */
164 WARN_ON(gk20a_fifo_preempt(c->g, c));
166 /* value field is 8 bits long */
167 while (value >= 1 << 8) {
172 /* time slice register is only 18bits long */
173 if ((value << shift) >= 1<<19) {
174 pr_err("Requested timeslice value is clamped to 18 bits\n");
179 /* set new timeslice */
180 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
181 value | (shift << 12) |
182 fifo_runlist_timeslice_enable_true_f());
185 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
186 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
187 ccsr_channel_enable_set_true_f());
192 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
193 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
199 inst_ptr = c->inst_block.cpu_va;
203 memset(inst_ptr, 0, ram_fc_size_val_v());
205 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
206 pbdma_gp_base_offset_f(
207 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
209 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
210 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
211 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
213 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
214 c->g->ops.fifo.get_pbdma_signature(c->g));
216 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
217 pbdma_formats_gp_fermi0_f() |
218 pbdma_formats_pb_fermi1_f() |
219 pbdma_formats_mp_fermi0_f());
221 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
222 pbdma_pb_header_priv_user_f() |
223 pbdma_pb_header_method_zero_f() |
224 pbdma_pb_header_subchannel_zero_f() |
225 pbdma_pb_header_level_main_f() |
226 pbdma_pb_header_first_true_f() |
227 pbdma_pb_header_type_inc_f());
229 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
230 pbdma_subdevice_id_f(1) |
231 pbdma_subdevice_status_active_f() |
232 pbdma_subdevice_channel_dma_enable_f());
234 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
236 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
237 pbdma_acquire_retry_man_2_f() |
238 pbdma_acquire_retry_exp_2_f() |
239 pbdma_acquire_timeout_exp_max_f() |
240 pbdma_acquire_timeout_man_max_f() |
241 pbdma_acquire_timeout_en_disable_f());
243 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
244 fifo_runlist_timeslice_timeout_128_f() |
245 fifo_runlist_timeslice_timescale_3_f() |
246 fifo_runlist_timeslice_enable_true_f());
248 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
249 fifo_pb_timeslice_timeout_16_f() |
250 fifo_pb_timeslice_timescale_0_f() |
251 fifo_pb_timeslice_enable_true_f());
253 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
255 return channel_gk20a_commit_userd(c);
258 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
260 BUG_ON(!c->userd_cpu_va);
264 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
265 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
266 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
267 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
268 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
269 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
270 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
271 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
272 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
273 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
278 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
280 struct gk20a *g = ch_gk20a->g;
281 struct fifo_gk20a *f = &g->fifo;
282 struct fifo_engine_info_gk20a *engine_info =
283 f->engine_info + ENGINE_GR_GK20A;
285 u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
286 >> ram_in_base_shift_v();
288 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
289 ch_gk20a->hw_chid, inst_ptr);
291 ch_gk20a->bound = true;
293 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
294 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
295 ~ccsr_channel_runlist_f(~0)) |
296 ccsr_channel_runlist_f(engine_info->runlist_id));
298 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
299 ccsr_channel_inst_ptr_f(inst_ptr) |
300 ccsr_channel_inst_target_vid_mem_f() |
301 ccsr_channel_inst_bind_true_f());
303 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
304 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
305 ~ccsr_channel_enable_set_f(~0)) |
306 ccsr_channel_enable_set_true_f());
309 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
311 struct gk20a *g = ch_gk20a->g;
316 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
317 ccsr_channel_inst_ptr_f(0) |
318 ccsr_channel_inst_bind_false_f());
320 ch_gk20a->bound = false;
323 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
329 err = gk20a_alloc_inst_block(g, &ch->inst_block);
333 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
334 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
336 gk20a_dbg_fn("done");
340 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
342 gk20a_free_inst_block(g, &ch->inst_block);
345 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
347 return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
350 void channel_gk20a_enable(struct channel_gk20a *ch)
353 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
354 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
355 ccsr_channel_enable_set_true_f());
358 void channel_gk20a_disable(struct channel_gk20a *ch)
360 /* disable channel */
361 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
363 ccsr_channel_r(ch->hw_chid)) |
364 ccsr_channel_enable_clr_true_f());
367 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
369 struct channel_gk20a_job *job, *n;
370 bool released_job_semaphore = false;
374 /* make sure new kickoffs are prevented */
375 ch->has_timedout = true;
377 ch->g->ops.fifo.disable_channel(ch);
380 gk20a_fifo_preempt(ch->g, ch);
382 gk20a_channel_cancel_job_clean_up(ch, true);
384 /* ensure no fences are pending */
385 mutex_lock(&ch->sync_lock);
387 ch->sync->set_min_eq_max(ch->sync);
388 mutex_unlock(&ch->sync_lock);
390 /* release all job semaphores (applies only to jobs that use
391 semaphore synchronization) */
392 spin_lock(&ch->jobs_lock);
393 list_for_each_entry_safe(job, n, &ch->jobs, list) {
394 if (job->post_fence->semaphore) {
395 gk20a_semaphore_release(job->post_fence->semaphore);
396 released_job_semaphore = true;
399 spin_unlock(&ch->jobs_lock);
401 if (released_job_semaphore)
402 wake_up_interruptible_all(&ch->semaphore_wq);
404 gk20a_channel_update(ch, 0);
407 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
409 bool channel_idle = false;
410 unsigned long end_jiffies = jiffies +
411 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
414 spin_lock(&ch->jobs_lock);
415 channel_idle = list_empty(&ch->jobs);
416 spin_unlock(&ch->jobs_lock);
420 usleep_range(1000, 3000);
421 } while (time_before(jiffies, end_jiffies)
422 || !tegra_platform_is_silicon());
425 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
433 void gk20a_disable_channel(struct channel_gk20a *ch)
435 gk20a_channel_abort(ch, true);
436 channel_gk20a_update_runlist(ch, false);
439 #if defined(CONFIG_GK20A_CYCLE_STATS)
441 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
443 /* disable existing cyclestats buffer */
444 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
445 if (ch->cyclestate.cyclestate_buffer_handler) {
446 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
447 ch->cyclestate.cyclestate_buffer);
448 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
449 ch->cyclestate.cyclestate_buffer_handler = NULL;
450 ch->cyclestate.cyclestate_buffer = NULL;
451 ch->cyclestate.cyclestate_buffer_size = 0;
453 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
456 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
457 struct nvgpu_cycle_stats_args *args)
459 struct dma_buf *dmabuf;
460 void *virtual_address;
462 /* is it allowed to handle calls for current GPU? */
463 if (0 == (ch->g->gpu_characteristics.flags &
464 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
467 if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
469 /* set up new cyclestats buffer */
470 dmabuf = dma_buf_get(args->dmabuf_fd);
472 return PTR_ERR(dmabuf);
473 virtual_address = dma_buf_vmap(dmabuf);
474 if (!virtual_address)
477 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
478 ch->cyclestate.cyclestate_buffer = virtual_address;
479 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
482 } else if (!args->dmabuf_fd &&
483 ch->cyclestate.cyclestate_buffer_handler) {
484 gk20a_free_cycle_stats_buffer(ch);
487 } else if (!args->dmabuf_fd &&
488 !ch->cyclestate.cyclestate_buffer_handler) {
489 /* no requst from GL */
493 pr_err("channel already has cyclestats buffer\n");
499 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
503 mutex_lock(&ch->cs_client_mutex);
505 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
508 mutex_unlock(&ch->cs_client_mutex);
513 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
515 u32 perfmon_id_count,
516 u32 *perfmon_id_start)
520 mutex_lock(&ch->cs_client_mutex);
524 ret = gr_gk20a_css_attach(ch->g,
530 mutex_unlock(&ch->cs_client_mutex);
535 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
539 mutex_lock(&ch->cs_client_mutex);
541 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
542 ch->cs_client = NULL;
546 mutex_unlock(&ch->cs_client_mutex);
551 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
552 struct nvgpu_cycle_stats_snapshot_args *args)
556 /* is it allowed to handle calls for current GPU? */
557 if (0 == (ch->g->gpu_characteristics.flags &
558 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
561 if (!args->dmabuf_fd)
564 /* handle the command (most frequent cases first) */
566 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
567 ret = gk20a_flush_cycle_stats_snapshot(ch);
571 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
572 ret = gk20a_attach_cycle_stats_snapshot(ch,
578 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
579 ret = gk20a_free_cycle_stats_snapshot(ch);
584 pr_err("cyclestats: unknown command %u\n", args->cmd);
593 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
594 struct nvgpu_set_error_notifier *args)
596 struct device *dev = dev_from_gk20a(ch->g);
597 struct dma_buf *dmabuf;
599 u64 end = args->offset + sizeof(struct nvgpu_notification);
602 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
606 dmabuf = dma_buf_get(args->mem);
608 if (ch->error_notifier_ref)
609 gk20a_free_error_notifiers(ch);
611 if (IS_ERR(dmabuf)) {
612 pr_err("Invalid handle: %d\n", args->mem);
616 if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
618 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
623 va = dma_buf_vmap(dmabuf);
626 pr_err("Cannot map notifier handle\n");
630 /* set channel notifiers pointer */
631 ch->error_notifier_ref = dmabuf;
632 ch->error_notifier = va + args->offset;
633 ch->error_notifier_va = va;
634 memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
638 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
640 if (ch->error_notifier_ref) {
641 struct timespec time_data;
643 getnstimeofday(&time_data);
644 nsec = ((u64)time_data.tv_sec) * 1000000000u +
645 (u64)time_data.tv_nsec;
646 ch->error_notifier->time_stamp.nanoseconds[0] =
648 ch->error_notifier->time_stamp.nanoseconds[1] =
650 ch->error_notifier->info32 = error;
651 ch->error_notifier->status = 0xffff;
653 gk20a_err(dev_from_gk20a(ch->g),
654 "error notifier set to %d for ch %d", error, ch->hw_chid);
658 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
660 if (ch->error_notifier_ref) {
661 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
662 dma_buf_put(ch->error_notifier_ref);
663 ch->error_notifier_ref = NULL;
664 ch->error_notifier = NULL;
665 ch->error_notifier_va = NULL;
669 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
671 static int cyclic_delta(int a, int b)
676 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
678 int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
679 int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
681 /* wait until all stalling irqs are handled */
682 wait_event(g->sw_irq_stall_last_handled_wq,
683 cyclic_delta(stall_irq_threshold,
684 atomic_read(&g->sw_irq_stall_last_handled))
687 /* wait until all non-stalling irqs are handled */
688 wait_event(g->sw_irq_nonstall_last_handled_wq,
689 cyclic_delta(nonstall_irq_threshold,
690 atomic_read(&g->sw_irq_nonstall_last_handled))
694 static void gk20a_wait_until_counter_is_N(
695 struct channel_gk20a *ch, atomic_t *counter, int wait_value,
696 wait_queue_head_t *wq, const char *caller, const char *counter_name)
699 if (wait_event_timeout(
701 atomic_read(counter) == wait_value,
702 msecs_to_jiffies(5000)) > 0)
705 gk20a_warn(dev_from_gk20a(ch->g),
706 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
707 caller, ch->hw_chid, counter_name,
708 atomic_read(counter), wait_value);
714 /* call ONLY when no references to the channel exist: after the last put */
715 static void gk20a_free_channel(struct channel_gk20a *ch)
717 struct gk20a *g = ch->g;
718 struct fifo_gk20a *f = &g->fifo;
719 struct gr_gk20a *gr = &g->gr;
720 struct vm_gk20a *ch_vm = ch->vm;
721 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
722 struct dbg_session_gk20a *dbg_s;
726 WARN_ON(ch->g == NULL);
728 trace_gk20a_free_channel(ch->hw_chid);
730 /* abort channel and remove from runlist */
731 gk20a_disable_channel(ch);
733 /* wait until there's only our ref to the channel */
734 gk20a_wait_until_counter_is_N(
735 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
736 __func__, "references");
738 /* wait until all pending interrupts for recently completed
739 * jobs are handled */
740 gk20a_wait_for_deferred_interrupts(g);
742 /* prevent new refs */
743 spin_lock(&ch->ref_obtain_lock);
744 if (!ch->referenceable) {
745 spin_unlock(&ch->ref_obtain_lock);
746 gk20a_err(dev_from_gk20a(ch->g),
747 "Extra %s() called to channel %u",
748 __func__, ch->hw_chid);
751 ch->referenceable = false;
752 spin_unlock(&ch->ref_obtain_lock);
754 /* matches with the initial reference in gk20a_open_new_channel() */
755 atomic_dec(&ch->ref_count);
757 /* wait until no more refs to the channel */
758 gk20a_wait_until_counter_is_N(
759 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
760 __func__, "references");
762 /* if engine reset was deferred, perform it now */
763 mutex_lock(&f->deferred_reset_mutex);
764 if (g->fifo.deferred_reset_pending) {
765 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
766 " deferred, running now");
767 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
768 mutex_lock(&g->fifo.gr_reset_mutex);
769 /* if lock is already taken, a reset is taking place
770 so no need to repeat */
772 gk20a_fifo_reset_engine(g,
773 g->fifo.deferred_fault_engines);
775 mutex_unlock(&g->fifo.gr_reset_mutex);
776 g->fifo.deferred_fault_engines = 0;
777 g->fifo.deferred_reset_pending = false;
779 mutex_unlock(&f->deferred_reset_mutex);
784 if (!gk20a_channel_as_bound(ch))
787 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
790 gk20a_free_error_notifiers(ch);
792 /* release channel ctx */
793 g->ops.gr.free_channel_ctx(ch);
795 gk20a_gr_flush_channel_tlb(gr);
797 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
799 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
801 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
803 #if defined(CONFIG_GK20A_CYCLE_STATS)
804 gk20a_free_cycle_stats_buffer(ch);
805 gk20a_free_cycle_stats_snapshot(ch);
808 channel_gk20a_free_priv_cmdbuf(ch);
810 /* sync must be destroyed before releasing channel vm */
811 mutex_lock(&ch->sync_lock);
813 gk20a_channel_sync_destroy(ch->sync);
816 mutex_unlock(&ch->sync_lock);
818 /* release channel binding to the as_share */
820 gk20a_as_release_share(ch_vm->as_share);
824 spin_lock(&ch->update_fn_lock);
825 ch->update_fn = NULL;
826 ch->update_fn_data = NULL;
827 spin_unlock(&ch->update_fn_lock);
828 cancel_work_sync(&ch->update_fn_work);
830 /* make sure we don't have deferred interrupts pending that
831 * could still touch the channel */
832 gk20a_wait_for_deferred_interrupts(g);
835 if (gk20a_is_channel_marked_as_tsg(ch))
836 gk20a_tsg_unbind_channel(ch);
838 g->ops.fifo.unbind_channel(ch);
839 g->ops.fifo.free_inst(g, ch);
844 mutex_lock(&ch->last_submit.fence_lock);
845 gk20a_fence_put(ch->last_submit.pre_fence);
846 gk20a_fence_put(ch->last_submit.post_fence);
847 ch->last_submit.pre_fence = NULL;
848 ch->last_submit.post_fence = NULL;
849 mutex_unlock(&ch->last_submit.fence_lock);
852 /* unlink all debug sessions */
853 mutex_lock(&ch->dbg_s_lock);
855 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
857 list_del_init(&dbg_s->dbg_s_list_node);
860 mutex_unlock(&ch->dbg_s_lock);
863 /* make sure we catch accesses of unopened channels in case
864 * there's non-refcounted channel pointers hanging around */
872 /* Try to get a reference to the channel. Return nonzero on success. If fails,
873 * the channel is dead or being freed elsewhere and you must not touch it.
875 * Always when a channel_gk20a pointer is seen and about to be used, a
876 * reference must be held to it - either by you or the caller, which should be
877 * documented well or otherwise clearly seen. This usually boils down to the
878 * file from ioctls directly, or an explicit get in exception handlers when the
879 * channel is found by a hw_chid.
881 * Most global functions in this file require a reference to be held by the
884 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
885 const char *caller) {
886 struct channel_gk20a *ret;
888 spin_lock(&ch->ref_obtain_lock);
890 if (likely(ch->referenceable)) {
891 atomic_inc(&ch->ref_count);
896 spin_unlock(&ch->ref_obtain_lock);
899 trace_gk20a_channel_get(ch->hw_chid, caller);
904 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
906 trace_gk20a_channel_put(ch->hw_chid, caller);
907 atomic_dec(&ch->ref_count);
908 wake_up_all(&ch->ref_count_dec_wq);
910 /* More puts than gets. Channel is probably going to get
912 WARN_ON(atomic_read(&ch->ref_count) < 0);
914 /* Also, more puts than gets. ref_count can go to 0 only if
915 * the channel is closing. Channel is probably going to get
917 WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
920 void gk20a_channel_close(struct channel_gk20a *ch)
922 gk20a_free_channel(ch);
925 int gk20a_channel_release(struct inode *inode, struct file *filp)
927 struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
928 struct gk20a *g = ch ? ch->g : NULL;
934 trace_gk20a_channel_release(dev_name(&g->dev->dev));
936 err = gk20a_busy(g->dev);
938 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
942 gk20a_channel_close(ch);
945 filp->private_data = NULL;
949 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
951 struct channel_gk20a *ch =
952 container_of(work, struct channel_gk20a, update_fn_work);
953 void (*update_fn)(struct channel_gk20a *, void *);
954 void *update_fn_data;
956 spin_lock(&ch->update_fn_lock);
957 update_fn = ch->update_fn;
958 update_fn_data = ch->update_fn_data;
959 spin_unlock(&ch->update_fn_lock);
962 update_fn(ch, update_fn_data);
965 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
966 void (*update_fn)(struct channel_gk20a *, void *),
967 void *update_fn_data)
969 struct channel_gk20a *ch = gk20a_open_new_channel(g);
972 spin_lock(&ch->update_fn_lock);
973 ch->update_fn = update_fn;
974 ch->update_fn_data = update_fn_data;
975 spin_unlock(&ch->update_fn_lock);
981 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
983 struct fifo_gk20a *f = &g->fifo;
984 struct channel_gk20a *ch;
988 ch = allocate_channel(f);
990 /* TBD: we want to make this virtualizable */
991 gk20a_err(dev_from_gk20a(g), "out of hw chids");
995 trace_gk20a_open_new_channel(ch->hw_chid);
1000 if (g->ops.fifo.alloc_inst(g, ch)) {
1002 free_channel(f, ch);
1003 gk20a_err(dev_from_gk20a(g),
1004 "failed to open gk20a channel, out of inst mem");
1008 /* now the channel is in a limbo out of the free list but not marked as
1009 * alive and used (i.e. get-able) yet */
1011 ch->pid = current->pid;
1013 /* By default, channel is regular (non-TSG) channel */
1014 ch->tsgid = NVGPU_INVALID_TSG_ID;
1016 /* reset timeout counter and update timestamp */
1017 ch->timeout_accumulated_ms = 0;
1018 ch->timeout_gpfifo_get = 0;
1019 /* set gr host default timeout */
1020 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1021 ch->timeout_debug_dump = true;
1022 ch->has_timedout = false;
1024 ch->clean_up.scheduled = false;
1026 /* The channel is *not* runnable at this point. It still needs to have
1027 * an address space bound and allocate a gpfifo and grctx. */
1029 init_waitqueue_head(&ch->notifier_wq);
1030 init_waitqueue_head(&ch->semaphore_wq);
1031 init_waitqueue_head(&ch->submit_wq);
1033 mutex_init(&ch->poll_events.lock);
1034 ch->poll_events.events_enabled = false;
1035 ch->poll_events.num_pending_events = 0;
1037 ch->update_fn = NULL;
1038 ch->update_fn_data = NULL;
1039 spin_lock_init(&ch->update_fn_lock);
1040 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1042 /* Mark the channel alive, get-able, with 1 initial use
1043 * references. The initial reference will be decreased in
1044 * gk20a_free_channel() */
1045 ch->referenceable = true;
1046 atomic_set(&ch->ref_count, 1);
1052 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1055 struct channel_gk20a *ch;
1057 trace_gk20a_channel_open(dev_name(&g->dev->dev));
1059 err = gk20a_busy(g->dev);
1061 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1064 ch = gk20a_open_new_channel(g);
1067 gk20a_err(dev_from_gk20a(g),
1072 filp->private_data = ch;
1076 int gk20a_channel_open(struct inode *inode, struct file *filp)
1078 struct gk20a *g = container_of(inode->i_cdev,
1079 struct gk20a, channel.cdev);
1082 gk20a_dbg_fn("start");
1083 ret = __gk20a_channel_open(g, filp);
1085 gk20a_dbg_fn("end");
1089 int gk20a_channel_open_ioctl(struct gk20a *g,
1090 struct nvgpu_channel_open_args *args)
1097 err = get_unused_fd_flags(O_RDWR);
1102 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1103 dev_name(&g->dev->dev), fd);
1109 file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1112 err = PTR_ERR(file);
1116 err = __gk20a_channel_open(g, file);
1120 fd_install(fd, file);
1121 args->channel_fd = fd;
1131 /* allocate private cmd buffer.
1132 used for inserting commands before/after user submitted buffers. */
1133 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1135 struct device *d = dev_from_gk20a(c->g);
1136 struct vm_gk20a *ch_vm = c->vm;
1137 struct priv_cmd_queue *q = &c->priv_cmd_q;
1141 /* Kernel can insert gpfifos before and after user gpfifos.
1142 Before user gpfifos, kernel inserts fence_wait, which takes
1143 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1144 After user gpfifos, kernel inserts fence_get, which takes
1145 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1147 Worse case if kernel adds both of them for every user gpfifo,
1148 max size of priv_cmdbuf is :
1149 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1150 size = roundup_pow_of_two(
1151 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1153 err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1155 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1159 q->size = q->mem.size / sizeof (u32);
1164 channel_gk20a_free_priv_cmdbuf(c);
1168 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1170 struct vm_gk20a *ch_vm = c->vm;
1171 struct priv_cmd_queue *q = &c->priv_cmd_q;
1176 gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1178 memset(q, 0, sizeof(struct priv_cmd_queue));
1181 /* allocate a cmd buffer with given size. size is number of u32 entries */
1182 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1183 struct priv_cmd_entry **entry)
1185 struct priv_cmd_queue *q = &c->priv_cmd_q;
1186 struct priv_cmd_entry *e;
1188 u32 size = orig_size;
1190 gk20a_dbg_fn("size %d", orig_size);
1194 /* if free space in the end is less than requested, increase the size
1195 * to make the real allocated space start from beginning. */
1196 if (q->put + size > q->size)
1197 size = orig_size + (q->size - q->put);
1199 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1200 c->hw_chid, q->get, q->put);
1202 free_count = (q->size - (q->put - q->get) - 1) % q->size;
1204 if (size > free_count)
1207 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1209 gk20a_err(dev_from_gk20a(c->g),
1210 "ch %d: fail to allocate priv cmd entry",
1215 e->size = orig_size;
1216 e->gp_get = c->gpfifo.get;
1217 e->gp_put = c->gpfifo.put;
1218 e->gp_wrap = c->gpfifo.wrap;
1220 /* if we have increased size to skip free space in the end, set put
1221 to beginning of cmd buffer (0) + size */
1222 if (size != orig_size) {
1223 e->ptr = (u32 *)q->mem.cpu_va;
1224 e->gva = q->mem.gpu_va;
1227 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1228 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1229 q->put = (q->put + orig_size) & (q->size - 1);
1232 /* we already handled q->put + size > q->size so BUG_ON this */
1233 BUG_ON(q->put > q->size);
1237 gk20a_dbg_fn("done");
1242 /* Don't call this to free an explict cmd entry.
1243 * It doesn't update priv_cmd_queue get/put */
1244 static void free_priv_cmdbuf(struct channel_gk20a *c,
1245 struct priv_cmd_entry *e)
1250 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1251 struct nvgpu_alloc_gpfifo_args *args)
1253 struct gk20a *g = c->g;
1254 struct device *d = dev_from_gk20a(g);
1255 struct vm_gk20a *ch_vm;
1259 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1260 and another one after, for internal usage. Triple the requested size. */
1261 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1263 if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1266 /* an address space needs to have been bound at this point. */
1267 if (!gk20a_channel_as_bound(c)) {
1269 "not bound to an address space at time of gpfifo"
1275 c->cmds_pending = false;
1276 mutex_lock(&c->last_submit.fence_lock);
1277 gk20a_fence_put(c->last_submit.pre_fence);
1278 gk20a_fence_put(c->last_submit.post_fence);
1279 c->last_submit.pre_fence = NULL;
1280 c->last_submit.post_fence = NULL;
1281 mutex_unlock(&c->last_submit.fence_lock);
1283 c->ramfc.offset = 0;
1284 c->ramfc.size = ram_in_ramfc_s() / 8;
1286 if (c->gpfifo.mem.cpu_va) {
1287 gk20a_err(d, "channel %d :"
1288 "gpfifo already allocated", c->hw_chid);
1292 err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1295 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1299 c->gpfifo.entry_num = gpfifo_size;
1300 c->gpfifo.get = c->gpfifo.put = 0;
1302 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1303 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1305 channel_gk20a_setup_userd(c);
1307 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1308 c->gpfifo.entry_num, args->flags);
1310 goto clean_up_unmap;
1312 /* TBD: setup engine contexts */
1314 err = channel_gk20a_alloc_priv_cmdbuf(c);
1316 goto clean_up_unmap;
1318 err = channel_gk20a_update_runlist(c, true);
1320 goto clean_up_unmap;
1322 g->ops.fifo.bind_channel(c);
1324 gk20a_dbg_fn("done");
1328 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1330 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1331 gk20a_err(d, "fail");
1335 /* Update with this periodically to determine how the gpfifo is draining. */
1336 static inline u32 update_gp_get(struct gk20a *g,
1337 struct channel_gk20a *c)
1339 u32 new_get = gk20a_bar1_readl(g,
1340 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1341 if (new_get < c->gpfifo.get)
1342 c->gpfifo.wrap = !c->gpfifo.wrap;
1343 c->gpfifo.get = new_get;
1347 static inline u32 gp_free_count(struct channel_gk20a *c)
1349 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1350 c->gpfifo.entry_num;
1353 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1354 u32 timeout_delta_ms)
1356 u32 gpfifo_get = update_gp_get(ch->g, ch);
1357 /* Count consequent timeout isr */
1358 if (gpfifo_get == ch->timeout_gpfifo_get) {
1359 /* we didn't advance since previous channel timeout check */
1360 ch->timeout_accumulated_ms += timeout_delta_ms;
1362 /* first timeout isr encountered */
1363 ch->timeout_accumulated_ms = timeout_delta_ms;
1366 ch->timeout_gpfifo_get = gpfifo_get;
1368 return ch->g->timeouts_enabled &&
1369 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1372 static u32 get_gp_free_count(struct channel_gk20a *c)
1374 update_gp_get(c->g, c);
1375 return gp_free_count(c);
1378 static void trace_write_pushbuffer(struct channel_gk20a *c,
1379 struct nvgpu_gpfifo *g)
1384 struct dma_buf *dmabuf = NULL;
1386 if (gk20a_debug_trace_cmdbuf) {
1387 u64 gpu_va = (u64)g->entry0 |
1388 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1391 words = pbdma_gp_entry1_length_v(g->entry1);
1392 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1394 mem = dma_buf_vmap(dmabuf);
1400 * Write in batches of 128 as there seems to be a limit
1401 * of how much you can output to ftrace at once.
1403 for (i = 0; i < words; i += 128U) {
1404 trace_gk20a_push_cmdbuf(
1407 min(words - i, 128U),
1408 offset + i * sizeof(u32),
1411 dma_buf_vunmap(dmabuf, mem);
1415 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1416 struct nvgpu_gpfifo *g,
1417 struct nvgpu_submit_gpfifo_args *args,
1423 struct nvgpu_gpfifo *gp;
1424 bool gpfifo_allocated = false;
1426 if (!gk20a_debug_trace_cmdbuf)
1433 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1435 g = nvgpu_alloc(size, false);
1439 if (copy_from_user(g,
1440 (void __user *)(uintptr_t)args->gpfifo, size)) {
1444 gpfifo_allocated = true;
1448 for (i = 0; i < count; i++, gp++)
1449 trace_write_pushbuffer(c, gp);
1451 if (gpfifo_allocated)
1455 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1456 struct priv_cmd_entry *e)
1458 struct priv_cmd_queue *q = &c->priv_cmd_q;
1459 u32 cmd_entry_start;
1460 struct device *d = dev_from_gk20a(c->g);
1465 cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1466 if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1467 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1469 q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1470 free_priv_cmdbuf(c, e);
1475 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
1477 mutex_lock(&c->clean_up.lock);
1479 if (c->clean_up.scheduled) {
1480 mutex_unlock(&c->clean_up.lock);
1484 c->clean_up.scheduled = true;
1485 schedule_delayed_work(&c->clean_up.wq, 1);
1487 mutex_unlock(&c->clean_up.lock);
1490 void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1491 bool wait_for_completion)
1493 if (wait_for_completion)
1494 cancel_delayed_work_sync(&c->clean_up.wq);
1496 mutex_lock(&c->clean_up.lock);
1497 c->clean_up.scheduled = false;
1498 mutex_unlock(&c->clean_up.lock);
1501 static int gk20a_channel_add_job(struct channel_gk20a *c,
1502 struct gk20a_fence *pre_fence,
1503 struct gk20a_fence *post_fence,
1504 struct priv_cmd_entry *wait_cmd,
1505 struct priv_cmd_entry *incr_cmd,
1506 bool skip_buffer_refcounting)
1508 struct vm_gk20a *vm = c->vm;
1509 struct channel_gk20a_job *job = NULL;
1510 struct mapped_buffer_node **mapped_buffers = NULL;
1511 int err = 0, num_mapped_buffers = 0;
1513 /* job needs reference to this vm (released in channel_update) */
1516 if (!skip_buffer_refcounting) {
1517 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1518 &num_mapped_buffers);
1525 job = kzalloc(sizeof(*job), GFP_KERNEL);
1527 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1532 /* put() is done in gk20a_channel_update() when the job is done */
1533 c = gk20a_channel_get(c);
1536 job->num_mapped_buffers = num_mapped_buffers;
1537 job->mapped_buffers = mapped_buffers;
1538 job->pre_fence = gk20a_fence_get(pre_fence);
1539 job->post_fence = gk20a_fence_get(post_fence);
1540 job->wait_cmd = wait_cmd;
1541 job->incr_cmd = incr_cmd;
1543 spin_lock(&c->jobs_lock);
1544 list_add_tail(&job->list, &c->jobs);
1545 spin_unlock(&c->jobs_lock);
1553 static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1555 struct channel_gk20a *c = container_of(to_delayed_work(work),
1556 struct channel_gk20a, clean_up.wq);
1557 struct vm_gk20a *vm;
1558 struct channel_gk20a_job *job;
1559 struct gk20a_platform *platform;
1562 c = gk20a_channel_get(c);
1566 if (!c->g->power_on) { /* shutdown case */
1567 gk20a_channel_put(c);
1573 platform = gk20a_get_platform(g->dev);
1575 gk20a_channel_cancel_job_clean_up(c, false);
1580 spin_lock(&c->jobs_lock);
1581 if (list_empty(&c->jobs)) {
1582 spin_unlock(&c->jobs_lock);
1585 job = list_first_entry(&c->jobs,
1586 struct channel_gk20a_job, list);
1587 spin_unlock(&c->jobs_lock);
1589 completed = gk20a_fence_is_expired(job->post_fence);
1593 mutex_lock(&c->sync_lock);
1595 c->sync->signal_timeline(c->sync);
1596 if (atomic_dec_and_test(&c->sync->refcount) &&
1597 platform->aggressive_sync_destroy) {
1598 gk20a_channel_sync_destroy(c->sync);
1604 mutex_unlock(&c->sync_lock);
1606 if (job->num_mapped_buffers)
1607 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1608 job->num_mapped_buffers);
1610 /* Close the fences (this will unref the semaphores and release
1611 * them to the pool). */
1612 gk20a_fence_put(job->pre_fence);
1613 gk20a_fence_put(job->post_fence);
1615 /* Free the private command buffers (wait_cmd first and
1616 * then incr_cmd i.e. order of allocation) */
1617 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1618 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1620 /* job is done. release its vm reference (taken in add_job) */
1622 /* another bookkeeping taken in add_job. caller must hold a ref
1623 * so this wouldn't get freed here. */
1624 gk20a_channel_put(c);
1626 spin_lock(&c->jobs_lock);
1627 list_del_init(&job->list);
1628 spin_unlock(&c->jobs_lock);
1636 schedule_work(&c->update_fn_work);
1638 gk20a_channel_put(c);
1641 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1643 c = gk20a_channel_get(c);
1647 if (!c->g->power_on) { /* shutdown case */
1648 gk20a_channel_put(c);
1652 update_gp_get(c->g, c);
1653 wake_up(&c->submit_wq);
1655 trace_gk20a_channel_update(c->hw_chid);
1656 gk20a_channel_schedule_job_clean_up(c);
1658 gk20a_channel_put(c);
1661 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1662 struct nvgpu_gpfifo *gpfifo,
1663 struct nvgpu_submit_gpfifo_args *args,
1666 struct nvgpu_fence *fence,
1667 struct gk20a_fence **fence_out,
1668 bool force_need_sync_fence)
1670 struct gk20a *g = c->g;
1671 struct device *d = dev_from_gk20a(g);
1674 int wait_fence_fd = -1;
1675 struct priv_cmd_entry *wait_cmd = NULL;
1676 struct priv_cmd_entry *incr_cmd = NULL;
1677 struct gk20a_fence *pre_fence = NULL;
1678 struct gk20a_fence *post_fence = NULL;
1679 /* we might need two extra gpfifo entries - one for pre fence
1680 * and one for post fence. */
1681 const int extra_entries = 2;
1682 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1683 bool skip_buffer_refcounting = (flags &
1684 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1685 bool need_sync_fence = false;
1688 * If user wants to allocate sync_fence_fd always, then respect that;
1689 * otherwise, allocate sync_fence_fd based on user flags only
1691 if (force_need_sync_fence)
1692 need_sync_fence = true;
1694 if (c->has_timedout)
1697 /* fifo not large enough for request. Return error immediately.
1698 * Kernel can insert gpfifo entries before and after user gpfifos.
1699 * So, add extra_entries in user request. Also, HW with fifo size N
1700 * can accept only N-1 entreis and so the below condition */
1701 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1702 gk20a_err(d, "not enough gpfifo space allocated");
1706 if (!gpfifo && !args)
1709 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1710 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1714 /* an address space needs to have been bound at this point. */
1715 if (!gk20a_channel_as_bound(c)) {
1717 "not bound to an address space at time of gpfifo"
1722 #ifdef CONFIG_DEBUG_FS
1723 /* update debug settings */
1724 if (g->ops.ltc.sync_debugfs)
1725 g->ops.ltc.sync_debugfs(g);
1728 gk20a_dbg_info("channel %d", c->hw_chid);
1730 /* gk20a_channel_update releases this ref. */
1731 err = gk20a_busy(g->dev);
1733 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1737 trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1741 fence ? fence->id : 0,
1742 fence ? fence->value : 0);
1744 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1745 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1747 /* Make sure we have enough space for gpfifo entries. If not,
1748 * wait for signals from completed submits */
1749 if (gp_free_count(c) < num_entries + extra_entries) {
1750 /* we can get here via locked ioctl and other paths too */
1751 int locked_path = mutex_is_locked(&c->ioctl_lock);
1753 mutex_unlock(&c->ioctl_lock);
1755 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1756 err = wait_event_interruptible(c->submit_wq,
1757 get_gp_free_count(c) >= num_entries + extra_entries ||
1759 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1762 mutex_lock(&c->ioctl_lock);
1765 if (c->has_timedout) {
1775 mutex_lock(&c->sync_lock);
1777 c->sync = gk20a_channel_sync_create(c);
1780 mutex_unlock(&c->sync_lock);
1783 if (g->ops.fifo.resetup_ramfc)
1784 err = g->ops.fifo.resetup_ramfc(c);
1786 mutex_unlock(&c->sync_lock);
1790 atomic_inc(&c->sync->refcount);
1791 mutex_unlock(&c->sync_lock);
1794 * optionally insert syncpt wait in the beginning of gpfifo submission
1795 * when user requested and the wait hasn't expired.
1796 * validate that the id makes sense, elide if not
1797 * the only reason this isn't being unceremoniously killed is to
1798 * keep running some tests which trigger this condition
1800 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1801 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1802 wait_fence_fd = fence->id;
1803 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1804 &wait_cmd, &pre_fence);
1806 err = c->sync->wait_syncpt(c->sync, fence->id,
1807 fence->value, &wait_cmd, &pre_fence);
1814 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1815 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1816 need_sync_fence = true;
1818 /* always insert syncpt increment at end of gpfifo submission
1819 to keep track of method completion for idle railgating */
1820 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1821 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1822 &post_fence, need_wfi, need_sync_fence);
1824 err = c->sync->incr(c->sync, &incr_cmd,
1825 &post_fence, need_sync_fence);
1831 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1832 u64_lo32(wait_cmd->gva);
1833 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1834 u64_hi32(wait_cmd->gva) |
1835 pbdma_gp_entry1_length_f(wait_cmd->size);
1836 trace_gk20a_push_cmdbuf(c->g->dev->name,
1837 0, wait_cmd->size, 0, wait_cmd->ptr);
1839 c->gpfifo.put = (c->gpfifo.put + 1) &
1840 (c->gpfifo.entry_num - 1);
1843 wait_cmd->gp_put = c->gpfifo.put;
1847 * Copy source gpfifo entries into the gpfifo ring buffer,
1848 * potentially splitting into two memcpies to handle the
1849 * ring buffer wrap-around case.
1851 start = c->gpfifo.put;
1852 end = start + num_entries;
1855 if (end > c->gpfifo.entry_num) {
1856 int length0 = c->gpfifo.entry_num - start;
1857 int length1 = num_entries - length0;
1859 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1861 length0 * sizeof(*gpfifo));
1863 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1865 length1 * sizeof(*gpfifo));
1867 trace_write_pushbuffer_range(c, gpfifo, NULL,
1869 trace_write_pushbuffer_range(c, gpfifo, NULL,
1872 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1874 num_entries * sizeof(*gpfifo));
1876 trace_write_pushbuffer_range(c, gpfifo, NULL,
1880 struct nvgpu_gpfifo __user *user_gpfifo =
1881 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1882 if (end > c->gpfifo.entry_num) {
1883 int length0 = c->gpfifo.entry_num - start;
1884 int length1 = num_entries - length0;
1886 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1888 length0 * sizeof(*user_gpfifo));
1893 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1894 user_gpfifo + length0,
1895 length1 * sizeof(*user_gpfifo));
1900 trace_write_pushbuffer_range(c, NULL, args,
1902 trace_write_pushbuffer_range(c, NULL, args,
1905 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1907 num_entries * sizeof(*user_gpfifo));
1912 trace_write_pushbuffer_range(c, NULL, args,
1917 c->gpfifo.put = (c->gpfifo.put + num_entries) &
1918 (c->gpfifo.entry_num - 1);
1921 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1922 u64_lo32(incr_cmd->gva);
1923 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1924 u64_hi32(incr_cmd->gva) |
1925 pbdma_gp_entry1_length_f(incr_cmd->size);
1926 trace_gk20a_push_cmdbuf(c->g->dev->name,
1927 0, incr_cmd->size, 0, incr_cmd->ptr);
1929 c->gpfifo.put = (c->gpfifo.put + 1) &
1930 (c->gpfifo.entry_num - 1);
1933 incr_cmd->gp_put = c->gpfifo.put;
1936 mutex_lock(&c->last_submit.fence_lock);
1937 gk20a_fence_put(c->last_submit.pre_fence);
1938 gk20a_fence_put(c->last_submit.post_fence);
1939 c->last_submit.pre_fence = pre_fence;
1940 c->last_submit.post_fence = post_fence;
1942 *fence_out = gk20a_fence_get(post_fence);
1943 mutex_unlock(&c->last_submit.fence_lock);
1945 /* TODO! Check for errors... */
1946 gk20a_channel_add_job(c, pre_fence, post_fence,
1948 skip_buffer_refcounting);
1950 c->cmds_pending = true;
1951 gk20a_bar1_writel(g,
1952 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1955 trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1959 post_fence->syncpt_id,
1960 post_fence->syncpt_value);
1962 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1963 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1965 gk20a_dbg_fn("done");
1969 gk20a_dbg_fn("fail");
1970 free_priv_cmdbuf(c, wait_cmd);
1971 free_priv_cmdbuf(c, incr_cmd);
1972 gk20a_fence_put(pre_fence);
1973 gk20a_fence_put(post_fence);
1978 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1980 struct channel_gk20a *c = g->fifo.channel+chid;
1984 spin_lock_init(&c->ref_obtain_lock);
1985 atomic_set(&c->ref_count, 0);
1986 c->referenceable = false;
1987 init_waitqueue_head(&c->ref_count_dec_wq);
1988 mutex_init(&c->ioctl_lock);
1989 spin_lock_init(&c->jobs_lock);
1990 mutex_init(&c->last_submit.fence_lock);
1991 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
1992 mutex_init(&c->clean_up.lock);
1993 mutex_init(&c->sync_lock);
1994 INIT_LIST_HEAD(&c->jobs);
1995 #if defined(CONFIG_GK20A_CYCLE_STATS)
1996 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1997 mutex_init(&c->cs_client_mutex);
1999 INIT_LIST_HEAD(&c->dbg_s_list);
2000 mutex_init(&c->dbg_s_lock);
2001 list_add(&c->free_chs, &g->fifo.free_chs);
2006 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2009 struct gk20a_fence *fence;
2011 if (!ch->cmds_pending)
2014 mutex_lock(&ch->last_submit.fence_lock);
2015 fence = ch->last_submit.post_fence;
2017 mutex_unlock(&ch->last_submit.fence_lock);
2020 mutex_unlock(&ch->last_submit.fence_lock);
2022 /* Do not wait for a timedout channel */
2023 if (ch->has_timedout)
2026 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2027 fence->syncpt_value, fence->semaphore);
2029 err = gk20a_fence_wait(fence, timeout);
2031 dev_warn(dev_from_gk20a(ch->g),
2032 "timed out waiting for gk20a channel to finish");
2034 ch->cmds_pending = false;
2039 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2040 ulong id, u32 offset,
2041 u32 payload, long timeout)
2043 struct platform_device *pdev = ch->g->dev;
2044 struct dma_buf *dmabuf;
2050 /* do not wait if channel has timed out */
2051 if (ch->has_timedout)
2054 dmabuf = dma_buf_get(id);
2055 if (IS_ERR(dmabuf)) {
2056 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2061 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2063 gk20a_err(&pdev->dev, "failed to map notifier memory");
2068 semaphore = data + (offset & ~PAGE_MASK);
2070 remain = wait_event_interruptible_timeout(
2072 *semaphore == payload || ch->has_timedout,
2075 if (remain == 0 && *semaphore != payload)
2077 else if (remain < 0)
2080 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2082 dma_buf_put(dmabuf);
2086 static int gk20a_channel_wait(struct channel_gk20a *ch,
2087 struct nvgpu_wait_args *args)
2089 struct device *d = dev_from_gk20a(ch->g);
2090 struct dma_buf *dmabuf;
2091 struct notification *notif;
2096 unsigned long timeout;
2097 int remain, ret = 0;
2102 if (ch->has_timedout)
2105 if (args->timeout == NVGPU_NO_TIMEOUT)
2106 timeout = MAX_SCHEDULE_TIMEOUT;
2108 timeout = (u32)msecs_to_jiffies(args->timeout);
2110 switch (args->type) {
2111 case NVGPU_WAIT_TYPE_NOTIFIER:
2112 id = args->condition.notifier.dmabuf_fd;
2113 offset = args->condition.notifier.offset;
2114 end = offset + sizeof(struct notification);
2116 dmabuf = dma_buf_get(id);
2117 if (IS_ERR(dmabuf)) {
2118 gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2123 if (end > dmabuf->size || end < sizeof(struct notification)) {
2124 dma_buf_put(dmabuf);
2125 gk20a_err(d, "invalid notifier offset\n");
2129 notif = dma_buf_vmap(dmabuf);
2131 gk20a_err(d, "failed to map notifier memory");
2135 notif = (struct notification *)((uintptr_t)notif + offset);
2137 /* user should set status pending before
2138 * calling this ioctl */
2139 remain = wait_event_interruptible_timeout(
2141 notif->status == 0 || ch->has_timedout,
2144 if (remain == 0 && notif->status != 0) {
2146 goto notif_clean_up;
2147 } else if (remain < 0) {
2149 goto notif_clean_up;
2152 /* TBD: fill in correct information */
2153 jiffies = get_jiffies_64();
2154 jiffies_to_timespec(jiffies, &tv);
2155 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2156 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2157 notif->info32 = 0xDEADBEEF; /* should be object name */
2158 notif->info16 = ch->hw_chid; /* should be method offset */
2161 dma_buf_vunmap(dmabuf, notif);
2164 case NVGPU_WAIT_TYPE_SEMAPHORE:
2165 ret = gk20a_channel_wait_semaphore(ch,
2166 args->condition.semaphore.dmabuf_fd,
2167 args->condition.semaphore.offset,
2168 args->condition.semaphore.payload,
2181 /* poll events for semaphores */
2183 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2187 mutex_lock(&ev->lock);
2189 ev->events_enabled = true;
2190 ev->num_pending_events = 0;
2192 mutex_unlock(&ev->lock);
2195 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2199 mutex_lock(&ev->lock);
2201 ev->events_enabled = false;
2202 ev->num_pending_events = 0;
2204 mutex_unlock(&ev->lock);
2207 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2211 mutex_lock(&ev->lock);
2213 if (ev->events_enabled &&
2214 ev->num_pending_events > 0)
2215 ev->num_pending_events--;
2217 mutex_unlock(&ev->lock);
2220 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2221 struct nvgpu_channel_events_ctrl_args *args)
2225 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2226 "channel events ctrl cmd %d", args->cmd);
2228 switch (args->cmd) {
2229 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2230 gk20a_channel_events_enable(&ch->poll_events);
2233 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2234 gk20a_channel_events_disable(&ch->poll_events);
2237 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2238 gk20a_channel_events_clear(&ch->poll_events);
2242 gk20a_err(dev_from_gk20a(ch->g),
2243 "unrecognized channel events ctrl cmd: 0x%x",
2252 void gk20a_channel_event(struct channel_gk20a *ch)
2254 mutex_lock(&ch->poll_events.lock);
2256 if (ch->poll_events.events_enabled) {
2257 gk20a_dbg_info("posting event on channel id %d",
2259 gk20a_dbg_info("%d channel events pending",
2260 ch->poll_events.num_pending_events);
2262 ch->poll_events.num_pending_events++;
2263 /* not waking up here, caller does that */
2266 mutex_unlock(&ch->poll_events.lock);
2269 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2271 unsigned int mask = 0;
2272 struct channel_gk20a *ch = filep->private_data;
2274 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2276 poll_wait(filep, &ch->semaphore_wq, wait);
2278 mutex_lock(&ch->poll_events.lock);
2280 if (ch->poll_events.events_enabled &&
2281 ch->poll_events.num_pending_events > 0) {
2282 gk20a_dbg_info("found pending event on channel id %d",
2284 gk20a_dbg_info("%d channel events pending",
2285 ch->poll_events.num_pending_events);
2286 mask = (POLLPRI | POLLIN);
2289 mutex_unlock(&ch->poll_events.lock);
2294 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2297 u32 timeslice_timeout;
2298 /* set priority of graphics channel */
2300 case NVGPU_PRIORITY_LOW:
2301 /* 64 << 3 = 512us */
2302 timeslice_timeout = 64;
2304 case NVGPU_PRIORITY_MEDIUM:
2305 /* 128 << 3 = 1024us */
2306 timeslice_timeout = 128;
2308 case NVGPU_PRIORITY_HIGH:
2309 /* 255 << 3 = 2048us */
2310 timeslice_timeout = 255;
2313 pr_err("Unsupported priority");
2316 channel_gk20a_set_schedule_params(ch,
2321 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2322 struct nvgpu_zcull_bind_args *args)
2324 struct gk20a *g = ch->g;
2325 struct gr_gk20a *gr = &g->gr;
2329 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2330 args->gpu_va, args->mode);
2333 /* in this context the "channel" is the host1x channel which
2334 * maps to *all* gk20a channels */
2335 int gk20a_channel_suspend(struct gk20a *g)
2337 struct fifo_gk20a *f = &g->fifo;
2339 bool channels_in_use = false;
2344 /* wait for engine idle */
2345 err = g->ops.fifo.wait_engine_idle(g);
2349 for (chid = 0; chid < f->num_channels; chid++) {
2350 struct channel_gk20a *ch = &f->channel[chid];
2351 if (gk20a_channel_get(ch)) {
2352 gk20a_dbg_info("suspend channel %d", chid);
2353 /* disable channel */
2354 g->ops.fifo.disable_channel(ch);
2355 /* preempt the channel */
2356 gk20a_fifo_preempt(ch->g, ch);
2357 gk20a_channel_cancel_job_clean_up(ch, true);
2358 /* wait for channel update notifiers */
2360 cancel_work_sync(&ch->update_fn_work);
2362 channels_in_use = true;
2364 gk20a_channel_put(ch);
2368 if (channels_in_use) {
2369 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2371 for (chid = 0; chid < f->num_channels; chid++) {
2372 if (gk20a_channel_get(&f->channel[chid])) {
2373 g->ops.fifo.unbind_channel(&f->channel[chid]);
2374 gk20a_channel_put(&f->channel[chid]);
2379 gk20a_dbg_fn("done");
2383 int gk20a_channel_resume(struct gk20a *g)
2385 struct fifo_gk20a *f = &g->fifo;
2387 bool channels_in_use = false;
2391 for (chid = 0; chid < f->num_channels; chid++) {
2392 if (gk20a_channel_get(&f->channel[chid])) {
2393 gk20a_dbg_info("resume channel %d", chid);
2394 g->ops.fifo.bind_channel(&f->channel[chid]);
2395 channels_in_use = true;
2396 gk20a_channel_put(&f->channel[chid]);
2400 if (channels_in_use)
2401 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2403 gk20a_dbg_fn("done");
2407 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2409 struct fifo_gk20a *f = &g->fifo;
2414 for (chid = 0; chid < f->num_channels; chid++) {
2415 struct channel_gk20a *c = g->fifo.channel+chid;
2416 if (gk20a_channel_get(c)) {
2417 gk20a_channel_event(c);
2418 wake_up_interruptible_all(&c->semaphore_wq);
2419 gk20a_channel_update(c, 0);
2420 gk20a_channel_put(c);
2425 static int gk20a_ioctl_channel_submit_gpfifo(
2426 struct channel_gk20a *ch,
2427 struct nvgpu_submit_gpfifo_args *args)
2429 struct gk20a_fence *fence_out;
2434 if (ch->has_timedout)
2437 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2438 args->flags, &args->fence,
2444 /* Convert fence_out to something we can pass back to user space. */
2445 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2446 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2447 int fd = gk20a_fence_install_fd(fence_out);
2451 args->fence.id = fd;
2453 args->fence.id = fence_out->syncpt_id;
2454 args->fence.value = fence_out->syncpt_value;
2457 gk20a_fence_put(fence_out);
2463 void gk20a_init_channel(struct gpu_ops *gops)
2465 gops->fifo.bind_channel = channel_gk20a_bind;
2466 gops->fifo.unbind_channel = channel_gk20a_unbind;
2467 gops->fifo.disable_channel = channel_gk20a_disable;
2468 gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2469 gops->fifo.free_inst = channel_gk20a_free_inst;
2470 gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2473 long gk20a_channel_ioctl(struct file *filp,
2474 unsigned int cmd, unsigned long arg)
2476 struct channel_gk20a *ch = filp->private_data;
2477 struct platform_device *dev = ch->g->dev;
2478 u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
2481 gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2483 if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2484 (_IOC_NR(cmd) == 0) ||
2485 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2486 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2489 if (_IOC_DIR(cmd) & _IOC_WRITE) {
2490 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2494 /* take a ref or return timeout if channel refs can't be taken */
2495 ch = gk20a_channel_get(ch);
2499 /* protect our sanity for threaded userspace - most of the channel is
2500 * not thread safe */
2501 mutex_lock(&ch->ioctl_lock);
2503 /* this ioctl call keeps a ref to the file which keeps a ref to the
2507 case NVGPU_IOCTL_CHANNEL_OPEN:
2508 err = gk20a_channel_open_ioctl(ch->g,
2509 (struct nvgpu_channel_open_args *)buf);
2511 case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2513 case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2514 err = gk20a_busy(dev);
2517 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2521 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2522 (struct nvgpu_alloc_obj_ctx_args *)buf);
2525 case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2526 err = gk20a_busy(dev);
2529 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2533 err = ch->g->ops.gr.free_obj_ctx(ch,
2534 (struct nvgpu_free_obj_ctx_args *)buf);
2537 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2538 err = gk20a_busy(dev);
2541 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2545 err = gk20a_alloc_channel_gpfifo(ch,
2546 (struct nvgpu_alloc_gpfifo_args *)buf);
2549 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2550 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2551 (struct nvgpu_submit_gpfifo_args *)buf);
2553 case NVGPU_IOCTL_CHANNEL_WAIT:
2554 err = gk20a_busy(dev);
2557 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2562 /* waiting is thread-safe, not dropping this mutex could
2563 * deadlock in certain conditions */
2564 mutex_unlock(&ch->ioctl_lock);
2566 err = gk20a_channel_wait(ch,
2567 (struct nvgpu_wait_args *)buf);
2569 mutex_lock(&ch->ioctl_lock);
2573 case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2574 err = gk20a_busy(dev);
2577 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2581 err = gk20a_channel_zcull_bind(ch,
2582 (struct nvgpu_zcull_bind_args *)buf);
2585 case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2586 err = gk20a_busy(dev);
2589 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2593 err = gk20a_init_error_notifier(ch,
2594 (struct nvgpu_set_error_notifier *)buf);
2597 #ifdef CONFIG_GK20A_CYCLE_STATS
2598 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2599 err = gk20a_busy(dev);
2602 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2606 err = gk20a_channel_cycle_stats(ch,
2607 (struct nvgpu_cycle_stats_args *)buf);
2611 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2614 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2615 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2616 timeout, ch->hw_chid);
2617 ch->timeout_ms_max = timeout;
2620 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2623 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2624 bool timeout_debug_dump = !((u32)
2625 ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2626 (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2627 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2628 timeout, ch->hw_chid);
2629 ch->timeout_ms_max = timeout;
2630 ch->timeout_debug_dump = timeout_debug_dump;
2633 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2634 ((struct nvgpu_get_param_args *)buf)->value =
2637 case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2638 err = gk20a_busy(dev);
2641 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2645 gk20a_channel_set_priority(ch,
2646 ((struct nvgpu_set_priority_args *)buf)->priority);
2649 case NVGPU_IOCTL_CHANNEL_ENABLE:
2650 err = gk20a_busy(dev);
2653 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2657 /* enable channel */
2658 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2659 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2660 ccsr_channel_enable_set_true_f());
2663 case NVGPU_IOCTL_CHANNEL_DISABLE:
2664 err = gk20a_busy(dev);
2667 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2671 /* disable channel */
2672 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2673 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2674 ccsr_channel_enable_clr_true_f());
2677 case NVGPU_IOCTL_CHANNEL_PREEMPT:
2678 err = gk20a_busy(dev);
2681 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2685 err = gk20a_fifo_preempt(ch->g, ch);
2688 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2689 err = gk20a_busy(dev);
2692 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2696 err = gk20a_fifo_force_reset_ch(ch, true);
2699 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2700 err = gk20a_channel_events_ctrl(ch,
2701 (struct nvgpu_channel_events_ctrl_args *)buf);
2703 #ifdef CONFIG_GK20A_CYCLE_STATS
2704 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2705 err = gk20a_busy(dev);
2708 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2712 err = gk20a_channel_cycle_stats_snapshot(ch,
2713 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2718 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2723 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2724 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2726 mutex_unlock(&ch->ioctl_lock);
2728 gk20a_channel_put(ch);
2730 gk20a_dbg_fn("end");