2 * GK20A Graphics channel
4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
30 #include "debug_gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
43 #define NVMAP_HANDLE_PARAM_SIZE 1
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51 struct priv_cmd_entry *e);
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
65 static void gk20a_channel_clean_up_jobs(struct work_struct *work);
67 /* allocate GPU channel */
68 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
70 struct channel_gk20a *ch = NULL;
71 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
73 mutex_lock(&f->free_chs_mutex);
74 if (!list_empty(&f->free_chs)) {
75 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
77 list_del(&ch->free_chs);
78 WARN_ON(atomic_read(&ch->ref_count));
79 WARN_ON(ch->referenceable);
82 mutex_unlock(&f->free_chs_mutex);
84 if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
85 platform->aggressive_sync_destroy = true;
90 static void free_channel(struct fifo_gk20a *f,
91 struct channel_gk20a *ch)
93 struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
95 trace_gk20a_release_used_channel(ch->hw_chid);
96 /* refcount is zero here and channel is in a freed/dead state */
97 mutex_lock(&f->free_chs_mutex);
98 /* add to head to increase visibility of timing-related bugs */
99 list_add(&ch->free_chs, &f->free_chs);
101 mutex_unlock(&f->free_chs_mutex);
103 if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
104 platform->aggressive_sync_destroy = false;
107 int channel_gk20a_commit_va(struct channel_gk20a *c)
111 if (!c->inst_block.cpu_va)
114 gk20a_init_inst_block(&c->inst_block, c->vm,
115 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
120 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
128 inst_ptr = c->inst_block.cpu_va;
132 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
133 addr_hi = u64_hi32(c->userd_iova);
135 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
136 c->hw_chid, (u64)c->userd_iova);
138 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
139 pbdma_userd_target_vid_mem_f() |
140 pbdma_userd_addr_f(addr_lo));
142 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
143 pbdma_userd_target_vid_mem_f() |
144 pbdma_userd_hi_addr_f(addr_hi));
149 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
150 u32 timeslice_timeout)
154 int value = timeslice_timeout;
156 inst_ptr = c->inst_block.cpu_va;
160 /* disable channel */
161 c->g->ops.fifo.disable_channel(c);
163 /* preempt the channel */
164 WARN_ON(gk20a_fifo_preempt(c->g, c));
166 /* value field is 8 bits long */
167 while (value >= 1 << 8) {
172 /* time slice register is only 18bits long */
173 if ((value << shift) >= 1<<19) {
174 pr_err("Requested timeslice value is clamped to 18 bits\n");
179 /* set new timeslice */
180 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
181 value | (shift << 12) |
182 fifo_runlist_timeslice_enable_true_f());
185 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
186 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
187 ccsr_channel_enable_set_true_f());
192 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
193 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
199 inst_ptr = c->inst_block.cpu_va;
203 memset(inst_ptr, 0, ram_fc_size_val_v());
205 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
206 pbdma_gp_base_offset_f(
207 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
209 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
210 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
211 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
213 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
214 c->g->ops.fifo.get_pbdma_signature(c->g));
216 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
217 pbdma_formats_gp_fermi0_f() |
218 pbdma_formats_pb_fermi1_f() |
219 pbdma_formats_mp_fermi0_f());
221 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
222 pbdma_pb_header_priv_user_f() |
223 pbdma_pb_header_method_zero_f() |
224 pbdma_pb_header_subchannel_zero_f() |
225 pbdma_pb_header_level_main_f() |
226 pbdma_pb_header_first_true_f() |
227 pbdma_pb_header_type_inc_f());
229 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
230 pbdma_subdevice_id_f(1) |
231 pbdma_subdevice_status_active_f() |
232 pbdma_subdevice_channel_dma_enable_f());
234 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
236 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
237 pbdma_acquire_retry_man_2_f() |
238 pbdma_acquire_retry_exp_2_f() |
239 pbdma_acquire_timeout_exp_max_f() |
240 pbdma_acquire_timeout_man_max_f() |
241 pbdma_acquire_timeout_en_disable_f());
243 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
244 fifo_runlist_timeslice_timeout_128_f() |
245 fifo_runlist_timeslice_timescale_3_f() |
246 fifo_runlist_timeslice_enable_true_f());
248 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
249 fifo_pb_timeslice_timeout_16_f() |
250 fifo_pb_timeslice_timescale_0_f() |
251 fifo_pb_timeslice_enable_true_f());
253 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
255 return channel_gk20a_commit_userd(c);
258 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
260 BUG_ON(!c->userd_cpu_va);
264 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
265 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
266 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
267 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
268 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
269 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
270 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
271 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
272 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
273 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
278 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
280 struct gk20a *g = ch_gk20a->g;
281 struct fifo_gk20a *f = &g->fifo;
282 struct fifo_engine_info_gk20a *engine_info =
283 f->engine_info + ENGINE_GR_GK20A;
285 u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
286 >> ram_in_base_shift_v();
288 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
289 ch_gk20a->hw_chid, inst_ptr);
291 ch_gk20a->bound = true;
293 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
294 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
295 ~ccsr_channel_runlist_f(~0)) |
296 ccsr_channel_runlist_f(engine_info->runlist_id));
298 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
299 ccsr_channel_inst_ptr_f(inst_ptr) |
300 ccsr_channel_inst_target_vid_mem_f() |
301 ccsr_channel_inst_bind_true_f());
303 gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
304 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
305 ~ccsr_channel_enable_set_f(~0)) |
306 ccsr_channel_enable_set_true_f());
309 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
311 struct gk20a *g = ch_gk20a->g;
312 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
317 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
318 ccsr_channel_inst_ptr_f(0) |
319 ccsr_channel_inst_bind_false_f());
321 ch_gk20a->bound = false;
324 * if we are agrressive then we can destroy the syncpt
325 * resource at this point
326 * if not, then it will be destroyed at channel_free()
328 mutex_lock(&ch_gk20a->sync_lock);
329 if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
331 ch_gk20a->sync->destroy(ch_gk20a->sync);
332 ch_gk20a->sync = NULL;
334 mutex_unlock(&ch_gk20a->sync_lock);
337 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
343 err = gk20a_alloc_inst_block(g, &ch->inst_block);
347 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
348 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
350 gk20a_dbg_fn("done");
354 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
356 gk20a_free_inst_block(g, &ch->inst_block);
359 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
361 return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
364 void channel_gk20a_enable(struct channel_gk20a *ch)
367 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
368 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
369 ccsr_channel_enable_set_true_f());
372 void channel_gk20a_disable(struct channel_gk20a *ch)
374 /* disable channel */
375 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
377 ccsr_channel_r(ch->hw_chid)) |
378 ccsr_channel_enable_clr_true_f());
381 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
383 struct channel_gk20a_job *job, *n;
384 bool released_job_semaphore = false;
388 /* make sure new kickoffs are prevented */
389 ch->has_timedout = true;
391 ch->g->ops.fifo.disable_channel(ch);
394 gk20a_fifo_preempt(ch->g, ch);
396 /* ensure no fences are pending */
397 mutex_lock(&ch->sync_lock);
399 ch->sync->set_min_eq_max(ch->sync);
400 mutex_unlock(&ch->sync_lock);
402 /* release all job semaphores (applies only to jobs that use
403 semaphore synchronization) */
404 mutex_lock(&ch->jobs_lock);
405 list_for_each_entry_safe(job, n, &ch->jobs, list) {
406 if (job->post_fence->semaphore) {
407 gk20a_semaphore_release(job->post_fence->semaphore);
408 released_job_semaphore = true;
411 mutex_unlock(&ch->jobs_lock);
413 if (released_job_semaphore)
414 wake_up_interruptible_all(&ch->semaphore_wq);
416 gk20a_channel_update(ch, 0);
419 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
421 bool channel_idle = false;
422 unsigned long end_jiffies = jiffies +
423 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
426 mutex_lock(&ch->jobs_lock);
427 channel_idle = list_empty(&ch->jobs);
428 mutex_unlock(&ch->jobs_lock);
432 usleep_range(1000, 3000);
433 } while (time_before(jiffies, end_jiffies)
434 || !tegra_platform_is_silicon());
437 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
445 void gk20a_disable_channel(struct channel_gk20a *ch)
447 gk20a_channel_abort(ch, true);
448 channel_gk20a_update_runlist(ch, false);
451 #if defined(CONFIG_GK20A_CYCLE_STATS)
453 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
455 /* disable existing cyclestats buffer */
456 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
457 if (ch->cyclestate.cyclestate_buffer_handler) {
458 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
459 ch->cyclestate.cyclestate_buffer);
460 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
461 ch->cyclestate.cyclestate_buffer_handler = NULL;
462 ch->cyclestate.cyclestate_buffer = NULL;
463 ch->cyclestate.cyclestate_buffer_size = 0;
465 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
468 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
469 struct nvgpu_cycle_stats_args *args)
471 struct dma_buf *dmabuf;
472 void *virtual_address;
474 /* is it allowed to handle calls for current GPU? */
475 if (0 == (ch->g->gpu_characteristics.flags &
476 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
479 if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
481 /* set up new cyclestats buffer */
482 dmabuf = dma_buf_get(args->dmabuf_fd);
484 return PTR_ERR(dmabuf);
485 virtual_address = dma_buf_vmap(dmabuf);
486 if (!virtual_address)
489 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
490 ch->cyclestate.cyclestate_buffer = virtual_address;
491 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
494 } else if (!args->dmabuf_fd &&
495 ch->cyclestate.cyclestate_buffer_handler) {
496 gk20a_free_cycle_stats_buffer(ch);
499 } else if (!args->dmabuf_fd &&
500 !ch->cyclestate.cyclestate_buffer_handler) {
501 /* no requst from GL */
505 pr_err("channel already has cyclestats buffer\n");
511 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
515 mutex_lock(&ch->cs_client_mutex);
517 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
520 mutex_unlock(&ch->cs_client_mutex);
525 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
527 u32 perfmon_id_count,
528 u32 *perfmon_id_start)
532 mutex_lock(&ch->cs_client_mutex);
536 ret = gr_gk20a_css_attach(ch->g,
542 mutex_unlock(&ch->cs_client_mutex);
547 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
551 mutex_lock(&ch->cs_client_mutex);
553 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
554 ch->cs_client = NULL;
558 mutex_unlock(&ch->cs_client_mutex);
563 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
564 struct nvgpu_cycle_stats_snapshot_args *args)
568 /* is it allowed to handle calls for current GPU? */
569 if (0 == (ch->g->gpu_characteristics.flags &
570 NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
573 if (!args->dmabuf_fd)
576 /* handle the command (most frequent cases first) */
578 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
579 ret = gk20a_flush_cycle_stats_snapshot(ch);
583 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
584 ret = gk20a_attach_cycle_stats_snapshot(ch,
590 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
591 ret = gk20a_free_cycle_stats_snapshot(ch);
596 pr_err("cyclestats: unknown command %u\n", args->cmd);
605 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
606 struct nvgpu_set_error_notifier *args)
608 struct device *dev = dev_from_gk20a(ch->g);
609 struct dma_buf *dmabuf;
611 u64 end = args->offset + sizeof(struct nvgpu_notification);
614 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
618 dmabuf = dma_buf_get(args->mem);
620 if (ch->error_notifier_ref)
621 gk20a_free_error_notifiers(ch);
623 if (IS_ERR(dmabuf)) {
624 pr_err("Invalid handle: %d\n", args->mem);
628 if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
630 gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
635 va = dma_buf_vmap(dmabuf);
638 pr_err("Cannot map notifier handle\n");
642 /* set channel notifiers pointer */
643 ch->error_notifier_ref = dmabuf;
644 ch->error_notifier = va + args->offset;
645 ch->error_notifier_va = va;
646 memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
650 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
652 if (ch->error_notifier_ref) {
653 struct timespec time_data;
655 getnstimeofday(&time_data);
656 nsec = ((u64)time_data.tv_sec) * 1000000000u +
657 (u64)time_data.tv_nsec;
658 ch->error_notifier->time_stamp.nanoseconds[0] =
660 ch->error_notifier->time_stamp.nanoseconds[1] =
662 ch->error_notifier->info32 = error;
663 ch->error_notifier->status = 0xffff;
665 gk20a_err(dev_from_gk20a(ch->g),
666 "error notifier set to %d for ch %d", error, ch->hw_chid);
670 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
672 if (ch->error_notifier_ref) {
673 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
674 dma_buf_put(ch->error_notifier_ref);
675 ch->error_notifier_ref = NULL;
676 ch->error_notifier = NULL;
677 ch->error_notifier_va = NULL;
681 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
683 static int cyclic_delta(int a, int b)
688 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
690 int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
691 int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
693 /* wait until all stalling irqs are handled */
694 wait_event(g->sw_irq_stall_last_handled_wq,
695 cyclic_delta(stall_irq_threshold,
696 atomic_read(&g->sw_irq_stall_last_handled))
699 /* wait until all non-stalling irqs are handled */
700 wait_event(g->sw_irq_nonstall_last_handled_wq,
701 cyclic_delta(nonstall_irq_threshold,
702 atomic_read(&g->sw_irq_nonstall_last_handled))
706 static void gk20a_wait_until_counter_is_N(
707 struct channel_gk20a *ch, atomic_t *counter, int wait_value,
708 wait_queue_head_t *wq, const char *caller, const char *counter_name)
711 if (wait_event_timeout(
713 atomic_read(counter) == wait_value,
714 msecs_to_jiffies(5000)) > 0)
717 gk20a_warn(dev_from_gk20a(ch->g),
718 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
719 caller, ch->hw_chid, counter_name,
720 atomic_read(counter), wait_value);
726 /* call ONLY when no references to the channel exist: after the last put */
727 static void gk20a_free_channel(struct channel_gk20a *ch)
729 struct gk20a *g = ch->g;
730 struct fifo_gk20a *f = &g->fifo;
731 struct gr_gk20a *gr = &g->gr;
732 struct vm_gk20a *ch_vm = ch->vm;
733 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
734 struct dbg_session_gk20a *dbg_s;
738 WARN_ON(ch->g == NULL);
740 trace_gk20a_free_channel(ch->hw_chid);
742 /* abort channel and remove from runlist */
743 gk20a_disable_channel(ch);
745 /* wait until there's only our ref to the channel */
746 gk20a_wait_until_counter_is_N(
747 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
748 __func__, "references");
750 /* wait until all pending interrupts for recently completed
751 * jobs are handled */
752 gk20a_wait_for_deferred_interrupts(g);
754 /* prevent new refs */
755 spin_lock(&ch->ref_obtain_lock);
756 if (!ch->referenceable) {
757 spin_unlock(&ch->ref_obtain_lock);
758 gk20a_err(dev_from_gk20a(ch->g),
759 "Extra %s() called to channel %u",
760 __func__, ch->hw_chid);
763 ch->referenceable = false;
764 spin_unlock(&ch->ref_obtain_lock);
766 /* matches with the initial reference in gk20a_open_new_channel() */
767 atomic_dec(&ch->ref_count);
769 /* wait until no more refs to the channel */
770 gk20a_wait_until_counter_is_N(
771 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
772 __func__, "references");
774 /* if engine reset was deferred, perform it now */
775 mutex_lock(&f->deferred_reset_mutex);
776 if (g->fifo.deferred_reset_pending) {
777 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
778 " deferred, running now");
779 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
780 mutex_lock(&g->fifo.gr_reset_mutex);
781 /* if lock is already taken, a reset is taking place
782 so no need to repeat */
784 gk20a_fifo_reset_engine(g,
785 g->fifo.deferred_fault_engines);
787 mutex_unlock(&g->fifo.gr_reset_mutex);
788 g->fifo.deferred_fault_engines = 0;
789 g->fifo.deferred_reset_pending = false;
791 mutex_unlock(&f->deferred_reset_mutex);
796 if (!gk20a_channel_as_bound(ch))
799 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
802 gk20a_free_error_notifiers(ch);
804 /* release channel ctx */
805 g->ops.gr.free_channel_ctx(ch);
807 gk20a_gr_flush_channel_tlb(gr);
809 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
811 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
813 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
815 #if defined(CONFIG_GK20A_CYCLE_STATS)
816 gk20a_free_cycle_stats_buffer(ch);
817 gk20a_free_cycle_stats_snapshot(ch);
820 channel_gk20a_free_priv_cmdbuf(ch);
822 /* sync must be destroyed before releasing channel vm */
823 mutex_lock(&ch->sync_lock);
825 ch->sync->destroy(ch->sync);
828 mutex_unlock(&ch->sync_lock);
830 /* release channel binding to the as_share */
832 gk20a_as_release_share(ch_vm->as_share);
836 spin_lock(&ch->update_fn_lock);
837 ch->update_fn = NULL;
838 ch->update_fn_data = NULL;
839 spin_unlock(&ch->update_fn_lock);
840 cancel_work_sync(&ch->update_fn_work);
842 /* make sure we don't have deferred interrupts pending that
843 * could still touch the channel */
844 gk20a_wait_for_deferred_interrupts(g);
847 if (gk20a_is_channel_marked_as_tsg(ch))
848 gk20a_tsg_unbind_channel(ch);
850 g->ops.fifo.unbind_channel(ch);
851 g->ops.fifo.free_inst(g, ch);
856 mutex_lock(&ch->submit_lock);
857 gk20a_fence_put(ch->last_submit.pre_fence);
858 gk20a_fence_put(ch->last_submit.post_fence);
859 ch->last_submit.pre_fence = NULL;
860 ch->last_submit.post_fence = NULL;
861 mutex_unlock(&ch->submit_lock);
864 /* unlink all debug sessions */
865 mutex_lock(&ch->dbg_s_lock);
867 list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
869 list_del_init(&dbg_s->dbg_s_list_node);
872 mutex_unlock(&ch->dbg_s_lock);
875 /* make sure we catch accesses of unopened channels in case
876 * there's non-refcounted channel pointers hanging around */
884 /* Try to get a reference to the channel. Return nonzero on success. If fails,
885 * the channel is dead or being freed elsewhere and you must not touch it.
887 * Always when a channel_gk20a pointer is seen and about to be used, a
888 * reference must be held to it - either by you or the caller, which should be
889 * documented well or otherwise clearly seen. This usually boils down to the
890 * file from ioctls directly, or an explicit get in exception handlers when the
891 * channel is found by a hw_chid.
893 * Most global functions in this file require a reference to be held by the
896 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
897 const char *caller) {
898 struct channel_gk20a *ret;
900 spin_lock(&ch->ref_obtain_lock);
902 if (likely(ch->referenceable)) {
903 atomic_inc(&ch->ref_count);
908 spin_unlock(&ch->ref_obtain_lock);
911 trace_gk20a_channel_get(ch->hw_chid, caller);
916 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
918 trace_gk20a_channel_put(ch->hw_chid, caller);
919 atomic_dec(&ch->ref_count);
920 wake_up_all(&ch->ref_count_dec_wq);
922 /* More puts than gets. Channel is probably going to get
924 WARN_ON(atomic_read(&ch->ref_count) < 0);
926 /* Also, more puts than gets. ref_count can go to 0 only if
927 * the channel is closing. Channel is probably going to get
929 WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
932 void gk20a_channel_close(struct channel_gk20a *ch)
934 gk20a_free_channel(ch);
937 int gk20a_channel_release(struct inode *inode, struct file *filp)
939 struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
940 struct gk20a *g = ch ? ch->g : NULL;
946 trace_gk20a_channel_release(dev_name(&g->dev->dev));
948 err = gk20a_busy(g->dev);
950 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
954 gk20a_channel_close(ch);
957 filp->private_data = NULL;
961 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
963 struct channel_gk20a *ch =
964 container_of(work, struct channel_gk20a, update_fn_work);
965 void (*update_fn)(struct channel_gk20a *, void *);
966 void *update_fn_data;
968 spin_lock(&ch->update_fn_lock);
969 update_fn = ch->update_fn;
970 update_fn_data = ch->update_fn_data;
971 spin_unlock(&ch->update_fn_lock);
974 update_fn(ch, update_fn_data);
977 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
978 void (*update_fn)(struct channel_gk20a *, void *),
979 void *update_fn_data)
981 struct channel_gk20a *ch = gk20a_open_new_channel(g);
984 spin_lock(&ch->update_fn_lock);
985 ch->update_fn = update_fn;
986 ch->update_fn_data = update_fn_data;
987 spin_unlock(&ch->update_fn_lock);
993 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
995 struct fifo_gk20a *f = &g->fifo;
996 struct channel_gk20a *ch;
1000 ch = allocate_channel(f);
1002 /* TBD: we want to make this virtualizable */
1003 gk20a_err(dev_from_gk20a(g), "out of hw chids");
1007 trace_gk20a_open_new_channel(ch->hw_chid);
1012 if (g->ops.fifo.alloc_inst(g, ch)) {
1014 free_channel(f, ch);
1015 gk20a_err(dev_from_gk20a(g),
1016 "failed to open gk20a channel, out of inst mem");
1020 /* now the channel is in a limbo out of the free list but not marked as
1021 * alive and used (i.e. get-able) yet */
1023 ch->pid = current->pid;
1025 /* By default, channel is regular (non-TSG) channel */
1026 ch->tsgid = NVGPU_INVALID_TSG_ID;
1028 /* reset timeout counter and update timestamp */
1029 ch->timeout_accumulated_ms = 0;
1030 ch->timeout_gpfifo_get = 0;
1031 /* set gr host default timeout */
1032 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1033 ch->timeout_debug_dump = true;
1034 ch->has_timedout = false;
1036 ch->clean_up.scheduled = false;
1038 /* The channel is *not* runnable at this point. It still needs to have
1039 * an address space bound and allocate a gpfifo and grctx. */
1041 init_waitqueue_head(&ch->notifier_wq);
1042 init_waitqueue_head(&ch->semaphore_wq);
1043 init_waitqueue_head(&ch->submit_wq);
1045 mutex_init(&ch->poll_events.lock);
1046 ch->poll_events.events_enabled = false;
1047 ch->poll_events.num_pending_events = 0;
1049 ch->update_fn = NULL;
1050 ch->update_fn_data = NULL;
1051 spin_lock_init(&ch->update_fn_lock);
1052 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1054 /* Mark the channel alive, get-able, with 1 initial use
1055 * references. The initial reference will be decreased in
1056 * gk20a_free_channel() */
1057 ch->referenceable = true;
1058 atomic_set(&ch->ref_count, 1);
1064 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1067 struct channel_gk20a *ch;
1069 trace_gk20a_channel_open(dev_name(&g->dev->dev));
1071 err = gk20a_busy(g->dev);
1073 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1076 ch = gk20a_open_new_channel(g);
1079 gk20a_err(dev_from_gk20a(g),
1084 filp->private_data = ch;
1088 int gk20a_channel_open(struct inode *inode, struct file *filp)
1090 struct gk20a *g = container_of(inode->i_cdev,
1091 struct gk20a, channel.cdev);
1094 gk20a_dbg_fn("start");
1095 ret = __gk20a_channel_open(g, filp);
1097 gk20a_dbg_fn("end");
1101 int gk20a_channel_open_ioctl(struct gk20a *g,
1102 struct nvgpu_channel_open_args *args)
1109 err = get_unused_fd_flags(O_RDWR);
1114 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1115 dev_name(&g->dev->dev), fd);
1121 file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1124 err = PTR_ERR(file);
1128 err = __gk20a_channel_open(g, file);
1132 fd_install(fd, file);
1133 args->channel_fd = fd;
1143 /* allocate private cmd buffer.
1144 used for inserting commands before/after user submitted buffers. */
1145 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1147 struct device *d = dev_from_gk20a(c->g);
1148 struct vm_gk20a *ch_vm = c->vm;
1149 struct priv_cmd_queue *q = &c->priv_cmd_q;
1153 /* Kernel can insert gpfifos before and after user gpfifos.
1154 Before user gpfifos, kernel inserts fence_wait, which takes
1155 syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1156 After user gpfifos, kernel inserts fence_get, which takes
1157 wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1159 Worse case if kernel adds both of them for every user gpfifo,
1160 max size of priv_cmdbuf is :
1161 (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1162 size = roundup_pow_of_two(
1163 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1165 err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1167 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1171 q->size = q->mem.size / sizeof (u32);
1173 INIT_LIST_HEAD(&q->free);
1178 channel_gk20a_free_priv_cmdbuf(c);
1182 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1184 struct vm_gk20a *ch_vm = c->vm;
1185 struct priv_cmd_queue *q = &c->priv_cmd_q;
1186 struct priv_cmd_entry *e;
1187 struct list_head *pos, *tmp, *head;
1192 gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1194 /* free free list */
1196 list_for_each_safe(pos, tmp, head) {
1197 e = container_of(pos, struct priv_cmd_entry, list);
1201 memset(q, 0, sizeof(struct priv_cmd_queue));
1204 /* allocate a cmd buffer with given size. size is number of u32 entries */
1205 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1206 struct priv_cmd_entry **entry)
1208 struct priv_cmd_queue *q = &c->priv_cmd_q;
1209 struct priv_cmd_entry *e;
1211 u32 size = orig_size;
1213 gk20a_dbg_fn("size %d", orig_size);
1217 /* if free space in the end is less than requested, increase the size
1218 * to make the real allocated space start from beginning. */
1219 if (q->put + size > q->size)
1220 size = orig_size + (q->size - q->put);
1222 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1223 c->hw_chid, q->get, q->put);
1225 free_count = (q->size - (q->put - q->get) - 1) % q->size;
1227 if (size > free_count)
1230 if (list_empty(&q->free))
1231 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1233 e = container_of((&q->free)->next,
1234 struct priv_cmd_entry, list);
1238 gk20a_err(dev_from_gk20a(c->g),
1239 "ch %d: fail to allocate priv cmd entry",
1244 e->size = orig_size;
1245 e->gp_get = c->gpfifo.get;
1246 e->gp_put = c->gpfifo.put;
1247 e->gp_wrap = c->gpfifo.wrap;
1249 /* if we have increased size to skip free space in the end, set put
1250 to beginning of cmd buffer (0) + size */
1251 if (size != orig_size) {
1252 e->ptr = (u32 *)q->mem.cpu_va;
1253 e->gva = q->mem.gpu_va;
1256 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1257 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1258 q->put = (q->put + orig_size) & (q->size - 1);
1261 /* we already handled q->put + size > q->size so BUG_ON this */
1262 BUG_ON(q->put > q->size);
1266 gk20a_dbg_fn("done");
1271 /* Don't call this to free an explict cmd entry.
1272 * It doesn't update priv_cmd_queue get/put */
1273 static void free_priv_cmdbuf(struct channel_gk20a *c,
1274 struct priv_cmd_entry *e)
1276 struct priv_cmd_queue *q = &c->priv_cmd_q;
1278 list_add(&e->list, &q->free);
1281 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1282 struct nvgpu_alloc_gpfifo_args *args)
1284 struct gk20a *g = c->g;
1285 struct device *d = dev_from_gk20a(g);
1286 struct vm_gk20a *ch_vm;
1290 /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1291 and another one after, for internal usage. Triple the requested size. */
1292 gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1294 if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1297 /* an address space needs to have been bound at this point. */
1298 if (!gk20a_channel_as_bound(c)) {
1300 "not bound to an address space at time of gpfifo"
1306 c->cmds_pending = false;
1307 mutex_lock(&c->submit_lock);
1308 gk20a_fence_put(c->last_submit.pre_fence);
1309 gk20a_fence_put(c->last_submit.post_fence);
1310 c->last_submit.pre_fence = NULL;
1311 c->last_submit.post_fence = NULL;
1312 mutex_unlock(&c->submit_lock);
1314 c->ramfc.offset = 0;
1315 c->ramfc.size = ram_in_ramfc_s() / 8;
1317 if (c->gpfifo.mem.cpu_va) {
1318 gk20a_err(d, "channel %d :"
1319 "gpfifo already allocated", c->hw_chid);
1323 err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1326 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1330 c->gpfifo.entry_num = gpfifo_size;
1331 c->gpfifo.get = c->gpfifo.put = 0;
1333 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1334 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1336 channel_gk20a_setup_userd(c);
1338 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1339 c->gpfifo.entry_num, args->flags);
1341 goto clean_up_unmap;
1343 /* TBD: setup engine contexts */
1345 err = channel_gk20a_alloc_priv_cmdbuf(c);
1347 goto clean_up_unmap;
1349 err = channel_gk20a_update_runlist(c, true);
1351 goto clean_up_unmap;
1353 g->ops.fifo.bind_channel(c);
1355 gk20a_dbg_fn("done");
1359 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1361 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1362 gk20a_err(d, "fail");
1366 static inline bool check_gp_put(struct gk20a *g,
1367 struct channel_gk20a *c)
1370 /* gp_put changed unexpectedly since last update? */
1371 put = gk20a_bar1_readl(g,
1372 c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1373 if (c->gpfifo.put != put) {
1374 /*TBD: BUG_ON/teardown on this*/
1375 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1376 "since last update, channel put = %u, ram put = %u\n",
1377 c->gpfifo.put, put);
1378 c->gpfifo.put = put;
1379 return false; /* surprise! */
1381 return true; /* checked out ok */
1384 /* Update with this periodically to determine how the gpfifo is draining. */
1385 static inline u32 update_gp_get(struct gk20a *g,
1386 struct channel_gk20a *c)
1388 u32 new_get = gk20a_bar1_readl(g,
1389 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1390 if (new_get < c->gpfifo.get)
1391 c->gpfifo.wrap = !c->gpfifo.wrap;
1392 c->gpfifo.get = new_get;
1396 static inline u32 gp_free_count(struct channel_gk20a *c)
1398 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1399 c->gpfifo.entry_num;
1402 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1403 u32 timeout_delta_ms)
1405 u32 gpfifo_get = update_gp_get(ch->g, ch);
1406 /* Count consequent timeout isr */
1407 if (gpfifo_get == ch->timeout_gpfifo_get) {
1408 /* we didn't advance since previous channel timeout check */
1409 ch->timeout_accumulated_ms += timeout_delta_ms;
1411 /* first timeout isr encountered */
1412 ch->timeout_accumulated_ms = timeout_delta_ms;
1415 ch->timeout_gpfifo_get = gpfifo_get;
1417 return ch->g->timeouts_enabled &&
1418 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1421 static u32 get_gp_free_count(struct channel_gk20a *c)
1423 update_gp_get(c->g, c);
1424 return gp_free_count(c);
1427 static void trace_write_pushbuffer(struct channel_gk20a *c,
1428 struct nvgpu_gpfifo *g)
1433 struct dma_buf *dmabuf = NULL;
1435 if (gk20a_debug_trace_cmdbuf) {
1436 u64 gpu_va = (u64)g->entry0 |
1437 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1440 words = pbdma_gp_entry1_length_v(g->entry1);
1441 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1443 mem = dma_buf_vmap(dmabuf);
1449 * Write in batches of 128 as there seems to be a limit
1450 * of how much you can output to ftrace at once.
1452 for (i = 0; i < words; i += 128U) {
1453 trace_gk20a_push_cmdbuf(
1456 min(words - i, 128U),
1457 offset + i * sizeof(u32),
1460 dma_buf_vunmap(dmabuf, mem);
1464 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1465 struct nvgpu_gpfifo *g,
1466 struct nvgpu_submit_gpfifo_args *args,
1472 struct nvgpu_gpfifo *gp;
1473 bool gpfifo_allocated = false;
1475 if (!gk20a_debug_trace_cmdbuf)
1482 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1484 g = nvgpu_alloc(size, false);
1488 if (copy_from_user(g,
1489 (void __user *)(uintptr_t)args->gpfifo, size)) {
1493 gpfifo_allocated = true;
1497 for (i = 0; i < count; i++, gp++)
1498 trace_write_pushbuffer(c, gp);
1500 if (gpfifo_allocated)
1504 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1505 struct priv_cmd_entry *e)
1507 struct priv_cmd_queue *q = &c->priv_cmd_q;
1508 u32 cmd_entry_start;
1509 struct device *d = dev_from_gk20a(c->g);
1514 cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1515 if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1516 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1518 q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1519 free_priv_cmdbuf(c, e);
1524 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
1526 mutex_lock(&c->clean_up.lock);
1528 if (c->clean_up.scheduled) {
1529 mutex_unlock(&c->clean_up.lock);
1533 c->clean_up.scheduled = true;
1534 schedule_delayed_work(&c->clean_up.wq, 1);
1536 mutex_unlock(&c->clean_up.lock);
1539 void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1540 bool wait_for_completion)
1542 if (wait_for_completion)
1543 cancel_delayed_work_sync(&c->clean_up.wq);
1545 mutex_lock(&c->clean_up.lock);
1546 c->clean_up.scheduled = false;
1547 mutex_unlock(&c->clean_up.lock);
1550 static int gk20a_channel_add_job(struct channel_gk20a *c,
1551 struct gk20a_fence *pre_fence,
1552 struct gk20a_fence *post_fence,
1553 struct priv_cmd_entry *wait_cmd,
1554 struct priv_cmd_entry *incr_cmd,
1555 bool skip_buffer_refcounting)
1557 struct vm_gk20a *vm = c->vm;
1558 struct channel_gk20a_job *job = NULL;
1559 struct mapped_buffer_node **mapped_buffers = NULL;
1560 int err = 0, num_mapped_buffers = 0;
1562 /* job needs reference to this vm (released in channel_update) */
1565 if (!skip_buffer_refcounting) {
1566 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1567 &num_mapped_buffers);
1574 job = kzalloc(sizeof(*job), GFP_KERNEL);
1576 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1581 /* put() is done in gk20a_channel_update() when the job is done */
1582 c = gk20a_channel_get(c);
1585 job->num_mapped_buffers = num_mapped_buffers;
1586 job->mapped_buffers = mapped_buffers;
1587 job->pre_fence = gk20a_fence_get(pre_fence);
1588 job->post_fence = gk20a_fence_get(post_fence);
1589 job->wait_cmd = wait_cmd;
1590 job->incr_cmd = incr_cmd;
1592 mutex_lock(&c->jobs_lock);
1593 list_add_tail(&job->list, &c->jobs);
1594 mutex_unlock(&c->jobs_lock);
1602 static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1604 struct channel_gk20a *c = container_of(to_delayed_work(work),
1605 struct channel_gk20a, clean_up.wq);
1606 struct vm_gk20a *vm;
1607 struct channel_gk20a_job *job, *n;
1608 struct gk20a_platform *platform;
1610 c = gk20a_channel_get(c);
1614 if (!c->g->power_on) { /* shutdown case */
1615 gk20a_channel_put(c);
1620 platform = gk20a_get_platform(c->g->dev);
1622 mutex_lock(&c->submit_lock);
1624 /* gp_put check needs to be done inside submit lock */
1625 check_gp_put(c->g, c);
1627 gk20a_channel_cancel_job_clean_up(c, false);
1629 mutex_lock(&c->jobs_lock);
1630 list_for_each_entry_safe(job, n, &c->jobs, list) {
1631 struct gk20a *g = c->g;
1633 bool completed = gk20a_fence_is_expired(job->post_fence);
1638 c->sync->signal_timeline(c->sync);
1640 if (job->num_mapped_buffers)
1641 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1642 job->num_mapped_buffers);
1644 /* Close the fences (this will unref the semaphores and release
1645 * them to the pool). */
1646 gk20a_fence_put(job->pre_fence);
1647 gk20a_fence_put(job->post_fence);
1649 /* Free the private command buffers (wait_cmd first and
1650 * then incr_cmd i.e. order of allocation) */
1651 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1652 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1654 /* job is done. release its vm reference (taken in add_job) */
1656 /* another bookkeeping taken in add_job. caller must hold a ref
1657 * so this wouldn't get freed here. */
1658 gk20a_channel_put(c);
1660 list_del_init(&job->list);
1666 * If job list is empty then channel is idle and we can free
1667 * the syncpt here (given aggressive_destroy flag is set)
1668 * Note: check if last submit is complete before destroying
1671 if (list_empty(&c->jobs)) {
1672 mutex_lock(&c->sync_lock);
1673 if (c->sync && platform->aggressive_sync_destroy &&
1674 gk20a_fence_is_expired(c->last_submit.post_fence)) {
1675 c->sync->destroy(c->sync);
1678 mutex_unlock(&c->sync_lock);
1680 mutex_unlock(&c->jobs_lock);
1681 mutex_unlock(&c->submit_lock);
1684 schedule_work(&c->update_fn_work);
1686 gk20a_channel_put(c);
1689 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1691 c = gk20a_channel_get(c);
1695 if (!c->g->power_on) { /* shutdown case */
1696 gk20a_channel_put(c);
1700 update_gp_get(c->g, c);
1701 wake_up(&c->submit_wq);
1703 trace_gk20a_channel_update(c->hw_chid);
1704 gk20a_channel_schedule_job_clean_up(c);
1706 gk20a_channel_put(c);
1709 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1710 struct nvgpu_gpfifo *gpfifo,
1711 struct nvgpu_submit_gpfifo_args *args,
1714 struct nvgpu_fence *fence,
1715 struct gk20a_fence **fence_out,
1716 bool force_need_sync_fence)
1718 struct gk20a *g = c->g;
1719 struct device *d = dev_from_gk20a(g);
1722 int wait_fence_fd = -1;
1723 struct priv_cmd_entry *wait_cmd = NULL;
1724 struct priv_cmd_entry *incr_cmd = NULL;
1725 struct gk20a_fence *pre_fence = NULL;
1726 struct gk20a_fence *post_fence = NULL;
1727 /* we might need two extra gpfifo entries - one for pre fence
1728 * and one for post fence. */
1729 const int extra_entries = 2;
1730 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1731 bool skip_buffer_refcounting = (flags &
1732 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1733 bool need_sync_fence = false;
1736 * If user wants to allocate sync_fence_fd always, then respect that;
1737 * otherwise, allocate sync_fence_fd based on user flags only
1739 if (force_need_sync_fence)
1740 need_sync_fence = true;
1742 if (c->has_timedout)
1745 /* fifo not large enough for request. Return error immediately.
1746 * Kernel can insert gpfifo entries before and after user gpfifos.
1747 * So, add extra_entries in user request. Also, HW with fifo size N
1748 * can accept only N-1 entreis and so the below condition */
1749 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1750 gk20a_err(d, "not enough gpfifo space allocated");
1754 if (!gpfifo && !args)
1757 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1758 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1762 /* an address space needs to have been bound at this point. */
1763 if (!gk20a_channel_as_bound(c)) {
1765 "not bound to an address space at time of gpfifo"
1770 #ifdef CONFIG_DEBUG_FS
1771 /* update debug settings */
1772 if (g->ops.ltc.sync_debugfs)
1773 g->ops.ltc.sync_debugfs(g);
1776 gk20a_dbg_info("channel %d", c->hw_chid);
1778 /* gk20a_channel_update releases this ref. */
1779 err = gk20a_busy(g->dev);
1781 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1785 trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1789 fence ? fence->id : 0,
1790 fence ? fence->value : 0);
1792 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1793 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1795 /* Make sure we have enough space for gpfifo entries. If not,
1796 * wait for signals from completed submits */
1797 if (gp_free_count(c) < num_entries + extra_entries) {
1798 /* we can get here via locked ioctl and other paths too */
1799 int locked_path = mutex_is_locked(&c->ioctl_lock);
1801 mutex_unlock(&c->ioctl_lock);
1803 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1804 err = wait_event_interruptible(c->submit_wq,
1805 get_gp_free_count(c) >= num_entries + extra_entries ||
1807 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1810 mutex_lock(&c->ioctl_lock);
1813 if (c->has_timedout) {
1819 gk20a_err(d, "timeout waiting for gpfifo space");
1824 mutex_lock(&c->submit_lock);
1826 mutex_lock(&c->sync_lock);
1828 c->sync = gk20a_channel_sync_create(c);
1831 mutex_unlock(&c->submit_lock);
1834 if (g->ops.fifo.resetup_ramfc)
1835 err = g->ops.fifo.resetup_ramfc(c);
1839 mutex_unlock(&c->sync_lock);
1842 * optionally insert syncpt wait in the beginning of gpfifo submission
1843 * when user requested and the wait hasn't expired.
1844 * validate that the id makes sense, elide if not
1845 * the only reason this isn't being unceremoniously killed is to
1846 * keep running some tests which trigger this condition
1848 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1849 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1850 wait_fence_fd = fence->id;
1851 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1852 &wait_cmd, &pre_fence);
1854 err = c->sync->wait_syncpt(c->sync, fence->id,
1855 fence->value, &wait_cmd, &pre_fence);
1859 mutex_unlock(&c->submit_lock);
1863 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1864 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1865 need_sync_fence = true;
1867 /* always insert syncpt increment at end of gpfifo submission
1868 to keep track of method completion for idle railgating */
1869 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1870 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1871 &post_fence, need_wfi, need_sync_fence);
1873 err = c->sync->incr(c->sync, &incr_cmd,
1874 &post_fence, need_sync_fence);
1876 mutex_unlock(&c->submit_lock);
1881 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1882 u64_lo32(wait_cmd->gva);
1883 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1884 u64_hi32(wait_cmd->gva) |
1885 pbdma_gp_entry1_length_f(wait_cmd->size);
1886 trace_gk20a_push_cmdbuf(c->g->dev->name,
1887 0, wait_cmd->size, 0, wait_cmd->ptr);
1889 c->gpfifo.put = (c->gpfifo.put + 1) &
1890 (c->gpfifo.entry_num - 1);
1893 wait_cmd->gp_put = c->gpfifo.put;
1897 * Copy source gpfifo entries into the gpfifo ring buffer,
1898 * potentially splitting into two memcpies to handle the
1899 * ring buffer wrap-around case.
1901 start = c->gpfifo.put;
1902 end = start + num_entries;
1905 if (end > c->gpfifo.entry_num) {
1906 int length0 = c->gpfifo.entry_num - start;
1907 int length1 = num_entries - length0;
1909 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1911 length0 * sizeof(*gpfifo));
1913 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1915 length1 * sizeof(*gpfifo));
1917 trace_write_pushbuffer_range(c, gpfifo, NULL,
1919 trace_write_pushbuffer_range(c, gpfifo, NULL,
1922 memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1924 num_entries * sizeof(*gpfifo));
1926 trace_write_pushbuffer_range(c, gpfifo, NULL,
1930 struct nvgpu_gpfifo __user *user_gpfifo =
1931 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1932 if (end > c->gpfifo.entry_num) {
1933 int length0 = c->gpfifo.entry_num - start;
1934 int length1 = num_entries - length0;
1936 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1938 length0 * sizeof(*user_gpfifo));
1940 mutex_unlock(&c->submit_lock);
1944 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1945 user_gpfifo + length0,
1946 length1 * sizeof(*user_gpfifo));
1948 mutex_unlock(&c->submit_lock);
1952 trace_write_pushbuffer_range(c, NULL, args,
1954 trace_write_pushbuffer_range(c, NULL, args,
1957 err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1959 num_entries * sizeof(*user_gpfifo));
1961 mutex_unlock(&c->submit_lock);
1965 trace_write_pushbuffer_range(c, NULL, args,
1970 c->gpfifo.put = (c->gpfifo.put + num_entries) &
1971 (c->gpfifo.entry_num - 1);
1974 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1975 u64_lo32(incr_cmd->gva);
1976 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1977 u64_hi32(incr_cmd->gva) |
1978 pbdma_gp_entry1_length_f(incr_cmd->size);
1979 trace_gk20a_push_cmdbuf(c->g->dev->name,
1980 0, incr_cmd->size, 0, incr_cmd->ptr);
1982 c->gpfifo.put = (c->gpfifo.put + 1) &
1983 (c->gpfifo.entry_num - 1);
1986 incr_cmd->gp_put = c->gpfifo.put;
1989 gk20a_fence_put(c->last_submit.pre_fence);
1990 gk20a_fence_put(c->last_submit.post_fence);
1991 c->last_submit.pre_fence = pre_fence;
1992 c->last_submit.post_fence = post_fence;
1994 *fence_out = gk20a_fence_get(post_fence);
1996 /* TODO! Check for errors... */
1997 gk20a_channel_add_job(c, pre_fence, post_fence,
1999 skip_buffer_refcounting);
2001 c->cmds_pending = true;
2002 gk20a_bar1_writel(g,
2003 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
2006 mutex_unlock(&c->submit_lock);
2008 trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
2012 post_fence->syncpt_id,
2013 post_fence->syncpt_value);
2015 gk20a_dbg_info("post-submit put %d, get %d, size %d",
2016 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
2018 gk20a_dbg_fn("done");
2022 gk20a_err(d, "fail");
2023 free_priv_cmdbuf(c, wait_cmd);
2024 free_priv_cmdbuf(c, incr_cmd);
2025 gk20a_fence_put(pre_fence);
2026 gk20a_fence_put(post_fence);
2031 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2033 struct channel_gk20a *c = g->fifo.channel+chid;
2037 spin_lock_init(&c->ref_obtain_lock);
2038 atomic_set(&c->ref_count, 0);
2039 c->referenceable = false;
2040 init_waitqueue_head(&c->ref_count_dec_wq);
2041 mutex_init(&c->ioctl_lock);
2042 mutex_init(&c->jobs_lock);
2043 mutex_init(&c->submit_lock);
2044 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
2045 mutex_init(&c->clean_up.lock);
2046 mutex_init(&c->sync_lock);
2047 INIT_LIST_HEAD(&c->jobs);
2048 #if defined(CONFIG_GK20A_CYCLE_STATS)
2049 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2050 mutex_init(&c->cs_client_mutex);
2052 INIT_LIST_HEAD(&c->dbg_s_list);
2053 mutex_init(&c->dbg_s_lock);
2054 list_add(&c->free_chs, &g->fifo.free_chs);
2059 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2062 struct gk20a_fence *fence = ch->last_submit.post_fence;
2064 if (!ch->cmds_pending)
2067 /* Do not wait for a timedout channel */
2068 if (ch->has_timedout)
2071 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2072 fence->syncpt_value, fence->semaphore);
2074 err = gk20a_fence_wait(fence, timeout);
2076 dev_warn(dev_from_gk20a(ch->g),
2077 "timed out waiting for gk20a channel to finish");
2079 ch->cmds_pending = false;
2084 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2085 ulong id, u32 offset,
2086 u32 payload, long timeout)
2088 struct platform_device *pdev = ch->g->dev;
2089 struct dma_buf *dmabuf;
2095 /* do not wait if channel has timed out */
2096 if (ch->has_timedout)
2099 dmabuf = dma_buf_get(id);
2100 if (IS_ERR(dmabuf)) {
2101 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2106 data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2108 gk20a_err(&pdev->dev, "failed to map notifier memory");
2113 semaphore = data + (offset & ~PAGE_MASK);
2115 remain = wait_event_interruptible_timeout(
2117 *semaphore == payload || ch->has_timedout,
2120 if (remain == 0 && *semaphore != payload)
2122 else if (remain < 0)
2125 dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2127 dma_buf_put(dmabuf);
2131 static int gk20a_channel_wait(struct channel_gk20a *ch,
2132 struct nvgpu_wait_args *args)
2134 struct device *d = dev_from_gk20a(ch->g);
2135 struct dma_buf *dmabuf;
2136 struct notification *notif;
2141 unsigned long timeout;
2142 int remain, ret = 0;
2146 if (ch->has_timedout)
2149 if (args->timeout == NVGPU_NO_TIMEOUT)
2150 timeout = MAX_SCHEDULE_TIMEOUT;
2152 timeout = (u32)msecs_to_jiffies(args->timeout);
2154 switch (args->type) {
2155 case NVGPU_WAIT_TYPE_NOTIFIER:
2156 id = args->condition.notifier.dmabuf_fd;
2157 offset = args->condition.notifier.offset;
2159 dmabuf = dma_buf_get(id);
2160 if (IS_ERR(dmabuf)) {
2161 gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2166 notif = dma_buf_vmap(dmabuf);
2168 gk20a_err(d, "failed to map notifier memory");
2172 notif = (struct notification *)((uintptr_t)notif + offset);
2174 /* user should set status pending before
2175 * calling this ioctl */
2176 remain = wait_event_interruptible_timeout(
2178 notif->status == 0 || ch->has_timedout,
2181 if (remain == 0 && notif->status != 0) {
2183 goto notif_clean_up;
2184 } else if (remain < 0) {
2186 goto notif_clean_up;
2189 /* TBD: fill in correct information */
2190 jiffies = get_jiffies_64();
2191 jiffies_to_timespec(jiffies, &tv);
2192 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2193 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2194 notif->info32 = 0xDEADBEEF; /* should be object name */
2195 notif->info16 = ch->hw_chid; /* should be method offset */
2198 dma_buf_vunmap(dmabuf, notif);
2201 case NVGPU_WAIT_TYPE_SEMAPHORE:
2202 ret = gk20a_channel_wait_semaphore(ch,
2203 args->condition.semaphore.dmabuf_fd,
2204 args->condition.semaphore.offset,
2205 args->condition.semaphore.payload,
2218 /* poll events for semaphores */
2220 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2224 mutex_lock(&ev->lock);
2226 ev->events_enabled = true;
2227 ev->num_pending_events = 0;
2229 mutex_unlock(&ev->lock);
2232 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2236 mutex_lock(&ev->lock);
2238 ev->events_enabled = false;
2239 ev->num_pending_events = 0;
2241 mutex_unlock(&ev->lock);
2244 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2248 mutex_lock(&ev->lock);
2250 if (ev->events_enabled &&
2251 ev->num_pending_events > 0)
2252 ev->num_pending_events--;
2254 mutex_unlock(&ev->lock);
2257 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2258 struct nvgpu_channel_events_ctrl_args *args)
2262 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2263 "channel events ctrl cmd %d", args->cmd);
2265 switch (args->cmd) {
2266 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2267 gk20a_channel_events_enable(&ch->poll_events);
2270 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2271 gk20a_channel_events_disable(&ch->poll_events);
2274 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2275 gk20a_channel_events_clear(&ch->poll_events);
2279 gk20a_err(dev_from_gk20a(ch->g),
2280 "unrecognized channel events ctrl cmd: 0x%x",
2289 void gk20a_channel_event(struct channel_gk20a *ch)
2291 mutex_lock(&ch->poll_events.lock);
2293 if (ch->poll_events.events_enabled) {
2294 gk20a_dbg_info("posting event on channel id %d",
2296 gk20a_dbg_info("%d channel events pending",
2297 ch->poll_events.num_pending_events);
2299 ch->poll_events.num_pending_events++;
2300 /* not waking up here, caller does that */
2303 mutex_unlock(&ch->poll_events.lock);
2306 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2308 unsigned int mask = 0;
2309 struct channel_gk20a *ch = filep->private_data;
2311 gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2313 poll_wait(filep, &ch->semaphore_wq, wait);
2315 mutex_lock(&ch->poll_events.lock);
2317 if (ch->poll_events.events_enabled &&
2318 ch->poll_events.num_pending_events > 0) {
2319 gk20a_dbg_info("found pending event on channel id %d",
2321 gk20a_dbg_info("%d channel events pending",
2322 ch->poll_events.num_pending_events);
2323 mask = (POLLPRI | POLLIN);
2326 mutex_unlock(&ch->poll_events.lock);
2331 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2334 u32 timeslice_timeout;
2335 /* set priority of graphics channel */
2337 case NVGPU_PRIORITY_LOW:
2338 /* 64 << 3 = 512us */
2339 timeslice_timeout = 64;
2341 case NVGPU_PRIORITY_MEDIUM:
2342 /* 128 << 3 = 1024us */
2343 timeslice_timeout = 128;
2345 case NVGPU_PRIORITY_HIGH:
2346 /* 255 << 3 = 2048us */
2347 timeslice_timeout = 255;
2350 pr_err("Unsupported priority");
2353 channel_gk20a_set_schedule_params(ch,
2358 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2359 struct nvgpu_zcull_bind_args *args)
2361 struct gk20a *g = ch->g;
2362 struct gr_gk20a *gr = &g->gr;
2366 return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2367 args->gpu_va, args->mode);
2370 /* in this context the "channel" is the host1x channel which
2371 * maps to *all* gk20a channels */
2372 int gk20a_channel_suspend(struct gk20a *g)
2374 struct fifo_gk20a *f = &g->fifo;
2376 bool channels_in_use = false;
2381 /* wait for engine idle */
2382 err = g->ops.fifo.wait_engine_idle(g);
2386 for (chid = 0; chid < f->num_channels; chid++) {
2387 struct channel_gk20a *ch = &f->channel[chid];
2388 if (gk20a_channel_get(ch)) {
2389 gk20a_dbg_info("suspend channel %d", chid);
2390 /* disable channel */
2391 g->ops.fifo.disable_channel(ch);
2392 /* preempt the channel */
2393 gk20a_fifo_preempt(ch->g, ch);
2394 /* wait for channel update notifiers */
2395 if (ch->update_fn &&
2396 work_pending(&ch->update_fn_work))
2397 flush_work(&ch->update_fn_work);
2398 gk20a_channel_cancel_job_clean_up(ch, true);
2400 channels_in_use = true;
2402 gk20a_channel_put(ch);
2406 if (channels_in_use) {
2407 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2409 for (chid = 0; chid < f->num_channels; chid++) {
2410 if (gk20a_channel_get(&f->channel[chid])) {
2411 g->ops.fifo.unbind_channel(&f->channel[chid]);
2412 gk20a_channel_put(&f->channel[chid]);
2417 gk20a_dbg_fn("done");
2421 int gk20a_channel_resume(struct gk20a *g)
2423 struct fifo_gk20a *f = &g->fifo;
2425 bool channels_in_use = false;
2429 for (chid = 0; chid < f->num_channels; chid++) {
2430 if (gk20a_channel_get(&f->channel[chid])) {
2431 gk20a_dbg_info("resume channel %d", chid);
2432 g->ops.fifo.bind_channel(&f->channel[chid]);
2433 channels_in_use = true;
2434 gk20a_channel_put(&f->channel[chid]);
2438 if (channels_in_use)
2439 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2441 gk20a_dbg_fn("done");
2445 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2447 struct fifo_gk20a *f = &g->fifo;
2452 for (chid = 0; chid < f->num_channels; chid++) {
2453 struct channel_gk20a *c = g->fifo.channel+chid;
2454 if (gk20a_channel_get(c)) {
2455 gk20a_channel_event(c);
2456 wake_up_interruptible_all(&c->semaphore_wq);
2457 gk20a_channel_update(c, 0);
2458 gk20a_channel_put(c);
2463 static int gk20a_ioctl_channel_submit_gpfifo(
2464 struct channel_gk20a *ch,
2465 struct nvgpu_submit_gpfifo_args *args)
2467 struct gk20a_fence *fence_out;
2472 if (ch->has_timedout)
2475 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2476 args->flags, &args->fence,
2482 /* Convert fence_out to something we can pass back to user space. */
2483 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2484 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2485 int fd = gk20a_fence_install_fd(fence_out);
2489 args->fence.id = fd;
2491 args->fence.id = fence_out->syncpt_id;
2492 args->fence.value = fence_out->syncpt_value;
2495 gk20a_fence_put(fence_out);
2501 void gk20a_init_channel(struct gpu_ops *gops)
2503 gops->fifo.bind_channel = channel_gk20a_bind;
2504 gops->fifo.unbind_channel = channel_gk20a_unbind;
2505 gops->fifo.disable_channel = channel_gk20a_disable;
2506 gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2507 gops->fifo.free_inst = channel_gk20a_free_inst;
2508 gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2511 long gk20a_channel_ioctl(struct file *filp,
2512 unsigned int cmd, unsigned long arg)
2514 struct channel_gk20a *ch = filp->private_data;
2515 struct platform_device *dev = ch->g->dev;
2516 u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2519 gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2521 if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2522 (_IOC_NR(cmd) == 0) ||
2523 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2524 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2527 if (_IOC_DIR(cmd) & _IOC_WRITE) {
2528 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2532 /* take a ref or return timeout if channel refs can't be taken */
2533 ch = gk20a_channel_get(ch);
2537 /* protect our sanity for threaded userspace - most of the channel is
2538 * not thread safe */
2539 mutex_lock(&ch->ioctl_lock);
2541 /* this ioctl call keeps a ref to the file which keeps a ref to the
2545 case NVGPU_IOCTL_CHANNEL_OPEN:
2546 err = gk20a_channel_open_ioctl(ch->g,
2547 (struct nvgpu_channel_open_args *)buf);
2549 case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2551 case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2552 err = gk20a_busy(dev);
2555 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2559 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2560 (struct nvgpu_alloc_obj_ctx_args *)buf);
2563 case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2564 err = gk20a_busy(dev);
2567 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2571 err = ch->g->ops.gr.free_obj_ctx(ch,
2572 (struct nvgpu_free_obj_ctx_args *)buf);
2575 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2576 err = gk20a_busy(dev);
2579 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2583 err = gk20a_alloc_channel_gpfifo(ch,
2584 (struct nvgpu_alloc_gpfifo_args *)buf);
2587 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2588 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2589 (struct nvgpu_submit_gpfifo_args *)buf);
2591 case NVGPU_IOCTL_CHANNEL_WAIT:
2592 err = gk20a_busy(dev);
2595 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2600 /* waiting is thread-safe, not dropping this mutex could
2601 * deadlock in certain conditions */
2602 mutex_unlock(&ch->ioctl_lock);
2604 err = gk20a_channel_wait(ch,
2605 (struct nvgpu_wait_args *)buf);
2607 mutex_lock(&ch->ioctl_lock);
2611 case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2612 err = gk20a_busy(dev);
2615 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2619 err = gk20a_channel_zcull_bind(ch,
2620 (struct nvgpu_zcull_bind_args *)buf);
2623 case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2624 err = gk20a_busy(dev);
2627 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2631 err = gk20a_init_error_notifier(ch,
2632 (struct nvgpu_set_error_notifier *)buf);
2635 #ifdef CONFIG_GK20A_CYCLE_STATS
2636 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2637 err = gk20a_busy(dev);
2640 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2644 err = gk20a_channel_cycle_stats(ch,
2645 (struct nvgpu_cycle_stats_args *)buf);
2649 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2652 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2653 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2654 timeout, ch->hw_chid);
2655 ch->timeout_ms_max = timeout;
2658 case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2661 (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2662 bool timeout_debug_dump = !((u32)
2663 ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2664 (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2665 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2666 timeout, ch->hw_chid);
2667 ch->timeout_ms_max = timeout;
2668 ch->timeout_debug_dump = timeout_debug_dump;
2671 case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2672 ((struct nvgpu_get_param_args *)buf)->value =
2675 case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2676 err = gk20a_busy(dev);
2679 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2683 gk20a_channel_set_priority(ch,
2684 ((struct nvgpu_set_priority_args *)buf)->priority);
2687 case NVGPU_IOCTL_CHANNEL_ENABLE:
2688 err = gk20a_busy(dev);
2691 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2695 /* enable channel */
2696 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2697 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2698 ccsr_channel_enable_set_true_f());
2701 case NVGPU_IOCTL_CHANNEL_DISABLE:
2702 err = gk20a_busy(dev);
2705 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2709 /* disable channel */
2710 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2711 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2712 ccsr_channel_enable_clr_true_f());
2715 case NVGPU_IOCTL_CHANNEL_PREEMPT:
2716 err = gk20a_busy(dev);
2719 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2723 err = gk20a_fifo_preempt(ch->g, ch);
2726 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2727 err = gk20a_busy(dev);
2730 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2734 err = gk20a_fifo_force_reset_ch(ch, true);
2737 case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2738 err = gk20a_channel_events_ctrl(ch,
2739 (struct nvgpu_channel_events_ctrl_args *)buf);
2741 #ifdef CONFIG_GK20A_CYCLE_STATS
2742 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2743 err = gk20a_busy(dev);
2746 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2750 err = gk20a_channel_cycle_stats_snapshot(ch,
2751 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2756 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2761 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2762 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2764 mutex_unlock(&ch->ioctl_lock);
2766 gk20a_channel_put(ch);
2768 gk20a_dbg_fn("end");