]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/channel_gk20a.c
67f73cbd2882c8b98236fb45986b8f9d148a6de9
[sojka/nv-tegra/linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
29
30 #include "debug_gk20a.h"
31
32 #include "gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
36
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
46
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
49
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
55
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
58
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
60
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
62                                         bool add);
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
64
65 /* allocate GPU channel */
66 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
67 {
68         struct channel_gk20a *ch = NULL;
69         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
70
71         mutex_lock(&f->free_chs_mutex);
72         if (!list_empty(&f->free_chs)) {
73                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
74                                 free_chs);
75                 list_del(&ch->free_chs);
76                 WARN_ON(atomic_read(&ch->ref_count));
77                 WARN_ON(ch->referenceable);
78                 f->used_channels++;
79         }
80         mutex_unlock(&f->free_chs_mutex);
81
82         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
83                 platform->aggressive_sync_destroy = true;
84
85         return ch;
86 }
87
88 static void free_channel(struct fifo_gk20a *f,
89                 struct channel_gk20a *ch)
90 {
91         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
92
93         trace_gk20a_release_used_channel(ch->hw_chid);
94         /* refcount is zero here and channel is in a freed/dead state */
95         mutex_lock(&f->free_chs_mutex);
96         /* add to head to increase visibility of timing-related bugs */
97         list_add(&ch->free_chs, &f->free_chs);
98         f->used_channels--;
99         mutex_unlock(&f->free_chs_mutex);
100
101         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
102                 platform->aggressive_sync_destroy = false;
103 }
104
105 int channel_gk20a_commit_va(struct channel_gk20a *c)
106 {
107         gk20a_dbg_fn("");
108
109         if (!c->inst_block.cpu_va)
110                 return -ENOMEM;
111
112         gk20a_init_inst_block(&c->inst_block, c->vm,
113                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
114
115         return 0;
116 }
117
118 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
119 {
120         u32 addr_lo;
121         u32 addr_hi;
122         void *inst_ptr;
123
124         gk20a_dbg_fn("");
125
126         inst_ptr = c->inst_block.cpu_va;
127         if (!inst_ptr)
128                 return -ENOMEM;
129
130         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
131         addr_hi = u64_hi32(c->userd_iova);
132
133         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
134                 c->hw_chid, (u64)c->userd_iova);
135
136         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
137                  pbdma_userd_target_vid_mem_f() |
138                  pbdma_userd_addr_f(addr_lo));
139
140         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
141                  pbdma_userd_target_vid_mem_f() |
142                  pbdma_userd_hi_addr_f(addr_hi));
143
144         return 0;
145 }
146
147 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
148                                 u32 timeslice_timeout)
149 {
150         void *inst_ptr;
151         int shift = 3;
152         int value = timeslice_timeout;
153
154         inst_ptr = c->inst_block.cpu_va;
155         if (!inst_ptr)
156                 return -ENOMEM;
157
158         /* disable channel */
159         c->g->ops.fifo.disable_channel(c);
160
161         /* preempt the channel */
162         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
163
164         /* value field is 8 bits long */
165         while (value >= 1 << 8) {
166                 value >>= 1;
167                 shift++;
168         }
169
170         /* time slice register is only 18bits long */
171         if ((value << shift) >= 1<<19) {
172                 pr_err("Requested timeslice value is clamped to 18 bits\n");
173                 value = 255;
174                 shift = 10;
175         }
176
177         /* set new timeslice */
178         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
179                 value | (shift << 12) |
180                 fifo_runlist_timeslice_enable_true_f());
181
182         /* enable channel */
183         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
184                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
185                 ccsr_channel_enable_set_true_f());
186
187         return 0;
188 }
189
190 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
191                         u64 gpfifo_base, u32 gpfifo_entries)
192 {
193         void *inst_ptr;
194
195         gk20a_dbg_fn("");
196
197         inst_ptr = c->inst_block.cpu_va;
198         if (!inst_ptr)
199                 return -ENOMEM;
200
201         memset(inst_ptr, 0, ram_fc_size_val_v());
202
203         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
204                 pbdma_gp_base_offset_f(
205                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
206
207         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
208                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
209                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
210
211         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
212                  c->g->ops.fifo.get_pbdma_signature(c->g));
213
214         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
215                 pbdma_formats_gp_fermi0_f() |
216                 pbdma_formats_pb_fermi1_f() |
217                 pbdma_formats_mp_fermi0_f());
218
219         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
220                 pbdma_pb_header_priv_user_f() |
221                 pbdma_pb_header_method_zero_f() |
222                 pbdma_pb_header_subchannel_zero_f() |
223                 pbdma_pb_header_level_main_f() |
224                 pbdma_pb_header_first_true_f() |
225                 pbdma_pb_header_type_inc_f());
226
227         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
228                 pbdma_subdevice_id_f(1) |
229                 pbdma_subdevice_status_active_f() |
230                 pbdma_subdevice_channel_dma_enable_f());
231
232         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
233
234         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
235                 pbdma_acquire_retry_man_2_f() |
236                 pbdma_acquire_retry_exp_2_f() |
237                 pbdma_acquire_timeout_exp_max_f() |
238                 pbdma_acquire_timeout_man_max_f() |
239                 pbdma_acquire_timeout_en_disable_f());
240
241         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
242                 fifo_runlist_timeslice_timeout_128_f() |
243                 fifo_runlist_timeslice_timescale_3_f() |
244                 fifo_runlist_timeslice_enable_true_f());
245
246         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
247                 fifo_pb_timeslice_timeout_16_f() |
248                 fifo_pb_timeslice_timescale_0_f() |
249                 fifo_pb_timeslice_enable_true_f());
250
251         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
252
253         return channel_gk20a_commit_userd(c);
254 }
255
256 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
257 {
258         BUG_ON(!c->userd_cpu_va);
259
260         gk20a_dbg_fn("");
261
262         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
263         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
272
273         return 0;
274 }
275
276 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
277 {
278         struct gk20a *g = ch_gk20a->g;
279         struct fifo_gk20a *f = &g->fifo;
280         struct fifo_engine_info_gk20a *engine_info =
281                 f->engine_info + ENGINE_GR_GK20A;
282
283         u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl)
284                 >> ram_in_base_shift_v();
285
286         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
287                 ch_gk20a->hw_chid, inst_ptr);
288
289         ch_gk20a->bound = true;
290
291         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
292                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
293                  ~ccsr_channel_runlist_f(~0)) |
294                  ccsr_channel_runlist_f(engine_info->runlist_id));
295
296         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
297                 ccsr_channel_inst_ptr_f(inst_ptr) |
298                 ccsr_channel_inst_target_vid_mem_f() |
299                 ccsr_channel_inst_bind_true_f());
300
301         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
302                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
303                  ~ccsr_channel_enable_set_f(~0)) |
304                  ccsr_channel_enable_set_true_f());
305 }
306
307 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
308 {
309         struct gk20a *g = ch_gk20a->g;
310         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
311
312         gk20a_dbg_fn("");
313
314         if (ch_gk20a->bound)
315                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
316                         ccsr_channel_inst_ptr_f(0) |
317                         ccsr_channel_inst_bind_false_f());
318
319         ch_gk20a->bound = false;
320
321         /*
322          * if we are agrressive then we can destroy the syncpt
323          * resource at this point
324          * if not, then it will be destroyed at channel_free()
325          */
326         if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
327                 ch_gk20a->sync->destroy(ch_gk20a->sync);
328                 ch_gk20a->sync = NULL;
329         }
330 }
331
332 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
333 {
334         int err;
335
336         gk20a_dbg_fn("");
337
338         err = gk20a_alloc_inst_block(g, &ch->inst_block);
339         if (err)
340                 return err;
341
342         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
343                 ch->hw_chid, (u64)sg_phys(ch->inst_block.sgt->sgl));
344
345         gk20a_dbg_fn("done");
346         return 0;
347 }
348
349 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
350 {
351         gk20a_free_inst_block(g, &ch->inst_block);
352 }
353
354 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
355 {
356         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
357 }
358
359 void channel_gk20a_enable(struct channel_gk20a *ch)
360 {
361         /* enable channel */
362         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
363                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
364                 ccsr_channel_enable_set_true_f());
365 }
366
367 void channel_gk20a_disable(struct channel_gk20a *ch)
368 {
369         /* disable channel */
370         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
371                 gk20a_readl(ch->g,
372                         ccsr_channel_r(ch->hw_chid)) |
373                         ccsr_channel_enable_clr_true_f());
374 }
375
376 void gk20a_channel_abort(struct channel_gk20a *ch)
377 {
378         struct channel_gk20a_job *job, *n;
379         bool released_job_semaphore = false;
380
381         gk20a_dbg_fn("");
382
383         /* make sure new kickoffs are prevented */
384         ch->has_timedout = true;
385
386         ch->g->ops.fifo.disable_channel(ch);
387
388         /* ensure no fences are pending */
389         mutex_lock(&ch->submit_lock);
390         if (ch->sync)
391                 ch->sync->set_min_eq_max(ch->sync);
392         mutex_unlock(&ch->submit_lock);
393
394         /* release all job semaphores (applies only to jobs that use
395            semaphore synchronization) */
396         mutex_lock(&ch->jobs_lock);
397         list_for_each_entry_safe(job, n, &ch->jobs, list) {
398                 if (job->post_fence->semaphore) {
399                         gk20a_semaphore_release(job->post_fence->semaphore);
400                         released_job_semaphore = true;
401                 }
402         }
403         mutex_unlock(&ch->jobs_lock);
404
405         if (released_job_semaphore) {
406                 wake_up_interruptible_all(&ch->semaphore_wq);
407                 gk20a_channel_update(ch, 0);
408         }
409 }
410
411 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
412 {
413         bool channel_idle = false;
414         unsigned long end_jiffies = jiffies +
415                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
416
417         do {
418                 mutex_lock(&ch->jobs_lock);
419                 channel_idle = list_empty(&ch->jobs);
420                 mutex_unlock(&ch->jobs_lock);
421                 if (channel_idle)
422                         break;
423
424                 usleep_range(1000, 3000);
425         } while (time_before(jiffies, end_jiffies)
426                         || !tegra_platform_is_silicon());
427
428         if (!channel_idle) {
429                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
430                                 ch->hw_chid);
431                 return -EBUSY;
432         }
433
434         return 0;
435 }
436
437 void gk20a_disable_channel(struct channel_gk20a *ch,
438                            bool finish,
439                            unsigned long finish_timeout)
440 {
441         gk20a_dbg_fn("");
442
443         if (finish) {
444                 int err = gk20a_channel_finish(ch, finish_timeout);
445                 WARN_ON(err);
446         }
447
448         /* disable the channel from hw and increment syncpoints */
449         gk20a_channel_abort(ch);
450
451         gk20a_wait_channel_idle(ch);
452
453         /* preempt the channel */
454         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
455
456         /* remove channel from runlist */
457         channel_gk20a_update_runlist(ch, false);
458 }
459
460 #if defined(CONFIG_GK20A_CYCLE_STATS)
461
462 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
463 {
464         /* disable existing cyclestats buffer */
465         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
466         if (ch->cyclestate.cyclestate_buffer_handler) {
467                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
468                                 ch->cyclestate.cyclestate_buffer);
469                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
470                 ch->cyclestate.cyclestate_buffer_handler = NULL;
471                 ch->cyclestate.cyclestate_buffer = NULL;
472                 ch->cyclestate.cyclestate_buffer_size = 0;
473         }
474         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
475 }
476
477 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
478                        struct nvgpu_cycle_stats_args *args)
479 {
480         struct dma_buf *dmabuf;
481         void *virtual_address;
482
483         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
484
485                 /* set up new cyclestats buffer */
486                 dmabuf = dma_buf_get(args->dmabuf_fd);
487                 if (IS_ERR(dmabuf))
488                         return PTR_ERR(dmabuf);
489                 virtual_address = dma_buf_vmap(dmabuf);
490                 if (!virtual_address)
491                         return -ENOMEM;
492
493                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
494                 ch->cyclestate.cyclestate_buffer = virtual_address;
495                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
496                 return 0;
497
498         } else if (!args->dmabuf_fd &&
499                         ch->cyclestate.cyclestate_buffer_handler) {
500                 gk20a_free_cycle_stats_buffer(ch);
501                 return 0;
502
503         } else if (!args->dmabuf_fd &&
504                         !ch->cyclestate.cyclestate_buffer_handler) {
505                 /* no requst from GL */
506                 return 0;
507
508         } else {
509                 pr_err("channel already has cyclestats buffer\n");
510                 return -EINVAL;
511         }
512 }
513 #endif
514
515 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
516                 struct nvgpu_set_error_notifier *args) {
517         void *va;
518
519         struct dma_buf *dmabuf;
520
521         if (!args->mem) {
522                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
523                 return -EINVAL;
524         }
525
526         dmabuf = dma_buf_get(args->mem);
527
528         if (ch->error_notifier_ref)
529                 gk20a_free_error_notifiers(ch);
530
531         if (IS_ERR(dmabuf)) {
532                 pr_err("Invalid handle: %d\n", args->mem);
533                 return -EINVAL;
534         }
535         /* map handle */
536         va = dma_buf_vmap(dmabuf);
537         if (!va) {
538                 dma_buf_put(dmabuf);
539                 pr_err("Cannot map notifier handle\n");
540                 return -ENOMEM;
541         }
542
543         /* set channel notifiers pointer */
544         ch->error_notifier_ref = dmabuf;
545         ch->error_notifier = va + args->offset;
546         ch->error_notifier_va = va;
547         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
548         return 0;
549 }
550
551 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
552 {
553         if (ch->error_notifier_ref) {
554                 struct timespec time_data;
555                 u64 nsec;
556                 getnstimeofday(&time_data);
557                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
558                                 (u64)time_data.tv_nsec;
559                 ch->error_notifier->time_stamp.nanoseconds[0] =
560                                 (u32)nsec;
561                 ch->error_notifier->time_stamp.nanoseconds[1] =
562                                 (u32)(nsec >> 32);
563                 ch->error_notifier->info32 = error;
564                 ch->error_notifier->status = 0xffff;
565
566                 gk20a_err(dev_from_gk20a(ch->g),
567                     "error notifier set to %d for ch %d", error, ch->hw_chid);
568         }
569 }
570
571 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
572 {
573         if (ch->error_notifier_ref) {
574                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
575                 dma_buf_put(ch->error_notifier_ref);
576                 ch->error_notifier_ref = NULL;
577                 ch->error_notifier = NULL;
578                 ch->error_notifier_va = NULL;
579         }
580 }
581
582 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
583  * is positive */
584 static int cyclic_delta(int a, int b)
585 {
586         return a - b;
587 }
588
589 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
590 {
591         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
592         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
593
594         /* wait until all stalling irqs are handled */
595         wait_event(g->sw_irq_stall_last_handled_wq,
596                    cyclic_delta(stall_irq_threshold,
597                                 atomic_read(&g->sw_irq_stall_last_handled))
598                    <= 0);
599
600         /* wait until all non-stalling irqs are handled */
601         wait_event(g->sw_irq_nonstall_last_handled_wq,
602                    cyclic_delta(nonstall_irq_threshold,
603                                 atomic_read(&g->sw_irq_nonstall_last_handled))
604                    <= 0);
605 }
606
607 static void gk20a_wait_until_counter_is_N(
608         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
609         wait_queue_head_t *wq, const char *caller, const char *counter_name)
610 {
611         while (true) {
612                 if (wait_event_timeout(
613                             *wq,
614                             atomic_read(counter) == wait_value,
615                             msecs_to_jiffies(5000)) > 0)
616                         break;
617
618                 gk20a_warn(dev_from_gk20a(ch->g),
619                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
620                            caller, ch->hw_chid, counter_name,
621                            atomic_read(counter), wait_value);
622         }
623 }
624
625
626
627 /* call ONLY when no references to the channel exist: after the last put */
628 static void gk20a_free_channel(struct channel_gk20a *ch)
629 {
630         struct gk20a *g = ch->g;
631         struct device *d = dev_from_gk20a(g);
632         struct fifo_gk20a *f = &g->fifo;
633         struct gr_gk20a *gr = &g->gr;
634         struct vm_gk20a *ch_vm = ch->vm;
635         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
636         struct dbg_session_gk20a *dbg_s;
637
638         gk20a_dbg_fn("");
639
640         WARN_ON(ch->g == NULL);
641
642         trace_gk20a_free_channel(ch->hw_chid);
643
644         /* prevent new kickoffs */
645         ch->has_timedout = true;
646         wmb();
647
648         /* wait until there's only our ref to the channel */
649         gk20a_wait_until_counter_is_N(
650                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
651                 __func__, "references");
652
653         /* wait until all pending interrupts for recently completed
654          * jobs are handled */
655         gk20a_wait_for_deferred_interrupts(g);
656
657         /* prevent new refs */
658         spin_lock(&ch->ref_obtain_lock);
659         if (!ch->referenceable) {
660                 spin_unlock(&ch->ref_obtain_lock);
661                 gk20a_err(dev_from_gk20a(ch->g),
662                           "Extra %s() called to channel %u",
663                           __func__, ch->hw_chid);
664                 return;
665         }
666         ch->referenceable = false;
667         spin_unlock(&ch->ref_obtain_lock);
668
669         /* matches with the initial reference in gk20a_open_new_channel() */
670         atomic_dec(&ch->ref_count);
671
672         /* wait until no more refs to the channel */
673         gk20a_wait_until_counter_is_N(
674                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
675                 __func__, "references");
676
677         /* if engine reset was deferred, perform it now */
678         mutex_lock(&f->deferred_reset_mutex);
679         if (g->fifo.deferred_reset_pending) {
680                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
681                            " deferred, running now");
682                 gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines);
683                 g->fifo.deferred_fault_engines = 0;
684                 g->fifo.deferred_reset_pending = false;
685         }
686         mutex_unlock(&f->deferred_reset_mutex);
687
688         if (!ch->bound)
689                 return;
690
691         if (!gk20a_channel_as_bound(ch))
692                 goto unbind;
693
694         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
695                         timeout);
696
697         gk20a_disable_channel(ch, !ch->has_timedout, timeout);
698
699         gk20a_free_error_notifiers(ch);
700
701         /* release channel ctx */
702         g->ops.gr.free_channel_ctx(ch);
703
704         gk20a_gr_flush_channel_tlb(gr);
705
706         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
707
708         /* free gpfifo */
709         if (ch->gpfifo.gpu_va)
710                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
711                         ch->gpfifo.size, gk20a_mem_flag_none);
712         if (ch->gpfifo.cpu_va)
713                 dma_free_coherent(d, ch->gpfifo.size,
714                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
715         ch->gpfifo.cpu_va = NULL;
716         ch->gpfifo.iova = 0;
717
718         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
719
720 #if defined(CONFIG_GK20A_CYCLE_STATS)
721         gk20a_free_cycle_stats_buffer(ch);
722 #endif
723
724         channel_gk20a_free_priv_cmdbuf(ch);
725
726         /* sync must be destroyed before releasing channel vm */
727         if (ch->sync) {
728                 ch->sync->destroy(ch->sync);
729                 ch->sync = NULL;
730         }
731
732         /* release channel binding to the as_share */
733         if (ch_vm->as_share)
734                 gk20a_as_release_share(ch_vm->as_share);
735         else
736                 gk20a_vm_put(ch_vm);
737
738         spin_lock(&ch->update_fn_lock);
739         ch->update_fn = NULL;
740         ch->update_fn_data = NULL;
741         spin_unlock(&ch->update_fn_lock);
742         cancel_work_sync(&ch->update_fn_work);
743
744         /* make sure we don't have deferred interrupts pending that
745          * could still touch the channel */
746         gk20a_wait_for_deferred_interrupts(g);
747
748 unbind:
749         if (gk20a_is_channel_marked_as_tsg(ch))
750                 gk20a_tsg_unbind_channel(ch);
751
752         g->ops.fifo.unbind_channel(ch);
753         g->ops.fifo.free_inst(g, ch);
754
755         ch->vpr = false;
756         ch->vm = NULL;
757
758         mutex_lock(&ch->submit_lock);
759         gk20a_fence_put(ch->last_submit.pre_fence);
760         gk20a_fence_put(ch->last_submit.post_fence);
761         ch->last_submit.pre_fence = NULL;
762         ch->last_submit.post_fence = NULL;
763         mutex_unlock(&ch->submit_lock);
764         WARN_ON(ch->sync);
765
766         /* unlink all debug sessions */
767         mutex_lock(&ch->dbg_s_lock);
768
769         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
770                 dbg_s->ch = NULL;
771                 list_del_init(&dbg_s->dbg_s_list_node);
772         }
773
774         mutex_unlock(&ch->dbg_s_lock);
775
776         /* make sure we catch accesses of unopened channels in case
777          * there's non-refcounted channel pointers hanging around */
778         ch->g = NULL;
779         wmb();
780
781         /* ALWAYS last */
782         free_channel(f, ch);
783 }
784
785 /* Try to get a reference to the channel. Return nonzero on success. If fails,
786  * the channel is dead or being freed elsewhere and you must not touch it.
787  *
788  * Always when a channel_gk20a pointer is seen and about to be used, a
789  * reference must be held to it - either by you or the caller, which should be
790  * documented well or otherwise clearly seen. This usually boils down to the
791  * file from ioctls directly, or an explicit get in exception handlers when the
792  * channel is found by a hw_chid.
793  *
794  * Most global functions in this file require a reference to be held by the
795  * caller.
796  */
797 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
798                                          const char *caller) {
799         struct channel_gk20a *ret;
800
801         spin_lock(&ch->ref_obtain_lock);
802
803         if (likely(ch->referenceable)) {
804                 atomic_inc(&ch->ref_count);
805                 ret = ch;
806         } else
807                 ret = NULL;
808
809         spin_unlock(&ch->ref_obtain_lock);
810
811         if (ret)
812                 trace_gk20a_channel_get(ch->hw_chid, caller);
813
814         return ret;
815 }
816
817 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
818 {
819         trace_gk20a_channel_put(ch->hw_chid, caller);
820         atomic_dec(&ch->ref_count);
821         wake_up_all(&ch->ref_count_dec_wq);
822
823         /* More puts than gets. Channel is probably going to get
824          * stuck. */
825         WARN_ON(atomic_read(&ch->ref_count) < 0);
826
827         /* Also, more puts than gets. ref_count can go to 0 only if
828          * the channel is closing. Channel is probably going to get
829          * stuck. */
830         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
831 }
832
833 void gk20a_channel_close(struct channel_gk20a *ch)
834 {
835         gk20a_free_channel(ch);
836 }
837
838 int gk20a_channel_release(struct inode *inode, struct file *filp)
839 {
840         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
841         struct gk20a *g = ch ? ch->g : NULL;
842         int err;
843
844         if (!ch)
845                 return 0;
846
847         trace_gk20a_channel_release(dev_name(&g->dev->dev));
848
849         err = gk20a_busy(g->dev);
850         if (err) {
851                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
852                         ch->hw_chid);
853                 return err;
854         }
855         gk20a_channel_close(ch);
856         gk20a_idle(g->dev);
857
858         filp->private_data = NULL;
859         return 0;
860 }
861
862 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
863 {
864         struct channel_gk20a *ch =
865                 container_of(work, struct channel_gk20a, update_fn_work);
866         void (*update_fn)(struct channel_gk20a *, void *);
867         void *update_fn_data;
868
869         spin_lock(&ch->update_fn_lock);
870         update_fn = ch->update_fn;
871         update_fn_data = ch->update_fn_data;
872         spin_unlock(&ch->update_fn_lock);
873
874         if (update_fn)
875                 update_fn(ch, update_fn_data);
876 }
877
878 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
879                 void (*update_fn)(struct channel_gk20a *, void *),
880                 void *update_fn_data)
881 {
882         struct channel_gk20a *ch = gk20a_open_new_channel(g);
883
884         if (ch) {
885                 spin_lock(&ch->update_fn_lock);
886                 ch->update_fn = update_fn;
887                 ch->update_fn_data = update_fn_data;
888                 spin_unlock(&ch->update_fn_lock);
889         }
890
891         return ch;
892 }
893
894 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
895 {
896         struct fifo_gk20a *f = &g->fifo;
897         struct channel_gk20a *ch;
898
899         gk20a_dbg_fn("");
900
901         ch = allocate_channel(f);
902         if (ch == NULL) {
903                 /* TBD: we want to make this virtualizable */
904                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
905                 return NULL;
906         }
907
908         trace_gk20a_open_new_channel(ch->hw_chid);
909
910         BUG_ON(ch->g);
911         ch->g = g;
912
913         if (g->ops.fifo.alloc_inst(g, ch)) {
914                 ch->g = NULL;
915                 free_channel(f, ch);
916                 gk20a_err(dev_from_gk20a(g),
917                            "failed to open gk20a channel, out of inst mem");
918                 return NULL;
919         }
920
921         /* now the channel is in a limbo out of the free list but not marked as
922          * alive and used (i.e. get-able) yet */
923
924         ch->pid = current->pid;
925
926         /* By default, channel is regular (non-TSG) channel */
927         ch->tsgid = NVGPU_INVALID_TSG_ID;
928
929         /* reset timeout counter and update timestamp */
930         ch->timeout_accumulated_ms = 0;
931         ch->timeout_gpfifo_get = 0;
932         /* set gr host default timeout */
933         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
934         ch->timeout_debug_dump = true;
935         ch->has_timedout = false;
936         ch->obj_class = 0;
937
938         /* The channel is *not* runnable at this point. It still needs to have
939          * an address space bound and allocate a gpfifo and grctx. */
940
941         init_waitqueue_head(&ch->notifier_wq);
942         init_waitqueue_head(&ch->semaphore_wq);
943         init_waitqueue_head(&ch->submit_wq);
944
945         mutex_init(&ch->poll_events.lock);
946         ch->poll_events.events_enabled = false;
947         ch->poll_events.num_pending_events = 0;
948
949         ch->update_fn = NULL;
950         ch->update_fn_data = NULL;
951         spin_lock_init(&ch->update_fn_lock);
952         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
953
954         /* Mark the channel alive, get-able, with 1 initial use
955          * references. The initial reference will be decreased in
956          * gk20a_free_channel() */
957         ch->referenceable = true;
958         atomic_set(&ch->ref_count, 1);
959         wmb();
960
961         return ch;
962 }
963
964 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
965 {
966         int err;
967         struct channel_gk20a *ch;
968
969         trace_gk20a_channel_open(dev_name(&g->dev->dev));
970
971         err = gk20a_busy(g->dev);
972         if (err) {
973                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
974                 return err;
975         }
976         ch = gk20a_open_new_channel(g);
977         gk20a_idle(g->dev);
978         if (!ch) {
979                 gk20a_err(dev_from_gk20a(g),
980                         "failed to get f");
981                 return -ENOMEM;
982         }
983
984         filp->private_data = ch;
985         return 0;
986 }
987
988 int gk20a_channel_open(struct inode *inode, struct file *filp)
989 {
990         struct gk20a *g = container_of(inode->i_cdev,
991                         struct gk20a, channel.cdev);
992         int ret;
993
994         gk20a_dbg_fn("start");
995         ret = __gk20a_channel_open(g, filp);
996
997         gk20a_dbg_fn("end");
998         return ret;
999 }
1000
1001 int gk20a_channel_open_ioctl(struct gk20a *g,
1002                 struct nvgpu_channel_open_args *args)
1003 {
1004         int err;
1005         int fd;
1006         struct file *file;
1007         char *name;
1008
1009         err = get_unused_fd_flags(O_RDWR);
1010         if (err < 0)
1011                 return err;
1012         fd = err;
1013
1014         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1015                         dev_name(&g->dev->dev), fd);
1016         if (!name) {
1017                 err = -ENOMEM;
1018                 goto clean_up;
1019         }
1020
1021         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1022         kfree(name);
1023         if (IS_ERR(file)) {
1024                 err = PTR_ERR(file);
1025                 goto clean_up;
1026         }
1027
1028         err = __gk20a_channel_open(g, file);
1029         if (err)
1030                 goto clean_up_file;
1031
1032         fd_install(fd, file);
1033         args->channel_fd = fd;
1034         return 0;
1035
1036 clean_up_file:
1037         fput(file);
1038 clean_up:
1039         put_unused_fd(fd);
1040         return err;
1041 }
1042
1043 /* allocate private cmd buffer.
1044    used for inserting commands before/after user submitted buffers. */
1045 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1046 {
1047         struct device *d = dev_from_gk20a(c->g);
1048         struct vm_gk20a *ch_vm = c->vm;
1049         struct priv_cmd_queue *q = &c->priv_cmd_q;
1050         u32 size;
1051         int err = 0;
1052         struct sg_table *sgt;
1053         dma_addr_t iova;
1054
1055         /* Kernel can insert gpfifos before and after user gpfifos.
1056            Before user gpfifos, kernel inserts fence_wait, which takes
1057            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1058            After user gpfifos, kernel inserts fence_get, which takes
1059            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1060            = 6 dwords.
1061            Worse case if kernel adds both of them for every user gpfifo,
1062            max size of priv_cmdbuf is :
1063            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1064         size = roundup_pow_of_two(
1065                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
1066
1067         q->mem.base_cpuva = dma_alloc_coherent(d, size,
1068                                         &iova,
1069                                         GFP_KERNEL);
1070         if (!q->mem.base_cpuva) {
1071                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1072                 err = -ENOMEM;
1073                 goto clean_up;
1074         }
1075
1076         q->mem.base_iova = iova;
1077         q->mem.size = size;
1078
1079         err = gk20a_get_sgtable(d, &sgt,
1080                         q->mem.base_cpuva, q->mem.base_iova, size);
1081         if (err) {
1082                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
1083                 goto clean_up;
1084         }
1085
1086         memset(q->mem.base_cpuva, 0, size);
1087
1088         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
1089                                         size,
1090                                         0, /* flags */
1091                                         gk20a_mem_flag_none);
1092         if (!q->base_gpuva) {
1093                 gk20a_err(d, "ch %d : failed to map gpu va"
1094                            "for priv cmd buffer", c->hw_chid);
1095                 err = -ENOMEM;
1096                 goto clean_up_sgt;
1097         }
1098
1099         q->size = q->mem.size / sizeof (u32);
1100
1101         gk20a_free_sgtable(&sgt);
1102
1103         return 0;
1104
1105 clean_up_sgt:
1106         gk20a_free_sgtable(&sgt);
1107 clean_up:
1108         channel_gk20a_free_priv_cmdbuf(c);
1109         return err;
1110 }
1111
1112 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1113 {
1114         struct device *d = dev_from_gk20a(c->g);
1115         struct vm_gk20a *ch_vm = c->vm;
1116         struct priv_cmd_queue *q = &c->priv_cmd_q;
1117
1118         if (q->size == 0)
1119                 return;
1120
1121         if (q->base_gpuva)
1122                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
1123                                 q->mem.size, gk20a_mem_flag_none);
1124         if (q->mem.base_cpuva)
1125                 dma_free_coherent(d, q->mem.size,
1126                         q->mem.base_cpuva, q->mem.base_iova);
1127         q->mem.base_cpuva = NULL;
1128         q->mem.base_iova = 0;
1129
1130         memset(q, 0, sizeof(struct priv_cmd_queue));
1131 }
1132
1133 /* allocate a cmd buffer with given size. size is number of u32 entries */
1134 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1135                              struct priv_cmd_entry **entry)
1136 {
1137         struct priv_cmd_queue *q = &c->priv_cmd_q;
1138         struct priv_cmd_entry *e;
1139         u32 free_count;
1140         u32 size = orig_size;
1141
1142         gk20a_dbg_fn("size %d", orig_size);
1143
1144         *entry = NULL;
1145
1146         /* if free space in the end is less than requested, increase the size
1147          * to make the real allocated space start from beginning. */
1148         if (q->put + size > q->size)
1149                 size = orig_size + (q->size - q->put);
1150
1151         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1152                         c->hw_chid, q->get, q->put);
1153
1154         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1155
1156         if (size > free_count)
1157                 return -EAGAIN;
1158
1159         e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1160         if (!e) {
1161                 gk20a_err(dev_from_gk20a(c->g),
1162                         "ch %d: fail to allocate priv cmd entry",
1163                         c->hw_chid);
1164                 return -ENOMEM;
1165         }
1166
1167         e->size = orig_size;
1168         e->gp_get = c->gpfifo.get;
1169         e->gp_put = c->gpfifo.put;
1170         e->gp_wrap = c->gpfifo.wrap;
1171
1172         /* if we have increased size to skip free space in the end, set put
1173            to beginning of cmd buffer (0) + size */
1174         if (size != orig_size) {
1175                 e->ptr = q->mem.base_cpuva;
1176                 e->gva = q->base_gpuva;
1177                 q->put = orig_size;
1178         } else {
1179                 e->ptr = q->mem.base_cpuva + q->put;
1180                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1181                 q->put = (q->put + orig_size) & (q->size - 1);
1182         }
1183
1184         /* we already handled q->put + size > q->size so BUG_ON this */
1185         BUG_ON(q->put > q->size);
1186
1187         *entry = e;
1188
1189         gk20a_dbg_fn("done");
1190
1191         return 0;
1192 }
1193
1194 /* Don't call this to free an explict cmd entry.
1195  * It doesn't update priv_cmd_queue get/put */
1196 static void free_priv_cmdbuf(struct channel_gk20a *c,
1197                              struct priv_cmd_entry *e)
1198 {
1199         kfree(e);
1200 }
1201
1202 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1203                 struct nvgpu_alloc_gpfifo_args *args)
1204 {
1205         struct gk20a *g = c->g;
1206         struct device *d = dev_from_gk20a(g);
1207         struct vm_gk20a *ch_vm;
1208         u32 gpfifo_size;
1209         int err = 0;
1210         struct sg_table *sgt;
1211         dma_addr_t iova;
1212
1213         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1214            and another one after, for internal usage. Triple the requested size. */
1215         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1216
1217         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1218                 c->vpr = true;
1219
1220         /* an address space needs to have been bound at this point. */
1221         if (!gk20a_channel_as_bound(c)) {
1222                 gk20a_err(d,
1223                             "not bound to an address space at time of gpfifo"
1224                             " allocation.");
1225                 return -EINVAL;
1226         }
1227         ch_vm = c->vm;
1228
1229         c->cmds_pending = false;
1230         mutex_lock(&c->submit_lock);
1231         gk20a_fence_put(c->last_submit.pre_fence);
1232         gk20a_fence_put(c->last_submit.post_fence);
1233         c->last_submit.pre_fence = NULL;
1234         c->last_submit.post_fence = NULL;
1235         mutex_unlock(&c->submit_lock);
1236
1237         c->ramfc.offset = 0;
1238         c->ramfc.size = ram_in_ramfc_s() / 8;
1239
1240         if (c->gpfifo.cpu_va) {
1241                 gk20a_err(d, "channel %d :"
1242                            "gpfifo already allocated", c->hw_chid);
1243                 return -EEXIST;
1244         }
1245
1246         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1247         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1248                                                 c->gpfifo.size,
1249                                                 &iova,
1250                                                 GFP_KERNEL);
1251         if (!c->gpfifo.cpu_va) {
1252                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1253                 err = -ENOMEM;
1254                 goto clean_up;
1255         }
1256
1257         c->gpfifo.iova = iova;
1258         c->gpfifo.entry_num = gpfifo_size;
1259
1260         c->gpfifo.get = c->gpfifo.put = 0;
1261
1262         err = gk20a_get_sgtable(d, &sgt,
1263                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1264         if (err) {
1265                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1266                 goto clean_up;
1267         }
1268
1269         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1270                                         &sgt,
1271                                         c->gpfifo.size,
1272                                         0, /* flags */
1273                                         gk20a_mem_flag_none);
1274         if (!c->gpfifo.gpu_va) {
1275                 gk20a_err(d, "channel %d : failed to map"
1276                            " gpu_va for gpfifo", c->hw_chid);
1277                 err = -ENOMEM;
1278                 goto clean_up_sgt;
1279         }
1280
1281         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1282                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1283
1284         channel_gk20a_setup_userd(c);
1285
1286         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1287         if (err)
1288                 goto clean_up_unmap;
1289
1290         /* TBD: setup engine contexts */
1291
1292         err = channel_gk20a_alloc_priv_cmdbuf(c);
1293         if (err)
1294                 goto clean_up_unmap;
1295
1296         err = channel_gk20a_update_runlist(c, true);
1297         if (err)
1298                 goto clean_up_unmap;
1299
1300         g->ops.fifo.bind_channel(c);
1301
1302         gk20a_free_sgtable(&sgt);
1303
1304         gk20a_dbg_fn("done");
1305         return 0;
1306
1307 clean_up_unmap:
1308         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1309                 c->gpfifo.size, gk20a_mem_flag_none);
1310 clean_up_sgt:
1311         gk20a_free_sgtable(&sgt);
1312 clean_up:
1313         dma_free_coherent(d, c->gpfifo.size,
1314                 c->gpfifo.cpu_va, c->gpfifo.iova);
1315         c->gpfifo.cpu_va = NULL;
1316         c->gpfifo.iova = 0;
1317         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1318         gk20a_err(d, "fail");
1319         return err;
1320 }
1321
1322 static inline bool check_gp_put(struct gk20a *g,
1323                                 struct channel_gk20a *c)
1324 {
1325         u32 put;
1326         /* gp_put changed unexpectedly since last update? */
1327         put = gk20a_bar1_readl(g,
1328                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1329         if (c->gpfifo.put != put) {
1330                 /*TBD: BUG_ON/teardown on this*/
1331                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1332                            "since last update");
1333                 c->gpfifo.put = put;
1334                 return false; /* surprise! */
1335         }
1336         return true; /* checked out ok */
1337 }
1338
1339 /* Update with this periodically to determine how the gpfifo is draining. */
1340 static inline u32 update_gp_get(struct gk20a *g,
1341                                 struct channel_gk20a *c)
1342 {
1343         u32 new_get = gk20a_bar1_readl(g,
1344                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1345         if (new_get < c->gpfifo.get)
1346                 c->gpfifo.wrap = !c->gpfifo.wrap;
1347         c->gpfifo.get = new_get;
1348         return new_get;
1349 }
1350
1351 static inline u32 gp_free_count(struct channel_gk20a *c)
1352 {
1353         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1354                 c->gpfifo.entry_num;
1355 }
1356
1357 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1358                 u32 timeout_delta_ms)
1359 {
1360         u32 gpfifo_get = update_gp_get(ch->g, ch);
1361         /* Count consequent timeout isr */
1362         if (gpfifo_get == ch->timeout_gpfifo_get) {
1363                 /* we didn't advance since previous channel timeout check */
1364                 ch->timeout_accumulated_ms += timeout_delta_ms;
1365         } else {
1366                 /* first timeout isr encountered */
1367                 ch->timeout_accumulated_ms = timeout_delta_ms;
1368         }
1369
1370         ch->timeout_gpfifo_get = gpfifo_get;
1371
1372         return ch->g->timeouts_enabled &&
1373                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1374 }
1375
1376 static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
1377 {
1378         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
1379
1380         if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled &&
1381                                 platform->ch_wdt_timeout_ms)
1382                 return platform->ch_wdt_timeout_ms;
1383         else
1384                 return (u32)MAX_SCHEDULE_TIMEOUT;
1385 }
1386
1387 static u32 get_gp_free_count(struct channel_gk20a *c)
1388 {
1389         update_gp_get(c->g, c);
1390         return gp_free_count(c);
1391 }
1392
1393 static void trace_write_pushbuffer(struct channel_gk20a *c,
1394                                    struct nvgpu_gpfifo *g)
1395 {
1396         void *mem = NULL;
1397         unsigned int words;
1398         u64 offset;
1399         struct dma_buf *dmabuf = NULL;
1400
1401         if (gk20a_debug_trace_cmdbuf) {
1402                 u64 gpu_va = (u64)g->entry0 |
1403                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1404                 int err;
1405
1406                 words = pbdma_gp_entry1_length_v(g->entry1);
1407                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1408                 if (!err)
1409                         mem = dma_buf_vmap(dmabuf);
1410         }
1411
1412         if (mem) {
1413                 u32 i;
1414                 /*
1415                  * Write in batches of 128 as there seems to be a limit
1416                  * of how much you can output to ftrace at once.
1417                  */
1418                 for (i = 0; i < words; i += 128U) {
1419                         trace_gk20a_push_cmdbuf(
1420                                 c->g->dev->name,
1421                                 0,
1422                                 min(words - i, 128U),
1423                                 offset + i * sizeof(u32),
1424                                 mem);
1425                 }
1426                 dma_buf_vunmap(dmabuf, mem);
1427         }
1428 }
1429
1430 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1431                                          struct nvgpu_gpfifo *g,
1432                                          struct nvgpu_submit_gpfifo_args *args,
1433                                          int offset,
1434                                          int count)
1435 {
1436         u32 size;
1437         int i;
1438         struct nvgpu_gpfifo *gp;
1439         bool gpfifo_allocated = false;
1440
1441         if (!gk20a_debug_trace_cmdbuf)
1442                 return;
1443
1444         if (!g && !args)
1445                 return;
1446
1447         if (!g) {
1448                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1449                 if (size) {
1450                         g = nvgpu_alloc(size, false);
1451                         if (!g)
1452                                 return;
1453
1454                         if (copy_from_user(g,
1455                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1456                                 return;
1457                         }
1458                 }
1459                 gpfifo_allocated = true;
1460         }
1461
1462         gp = g + offset;
1463         for (i = 0; i < count; i++, gp++)
1464                 trace_write_pushbuffer(c, gp);
1465
1466         if (gpfifo_allocated)
1467                 nvgpu_free(g);
1468 }
1469
1470 static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
1471                 struct channel_gk20a_job *job)
1472 {
1473         mutex_lock(&ch->timeout.lock);
1474
1475         if (ch->timeout.initialized) {
1476                 mutex_unlock(&ch->timeout.lock);
1477                 return;
1478         }
1479
1480         ch->timeout.job = job;
1481         ch->timeout.initialized = true;
1482         schedule_delayed_work(&ch->timeout.wq,
1483                msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
1484
1485         mutex_unlock(&ch->timeout.lock);
1486 }
1487
1488 static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
1489 {
1490         mutex_lock(&ch->timeout.lock);
1491         if (!ch->timeout.initialized) {
1492                 mutex_unlock(&ch->timeout.lock);
1493                 return;
1494         }
1495         mutex_unlock(&ch->timeout.lock);
1496
1497         cancel_delayed_work_sync(&ch->timeout.wq);
1498
1499         mutex_lock(&ch->timeout.lock);
1500         ch->timeout.initialized = false;
1501         mutex_unlock(&ch->timeout.lock);
1502 }
1503
1504 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
1505 {
1506         u32 chid;
1507         struct fifo_gk20a *f = &g->fifo;
1508
1509         for (chid = 0; chid < f->num_channels; chid++) {
1510                 struct channel_gk20a *ch = &f->channel[chid];
1511
1512                 if (gk20a_channel_get(ch)) {
1513                         mutex_lock(&ch->timeout.lock);
1514                         if (!ch->timeout.initialized) {
1515                                 mutex_unlock(&ch->timeout.lock);
1516                                 gk20a_channel_put(ch);
1517                                 continue;
1518                         }
1519                         mutex_unlock(&ch->timeout.lock);
1520
1521                         cancel_delayed_work_sync(&ch->timeout.wq);
1522                         if (!ch->has_timedout)
1523                                 schedule_delayed_work(&ch->timeout.wq,
1524                                        msecs_to_jiffies(
1525                                        gk20a_get_channel_watchdog_timeout(ch)));
1526
1527                         gk20a_channel_put(ch);
1528                 }
1529         }
1530 }
1531
1532 static void gk20a_channel_timeout_handler(struct work_struct *work)
1533 {
1534         struct channel_gk20a_job *job;
1535         struct gk20a *g;
1536         struct channel_gk20a *ch;
1537         struct channel_gk20a *failing_ch;
1538         u32 engine_id;
1539         int id = -1;
1540         bool is_tsg = false;
1541
1542         ch = container_of(to_delayed_work(work), struct channel_gk20a,
1543                         timeout.wq);
1544         ch = gk20a_channel_get(ch);
1545         if (!ch)
1546                 return;
1547
1548         g = ch->g;
1549
1550         /* Need global lock since multiple channels can timeout at a time */
1551         mutex_lock(&g->ch_wdt_lock);
1552
1553         /* Get timed out job and reset the timer */
1554         mutex_lock(&ch->timeout.lock);
1555         job = ch->timeout.job;
1556         ch->timeout.initialized = false;
1557         mutex_unlock(&ch->timeout.lock);
1558
1559         if (gk20a_fifo_disable_all_engine_activity(g, true))
1560                 goto fail_unlock;
1561
1562         if (gk20a_fence_is_expired(job->post_fence))
1563                 goto fail_enable_engine_activity;
1564
1565         gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n",
1566                 ch->hw_chid);
1567
1568         /* Get failing engine data */
1569         engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
1570
1571         if (engine_id >= g->fifo.max_engines) {
1572                 /* If no failing engine, abort the channels */
1573                 if (gk20a_is_channel_marked_as_tsg(ch)) {
1574                         struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
1575
1576                         gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
1577                         gk20a_fifo_abort_tsg(g, ch->tsgid);
1578                 } else {
1579                         gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1580                         gk20a_channel_abort(ch);
1581                 }
1582         } else {
1583                 /* If failing engine, trigger recovery */
1584                 failing_ch = gk20a_channel_get(&g->fifo.channel[id]);
1585                 if (!failing_ch)
1586                         goto fail_enable_engine_activity;
1587
1588                 if (failing_ch->hw_chid != ch->hw_chid)
1589                         gk20a_channel_timeout_start(ch, job);
1590
1591                 gk20a_fifo_recover(g, BIT(engine_id),
1592                         failing_ch->hw_chid, is_tsg,
1593                         failing_ch->timeout_debug_dump);
1594
1595                 gk20a_channel_put(failing_ch);
1596         }
1597
1598 fail_enable_engine_activity:
1599         gk20a_fifo_enable_all_engine_activity(g);
1600 fail_unlock:
1601         mutex_unlock(&g->ch_wdt_lock);
1602         gk20a_channel_put(ch);
1603 }
1604
1605 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1606                                         struct priv_cmd_entry *e)
1607 {
1608         struct priv_cmd_queue *q = &c->priv_cmd_q;
1609         u32 cmd_entry_start;
1610         struct device *d = dev_from_gk20a(c->g);
1611
1612         if (!e)
1613                 return 0;
1614
1615         cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.base_cpuva);
1616         if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1617                 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1618
1619         q->get = (e->ptr - (u32 *)q->mem.base_cpuva) + e->size;
1620         free_priv_cmdbuf(c, e);
1621
1622         return 0;
1623 }
1624
1625 static int gk20a_channel_add_job(struct channel_gk20a *c,
1626                                  struct gk20a_fence *pre_fence,
1627                                  struct gk20a_fence *post_fence,
1628                                  struct priv_cmd_entry *wait_cmd,
1629                                  struct priv_cmd_entry *incr_cmd,
1630                                  bool skip_buffer_refcounting)
1631 {
1632         struct vm_gk20a *vm = c->vm;
1633         struct channel_gk20a_job *job = NULL;
1634         struct mapped_buffer_node **mapped_buffers = NULL;
1635         int err = 0, num_mapped_buffers = 0;
1636
1637         /* job needs reference to this vm (released in channel_update) */
1638         gk20a_vm_get(vm);
1639
1640         if (!skip_buffer_refcounting) {
1641                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1642                                         &num_mapped_buffers);
1643                 if (err) {
1644                         gk20a_vm_put(vm);
1645                         return err;
1646                 }
1647         }
1648
1649         job = kzalloc(sizeof(*job), GFP_KERNEL);
1650         if (!job) {
1651                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1652                 gk20a_vm_put(vm);
1653                 return -ENOMEM;
1654         }
1655
1656         /* put() is done in gk20a_channel_update() when the job is done */
1657         c = gk20a_channel_get(c);
1658
1659         if (c) {
1660                 job->num_mapped_buffers = num_mapped_buffers;
1661                 job->mapped_buffers = mapped_buffers;
1662                 job->pre_fence = gk20a_fence_get(pre_fence);
1663                 job->post_fence = gk20a_fence_get(post_fence);
1664                 job->wait_cmd = wait_cmd;
1665                 job->incr_cmd = incr_cmd;
1666
1667                 gk20a_channel_timeout_start(c, job);
1668
1669                 mutex_lock(&c->jobs_lock);
1670                 list_add_tail(&job->list, &c->jobs);
1671                 mutex_unlock(&c->jobs_lock);
1672         } else {
1673                 return -ETIMEDOUT;
1674         }
1675
1676         return 0;
1677 }
1678
1679 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1680 {
1681         struct vm_gk20a *vm = c->vm;
1682         struct channel_gk20a_job *job, *n;
1683         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
1684
1685         trace_gk20a_channel_update(c->hw_chid);
1686
1687         wake_up(&c->submit_wq);
1688
1689         mutex_lock(&c->submit_lock);
1690         mutex_lock(&c->jobs_lock);
1691         list_for_each_entry_safe(job, n, &c->jobs, list) {
1692                 struct gk20a *g = c->g;
1693
1694                 bool completed = gk20a_fence_is_expired(job->post_fence);
1695                 if (!completed) {
1696                         gk20a_channel_timeout_start(c, job);
1697                         break;
1698                 }
1699
1700                 gk20a_channel_timeout_stop(c);
1701
1702                 if (c->sync)
1703                         c->sync->signal_timeline(c->sync);
1704
1705                 if (job->num_mapped_buffers)
1706                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1707                                 job->num_mapped_buffers);
1708
1709                 /* Close the fences (this will unref the semaphores and release
1710                  * them to the pool). */
1711                 gk20a_fence_put(job->pre_fence);
1712                 gk20a_fence_put(job->post_fence);
1713
1714                 /* Free the private command buffers (wait_cmd first and
1715                  * then incr_cmd i.e. order of allocation) */
1716                 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1717                 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1718
1719                 /* job is done. release its vm reference (taken in add_job) */
1720                 gk20a_vm_put(vm);
1721                 /* another bookkeeping taken in add_job. caller must hold a ref
1722                  * so this wouldn't get freed here. */
1723                 gk20a_channel_put(c);
1724
1725                 list_del_init(&job->list);
1726                 kfree(job);
1727                 gk20a_idle(g->dev);
1728         }
1729
1730         /*
1731          * If job list is empty then channel is idle and we can free
1732          * the syncpt here (given aggressive_destroy flag is set)
1733          * Note: check if last submit is complete before destroying
1734          * the sync resource
1735          */
1736         if (list_empty(&c->jobs)) {
1737                 if (c->sync && platform->aggressive_sync_destroy &&
1738                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1739                         c->sync->destroy(c->sync);
1740                         c->sync = NULL;
1741                 }
1742         }
1743         mutex_unlock(&c->jobs_lock);
1744         mutex_unlock(&c->submit_lock);
1745
1746         if (c->update_fn)
1747                 schedule_work(&c->update_fn_work);
1748 }
1749
1750 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1751                                 struct nvgpu_gpfifo *gpfifo,
1752                                 struct nvgpu_submit_gpfifo_args *args,
1753                                 u32 num_entries,
1754                                 u32 flags,
1755                                 struct nvgpu_fence *fence,
1756                                 struct gk20a_fence **fence_out)
1757 {
1758         struct gk20a *g = c->g;
1759         struct device *d = dev_from_gk20a(g);
1760         int err = 0;
1761         int start, end;
1762         int wait_fence_fd = -1;
1763         struct priv_cmd_entry *wait_cmd = NULL;
1764         struct priv_cmd_entry *incr_cmd = NULL;
1765         struct gk20a_fence *pre_fence = NULL;
1766         struct gk20a_fence *post_fence = NULL;
1767         /* we might need two extra gpfifo entries - one for pre fence
1768          * and one for post fence. */
1769         const int extra_entries = 2;
1770         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1771         bool skip_buffer_refcounting = (flags &
1772                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1773
1774         if (c->has_timedout)
1775                 return -ETIMEDOUT;
1776
1777         /* fifo not large enough for request. Return error immediately */
1778         if (c->gpfifo.entry_num < num_entries) {
1779                 gk20a_err(d, "not enough gpfifo space allocated");
1780                 return -ENOMEM;
1781         }
1782
1783         if (!gpfifo && !args)
1784                 return -EINVAL;
1785
1786         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1787                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1788             !fence)
1789                 return -EINVAL;
1790
1791         /* an address space needs to have been bound at this point. */
1792         if (!gk20a_channel_as_bound(c)) {
1793                 gk20a_err(d,
1794                             "not bound to an address space at time of gpfifo"
1795                             " submission.");
1796                 return -EINVAL;
1797         }
1798
1799 #ifdef CONFIG_DEBUG_FS
1800         /* update debug settings */
1801         if (g->ops.ltc.sync_debugfs)
1802                 g->ops.ltc.sync_debugfs(g);
1803 #endif
1804
1805         gk20a_dbg_info("channel %d", c->hw_chid);
1806
1807         /* gk20a_channel_update releases this ref. */
1808         err = gk20a_busy(g->dev);
1809         if (err) {
1810                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1811                 return err;
1812         }
1813
1814         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1815                                           c->hw_chid,
1816                                           num_entries,
1817                                           flags,
1818                                           fence ? fence->id : 0,
1819                                           fence ? fence->value : 0);
1820         check_gp_put(g, c);
1821         update_gp_get(g, c);
1822
1823         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1824                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1825
1826         /* Make sure we have enough space for gpfifo entries. If not,
1827          * wait for signals from completed submits */
1828         if (gp_free_count(c) < num_entries + extra_entries) {
1829                 /* we can get here via locked ioctl and other paths too */
1830                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1831                 if (locked_path)
1832                         mutex_unlock(&c->ioctl_lock);
1833
1834                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1835                 err = wait_event_interruptible(c->submit_wq,
1836                         get_gp_free_count(c) >= num_entries + extra_entries ||
1837                         c->has_timedout);
1838                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1839
1840                 if (locked_path)
1841                         mutex_lock(&c->ioctl_lock);
1842         }
1843
1844         if (c->has_timedout) {
1845                 err = -ETIMEDOUT;
1846                 goto clean_up;
1847         }
1848
1849         if (err) {
1850                 gk20a_err(d, "timeout waiting for gpfifo space");
1851                 err = -EAGAIN;
1852                 goto clean_up;
1853         }
1854
1855         mutex_lock(&c->submit_lock);
1856
1857         if (!c->sync) {
1858                 c->sync = gk20a_channel_sync_create(c);
1859                 if (!c->sync) {
1860                         err = -ENOMEM;
1861                         mutex_unlock(&c->submit_lock);
1862                         goto clean_up;
1863                 }
1864         }
1865
1866         /*
1867          * optionally insert syncpt wait in the beginning of gpfifo submission
1868          * when user requested and the wait hasn't expired.
1869          * validate that the id makes sense, elide if not
1870          * the only reason this isn't being unceremoniously killed is to
1871          * keep running some tests which trigger this condition
1872          */
1873         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1874                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1875                         wait_fence_fd = fence->id;
1876                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1877                                         &wait_cmd, &pre_fence);
1878                 } else {
1879                         err = c->sync->wait_syncpt(c->sync, fence->id,
1880                                         fence->value, &wait_cmd, &pre_fence);
1881                 }
1882         }
1883         if (err) {
1884                 mutex_unlock(&c->submit_lock);
1885                 goto clean_up;
1886         }
1887
1888
1889         /* always insert syncpt increment at end of gpfifo submission
1890            to keep track of method completion for idle railgating */
1891         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1892                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1893                                          &post_fence, need_wfi);
1894         else
1895                 err = c->sync->incr(c->sync, &incr_cmd,
1896                                     &post_fence);
1897         if (err) {
1898                 mutex_unlock(&c->submit_lock);
1899                 goto clean_up;
1900         }
1901
1902         if (wait_cmd) {
1903                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1904                         u64_lo32(wait_cmd->gva);
1905                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1906                         u64_hi32(wait_cmd->gva) |
1907                         pbdma_gp_entry1_length_f(wait_cmd->size);
1908                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1909                         0, wait_cmd->size, 0, wait_cmd->ptr);
1910
1911                 c->gpfifo.put = (c->gpfifo.put + 1) &
1912                         (c->gpfifo.entry_num - 1);
1913
1914                 /* save gp_put */
1915                 wait_cmd->gp_put = c->gpfifo.put;
1916         }
1917
1918         /*
1919          * Copy source gpfifo entries into the gpfifo ring buffer,
1920          * potentially splitting into two memcpies to handle the
1921          * ring buffer wrap-around case.
1922          */
1923         start = c->gpfifo.put;
1924         end = start + num_entries;
1925
1926         if (gpfifo) {
1927                 if (end > c->gpfifo.entry_num) {
1928                         int length0 = c->gpfifo.entry_num - start;
1929                         int length1 = num_entries - length0;
1930
1931                         memcpy(c->gpfifo.cpu_va + start, gpfifo,
1932                                length0 * sizeof(*gpfifo));
1933
1934                         memcpy(c->gpfifo.cpu_va, gpfifo + length0,
1935                                length1 * sizeof(*gpfifo));
1936
1937                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1938                                         0, length0);
1939                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1940                                         length0, length1);
1941                 } else {
1942                         memcpy(c->gpfifo.cpu_va + start, gpfifo,
1943                                num_entries * sizeof(*gpfifo));
1944
1945                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1946                                         0, num_entries);
1947                 }
1948         } else {
1949                 struct nvgpu_gpfifo __user *user_gpfifo =
1950                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1951                 if (end > c->gpfifo.entry_num) {
1952                         int length0 = c->gpfifo.entry_num - start;
1953                         int length1 = num_entries - length0;
1954
1955                         err = copy_from_user(c->gpfifo.cpu_va + start,
1956                                 user_gpfifo,
1957                                 length0 * sizeof(*user_gpfifo));
1958                         if (err) {
1959                                 mutex_unlock(&c->submit_lock);
1960                                 goto clean_up;
1961                         }
1962
1963                         err = copy_from_user(c->gpfifo.cpu_va,
1964                                 user_gpfifo + length0,
1965                                 length1 * sizeof(*user_gpfifo));
1966                         if (err) {
1967                                 mutex_unlock(&c->submit_lock);
1968                                 goto clean_up;
1969                         }
1970
1971                         trace_write_pushbuffer_range(c, NULL, args,
1972                                         0, length0);
1973                         trace_write_pushbuffer_range(c, NULL, args,
1974                                         length0, length1);
1975                 } else {
1976                         err = copy_from_user(c->gpfifo.cpu_va + start,
1977                                 user_gpfifo,
1978                                 num_entries * sizeof(*user_gpfifo));
1979                         if (err) {
1980                                 mutex_unlock(&c->submit_lock);
1981                                 goto clean_up;
1982                         }
1983
1984                         trace_write_pushbuffer_range(c, NULL, args,
1985                                         0, num_entries);
1986                 }
1987         }
1988
1989         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1990                 (c->gpfifo.entry_num - 1);
1991
1992         if (incr_cmd) {
1993                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1994                         u64_lo32(incr_cmd->gva);
1995                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1996                         u64_hi32(incr_cmd->gva) |
1997                         pbdma_gp_entry1_length_f(incr_cmd->size);
1998                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1999                         0, incr_cmd->size, 0, incr_cmd->ptr);
2000
2001                 c->gpfifo.put = (c->gpfifo.put + 1) &
2002                         (c->gpfifo.entry_num - 1);
2003
2004                 /* save gp_put */
2005                 incr_cmd->gp_put = c->gpfifo.put;
2006         }
2007
2008         gk20a_fence_put(c->last_submit.pre_fence);
2009         gk20a_fence_put(c->last_submit.post_fence);
2010         c->last_submit.pre_fence = pre_fence;
2011         c->last_submit.post_fence = post_fence;
2012         if (fence_out)
2013                 *fence_out = gk20a_fence_get(post_fence);
2014
2015         /* TODO! Check for errors... */
2016         gk20a_channel_add_job(c, pre_fence, post_fence,
2017                                 wait_cmd, incr_cmd,
2018                                 skip_buffer_refcounting);
2019
2020         c->cmds_pending = true;
2021         gk20a_bar1_writel(g,
2022                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
2023                 c->gpfifo.put);
2024
2025         mutex_unlock(&c->submit_lock);
2026
2027         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
2028                                              c->hw_chid,
2029                                              num_entries,
2030                                              flags,
2031                                              post_fence->syncpt_id,
2032                                              post_fence->syncpt_value);
2033
2034         gk20a_dbg_info("post-submit put %d, get %d, size %d",
2035                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
2036
2037         gk20a_dbg_fn("done");
2038         return err;
2039
2040 clean_up:
2041         gk20a_err(d, "fail");
2042         free_priv_cmdbuf(c, wait_cmd);
2043         free_priv_cmdbuf(c, incr_cmd);
2044         gk20a_fence_put(pre_fence);
2045         gk20a_fence_put(post_fence);
2046         gk20a_idle(g->dev);
2047         return err;
2048 }
2049
2050 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2051 {
2052         struct channel_gk20a *c = g->fifo.channel+chid;
2053         c->g = NULL;
2054         c->hw_chid = chid;
2055         c->bound = false;
2056         spin_lock_init(&c->ref_obtain_lock);
2057         atomic_set(&c->ref_count, 0);
2058         c->referenceable = false;
2059         init_waitqueue_head(&c->ref_count_dec_wq);
2060         mutex_init(&c->ioctl_lock);
2061         mutex_init(&c->jobs_lock);
2062         mutex_init(&c->submit_lock);
2063         mutex_init(&c->timeout.lock);
2064         INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
2065         INIT_LIST_HEAD(&c->jobs);
2066 #if defined(CONFIG_GK20A_CYCLE_STATS)
2067         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2068 #endif
2069         INIT_LIST_HEAD(&c->dbg_s_list);
2070         mutex_init(&c->dbg_s_lock);
2071         list_add(&c->free_chs, &g->fifo.free_chs);
2072
2073         return 0;
2074 }
2075
2076 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2077 {
2078         int err = 0;
2079         struct gk20a_fence *fence = ch->last_submit.post_fence;
2080
2081         if (!ch->cmds_pending)
2082                 return 0;
2083
2084         /* Do not wait for a timedout channel */
2085         if (ch->has_timedout)
2086                 return -ETIMEDOUT;
2087
2088         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2089                      fence->syncpt_value, fence->semaphore);
2090
2091         err = gk20a_fence_wait(fence, timeout);
2092         if (WARN_ON(err))
2093                 dev_warn(dev_from_gk20a(ch->g),
2094                        "timed out waiting for gk20a channel to finish");
2095         else
2096                 ch->cmds_pending = false;
2097
2098         return err;
2099 }
2100
2101 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2102                                         ulong id, u32 offset,
2103                                         u32 payload, long timeout)
2104 {
2105         struct platform_device *pdev = ch->g->dev;
2106         struct dma_buf *dmabuf;
2107         void *data;
2108         u32 *semaphore;
2109         int ret = 0;
2110         long remain;
2111
2112         /* do not wait if channel has timed out */
2113         if (ch->has_timedout)
2114                 return -ETIMEDOUT;
2115
2116         dmabuf = dma_buf_get(id);
2117         if (IS_ERR(dmabuf)) {
2118                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2119                            id);
2120                 return -EINVAL;
2121         }
2122
2123         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2124         if (!data) {
2125                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2126                 ret = -EINVAL;
2127                 goto cleanup_put;
2128         }
2129
2130         semaphore = data + (offset & ~PAGE_MASK);
2131
2132         remain = wait_event_interruptible_timeout(
2133                         ch->semaphore_wq,
2134                         *semaphore == payload || ch->has_timedout,
2135                         timeout);
2136
2137         if (remain == 0 && *semaphore != payload)
2138                 ret = -ETIMEDOUT;
2139         else if (remain < 0)
2140                 ret = remain;
2141
2142         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2143 cleanup_put:
2144         dma_buf_put(dmabuf);
2145         return ret;
2146 }
2147
2148 static int gk20a_channel_wait(struct channel_gk20a *ch,
2149                               struct nvgpu_wait_args *args)
2150 {
2151         struct device *d = dev_from_gk20a(ch->g);
2152         struct dma_buf *dmabuf;
2153         struct notification *notif;
2154         struct timespec tv;
2155         u64 jiffies;
2156         ulong id;
2157         u32 offset;
2158         unsigned long timeout;
2159         int remain, ret = 0;
2160
2161         gk20a_dbg_fn("");
2162
2163         if (ch->has_timedout)
2164                 return -ETIMEDOUT;
2165
2166         if (args->timeout == NVGPU_NO_TIMEOUT)
2167                 timeout = MAX_SCHEDULE_TIMEOUT;
2168         else
2169                 timeout = (u32)msecs_to_jiffies(args->timeout);
2170
2171         switch (args->type) {
2172         case NVGPU_WAIT_TYPE_NOTIFIER:
2173                 id = args->condition.notifier.dmabuf_fd;
2174                 offset = args->condition.notifier.offset;
2175
2176                 dmabuf = dma_buf_get(id);
2177                 if (IS_ERR(dmabuf)) {
2178                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2179                                    id);
2180                         return -EINVAL;
2181                 }
2182
2183                 notif = dma_buf_vmap(dmabuf);
2184                 if (!notif) {
2185                         gk20a_err(d, "failed to map notifier memory");
2186                         return -ENOMEM;
2187                 }
2188
2189                 notif = (struct notification *)((uintptr_t)notif + offset);
2190
2191                 /* user should set status pending before
2192                  * calling this ioctl */
2193                 remain = wait_event_interruptible_timeout(
2194                                 ch->notifier_wq,
2195                                 notif->status == 0 || ch->has_timedout,
2196                                 timeout);
2197
2198                 if (remain == 0 && notif->status != 0) {
2199                         ret = -ETIMEDOUT;
2200                         goto notif_clean_up;
2201                 } else if (remain < 0) {
2202                         ret = -EINTR;
2203                         goto notif_clean_up;
2204                 }
2205
2206                 /* TBD: fill in correct information */
2207                 jiffies = get_jiffies_64();
2208                 jiffies_to_timespec(jiffies, &tv);
2209                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2210                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2211                 notif->info32 = 0xDEADBEEF; /* should be object name */
2212                 notif->info16 = ch->hw_chid; /* should be method offset */
2213
2214 notif_clean_up:
2215                 dma_buf_vunmap(dmabuf, notif);
2216                 return ret;
2217
2218         case NVGPU_WAIT_TYPE_SEMAPHORE:
2219                 ret = gk20a_channel_wait_semaphore(ch,
2220                                 args->condition.semaphore.dmabuf_fd,
2221                                 args->condition.semaphore.offset,
2222                                 args->condition.semaphore.payload,
2223                                 timeout);
2224
2225                 break;
2226
2227         default:
2228                 ret = -EINVAL;
2229                 break;
2230         }
2231
2232         return ret;
2233 }
2234
2235 /* poll events for semaphores */
2236
2237 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2238 {
2239         gk20a_dbg_fn("");
2240
2241         mutex_lock(&ev->lock);
2242
2243         ev->events_enabled = true;
2244         ev->num_pending_events = 0;
2245
2246         mutex_unlock(&ev->lock);
2247 }
2248
2249 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2250 {
2251         gk20a_dbg_fn("");
2252
2253         mutex_lock(&ev->lock);
2254
2255         ev->events_enabled = false;
2256         ev->num_pending_events = 0;
2257
2258         mutex_unlock(&ev->lock);
2259 }
2260
2261 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2262 {
2263         gk20a_dbg_fn("");
2264
2265         mutex_lock(&ev->lock);
2266
2267         if (ev->events_enabled &&
2268                         ev->num_pending_events > 0)
2269                 ev->num_pending_events--;
2270
2271         mutex_unlock(&ev->lock);
2272 }
2273
2274 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2275                           struct nvgpu_channel_events_ctrl_args *args)
2276 {
2277         int ret = 0;
2278
2279         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2280                         "channel events ctrl cmd %d", args->cmd);
2281
2282         switch (args->cmd) {
2283         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2284                 gk20a_channel_events_enable(&ch->poll_events);
2285                 break;
2286
2287         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2288                 gk20a_channel_events_disable(&ch->poll_events);
2289                 break;
2290
2291         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2292                 gk20a_channel_events_clear(&ch->poll_events);
2293                 break;
2294
2295         default:
2296                 gk20a_err(dev_from_gk20a(ch->g),
2297                            "unrecognized channel events ctrl cmd: 0x%x",
2298                            args->cmd);
2299                 ret = -EINVAL;
2300                 break;
2301         }
2302
2303         return ret;
2304 }
2305
2306 void gk20a_channel_event(struct channel_gk20a *ch)
2307 {
2308         mutex_lock(&ch->poll_events.lock);
2309
2310         if (ch->poll_events.events_enabled) {
2311                 gk20a_dbg_info("posting event on channel id %d",
2312                                 ch->hw_chid);
2313                 gk20a_dbg_info("%d channel events pending",
2314                                 ch->poll_events.num_pending_events);
2315
2316                 ch->poll_events.num_pending_events++;
2317                 /* not waking up here, caller does that */
2318         }
2319
2320         mutex_unlock(&ch->poll_events.lock);
2321 }
2322
2323 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2324 {
2325         unsigned int mask = 0;
2326         struct channel_gk20a *ch = filep->private_data;
2327
2328         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2329
2330         poll_wait(filep, &ch->semaphore_wq, wait);
2331
2332         mutex_lock(&ch->poll_events.lock);
2333
2334         if (ch->poll_events.events_enabled &&
2335                         ch->poll_events.num_pending_events > 0) {
2336                 gk20a_dbg_info("found pending event on channel id %d",
2337                                 ch->hw_chid);
2338                 gk20a_dbg_info("%d channel events pending",
2339                                 ch->poll_events.num_pending_events);
2340                 mask = (POLLPRI | POLLIN);
2341         }
2342
2343         mutex_unlock(&ch->poll_events.lock);
2344
2345         return mask;
2346 }
2347
2348 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2349                 u32 priority)
2350 {
2351         u32 timeslice_timeout;
2352         /* set priority of graphics channel */
2353         switch (priority) {
2354         case NVGPU_PRIORITY_LOW:
2355                 /* 64 << 3 = 512us */
2356                 timeslice_timeout = 64;
2357                 break;
2358         case NVGPU_PRIORITY_MEDIUM:
2359                 /* 128 << 3 = 1024us */
2360                 timeslice_timeout = 128;
2361                 break;
2362         case NVGPU_PRIORITY_HIGH:
2363                 /* 255 << 3 = 2048us */
2364                 timeslice_timeout = 255;
2365                 break;
2366         default:
2367                 pr_err("Unsupported priority");
2368                 return -EINVAL;
2369         }
2370         channel_gk20a_set_schedule_params(ch,
2371                         timeslice_timeout);
2372         return 0;
2373 }
2374
2375 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2376                             struct nvgpu_zcull_bind_args *args)
2377 {
2378         struct gk20a *g = ch->g;
2379         struct gr_gk20a *gr = &g->gr;
2380
2381         gk20a_dbg_fn("");
2382
2383         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2384                                 args->gpu_va, args->mode);
2385 }
2386
2387 /* in this context the "channel" is the host1x channel which
2388  * maps to *all* gk20a channels */
2389 int gk20a_channel_suspend(struct gk20a *g)
2390 {
2391         struct fifo_gk20a *f = &g->fifo;
2392         u32 chid;
2393         bool channels_in_use = false;
2394         int err;
2395
2396         gk20a_dbg_fn("");
2397
2398         /* wait for engine idle */
2399         err = g->ops.fifo.wait_engine_idle(g);
2400         if (err)
2401                 return err;
2402
2403         for (chid = 0; chid < f->num_channels; chid++) {
2404                 struct channel_gk20a *ch = &f->channel[chid];
2405                 if (gk20a_channel_get(ch)) {
2406                         gk20a_dbg_info("suspend channel %d", chid);
2407                         /* disable channel */
2408                         g->ops.fifo.disable_channel(ch);
2409                         /* preempt the channel */
2410                         g->ops.fifo.preempt_channel(g, chid);
2411                         /* wait for channel update notifiers */
2412                         if (ch->update_fn &&
2413                                         work_pending(&ch->update_fn_work))
2414                                 flush_work(&ch->update_fn_work);
2415
2416                         channels_in_use = true;
2417
2418                         gk20a_channel_put(ch);
2419                 }
2420         }
2421
2422         if (channels_in_use) {
2423                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2424
2425                 for (chid = 0; chid < f->num_channels; chid++) {
2426                         if (gk20a_channel_get(&f->channel[chid])) {
2427                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2428                                 gk20a_channel_put(&f->channel[chid]);
2429                         }
2430                 }
2431         }
2432
2433         gk20a_dbg_fn("done");
2434         return 0;
2435 }
2436
2437 int gk20a_channel_resume(struct gk20a *g)
2438 {
2439         struct fifo_gk20a *f = &g->fifo;
2440         u32 chid;
2441         bool channels_in_use = false;
2442
2443         gk20a_dbg_fn("");
2444
2445         for (chid = 0; chid < f->num_channels; chid++) {
2446                 if (gk20a_channel_get(&f->channel[chid])) {
2447                         gk20a_dbg_info("resume channel %d", chid);
2448                         g->ops.fifo.bind_channel(&f->channel[chid]);
2449                         channels_in_use = true;
2450                         gk20a_channel_put(&f->channel[chid]);
2451                 }
2452         }
2453
2454         if (channels_in_use)
2455                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2456
2457         gk20a_dbg_fn("done");
2458         return 0;
2459 }
2460
2461 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2462 {
2463         struct fifo_gk20a *f = &g->fifo;
2464         u32 chid;
2465
2466         gk20a_dbg_fn("");
2467
2468         for (chid = 0; chid < f->num_channels; chid++) {
2469                 struct channel_gk20a *c = g->fifo.channel+chid;
2470                 if (gk20a_channel_get(c)) {
2471                         wake_up_interruptible_all(&c->semaphore_wq);
2472                         gk20a_channel_update(c, 0);
2473                         gk20a_channel_put(c);
2474                 }
2475         }
2476 }
2477
2478 static int gk20a_ioctl_channel_submit_gpfifo(
2479         struct channel_gk20a *ch,
2480         struct nvgpu_submit_gpfifo_args *args)
2481 {
2482         struct gk20a_fence *fence_out;
2483         int ret = 0;
2484
2485         gk20a_dbg_fn("");
2486
2487         if (ch->has_timedout)
2488                 return -ETIMEDOUT;
2489
2490         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2491                                           args->flags, &args->fence,
2492                                           &fence_out);
2493
2494         if (ret)
2495                 goto clean_up;
2496
2497         /* Convert fence_out to something we can pass back to user space. */
2498         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2499                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2500                         int fd = gk20a_fence_install_fd(fence_out);
2501                         if (fd < 0)
2502                                 ret = fd;
2503                         else
2504                                 args->fence.id = fd;
2505                 } else {
2506                         args->fence.id = fence_out->syncpt_id;
2507                         args->fence.value = fence_out->syncpt_value;
2508                 }
2509         }
2510         gk20a_fence_put(fence_out);
2511
2512 clean_up:
2513         return ret;
2514 }
2515
2516 void gk20a_init_channel(struct gpu_ops *gops)
2517 {
2518         gops->fifo.bind_channel = channel_gk20a_bind;
2519         gops->fifo.unbind_channel = channel_gk20a_unbind;
2520         gops->fifo.disable_channel = channel_gk20a_disable;
2521         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2522         gops->fifo.free_inst = channel_gk20a_free_inst;
2523         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2524 }
2525
2526 long gk20a_channel_ioctl(struct file *filp,
2527         unsigned int cmd, unsigned long arg)
2528 {
2529         struct channel_gk20a *ch = filp->private_data;
2530         struct platform_device *dev = ch->g->dev;
2531         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2532         int err = 0;
2533
2534         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2535
2536         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2537                 (_IOC_NR(cmd) == 0) ||
2538                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2539                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2540                 return -EINVAL;
2541
2542         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2543                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2544                         return -EFAULT;
2545         }
2546
2547         /* take a ref or return timeout if channel refs can't be taken */
2548         ch = gk20a_channel_get(ch);
2549         if (!ch)
2550                 return -ETIMEDOUT;
2551
2552         /* protect our sanity for threaded userspace - most of the channel is
2553          * not thread safe */
2554         mutex_lock(&ch->ioctl_lock);
2555
2556         /* this ioctl call keeps a ref to the file which keeps a ref to the
2557          * channel */
2558
2559         switch (cmd) {
2560         case NVGPU_IOCTL_CHANNEL_OPEN:
2561                 err = gk20a_channel_open_ioctl(ch->g,
2562                         (struct nvgpu_channel_open_args *)buf);
2563                 break;
2564         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2565                 break;
2566         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2567                 err = gk20a_busy(dev);
2568                 if (err) {
2569                         dev_err(&dev->dev,
2570                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2571                                 __func__, cmd);
2572                         break;
2573                 }
2574                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2575                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2576                 gk20a_idle(dev);
2577                 break;
2578         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2579                 err = gk20a_busy(dev);
2580                 if (err) {
2581                         dev_err(&dev->dev,
2582                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2583                                 __func__, cmd);
2584                         break;
2585                 }
2586                 err = ch->g->ops.gr.free_obj_ctx(ch,
2587                                 (struct nvgpu_free_obj_ctx_args *)buf);
2588                 gk20a_idle(dev);
2589                 break;
2590         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2591                 err = gk20a_busy(dev);
2592                 if (err) {
2593                         dev_err(&dev->dev,
2594                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2595                                 __func__, cmd);
2596                         break;
2597                 }
2598                 err = gk20a_alloc_channel_gpfifo(ch,
2599                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2600                 gk20a_idle(dev);
2601                 break;
2602         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2603                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2604                                 (struct nvgpu_submit_gpfifo_args *)buf);
2605                 break;
2606         case NVGPU_IOCTL_CHANNEL_WAIT:
2607                 err = gk20a_busy(dev);
2608                 if (err) {
2609                         dev_err(&dev->dev,
2610                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2611                                 __func__, cmd);
2612                         break;
2613                 }
2614
2615                 /* waiting is thread-safe, not dropping this mutex could
2616                  * deadlock in certain conditions */
2617                 mutex_unlock(&ch->ioctl_lock);
2618
2619                 err = gk20a_channel_wait(ch,
2620                                 (struct nvgpu_wait_args *)buf);
2621
2622                 mutex_lock(&ch->ioctl_lock);
2623
2624                 gk20a_idle(dev);
2625                 break;
2626         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2627                 err = gk20a_busy(dev);
2628                 if (err) {
2629                         dev_err(&dev->dev,
2630                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2631                                 __func__, cmd);
2632                         break;
2633                 }
2634                 err = gk20a_channel_zcull_bind(ch,
2635                                 (struct nvgpu_zcull_bind_args *)buf);
2636                 gk20a_idle(dev);
2637                 break;
2638         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2639                 err = gk20a_busy(dev);
2640                 if (err) {
2641                         dev_err(&dev->dev,
2642                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2643                                 __func__, cmd);
2644                         break;
2645                 }
2646                 err = gk20a_init_error_notifier(ch,
2647                                 (struct nvgpu_set_error_notifier *)buf);
2648                 gk20a_idle(dev);
2649                 break;
2650 #ifdef CONFIG_GK20A_CYCLE_STATS
2651         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2652                 err = gk20a_busy(dev);
2653                 if (err) {
2654                         dev_err(&dev->dev,
2655                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2656                                 __func__, cmd);
2657                         break;
2658                 }
2659                 err = gk20a_channel_cycle_stats(ch,
2660                                 (struct nvgpu_cycle_stats_args *)buf);
2661                 gk20a_idle(dev);
2662                 break;
2663 #endif
2664         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2665         {
2666                 u32 timeout =
2667                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2668                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2669                            timeout, ch->hw_chid);
2670                 ch->timeout_ms_max = timeout;
2671                 break;
2672         }
2673         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2674         {
2675                 u32 timeout =
2676                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2677                 bool timeout_debug_dump = !((u32)
2678                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2679                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2680                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2681                            timeout, ch->hw_chid);
2682                 ch->timeout_ms_max = timeout;
2683                 ch->timeout_debug_dump = timeout_debug_dump;
2684                 break;
2685         }
2686         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2687                 ((struct nvgpu_get_param_args *)buf)->value =
2688                         ch->has_timedout;
2689                 break;
2690         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2691                 err = gk20a_busy(dev);
2692                 if (err) {
2693                         dev_err(&dev->dev,
2694                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2695                                 __func__, cmd);
2696                         break;
2697                 }
2698                 gk20a_channel_set_priority(ch,
2699                         ((struct nvgpu_set_priority_args *)buf)->priority);
2700                 gk20a_idle(dev);
2701                 break;
2702         case NVGPU_IOCTL_CHANNEL_ENABLE:
2703                 err = gk20a_busy(dev);
2704                 if (err) {
2705                         dev_err(&dev->dev,
2706                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2707                                 __func__, cmd);
2708                         break;
2709                 }
2710                 /* enable channel */
2711                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2712                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2713                         ccsr_channel_enable_set_true_f());
2714                 gk20a_idle(dev);
2715                 break;
2716         case NVGPU_IOCTL_CHANNEL_DISABLE:
2717                 err = gk20a_busy(dev);
2718                 if (err) {
2719                         dev_err(&dev->dev,
2720                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2721                                 __func__, cmd);
2722                         break;
2723                 }
2724                 /* disable channel */
2725                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2726                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2727                         ccsr_channel_enable_clr_true_f());
2728                 gk20a_idle(dev);
2729                 break;
2730         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2731                 err = gk20a_busy(dev);
2732                 if (err) {
2733                         dev_err(&dev->dev,
2734                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2735                                 __func__, cmd);
2736                         break;
2737                 }
2738                 err = gk20a_fifo_preempt(ch->g, ch);
2739                 gk20a_idle(dev);
2740                 break;
2741         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2742                 err = gk20a_busy(dev);
2743                 if (err) {
2744                         dev_err(&dev->dev,
2745                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2746                                 __func__, cmd);
2747                         break;
2748                 }
2749                 err = ch->g->ops.fifo.force_reset_ch(ch, true);
2750                 gk20a_idle(dev);
2751                 break;
2752         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2753                 err = gk20a_channel_events_ctrl(ch,
2754                            (struct nvgpu_channel_events_ctrl_args *)buf);
2755                 break;
2756         default:
2757                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2758                 err = -ENOTTY;
2759                 break;
2760         }
2761
2762         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2763                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2764
2765         mutex_unlock(&ch->ioctl_lock);
2766
2767         gk20a_channel_put(ch);
2768
2769         gk20a_dbg_fn("end");
2770
2771         return err;
2772 }