]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/gpu/nvgpu/gk20a/gk20a.c
gpu: nvgpu: gk20a: dma_alloc only if needed
[sojka/nv-tegra/linux-3.10.git] / drivers / gpu / nvgpu / gk20a / gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/gk20a.c
3  *
4  * GK20A Graphics
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #define CREATE_TRACE_POINTS
22 #include <trace/events/gk20a.h>
23
24 #include <linux/module.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/highmem.h>
27 #include <linux/string.h>
28 #include <linux/cdev.h>
29 #include <linux/delay.h>
30 #include <linux/firmware.h>
31 #include <linux/interrupt.h>
32 #include <linux/irq.h>
33 #include <linux/export.h>
34 #include <linux/file.h>
35 #include <linux/of.h>
36 #include <linux/of_device.h>
37 #include <linux/of_platform.h>
38 #include <linux/pm_runtime.h>
39 #include <linux/thermal.h>
40 #include <asm/cacheflush.h>
41 #include <linux/debugfs.h>
42 #include <linux/spinlock.h>
43 #include <linux/tegra-powergate.h>
44 #include <linux/tegra_pm_domains.h>
45 #include <linux/clk/tegra.h>
46
47 #include <linux/sched.h>
48 #include <linux/input-cfboost.h>
49
50
51 #include "gk20a.h"
52 #include "debug_gk20a.h"
53 #include "ctrl_gk20a.h"
54 #include "hw_mc_gk20a.h"
55 #include "hw_timer_gk20a.h"
56 #include "hw_bus_gk20a.h"
57 #include "hw_sim_gk20a.h"
58 #include "hw_top_gk20a.h"
59 #include "hw_ltc_gk20a.h"
60 #include "gk20a_scale.h"
61 #include "dbg_gpu_gk20a.h"
62 #include "hal.h"
63
64 #ifdef CONFIG_ARM64
65 #define __cpuc_flush_dcache_area __flush_dcache_area
66 #endif
67
68 #define CLASS_NAME "nvidia-gpu"
69 /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
70 #define INTERFACE_NAME "nvhost%s-gpu"
71
72 #define GK20A_NUM_CDEVS 5
73
74 #if defined(GK20A_DEBUG)
75 u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
76 u32 gk20a_dbg_ftrace;
77 #endif
78
79 #define GK20A_WAIT_FOR_IDLE_MS  2000
80
81 static int gk20a_pm_finalize_poweron(struct device *dev);
82 static int gk20a_pm_prepare_poweroff(struct device *dev);
83
84 static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a)
85 {
86         gk20a_get_platform(dev)->g = gk20a;
87 }
88
89 static const struct file_operations gk20a_channel_ops = {
90         .owner = THIS_MODULE,
91         .release = gk20a_channel_release,
92         .open = gk20a_channel_open,
93 #ifdef CONFIG_COMPAT
94         .compat_ioctl = gk20a_channel_ioctl,
95 #endif
96         .unlocked_ioctl = gk20a_channel_ioctl,
97 };
98
99 static const struct file_operations gk20a_ctrl_ops = {
100         .owner = THIS_MODULE,
101         .release = gk20a_ctrl_dev_release,
102         .open = gk20a_ctrl_dev_open,
103         .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
104 #ifdef CONFIG_COMPAT
105         .compat_ioctl = gk20a_ctrl_dev_ioctl,
106 #endif
107 };
108
109 static const struct file_operations gk20a_dbg_ops = {
110         .owner = THIS_MODULE,
111         .release        = gk20a_dbg_gpu_dev_release,
112         .open           = gk20a_dbg_gpu_dev_open,
113         .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
114         .poll           = gk20a_dbg_gpu_dev_poll,
115 #ifdef CONFIG_COMPAT
116         .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
117 #endif
118 };
119
120 static const struct file_operations gk20a_as_ops = {
121         .owner = THIS_MODULE,
122         .release = gk20a_as_dev_release,
123         .open = gk20a_as_dev_open,
124 #ifdef CONFIG_COMPAT
125         .compat_ioctl = gk20a_as_dev_ioctl,
126 #endif
127         .unlocked_ioctl = gk20a_as_dev_ioctl,
128 };
129
130 /*
131  * Note: We use a different 'open' to trigger handling of the profiler session.
132  * Most of the code is shared between them...  Though, at some point if the
133  * code does get too tangled trying to handle each in the same path we can
134  * separate them cleanly.
135  */
136 static const struct file_operations gk20a_prof_ops = {
137         .owner = THIS_MODULE,
138         .release        = gk20a_dbg_gpu_dev_release,
139         .open           = gk20a_prof_gpu_dev_open,
140         .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
141         /* .mmap           = gk20a_prof_gpu_dev_mmap,*/
142         /*int (*mmap) (struct file *, struct vm_area_struct *);*/
143         .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
144 #ifdef CONFIG_COMPAT
145         .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
146 #endif
147 };
148
149 static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
150 {
151         writel(v, g->sim.regs+r);
152 }
153
154 static inline u32 sim_readl(struct gk20a *g, u32 r)
155 {
156         return readl(g->sim.regs+r);
157 }
158
159 static void kunmap_and_free_iopage(void **kvaddr, struct page **page)
160 {
161         if (*kvaddr) {
162                 kunmap(*kvaddr);
163                 *kvaddr = 0;
164         }
165         if (*page) {
166                 __free_page(*page);
167                 *page = 0;
168         }
169 }
170
171 static void gk20a_free_sim_support(struct gk20a *g)
172 {
173         /* free sim mappings, bfrs */
174         kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr,
175                                &g->sim.send_bfr.page);
176
177         kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr,
178                                &g->sim.recv_bfr.page);
179
180         kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr,
181                                &g->sim.msg_bfr.page);
182 }
183
184 static void gk20a_remove_sim_support(struct sim_gk20a *s)
185 {
186         struct gk20a *g = s->g;
187         if (g->sim.regs)
188                 sim_writel(g, sim_config_r(), sim_config_mode_disabled_v());
189         gk20a_free_sim_support(g);
190 }
191
192 static int alloc_and_kmap_iopage(struct device *d,
193                                  void **kvaddr,
194                                  phys_addr_t *phys,
195                                  struct page **page)
196 {
197         int err = 0;
198         *page = alloc_page(GFP_KERNEL);
199
200         if (!*page) {
201                 err = -ENOMEM;
202                 dev_err(d, "couldn't allocate io page\n");
203                 goto fail;
204         }
205
206         *kvaddr = kmap(*page);
207         if (!*kvaddr) {
208                 err = -ENOMEM;
209                 dev_err(d, "couldn't kmap io page\n");
210                 goto fail;
211         }
212         *phys = page_to_phys(*page);
213         return 0;
214
215  fail:
216         kunmap_and_free_iopage(kvaddr, page);
217         return err;
218
219 }
220
221 static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
222                                             struct resource **out)
223 {
224         struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
225         if (!r)
226                 return NULL;
227         if (out)
228                 *out = r;
229         return devm_request_and_ioremap(&dev->dev, r);
230 }
231
232 /* TBD: strip from released */
233 static int gk20a_init_sim_support(struct platform_device *dev)
234 {
235         int err = 0;
236         struct gk20a *g = get_gk20a(dev);
237         struct device *d = &dev->dev;
238         phys_addr_t phys;
239
240         g->sim.g = g;
241         g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM,
242                                              &g->sim.reg_mem);
243         if (!g->sim.regs) {
244                 dev_err(d, "failed to remap gk20a sim regs\n");
245                 err = -ENXIO;
246                 goto fail;
247         }
248
249         /* allocate sim event/msg buffers */
250         err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr,
251                                     &g->sim.send_bfr.phys,
252                                     &g->sim.send_bfr.page);
253
254         err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr,
255                                            &g->sim.recv_bfr.phys,
256                                            &g->sim.recv_bfr.page);
257
258         err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr,
259                                            &g->sim.msg_bfr.phys,
260                                            &g->sim.msg_bfr.page);
261
262         if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr &&
263               g->sim.msg_bfr.kvaddr)) {
264                 dev_err(d, "couldn't allocate all sim buffers\n");
265                 goto fail;
266         }
267
268         /*mark send ring invalid*/
269         sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f());
270
271         /*read get pointer and make equal to put*/
272         g->sim.send_ring_put = sim_readl(g, sim_send_get_r());
273         sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
274
275         /*write send ring address and make it valid*/
276         /*TBD: work for >32b physmem*/
277         phys = g->sim.send_bfr.phys;
278         sim_writel(g, sim_send_ring_hi_r(), 0);
279         sim_writel(g, sim_send_ring_r(),
280                    sim_send_ring_status_valid_f() |
281                    sim_send_ring_target_phys_pci_coherent_f() |
282                    sim_send_ring_size_4kb_f() |
283                    sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT));
284
285         /*repeat for recv ring (but swap put,get as roles are opposite) */
286         sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f());
287
288         /*read put pointer and make equal to get*/
289         g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r());
290         sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
291
292         /*write send ring address and make it valid*/
293         /*TBD: work for >32b physmem*/
294         phys = g->sim.recv_bfr.phys;
295         sim_writel(g, sim_recv_ring_hi_r(), 0);
296         sim_writel(g, sim_recv_ring_r(),
297                    sim_recv_ring_status_valid_f() |
298                    sim_recv_ring_target_phys_pci_coherent_f() |
299                    sim_recv_ring_size_4kb_f() |
300                    sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT));
301
302         g->sim.remove_support = gk20a_remove_sim_support;
303         return 0;
304
305  fail:
306         gk20a_free_sim_support(g);
307         return err;
308 }
309
310 static inline u32 sim_msg_header_size(void)
311 {
312         return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/
313 }
314
315 static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset)
316 {
317         return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset);
318 }
319
320 static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset)
321 {
322         return sim_msg_bfr(g, byte_offset); /*starts at 0*/
323 }
324
325 static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset)
326 {
327         /*starts after msg header/cmn*/
328         return sim_msg_bfr(g, byte_offset + sim_msg_header_size());
329 }
330
331 static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
332 {
333         /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
334         *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
335         *sim_msg_hdr(g, sim_msg_result_r())    = sim_msg_result_rpc_pending_v();
336         *sim_msg_hdr(g, sim_msg_spare_r())     = sim_msg_spare__init_v();
337         *sim_msg_hdr(g, sim_msg_function_r())  = func;
338         *sim_msg_hdr(g, sim_msg_length_r())    = size + sim_msg_header_size();
339 }
340
341 static inline u32 sim_escape_read_hdr_size(void)
342 {
343         return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/
344 }
345
346 static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset)
347 {
348         return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset);
349 }
350
351 static int rpc_send_message(struct gk20a *g)
352 {
353         /* calculations done in units of u32s */
354         u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2;
355         u32 dma_offset = send_base + sim_dma_r()/sizeof(u32);
356         u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32);
357
358         *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) =
359                 sim_dma_target_phys_pci_coherent_f() |
360                 sim_dma_status_valid_f() |
361                 sim_dma_size_4kb_f() |
362                 sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT);
363
364         *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/
365
366         *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++;
367
368         g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) %
369                 PAGE_SIZE;
370
371         __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
372         __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
373         __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
374
375         /* Update the put pointer. This will trap into the host. */
376         sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
377
378         return 0;
379 }
380
381 static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset)
382 {
383         return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset);
384 }
385
386 static int rpc_recv_poll(struct gk20a *g)
387 {
388         phys_addr_t recv_phys_addr;
389
390         /* XXX This read is not required (?) */
391         /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/
392
393         /* Poll the recv ring get pointer in an infinite loop*/
394         do {
395                 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
396         } while (g->sim.recv_ring_put == g->sim.recv_ring_get);
397
398         /* process all replies */
399         while (g->sim.recv_ring_put != g->sim.recv_ring_get) {
400                 /* these are in u32 offsets*/
401                 u32 dma_lo_offset =
402                         sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0;
403                 /*u32 dma_hi_offset = dma_lo_offset + 1;*/
404                 u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4));
405
406                 /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v(
407                       (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/
408
409                 /*TBD >32b phys addr */
410                 recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT;
411
412                 if (recv_phys_addr != g->sim.msg_bfr.phys) {
413                         dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n",
414                                 __func__);
415                         return -1;
416                 }
417
418                 /* Update GET pointer */
419                 g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) %
420                         PAGE_SIZE;
421
422                 __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
423                 __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
424                 __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
425
426                 sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
427
428                 g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
429         }
430
431         return 0;
432 }
433
434 static int issue_rpc_and_wait(struct gk20a *g)
435 {
436         int err;
437
438         err = rpc_send_message(g);
439         if (err) {
440                 dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n",
441                         __func__);
442                 return err;
443         }
444
445         err = rpc_recv_poll(g);
446         if (err) {
447                 dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n",
448                         __func__);
449                 return err;
450         }
451
452         /* Now check if RPC really succeeded */
453         if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) {
454                 dev_err(dev_from_gk20a(g), "%s received failed status!\n",
455                         __func__);
456                 return -(*sim_msg_hdr(g, sim_msg_result_r()));
457         }
458         return 0;
459 }
460
461 int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data)
462 {
463         int err;
464         size_t pathlen = strlen(path);
465         u32 data_offset;
466
467         sim_write_hdr(g, sim_msg_function_sim_escape_read_v(),
468                       sim_escape_read_hdr_size());
469         *sim_msg_param(g, 0) = index;
470         *sim_msg_param(g, 4) = count;
471         data_offset = roundup(0xc +  pathlen + 1, sizeof(u32));
472         *sim_msg_param(g, 8) = data_offset;
473         strcpy((char *)sim_msg_param(g, 0xc), path);
474
475         err = issue_rpc_and_wait(g);
476
477         if (!err)
478                 memcpy(data, sim_msg_param(g, data_offset), count);
479         return err;
480 }
481
482 static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
483 {
484         struct gk20a *g = dev_id;
485         u32 mc_intr_0;
486
487         if (!g->power_on)
488                 return IRQ_NONE;
489
490         /* not from gpu when sharing irq with others */
491         mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
492         if (unlikely(!mc_intr_0))
493                 return IRQ_NONE;
494
495         gk20a_writel(g, mc_intr_en_0_r(),
496                 mc_intr_en_0_inta_disabled_f());
497
498         /* flush previous write */
499         gk20a_readl(g, mc_intr_en_0_r());
500
501         return IRQ_WAKE_THREAD;
502 }
503
504 static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
505 {
506         struct gk20a *g = dev_id;
507         u32 mc_intr_1;
508
509         if (!g->power_on)
510                 return IRQ_NONE;
511
512         /* not from gpu when sharing irq with others */
513         mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
514         if (unlikely(!mc_intr_1))
515                 return IRQ_NONE;
516
517         gk20a_writel(g, mc_intr_en_1_r(),
518                 mc_intr_en_1_inta_disabled_f());
519
520         /* flush previous write */
521         gk20a_readl(g, mc_intr_en_1_r());
522
523         return IRQ_WAKE_THREAD;
524 }
525
526 static void gk20a_pbus_isr(struct gk20a *g)
527 {
528         u32 val;
529         val = gk20a_readl(g, bus_intr_0_r());
530         if (val & (bus_intr_0_pri_squash_m() |
531                         bus_intr_0_pri_fecserr_m() |
532                         bus_intr_0_pri_timeout_m())) {
533                 gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
534                         gk20a_readl(g, mc_enable_r()));
535                 gk20a_err(&g->dev->dev,
536                         "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n",
537                         gk20a_readl(g, timer_pri_timeout_save_0_r()));
538                 gk20a_err(&g->dev->dev,
539                         "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n",
540                         gk20a_readl(g, timer_pri_timeout_save_1_r()));
541                 gk20a_err(&g->dev->dev,
542                         "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n",
543                         gk20a_readl(g, timer_pri_timeout_fecs_errcode_r()));
544         }
545
546         if (val)
547                 gk20a_err(&g->dev->dev,
548                         "Unhandled pending pbus interrupt\n");
549
550         gk20a_writel(g, bus_intr_0_r(), val);
551 }
552
553 static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
554 {
555         struct gk20a *g = dev_id;
556         u32 mc_intr_0;
557
558         gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
559
560         mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
561
562         gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
563
564         if (mc_intr_0 & mc_intr_0_pgraph_pending_f())
565                 gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
566         if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
567                 gk20a_fifo_isr(g);
568         if (mc_intr_0 & mc_intr_0_pmu_pending_f())
569                 gk20a_pmu_isr(g);
570         if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
571                 gk20a_priv_ring_isr(g);
572         if (mc_intr_0 & mc_intr_0_ltc_pending_f())
573                 gk20a_mm_ltc_isr(g);
574         if (mc_intr_0 & mc_intr_0_pbus_pending_f())
575                 gk20a_pbus_isr(g);
576
577         gk20a_writel(g, mc_intr_en_0_r(),
578                 mc_intr_en_0_inta_hardware_f());
579
580         /* flush previous write */
581         gk20a_readl(g, mc_intr_en_0_r());
582
583         return IRQ_HANDLED;
584 }
585
586 static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
587 {
588         struct gk20a *g = dev_id;
589         u32 mc_intr_1;
590
591         gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
592
593         mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
594
595         gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
596
597         if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
598                 gk20a_fifo_nonstall_isr(g);
599         if (mc_intr_1 & mc_intr_0_pgraph_pending_f())
600                 gk20a_gr_nonstall_isr(g);
601
602         gk20a_writel(g, mc_intr_en_1_r(),
603                 mc_intr_en_1_inta_hardware_f());
604
605         /* flush previous write */
606         gk20a_readl(g, mc_intr_en_1_r());
607
608         return IRQ_HANDLED;
609 }
610
611 static void gk20a_remove_support(struct platform_device *dev)
612 {
613         struct gk20a *g = get_gk20a(dev);
614
615         if (g->pmu.remove_support)
616                 g->pmu.remove_support(&g->pmu);
617
618         if (g->gk20a_cdev.gk20a_cooling_dev)
619                 thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev);
620
621         if (g->gr.remove_support)
622                 g->gr.remove_support(&g->gr);
623
624         if (g->fifo.remove_support)
625                 g->fifo.remove_support(&g->fifo);
626
627         if (g->mm.remove_support)
628                 g->mm.remove_support(&g->mm);
629
630         if (g->sim.remove_support)
631                 g->sim.remove_support(&g->sim);
632
633         release_firmware(g->pmu_fw);
634
635         free_irq(g->irq_stall, g);
636         free_irq(g->irq_nonstall, g);
637
638         /* free mappings to registers, etc*/
639
640         if (g->regs) {
641                 iounmap(g->regs);
642                 g->regs = 0;
643         }
644         if (g->bar1) {
645                 iounmap(g->bar1);
646                 g->bar1 = 0;
647         }
648 }
649
650 static int gk20a_init_support(struct platform_device *dev)
651 {
652         int err = 0;
653         struct gk20a *g = get_gk20a(dev);
654
655         g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
656                                          &g->reg_mem);
657         if (!g->regs) {
658                 dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n");
659                 err = -ENXIO;
660                 goto fail;
661         }
662
663         g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
664                                          &g->bar1_mem);
665         if (!g->bar1) {
666                 dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
667                 err = -ENXIO;
668                 goto fail;
669         }
670
671         /* Get interrupt numbers */
672         g->irq_nonstall = platform_get_irq(dev, 1);
673         if (g->irq_stall < 0 || g->irq_nonstall < 0) {
674                 err = -ENXIO;
675                 goto fail;
676         }
677
678         if (tegra_cpu_is_asim()) {
679                 err = gk20a_init_sim_support(dev);
680                 if (err)
681                         goto fail;
682         }
683
684         mutex_init(&g->dbg_sessions_lock);
685         mutex_init(&g->client_lock);
686
687         g->remove_support = gk20a_remove_support;
688         return 0;
689
690  fail:
691         gk20a_remove_support(dev);
692         return err;
693 }
694
695 static int gk20a_init_client(struct platform_device *dev)
696 {
697         struct gk20a *g = get_gk20a(dev);
698         int err;
699
700         gk20a_dbg_fn("");
701
702 #ifndef CONFIG_PM_RUNTIME
703         gk20a_pm_finalize_poweron(&dev->dev);
704 #endif
705
706         err = gk20a_init_mm_setup_sw(g);
707         if (err)
708                 return err;
709
710         if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
711                 gk20a_scale_hw_init(dev);
712         return 0;
713 }
714
715 static void gk20a_deinit_client(struct platform_device *dev)
716 {
717         gk20a_dbg_fn("");
718 #ifndef CONFIG_PM_RUNTIME
719         gk20a_pm_prepare_poweroff(&dev->dev);
720 #endif
721 }
722
723 int gk20a_get_client(struct gk20a *g)
724 {
725         int err = 0;
726
727         mutex_lock(&g->client_lock);
728         if (g->client_refcount == 0)
729                 err = gk20a_init_client(g->dev);
730         if (!err)
731                 g->client_refcount++;
732         mutex_unlock(&g->client_lock);
733         return err;
734 }
735
736 void gk20a_put_client(struct gk20a *g)
737 {
738         mutex_lock(&g->client_lock);
739         if (g->client_refcount == 1)
740                 gk20a_deinit_client(g->dev);
741         g->client_refcount--;
742         mutex_unlock(&g->client_lock);
743         WARN_ON(g->client_refcount < 0);
744 }
745
746 static int gk20a_pm_prepare_poweroff(struct device *dev)
747 {
748         struct platform_device *pdev = to_platform_device(dev);
749         struct gk20a *g = get_gk20a(pdev);
750         int ret = 0;
751
752         gk20a_dbg_fn("");
753
754         gk20a_scale_suspend(pdev);
755
756         if (!g->power_on)
757                 return 0;
758
759         ret = gk20a_channel_suspend(g);
760         if (ret)
761                 return ret;
762
763         /*
764          * After this point, gk20a interrupts should not get
765          * serviced.
766          */
767         disable_irq(g->irq_stall);
768         disable_irq(g->irq_nonstall);
769
770         /* disable elpg before gr or fifo suspend */
771         ret |= gk20a_pmu_destroy(g);
772         ret |= gk20a_gr_suspend(g);
773         ret |= gk20a_mm_suspend(g);
774         ret |= gk20a_fifo_suspend(g);
775
776         /* Disable GPCPLL */
777         ret |= gk20a_suspend_clk_support(g);
778
779         g->power_on = false;
780
781         return ret;
782 }
783
784 static void gk20a_detect_chip(struct gk20a *g)
785 {
786         struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
787
788         u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r());
789         gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
790                 NVHOST_GPU_ARCHITECTURE_SHIFT;
791         gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
792         gpu->rev =
793                 (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
794                 mc_boot_0_minor_revision_v(mc_boot_0_value);
795
796         gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
797                         g->gpu_characteristics.arch,
798                         g->gpu_characteristics.impl,
799                         g->gpu_characteristics.rev);
800 }
801
802 static int gk20a_pm_finalize_poweron(struct device *dev)
803 {
804         struct platform_device *pdev = to_platform_device(dev);
805         struct gk20a *g = get_gk20a(pdev);
806         int err, nice_value;
807
808         gk20a_dbg_fn("");
809
810         if (g->power_on)
811                 return 0;
812
813         nice_value = task_nice(current);
814         set_user_nice(current, -20);
815
816         enable_irq(g->irq_stall);
817         enable_irq(g->irq_nonstall);
818
819         g->power_on = true;
820
821         gk20a_writel(g, mc_intr_mask_1_r(),
822                         mc_intr_0_pfifo_pending_f()
823                         | mc_intr_0_pgraph_pending_f());
824         gk20a_writel(g, mc_intr_en_1_r(),
825                 mc_intr_en_1_inta_hardware_f());
826
827         gk20a_writel(g, mc_intr_mask_0_r(),
828                         mc_intr_0_pgraph_pending_f()
829                         | mc_intr_0_pfifo_pending_f()
830                         | mc_intr_0_priv_ring_pending_f()
831                         | mc_intr_0_ltc_pending_f()
832                         | mc_intr_0_pbus_pending_f());
833         gk20a_writel(g, mc_intr_en_0_r(),
834                 mc_intr_en_0_inta_hardware_f());
835
836         if (!tegra_platform_is_silicon())
837                 gk20a_writel(g, bus_intr_en_0_r(), 0x0);
838         else
839                 gk20a_writel(g, bus_intr_en_0_r(),
840                                 bus_intr_en_0_pri_squash_m() |
841                                 bus_intr_en_0_pri_fecserr_m() |
842                                 bus_intr_en_0_pri_timeout_m());
843         gk20a_reset_priv_ring(g);
844
845         gk20a_detect_chip(g);
846         err = gpu_init_hal(g);
847         if (err)
848                 goto done;
849
850         /* TBD: move this after graphics init in which blcg/slcg is enabled.
851            This function removes SlowdownOnBoot which applies 32x divider
852            on gpcpll bypass path. The purpose of slowdown is to save power
853            during boot but it also significantly slows down gk20a init on
854            simulation and emulation. We should remove SOB after graphics power
855            saving features (blcg/slcg) are enabled. For now, do it here. */
856         err = gk20a_init_clk_support(g);
857         if (err) {
858                 gk20a_err(dev, "failed to init gk20a clk");
859                 goto done;
860         }
861
862         /* enable pri timeout only on silicon */
863         if (tegra_platform_is_silicon()) {
864                 gk20a_writel(g,
865                         timer_pri_timeout_r(),
866                         timer_pri_timeout_period_f(0x186A0) |
867                         timer_pri_timeout_en_en_enabled_f());
868         } else {
869                 gk20a_writel(g,
870                         timer_pri_timeout_r(),
871                         timer_pri_timeout_period_f(0x186A0) |
872                         timer_pri_timeout_en_en_disabled_f());
873         }
874
875         err = gk20a_init_fifo_reset_enable_hw(g);
876         if (err) {
877                 gk20a_err(dev, "failed to reset gk20a fifo");
878                 goto done;
879         }
880
881         err = gk20a_init_mm_support(g);
882         if (err) {
883                 gk20a_err(dev, "failed to init gk20a mm");
884                 goto done;
885         }
886
887         err = gk20a_init_pmu_support(g);
888         if (err) {
889                 gk20a_err(dev, "failed to init gk20a pmu");
890                 goto done;
891         }
892
893         err = gk20a_init_fifo_support(g);
894         if (err) {
895                 gk20a_err(dev, "failed to init gk20a fifo");
896                 goto done;
897         }
898
899         err = gk20a_init_gr_support(g);
900         if (err) {
901                 gk20a_err(dev, "failed to init gk20a gr");
902                 goto done;
903         }
904
905         err = gk20a_init_therm_support(g);
906         if (err) {
907                 gk20a_err(dev, "failed to init gk20a therm");
908                 goto done;
909         }
910
911         err = gk20a_init_gpu_characteristics(g);
912         if (err) {
913                 gk20a_err(dev, "failed to init gk20a gpu characteristics");
914                 goto done;
915         }
916
917         wait_event(g->pmu.boot_wq, g->pmu.pmu_state == PMU_STATE_STARTED);
918
919         gk20a_channel_resume(g);
920         set_user_nice(current, nice_value);
921
922         gk20a_scale_resume(pdev);
923
924 #ifdef CONFIG_INPUT_CFBOOST
925        if (!g->boost_added) {
926                gk20a_dbg_info("add touch boost");
927                cfb_add_device(dev);
928                g->boost_added = true;
929        }
930 #endif
931 done:
932         return err;
933 }
934
935 static struct of_device_id tegra_gk20a_of_match[] = {
936 #ifdef CONFIG_TEGRA_GK20A
937         { .compatible = "nvidia,tegra124-gk20a",
938                 .data = &gk20a_tegra_platform },
939 #endif
940         { .compatible = "nvidia,generic-gk20a",
941                 .data = &gk20a_generic_platform },
942         { },
943 };
944
945 int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev,
946                 unsigned long *max_state)
947 {
948         struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
949
950         *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1;
951         return 0;
952 }
953
954 int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev,
955                 unsigned long *cur_state)
956 {
957         struct cooling_device_gk20a  *gk20a_gpufreq_device = cdev->devdata;
958
959         *cur_state = gk20a_gpufreq_device->gk20a_freq_state;
960         return 0;
961 }
962
963 int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev,
964                 unsigned long cur_state)
965 {
966         u32 target_freq;
967         struct gk20a *g;
968         struct gpufreq_table_data *gpu_cooling_table;
969         struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata;
970
971         BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size);
972
973         g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev);
974
975         gpu_cooling_table = tegra_gpufreq_table_get();
976         target_freq = gpu_cooling_table[cur_state].frequency;
977
978         /* ensure a query for state will get the proper value */
979         gk20a_gpufreq_device->gk20a_freq_state = cur_state;
980
981         gk20a_clk_set_rate(g, target_freq);
982
983         return 0;
984 }
985
986 static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = {
987         .get_max_state = tegra_gpu_get_max_state,
988         .get_cur_state = tegra_gpu_get_cur_state,
989         .set_cur_state = tegra_gpu_set_cur_state,
990 };
991
992 static int gk20a_create_device(
993         struct platform_device *pdev, int devno, const char *cdev_name,
994         struct cdev *cdev, struct device **out,
995         const struct file_operations *ops)
996 {
997         struct device *dev;
998         int err;
999         struct gk20a *g = get_gk20a(pdev);
1000
1001         gk20a_dbg_fn("");
1002
1003         cdev_init(cdev, ops);
1004         cdev->owner = THIS_MODULE;
1005
1006         err = cdev_add(cdev, devno, 1);
1007         if (err) {
1008                 dev_err(&pdev->dev,
1009                         "failed to add %s cdev\n", cdev_name);
1010                 return err;
1011         }
1012
1013         dev = device_create(g->class, NULL, devno, NULL,
1014                 (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d",
1015                 cdev_name, pdev->id);
1016
1017         if (IS_ERR(dev)) {
1018                 err = PTR_ERR(dev);
1019                 cdev_del(cdev);
1020                 dev_err(&pdev->dev,
1021                         "failed to create %s device for %s\n",
1022                         cdev_name, pdev->name);
1023                 return err;
1024         }
1025
1026         *out = dev;
1027         return 0;
1028 }
1029
1030 static void gk20a_user_deinit(struct platform_device *dev)
1031 {
1032         struct gk20a *g = get_gk20a(dev);
1033
1034         if (g->channel.node) {
1035                 device_destroy(g->class, g->channel.cdev.dev);
1036                 cdev_del(&g->channel.cdev);
1037         }
1038
1039         if (g->as.node) {
1040                 device_destroy(g->class, g->as.cdev.dev);
1041                 cdev_del(&g->as.cdev);
1042         }
1043
1044         if (g->ctrl.node) {
1045                 device_destroy(g->class, g->ctrl.cdev.dev);
1046                 cdev_del(&g->ctrl.cdev);
1047         }
1048
1049         if (g->dbg.node) {
1050                 device_destroy(g->class, g->dbg.cdev.dev);
1051                 cdev_del(&g->dbg.cdev);
1052         }
1053
1054         if (g->prof.node) {
1055                 device_destroy(g->class, g->prof.cdev.dev);
1056                 cdev_del(&g->prof.cdev);
1057         }
1058
1059         if (g->cdev_region)
1060                 unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
1061
1062         if (g->class)
1063                 class_destroy(g->class);
1064 }
1065
1066 static int gk20a_user_init(struct platform_device *dev)
1067 {
1068         int err;
1069         dev_t devno;
1070         struct gk20a *g = get_gk20a(dev);
1071
1072         g->class = class_create(THIS_MODULE, CLASS_NAME);
1073         if (IS_ERR(g->class)) {
1074                 err = PTR_ERR(g->class);
1075                 g->class = NULL;
1076                 dev_err(&dev->dev,
1077                         "failed to create " CLASS_NAME " class\n");
1078                 goto fail;
1079         }
1080
1081         err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME);
1082         if (err) {
1083                 dev_err(&dev->dev, "failed to allocate devno\n");
1084                 goto fail;
1085         }
1086         g->cdev_region = devno;
1087
1088         err = gk20a_create_device(dev, devno++, "",
1089                                   &g->channel.cdev, &g->channel.node,
1090                                   &gk20a_channel_ops);
1091         if (err)
1092                 goto fail;
1093
1094         err = gk20a_create_device(dev, devno++, "-as",
1095                                   &g->as.cdev, &g->as.node,
1096                                   &gk20a_as_ops);
1097         if (err)
1098                 goto fail;
1099
1100         err = gk20a_create_device(dev, devno++, "-ctrl",
1101                                   &g->ctrl.cdev, &g->ctrl.node,
1102                                   &gk20a_ctrl_ops);
1103         if (err)
1104                 goto fail;
1105
1106         err = gk20a_create_device(dev, devno++, "-dbg",
1107                                   &g->dbg.cdev, &g->dbg.node,
1108                                   &gk20a_dbg_ops);
1109         if (err)
1110                 goto fail;
1111
1112         err = gk20a_create_device(dev, devno++, "-prof",
1113                                   &g->prof.cdev, &g->prof.node,
1114                                   &gk20a_prof_ops);
1115         if (err)
1116                 goto fail;
1117
1118         return 0;
1119 fail:
1120         gk20a_user_deinit(dev);
1121         return err;
1122 }
1123
1124 struct channel_gk20a *gk20a_get_channel_from_file(int fd)
1125 {
1126         struct channel_gk20a *ch;
1127         struct file *f = fget(fd);
1128         if (!f)
1129                 return 0;
1130
1131         if (f->f_op != &gk20a_channel_ops) {
1132                 fput(f);
1133                 return 0;
1134         }
1135
1136         ch = (struct channel_gk20a *)f->private_data;
1137         fput(f);
1138         return ch;
1139 }
1140
1141 static int gk20a_pm_enable_clk(struct device *dev)
1142 {
1143         int index = 0;
1144         struct gk20a_platform *platform;
1145
1146         platform = dev_get_drvdata(dev);
1147         if (!platform)
1148                 return -EINVAL;
1149
1150         for (index = 0; index < platform->num_clks; index++) {
1151                 int err = clk_prepare_enable(platform->clk[index]);
1152                 if (err)
1153                         return -EINVAL;
1154         }
1155
1156         return 0;
1157 }
1158
1159 static int gk20a_pm_disable_clk(struct device *dev)
1160 {
1161         int index = 0;
1162         struct gk20a_platform *platform;
1163
1164         platform = dev_get_drvdata(dev);
1165         if (!platform)
1166                 return -EINVAL;
1167
1168         for (index = 0; index < platform->num_clks; index++)
1169                 clk_disable_unprepare(platform->clk[index]);
1170
1171         return 0;
1172 }
1173
1174 static void gk20a_pm_shutdown(struct platform_device *pdev)
1175 {
1176         dev_info(&pdev->dev, "shutting down");
1177         __pm_runtime_disable(&pdev->dev, false);
1178 }
1179
1180 #ifdef CONFIG_PM
1181 const struct dev_pm_ops gk20a_pm_ops = {
1182 #if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS)
1183         .runtime_resume = gk20a_pm_enable_clk,
1184         .runtime_suspend = gk20a_pm_disable_clk,
1185 #endif
1186 };
1187 #endif
1188
1189 static int gk20a_pm_railgate(struct generic_pm_domain *domain)
1190 {
1191         struct gk20a *g = container_of(domain, struct gk20a, pd);
1192         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1193         int ret = 0;
1194
1195         if (platform->railgate)
1196                 ret = platform->railgate(platform->g->dev);
1197
1198         return ret;
1199 }
1200
1201 static int gk20a_pm_unrailgate(struct generic_pm_domain *domain)
1202 {
1203         struct gk20a *g = container_of(domain, struct gk20a, pd);
1204         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1205         int ret = 0;
1206
1207         if (platform->unrailgate) {
1208                 mutex_lock(&platform->railgate_lock);
1209                 ret = platform->unrailgate(platform->g->dev);
1210                 mutex_unlock(&platform->railgate_lock);
1211         }
1212
1213         return ret;
1214 }
1215
1216 static int gk20a_pm_suspend(struct device *dev)
1217 {
1218         struct gk20a_platform *platform = dev_get_drvdata(dev);
1219         int ret = 0;
1220
1221         if (atomic_read(&dev->power.usage_count) > 1)
1222                 return -EBUSY;
1223
1224         ret = gk20a_pm_prepare_poweroff(dev);
1225         if (ret)
1226                 return ret;
1227
1228         if (platform->suspend)
1229                 platform->suspend(dev);
1230
1231         return 0;
1232 }
1233
1234 static int gk20a_pm_resume(struct device *dev)
1235 {
1236         return gk20a_pm_finalize_poweron(dev);
1237 }
1238
1239 static int gk20a_pm_initialise_domain(struct platform_device *pdev)
1240 {
1241         struct gk20a_platform *platform = platform_get_drvdata(pdev);
1242         struct dev_power_governor *pm_domain_gov = NULL;
1243         struct generic_pm_domain *domain = &platform->g->pd;
1244         int ret = 0;
1245
1246         domain->name = "gpu";
1247
1248         if (!platform->can_railgate)
1249                 pm_domain_gov = &pm_domain_always_on_gov;
1250
1251         pm_genpd_init(domain, pm_domain_gov, true);
1252
1253         domain->power_off = gk20a_pm_railgate;
1254         domain->power_on = gk20a_pm_unrailgate;
1255         domain->dev_ops.start = gk20a_pm_enable_clk;
1256         domain->dev_ops.stop = gk20a_pm_disable_clk;
1257         domain->dev_ops.save_state = gk20a_pm_prepare_poweroff;
1258         domain->dev_ops.restore_state = gk20a_pm_finalize_poweron;
1259         domain->dev_ops.suspend = gk20a_pm_suspend;
1260         domain->dev_ops.resume = gk20a_pm_resume;
1261
1262         device_set_wakeup_capable(&pdev->dev, 0);
1263         ret = pm_genpd_add_device(domain, &pdev->dev);
1264
1265         if (platform->railgate_delay)
1266                 pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
1267
1268         return ret;
1269 }
1270
1271 static int gk20a_pm_init(struct platform_device *dev)
1272 {
1273         struct gk20a_platform *platform = platform_get_drvdata(dev);
1274         int err = 0;
1275
1276         mutex_init(&platform->railgate_lock);
1277
1278         /* Initialise pm runtime */
1279         if (platform->clockgate_delay) {
1280                 pm_runtime_set_autosuspend_delay(&dev->dev,
1281                                                  platform->clockgate_delay);
1282                 pm_runtime_use_autosuspend(&dev->dev);
1283         }
1284
1285         pm_runtime_enable(&dev->dev);
1286         if (!pm_runtime_enabled(&dev->dev))
1287                 gk20a_pm_enable_clk(&dev->dev);
1288
1289         /* Enable runtime railgating if possible. If not,
1290          * turn on the rail now. */
1291         if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1292                 platform->railgate(dev);
1293         else
1294                 platform->unrailgate(dev);
1295
1296         /* genpd will take care of runtime power management if it is enabled */
1297         if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1298                 err = gk20a_pm_initialise_domain(dev);
1299
1300         return err;
1301 }
1302
1303 int gk20a_secure_page_alloc(struct platform_device *pdev)
1304 {
1305         struct gk20a_platform *platform = platform_get_drvdata(pdev);
1306         int err = 0;
1307
1308         if (platform->secure_page_alloc) {
1309                 tegra_periph_reset_assert(platform->clk[0]);
1310                 udelay(10);
1311                 err = platform->secure_page_alloc(pdev);
1312                 tegra_periph_reset_deassert(platform->clk[0]);
1313                 if (!err)
1314                         platform->secure_alloc_ready = true;
1315         }
1316
1317         return err;
1318 }
1319
1320 static int gk20a_probe(struct platform_device *dev)
1321 {
1322         struct gk20a *gk20a;
1323         int err;
1324         struct gk20a_platform *platform = NULL;
1325         struct cooling_device_gk20a *gpu_cdev = NULL;
1326
1327         if (dev->dev.of_node) {
1328                 const struct of_device_id *match;
1329
1330                 match = of_match_device(tegra_gk20a_of_match, &dev->dev);
1331                 if (match)
1332                         platform = (struct gk20a_platform *)match->data;
1333         } else
1334                 platform = (struct gk20a_platform *)dev->dev.platform_data;
1335
1336         if (!platform) {
1337                 dev_err(&dev->dev, "no platform data\n");
1338                 return -ENODATA;
1339         }
1340
1341         gk20a_dbg_fn("");
1342
1343         platform_set_drvdata(dev, platform);
1344
1345         gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
1346         if (!gk20a) {
1347                 dev_err(&dev->dev, "couldn't allocate gk20a support");
1348                 return -ENOMEM;
1349         }
1350
1351         set_gk20a(dev, gk20a);
1352         gk20a->dev = dev;
1353
1354         gk20a->irq_stall = platform_get_irq(dev, 0);
1355         gk20a->irq_nonstall = platform_get_irq(dev, 1);
1356         if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
1357                 return -ENXIO;
1358         err = devm_request_threaded_irq(&dev->dev,
1359                         gk20a->irq_stall,
1360                         gk20a_intr_isr_stall,
1361                         gk20a_intr_thread_stall,
1362                         0, "gk20a_stall", gk20a);
1363         if (err) {
1364                 dev_err(&dev->dev,
1365                         "failed to request stall intr irq @ %d\n",
1366                                 gk20a->irq_stall);
1367                 return err;
1368         }
1369         err = devm_request_threaded_irq(&dev->dev,
1370                         gk20a->irq_nonstall,
1371                         gk20a_intr_isr_nonstall,
1372                         gk20a_intr_thread_nonstall,
1373                         0, "gk20a_nonstall", gk20a);
1374         if (err) {
1375                 dev_err(&dev->dev,
1376                         "failed to request non-stall intr irq @ %d\n",
1377                                 gk20a->irq_nonstall);
1378                 return err;
1379         }
1380         disable_irq(gk20a->irq_stall);
1381         disable_irq(gk20a->irq_nonstall);
1382
1383         err = gk20a_user_init(dev);
1384         if (err)
1385                 return err;
1386
1387         gk20a_init_support(dev);
1388
1389         init_rwsem(&gk20a->busy_lock);
1390
1391         spin_lock_init(&gk20a->mc_enable_lock);
1392
1393         /* Initialize the platform interface. */
1394         err = platform->probe(dev);
1395         if (err) {
1396                 dev_err(&dev->dev, "platform probe failed");
1397                 return err;
1398         }
1399
1400         err = gk20a_pm_init(dev);
1401         if (err) {
1402                 dev_err(&dev->dev, "pm init failed");
1403                 return err;
1404         }
1405
1406         /* Initialise scaling */
1407         if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1408                 gk20a_scale_init(dev);
1409
1410         if (platform->late_probe) {
1411                 err = platform->late_probe(dev);
1412                 if (err) {
1413                         dev_err(&dev->dev, "late probe failed");
1414                         return err;
1415                 }
1416         }
1417
1418         err = gk20a_secure_page_alloc(dev);
1419         if (err)
1420                 dev_err(&dev->dev,
1421                         "failed to allocate secure buffer %d\n", err);
1422
1423         gk20a_debug_init(dev);
1424
1425         /* Set DMA parameters to allow larger sgt lists */
1426         dev->dev.dma_parms = &gk20a->dma_parms;
1427         dma_set_max_seg_size(&dev->dev, UINT_MAX);
1428
1429         gpu_cdev = &gk20a->gk20a_cdev;
1430         gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get();
1431         gpu_cdev->gk20a_freq_state = 0;
1432         gpu_cdev->g = gk20a;
1433         gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev,
1434                                         &tegra_gpu_cooling_ops);
1435
1436         gk20a->gr_idle_timeout_default =
1437                         CONFIG_GK20A_DEFAULT_TIMEOUT;
1438         gk20a->timeouts_enabled = true;
1439
1440         /* Set up initial clock gating settings */
1441         if (tegra_platform_is_silicon()) {
1442                 gk20a->slcg_enabled = true;
1443                 gk20a->blcg_enabled = true;
1444                 gk20a->elcg_enabled = true;
1445                 gk20a->elpg_enabled = true;
1446                 gk20a->aelpg_enabled = true;
1447         }
1448
1449         gk20a_create_sysfs(dev);
1450
1451 #ifdef CONFIG_DEBUG_FS
1452         clk_gk20a_debugfs_init(dev);
1453
1454         spin_lock_init(&gk20a->debugfs_lock);
1455         gk20a->mm.ltc_enabled = true;
1456         gk20a->mm.ltc_enabled_debug = true;
1457         gk20a->debugfs_ltc_enabled =
1458                         debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
1459                                  platform->debugfs,
1460                                  &gk20a->mm.ltc_enabled_debug);
1461         gk20a->mm.ltc_enabled_debug = true;
1462         gk20a->debugfs_gr_idle_timeout_default =
1463                         debugfs_create_u32("gr_idle_timeout_default_us",
1464                                         S_IRUGO|S_IWUSR, platform->debugfs,
1465                                          &gk20a->gr_idle_timeout_default);
1466         gk20a->debugfs_timeouts_enabled =
1467                         debugfs_create_bool("timeouts_enabled",
1468                                         S_IRUGO|S_IWUSR,
1469                                         platform->debugfs,
1470                                         &gk20a->timeouts_enabled);
1471         gk20a_pmu_debugfs_init(dev);
1472 #endif
1473         init_waitqueue_head(&gk20a->pmu.boot_wq);
1474
1475         gk20a_init_gr(gk20a);
1476
1477         return 0;
1478 }
1479
1480 static int __exit gk20a_remove(struct platform_device *dev)
1481 {
1482         struct gk20a *g = get_gk20a(dev);
1483         gk20a_dbg_fn("");
1484
1485 #ifdef CONFIG_INPUT_CFBOOST
1486         if (g->boost_added)
1487                 cfb_remove_device(&dev->dev);
1488 #endif
1489
1490         if (g->remove_support)
1491                 g->remove_support(dev);
1492
1493         gk20a_user_deinit(dev);
1494
1495         set_gk20a(dev, 0);
1496 #ifdef CONFIG_DEBUG_FS
1497         debugfs_remove(g->debugfs_ltc_enabled);
1498         debugfs_remove(g->debugfs_gr_idle_timeout_default);
1499         debugfs_remove(g->debugfs_timeouts_enabled);
1500 #endif
1501
1502         kfree(g);
1503
1504 #ifdef CONFIG_PM_RUNTIME
1505         pm_runtime_put(&dev->dev);
1506         pm_runtime_disable(&dev->dev);
1507 #else
1508         nvhost_module_disable_clk(&dev->dev);
1509 #endif
1510
1511         return 0;
1512 }
1513
1514 static struct platform_driver gk20a_driver = {
1515         .probe = gk20a_probe,
1516         .remove = __exit_p(gk20a_remove),
1517         .shutdown = gk20a_pm_shutdown,
1518         .driver = {
1519                 .owner = THIS_MODULE,
1520                 .name = "gk20a",
1521 #ifdef CONFIG_OF
1522                 .of_match_table = tegra_gk20a_of_match,
1523 #endif
1524 #ifdef CONFIG_PM
1525                 .pm = &gk20a_pm_ops,
1526 #endif
1527         }
1528 };
1529
1530 static int __init gk20a_init(void)
1531 {
1532         return platform_driver_register(&gk20a_driver);
1533 }
1534
1535 static void __exit gk20a_exit(void)
1536 {
1537         platform_driver_unregister(&gk20a_driver);
1538 }
1539
1540 bool is_gk20a_module(struct platform_device *dev)
1541 {
1542         return &gk20a_driver.driver == dev->dev.driver;
1543 }
1544
1545 void gk20a_busy_noresume(struct platform_device *pdev)
1546 {
1547         pm_runtime_get_noresume(&pdev->dev);
1548 }
1549
1550 int gk20a_busy(struct platform_device *pdev)
1551 {
1552         int ret = 0;
1553         struct gk20a *g = get_gk20a(pdev);
1554
1555         down_read(&g->busy_lock);
1556
1557 #ifdef CONFIG_PM_RUNTIME
1558         ret = pm_runtime_get_sync(&pdev->dev);
1559         if (ret < 0)
1560                 pm_runtime_put_noidle(&pdev->dev);
1561 #endif
1562         gk20a_scale_notify_busy(pdev);
1563
1564         up_read(&g->busy_lock);
1565
1566         return ret < 0 ? ret : 0;
1567 }
1568
1569 void gk20a_idle(struct platform_device *pdev)
1570 {
1571 #ifdef CONFIG_PM_RUNTIME
1572         if (atomic_read(&pdev->dev.power.usage_count) == 1)
1573                 gk20a_scale_notify_idle(pdev);
1574         pm_runtime_mark_last_busy(&pdev->dev);
1575         pm_runtime_put_sync_autosuspend(&pdev->dev);
1576 #else
1577         gk20a_scale_notify_idle(pdev);
1578 #endif
1579 }
1580
1581 void gk20a_disable(struct gk20a *g, u32 units)
1582 {
1583         u32 pmc;
1584
1585         gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
1586
1587         spin_lock(&g->mc_enable_lock);
1588         pmc = gk20a_readl(g, mc_enable_r());
1589         pmc &= ~units;
1590         gk20a_writel(g, mc_enable_r(), pmc);
1591         spin_unlock(&g->mc_enable_lock);
1592 }
1593
1594 void gk20a_enable(struct gk20a *g, u32 units)
1595 {
1596         u32 pmc;
1597
1598         gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
1599
1600         spin_lock(&g->mc_enable_lock);
1601         pmc = gk20a_readl(g, mc_enable_r());
1602         pmc |= units;
1603         gk20a_writel(g, mc_enable_r(), pmc);
1604         gk20a_readl(g, mc_enable_r());
1605         spin_unlock(&g->mc_enable_lock);
1606
1607         udelay(20);
1608 }
1609
1610 void gk20a_reset(struct gk20a *g, u32 units)
1611 {
1612         gk20a_disable(g, units);
1613         udelay(20);
1614         gk20a_enable(g, units);
1615 }
1616
1617 /**
1618  * gk20a_do_idle() - force the GPU to idle and railgate
1619  *
1620  * In success, this call MUST be balanced by caller with gk20a_do_unidle()
1621  */
1622 int gk20a_do_idle(void)
1623 {
1624         struct platform_device *pdev = to_platform_device(
1625                 bus_find_device_by_name(&platform_bus_type,
1626                 NULL, "gk20a.0"));
1627         struct gk20a *g = get_gk20a(pdev);
1628         struct gk20a_platform *platform = dev_get_drvdata(&pdev->dev);
1629         unsigned long timeout = jiffies +
1630                 msecs_to_jiffies(GK20A_WAIT_FOR_IDLE_MS);
1631         int ref_cnt;
1632         bool is_railgated;
1633
1634         if (!platform->can_railgate)
1635                 return -ENOSYS;
1636
1637         /* acquire busy lock to block other busy() calls */
1638         down_write(&g->busy_lock);
1639
1640         /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
1641         mutex_lock(&platform->railgate_lock);
1642
1643         /* check if it is already railgated ? */
1644         if (platform->is_railgated(pdev))
1645                 return 0;
1646
1647         /* prevent suspend by incrementing usage counter */
1648         pm_runtime_get_noresume(&pdev->dev);
1649
1650         /* check and wait until GPU is idle (with a timeout) */
1651         pm_runtime_barrier(&pdev->dev);
1652
1653         do {
1654                 msleep(1);
1655                 ref_cnt = atomic_read(&pdev->dev.power.usage_count);
1656         } while (ref_cnt != 1 && time_before(jiffies, timeout));
1657
1658         if (ref_cnt != 1)
1659                 goto fail;
1660
1661         /*
1662          * if GPU is now idle, we will have only one ref count
1663          * drop this ref which will rail gate the GPU
1664          */
1665         pm_runtime_put_sync(&pdev->dev);
1666
1667         /* add sufficient delay to allow GPU to rail gate */
1668         msleep(platform->railgate_delay);
1669
1670         timeout = jiffies + msecs_to_jiffies(GK20A_WAIT_FOR_IDLE_MS);
1671
1672         /* check in loop if GPU is railgated or not */
1673         do {
1674                 msleep(1);
1675                 is_railgated = platform->is_railgated(pdev);
1676         } while (!is_railgated && time_before(jiffies, timeout));
1677
1678         if (is_railgated)
1679                 return 0;
1680         else
1681                 goto fail_timeout;
1682
1683 fail:
1684         pm_runtime_put_noidle(&pdev->dev);
1685 fail_timeout:
1686         mutex_unlock(&platform->railgate_lock);
1687         up_write(&g->busy_lock);
1688         return -EBUSY;
1689 }
1690
1691 /**
1692  * gk20a_do_unidle() - unblock all the tasks blocked by gk20a_do_idle()
1693  */
1694 int gk20a_do_unidle(void)
1695 {
1696         struct platform_device *pdev = to_platform_device(
1697                 bus_find_device_by_name(&platform_bus_type,
1698                 NULL, "gk20a.0"));
1699         struct gk20a *g = get_gk20a(pdev);
1700         struct gk20a_platform *platform = dev_get_drvdata(&pdev->dev);
1701
1702         /* release the lock and open up all other busy() calls */
1703         mutex_unlock(&platform->railgate_lock);
1704         up_write(&g->busy_lock);
1705
1706         return 0;
1707 }
1708
1709 int gk20a_init_gpu_characteristics(struct gk20a *g)
1710 {
1711         struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
1712
1713         gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
1714         gpu->on_board_video_memory_size = 0; /* integrated GPU */
1715
1716         gpu->num_gpc = g->gr.gpc_count;
1717         gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
1718
1719         gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */
1720
1721         gpu->big_page_size = g->mm.big_page_size;
1722         gpu->compression_page_size = g->mm.compression_page_size;
1723         gpu->pde_coverage_bit_count = g->mm.pde_stride_shift;
1724         gpu->reserved = 0;
1725
1726         return 0;
1727 }
1728
1729 static const struct firmware *
1730 do_request_firmware(struct device *dev, const char *prefix, const char *fw_name)
1731 {
1732         const struct firmware *fw;
1733         char *fw_path = NULL;
1734         int path_len, err;
1735
1736         if (prefix) {
1737                 path_len = strlen(prefix) + strlen(fw_name);
1738                 path_len += 2; /* for the path separator and zero terminator*/
1739
1740                 fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL);
1741                 if (!fw_path)
1742                         return NULL;
1743
1744                 sprintf(fw_path, "%s/%s", prefix, fw_name);
1745                 fw_name = fw_path;
1746         }
1747
1748         err = request_firmware(&fw, fw_name, dev);
1749         kfree(fw_path);
1750         if (err)
1751                 return NULL;
1752         return fw;
1753 }
1754
1755 /* This is a simple wrapper around request_firmware that takes 'fw_name' and
1756  * applies an IP specific relative path prefix to it. The caller is
1757  * responsible for calling release_firmware later. */
1758 const struct firmware *
1759 gk20a_request_firmware(struct gk20a *g, const char *fw_name)
1760 {
1761         struct device *dev = &g->dev->dev;
1762         const struct firmware *fw;
1763
1764         /* current->fs is NULL when calling from SYS_EXIT.
1765            Add a check here to prevent crash in request_firmware */
1766         if (!current->fs || !fw_name)
1767                 return NULL;
1768
1769         BUG_ON(!g->ops.name);
1770         fw = do_request_firmware(dev, g->ops.name, fw_name);
1771
1772 #ifdef CONFIG_TEGRA_GK20A
1773         /* TO BE REMOVED - Support loading from legacy SOC specific path. */
1774         if (!fw)
1775                 fw = nvhost_client_request_firmware(g->dev, fw_name);
1776 #endif
1777
1778         if (!fw) {
1779                 dev_err(dev, "failed to get firmware\n");
1780                 return NULL;
1781         }
1782
1783         return fw;
1784 }
1785
1786 MODULE_LICENSE("GPL v2");
1787 module_init(gk20a_init);
1788 module_exit(gk20a_exit);