2 * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
4 * GK20A Tegra Platform Interface
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 #include <linux/debugfs.h>
19 #include <linux/tegra-powergate.h>
20 #include <linux/platform_data/tegra_edp.h>
21 #include <linux/nvhost_ioctl.h>
22 #include <linux/dma-buf.h>
23 #include <linux/nvmap.h>
24 #include <linux/tegra_pm_domains.h>
26 #include <mach/irqs.h>
28 #include "../../../arch/arm/mach-tegra/iomap.h"
31 #include "hal_gk20a.h"
32 #include "platform_gk20a.h"
33 #include "gk20a_scale.h"
35 #define TEGRA_GK20A_INTR INT_GPU
36 #define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
38 #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
39 #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
41 extern struct device tegra_vpr_dev;
42 extern phys_addr_t tegra_vpr_size;
43 struct gk20a_platform t132_gk20a_tegra_platform;
45 struct gk20a_emc_params {
55 * 20.12 fixed point arithmetic
58 static const int FXFRAC = 12;
59 static const int FX_HALF = (1 << 12) / 2;
61 #define INT_TO_FX(x) ((x) << FXFRAC)
62 #define FX_TO_INT(x) ((x) >> FXFRAC)
64 #define MHZ_TO_HZ(x) ((x) * 1000000)
65 #define HZ_TO_MHZ(x) ((x) / 1000000)
67 int FXMUL(int x, int y)
69 return ((long long) x * (long long) y) >> FXFRAC;
72 int FXDIV(int x, int y)
74 /* long long div operation not supported, must shift manually. This
77 * return (((long long) x) << FXFRAC) / (long long) y;
83 /* find largest allowable right shift to numerator, limit to FXFRAC */
85 pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
91 return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
93 return (x << pos) / y;
96 static void gk20a_tegra_secure_page_destroy(struct platform_device *pdev,
97 struct secure_page_buffer *secure_buffer)
99 dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
100 (void *)(uintptr_t)secure_buffer->iova,
101 secure_buffer->iova, &secure_buffer->attrs);
104 static int gk20a_tegra_secure_page_alloc(struct platform_device *pdev)
106 struct gk20a_platform *platform = platform_get_drvdata(pdev);
107 struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
108 DEFINE_DMA_ATTRS(attrs);
110 size_t size = PAGE_SIZE;
112 (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
113 DMA_MEMORY_NOMAP, &attrs);
114 if (dma_mapping_error(&tegra_vpr_dev, iova))
117 secure_buffer->size = size;
118 secure_buffer->iova = iova;
119 secure_buffer->attrs = attrs;
120 secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
125 static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
126 struct gr_ctx_buffer_desc *desc)
128 gk20a_free_sgtable(&desc->sgt);
129 dma_free_attrs(&tegra_vpr_dev, desc->size,
130 (void *)(uintptr_t)desc->iova,
131 desc->iova, &desc->attrs);
134 static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
135 struct gr_ctx_buffer_desc *desc,
138 struct gk20a_platform *platform = platform_get_drvdata(pdev);
139 struct device *dev = &pdev->dev;
140 DEFINE_DMA_ATTRS(attrs);
142 struct sg_table *sgt;
146 if (!platform->secure_alloc_ready)
149 (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
150 DMA_MEMORY_NOMAP, &attrs);
151 if (dma_mapping_error(&tegra_vpr_dev, iova))
157 desc->destroy = gk20a_tegra_secure_destroy;
159 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
161 gk20a_err(dev, "failed to allocate memory\n");
164 err = sg_alloc_table(sgt, 1, GFP_KERNEL);
166 gk20a_err(dev, "failed to allocate sg_table\n");
169 page = phys_to_page(iova);
170 sg_set_page(sgt->sgl, page, size, 0);
171 sg_dma_address(sgt->sgl) = iova;
180 dma_free_attrs(&tegra_vpr_dev, desc->size,
181 (void *)(uintptr_t)&desc->iova,
182 desc->iova, &desc->attrs);
187 * gk20a_tegra_get_emc_rate()
189 * This function returns the minimum emc clock based on gpu frequency
192 long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
196 freq = INT_TO_FX(HZ_TO_MHZ(freq));
197 hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
199 hz -= FXMUL(emc_params->emc_dip_slope,
200 FXMUL(freq - emc_params->emc_xmid,
201 freq - emc_params->emc_xmid)) +
202 emc_params->emc_dip_offset;
204 hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
205 hz = (hz < 0) ? 0 : hz;
211 * gk20a_tegra_postscale(profile, freq)
213 * This function sets emc frequency based on current gpu frequency
216 static void gk20a_tegra_postscale(struct platform_device *pdev,
219 struct gk20a_platform *platform = platform_get_drvdata(pdev);
220 struct gk20a_scale_profile *profile = platform->g->scale_profile;
221 struct gk20a_emc_params *emc_params = profile->private_data;
222 struct gk20a *g = get_gk20a(pdev);
224 long after = gk20a_clk_get_rate(g);
225 long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
227 clk_set_rate(platform->clk[2], emc_target);
231 * gk20a_tegra_prescale(profile, freq)
233 * This function informs EDP about changed constraints.
236 static void gk20a_tegra_prescale(struct platform_device *pdev)
238 struct gk20a *g = get_gk20a(pdev);
241 gk20a_pmu_load_norm(g, &avg);
242 tegra_edp_notify_gpu_load(avg, gk20a_clk_get_rate(g));
246 * gk20a_tegra_calibrate_emc()
248 * Compute emc scaling parameters
250 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
254 * Rm - 3d.cbus 'middle' rate = (max + min)/2
258 * Od - emc_dip_offset
260 * this superposes a quadratic dip centered around the middle 3d
261 * frequency over a linear correlation of 3d.emc to 3d clock
264 * S, O are chosen so that the maximum 3d rate produces the
265 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
266 * at least the minimum 3d.emc rate.
268 * Sd and Od are chosen to produce the largest dip that will
269 * keep 3d.emc frequencies monotonously decreasing with 3d
270 * frequencies. To achieve this, the first derivative of Remc
271 * with respect to R3d should be zero for the minimal 3d rate:
273 * R'emc = S - 2 * Sd * (R3d - Rm)
275 * S = 2 * Sd * (R3d-min - Rm)
276 * = 2 * Sd * (R3d-min - R3d-max) / 2
278 * +------------------------------+
279 * | Sd = S / (R3d-min - R3d-max) |
280 * +------------------------------+
282 * dip = Sd * (R3d - Rm)^2 + Od
284 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
286 * Sd * (R3d-min - Rm)^2 + Od = 0
287 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
288 * = -Sd * ((R3d-min - R3d-max)^2) / 4
290 * +------------------------------+
291 * | Od = (emc-max - emc-min) / 4 |
292 * +------------------------------+
296 void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
297 struct clk *clk_3d, struct clk *clk_3d_emc)
300 unsigned long max_emc;
301 unsigned long min_emc;
302 unsigned long min_rate_3d;
303 unsigned long max_rate_3d;
305 max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
306 max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
308 min_emc = clk_round_rate(clk_3d_emc, 0);
309 min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
311 max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
312 max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
314 min_rate_3d = clk_round_rate(clk_3d, 0);
315 min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
317 emc_params->emc_slope =
318 FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
319 emc_params->emc_offset = max_emc -
320 FXMUL(emc_params->emc_slope, max_rate_3d);
321 /* Guarantee max 3d rate maps to max emc rate */
322 emc_params->emc_offset += max_emc -
323 (FXMUL(emc_params->emc_slope, max_rate_3d) +
324 emc_params->emc_offset);
326 emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
327 emc_params->emc_dip_slope =
328 -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
329 emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
331 emc_params->emc_dip_offset +
332 FXMUL(emc_params->emc_dip_slope,
333 FXMUL(max_rate_3d - emc_params->emc_xmid,
334 max_rate_3d - emc_params->emc_xmid));
335 emc_params->emc_dip_offset -= correction;
339 * gk20a_tegra_is_railgated()
341 * Check status of gk20a power rail
344 static bool gk20a_tegra_is_railgated(struct platform_device *pdev)
346 return !tegra_powergate_is_powered(TEGRA_POWERGATE_GPU);
350 * gk20a_tegra_railgate()
352 * Gate (disable) gk20a power rail
355 static int gk20a_tegra_railgate(struct platform_device *pdev)
357 if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
358 tegra_powergate_partition(TEGRA_POWERGATE_GPU);
363 * gk20a_tegra_unrailgate()
365 * Ungate (enable) gk20a power rail
368 static int gk20a_tegra_unrailgate(struct platform_device *pdev)
371 ret = tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
377 unsigned long default_rate;
378 } tegra_gk20a_clocks[] = {
379 {"PLLG_ref", UINT_MAX},
384 * gk20a_tegra_get_clocks()
386 * This function finds clocks in tegra platform and populates
387 * the clock information to gk20a platform data.
390 static int gk20a_tegra_get_clocks(struct platform_device *pdev)
392 struct gk20a_platform *platform = platform_get_drvdata(pdev);
397 snprintf(devname, sizeof(devname),
398 (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
399 pdev->name, pdev->id);
401 platform->num_clks = 0;
402 for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
403 long rate = tegra_gk20a_clocks[i].default_rate;
406 c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
411 rate = clk_round_rate(c, rate);
412 clk_set_rate(c, rate);
413 platform->clk[i] = c;
415 platform->num_clks = i;
422 clk_put(platform->clk[i]);
426 static void gk20a_tegra_scale_init(struct platform_device *pdev)
428 struct gk20a_platform *platform = gk20a_get_platform(pdev);
429 struct gk20a_scale_profile *profile = platform->g->scale_profile;
430 struct gk20a_emc_params *emc_params;
435 emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
439 gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
442 profile->private_data = emc_params;
445 static void gk20a_tegra_debug_dump(struct platform_device *pdev)
447 struct gk20a_platform *platform = gk20a_get_platform(pdev);
448 struct gk20a *g = platform->g;
449 nvhost_debug_dump_device(g->dev);
452 static int gk20a_tegra_probe(struct platform_device *dev)
454 struct gk20a_platform *platform = gk20a_get_platform(dev);
456 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
457 t132_gk20a_tegra_platform.g = platform->g;
458 *platform = t132_gk20a_tegra_platform;
461 if (tegra_vpr_size == 0)
462 platform->secure_page_alloc = NULL;
463 gk20a_tegra_get_clocks(dev);
468 static int gk20a_tegra_late_probe(struct platform_device *dev)
470 struct gk20a_platform *platform = gk20a_get_platform(dev);
472 /* Make gk20a power domain a subdomain of host1x */
473 nvhost_register_client_domain(&platform->g->pd);
475 /* Initialise tegra specific scaling quirks */
476 gk20a_tegra_scale_init(dev);
481 static int gk20a_tegra_suspend(struct device *dev)
483 tegra_edp_notify_gpu_load(0, 0);
487 static struct resource gk20a_tegra_resources[] = {
489 .start = TEGRA_GK20A_BAR0_BASE,
490 .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
491 .flags = IORESOURCE_MEM,
494 .start = TEGRA_GK20A_BAR1_BASE,
495 .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
496 .flags = IORESOURCE_MEM,
498 { /* Used on ASIM only */
499 .start = TEGRA_GK20A_SIM_BASE,
500 .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
501 .flags = IORESOURCE_MEM,
504 .start = TEGRA_GK20A_INTR,
505 .end = TEGRA_GK20A_INTR,
506 .flags = IORESOURCE_IRQ,
509 .start = TEGRA_GK20A_INTR_NONSTALL,
510 .end = TEGRA_GK20A_INTR_NONSTALL,
511 .flags = IORESOURCE_IRQ,
515 struct gk20a_platform t132_gk20a_tegra_platform = {
516 .has_syncpoints = true,
518 /* power management configuration */
519 .railgate_delay = 500,
520 .clockgate_delay = 50,
521 .can_railgate = true,
523 .probe = gk20a_tegra_probe,
524 .late_probe = gk20a_tegra_late_probe,
526 /* power management callbacks */
527 .suspend = gk20a_tegra_suspend,
528 .railgate = gk20a_tegra_railgate,
529 .unrailgate = gk20a_tegra_unrailgate,
530 .is_railgated = gk20a_tegra_is_railgated,
532 /* frequency scaling configuration */
533 .prescale = gk20a_tegra_prescale,
534 .postscale = gk20a_tegra_postscale,
535 .devfreq_governor = "nvhost_podgov",
536 .qos_id = PM_QOS_GPU_FREQ_MIN,
538 .secure_alloc = gk20a_tegra_secure_alloc,
539 .secure_page_alloc = gk20a_tegra_secure_page_alloc,
540 .dump_platform_dependencies = gk20a_tegra_debug_dump,
543 struct gk20a_platform gk20a_tegra_platform = {
544 .has_syncpoints = true,
546 /* power management configuration */
547 .railgate_delay = 500,
548 .clockgate_delay = 50,
549 .can_railgate = true,
551 .probe = gk20a_tegra_probe,
552 .late_probe = gk20a_tegra_late_probe,
554 /* power management callbacks */
555 .suspend = gk20a_tegra_suspend,
556 .railgate = gk20a_tegra_railgate,
557 .unrailgate = gk20a_tegra_unrailgate,
558 .is_railgated = gk20a_tegra_is_railgated,
560 /* frequency scaling configuration */
561 .prescale = gk20a_tegra_prescale,
562 .postscale = gk20a_tegra_postscale,
563 .devfreq_governor = "nvhost_podgov",
564 .qos_id = PM_QOS_GPU_FREQ_MIN,
566 .secure_alloc = gk20a_tegra_secure_alloc,
567 .secure_page_alloc = gk20a_tegra_secure_page_alloc,
568 .dump_platform_dependencies = gk20a_tegra_debug_dump,
571 struct platform_device tegra_gk20a_device = {
573 .resource = gk20a_tegra_resources,
574 .num_resources = ARRAY_SIZE(gk20a_tegra_resources),
576 .platform_data = &gk20a_tegra_platform,