2 * arch/arm/mach-tegra/mcerr.c
4 * MC error code common to T3x and T11x. T20 has been left alone.
6 * Copyright (c) 2010-2016, NVIDIA Corporation. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #define pr_fmt(fmt) "mc-err: " fmt
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/interrupt.h>
28 #include <linux/spinlock.h>
29 #include <linux/stat.h>
30 #include <linux/sched.h>
31 #include <linux/debugfs.h>
32 #include <linux/seq_file.h>
33 #include <linux/moduleparam.h>
34 #include <linux/platform_device.h>
35 #include <linux/of_irq.h>
36 #include <linux/atomic.h>
38 #include <linux/platform/tegra/mc.h>
39 #include <linux/platform/tegra/mcerr.h>
40 #include <linux/platform/tegra/tegra_emc_err.h>
42 static bool mcerr_throttle_enabled = true;
45 static int arb_intr_mma_set(const char *arg, const struct kernel_param *kp);
46 static int arb_intr_mma_get(char *buff, const struct kernel_param *kp);
47 static void unthrottle_prints(struct work_struct *work);
49 static struct arb_emem_intr_info arb_intr_info = {
50 .lock = __SPIN_LOCK_UNLOCKED(arb_intr_info.lock),
52 static int arb_intr_count;
54 static struct kernel_param_ops arb_intr_mma_ops = {
55 .get = arb_intr_mma_get,
56 .set = arb_intr_mma_set,
59 module_param_cb(arb_intr_mma_in_ms, &arb_intr_mma_ops,
60 &arb_intr_info.arb_intr_mma, S_IRUGO | S_IWUSR);
61 module_param(arb_intr_count, int, S_IRUGO | S_IWUSR);
64 * Some platforms report SMMU errors via the SMMU driver.
66 static int report_smmu_errs;
67 static const char *const smmu_page_attrib[] = {
79 * Table of known errors and their interrupt signatures.
81 static const struct mc_error mc_errors[] = {
82 MC_ERR(MC_INT_DECERR_EMEM,
83 "EMEM address decode error",
84 0, MC_ERR_STATUS, MC_ERR_ADR),
85 MC_ERR(MC_INT_DECERR_VPR,
86 "MC request violates VPR requirements",
87 0, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
88 MC_ERR(MC_INT_SECURITY_VIOLATION,
89 "non secure access to secure region",
90 0, MC_ERR_STATUS, MC_ERR_ADR),
91 MC_ERR(MC_INT_DECERR_EMEM | MC_INT_SECURITY_VIOLATION,
92 "non secure access to secure region",
93 0, MC_ERR_STATUS, MC_ERR_ADR),
94 MC_ERR(MC_INT_SECERR_SEC,
95 "MC request violated SEC carveout requirements",
96 0, MC_ERR_SEC_STATUS, MC_ERR_SEC_ADR),
99 * SMMU related faults.
101 MC_ERR(MC_INT_INVALID_SMMU_PAGE,
102 "SMMU address translation fault",
103 E_SMMU, MC_ERR_STATUS, MC_ERR_ADR),
104 MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_EMEM,
105 "EMEM decode error on PDE or PTE entry",
106 E_SMMU, MC_ERR_STATUS, MC_ERR_ADR),
107 MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_SECERR_SEC,
108 "secure SMMU address translation fault",
109 E_SMMU, MC_ERR_SEC_STATUS, MC_ERR_SEC_ADR),
110 MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_VPR,
111 "VPR SMMU address translation fault",
112 E_SMMU, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
113 MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_VPR |
115 "EMEM decode error on PDE or PTE entry on VPR context",
116 E_SMMU, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
119 * MTS access violation.
121 MC_ERR(MC_INT_DECERR_MTS,
122 "MTS carveout access violation",
123 0, MC_ERR_MTS_STATUS, MC_ERR_MTS_ADR),
126 * Generalized carveouts.
128 MC_ERR(MC_INT_DECERR_GENERALIZED_CARVEOUT,
129 "GSC access violation", 0,
130 MC_ERR_GENERALIZED_CARVEOUT_STATUS,
131 MC_ERR_GENERALIZED_CARVEOUT_ADR),
132 MC_ERR(MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_EMEM,
133 "EMEM GSC access violation", 0,
134 MC_ERR_GENERALIZED_CARVEOUT_STATUS,
135 MC_ERR_GENERALIZED_CARVEOUT_ADR),
138 * Miscellaneous errors.
140 MC_ERR(MC_INT_INVALID_APB_ASID_UPDATE,
141 "invalid APB ASID update", 0,
142 MC_ERR_STATUS, MC_ERR_ADR),
144 /* NULL terminate. */
145 MC_ERR(0, NULL, 0, 0, 0),
148 static atomic_t error_count;
150 static DECLARE_DELAYED_WORK(unthrottle_prints_work, unthrottle_prints);
152 static struct dentry *mcerr_debugfs_dir;
157 * Chip specific functions.
159 static struct mcerr_chip_specific chip_specific;
160 static struct mcerr_chip_specific *cs_ops = &chip_specific;
162 static int arb_intr_mma_set(const char *arg, const struct kernel_param *kp)
167 spin_lock_irqsave(&arb_intr_info.lock, flags);
168 ret = param_set_int(arg, kp);
169 spin_unlock_irqrestore(&arb_intr_info.lock, flags);
173 static int arb_intr_mma_get(char *buff, const struct kernel_param *kp)
175 return param_get_int(buff, kp);
178 static void arb_intr(void)
184 spin_lock_irqsave(&arb_intr_info.lock, flags);
186 time = sched_clock();
187 time_diff_ms = (time - arb_intr_info.time) >> 20;
188 arb_intr_info.time = time;
189 arb_intr_info.arb_intr_mma =
190 ((MMA_HISTORY_SAMPLES - 1) * time_diff_ms +
191 arb_intr_info.arb_intr_mma) / MMA_HISTORY_SAMPLES;
192 spin_unlock_irqrestore(&arb_intr_info.lock, flags);
195 static void unthrottle_prints(struct work_struct *work)
197 atomic_set(&error_count, 0);
200 static void mcerr_info_update(struct mc_client *c, u32 stat)
204 for (i = 0; i < (sizeof(stat) * 8); i++) {
210 static void log_mcerr_fault(unsigned int irq)
212 struct mc_client *client;
213 const struct mc_error *fault;
214 const char *smmu_info;
216 u32 status, write, secure, client_id;
217 int src_chan = MC_BROADCAST_CHANNEL;
218 u32 intstatus = mc_int_mask &
219 __mc_readl(src_chan, MC_INTSTATUS);
222 if (intstatus & MC_INT_ARBITRATION_EMEM) {
224 if (intstatus == MC_INT_ARBITRATION_EMEM)
226 intstatus &= ~MC_INT_ARBITRATION_EMEM;
229 fault = chip_specific.mcerr_info(intstatus & mc_int_mask);
230 if (WARN(!fault, "Unknown error! intr sig: 0x%08x\n",
231 intstatus & mc_int_mask))
234 if (fault->flags & E_NO_STATUS) {
235 mcerr_pr("MC fault - no status: %s\n", fault->msg);
239 status = __mc_readl(src_chan, fault->stat_reg);
240 addr = __mc_readl(src_chan, fault->addr_reg);
242 if (fault->flags & E_TWO_STATUS) {
243 mcerr_pr("MC fault - %s\n", fault->msg);
244 mcerr_pr("status: 0x%08x status2: 0x%08llx\n",
249 secure = !!(status & MC_ERR_STATUS_SECURE);
250 write = !!(status & MC_ERR_STATUS_WRITE);
251 client_id = status & 0xff;
252 client = &mc_clients[client_id <= mc_client_last
253 ? client_id : mc_client_last];
255 mcerr_info_update(client, intstatus & mc_int_mask);
258 * LPAE: make sure we get the extra 2 physical address bits available
259 * and pass them down to the printing function.
261 addr |= (((phys_addr_t)(status & MC_ERR_STATUS_ADR_HI)) << 12);
263 if (fault->flags & E_SMMU)
264 smmu_info = smmu_page_attrib[MC_ERR_SMMU_BITS(status)];
268 chip_specific.mcerr_print(fault, client, status, addr, secure, write,
272 static void disable_interrupt(unsigned int irq)
274 mc_writel(0, MC_INTMASK);
277 static void enable_interrupt(unsigned int irq)
279 mc_writel(mc_int_mask, MC_INTMASK);
282 static void clear_interrupt(unsigned int irq)
284 mc_writel(0x00033F40, MC_INTSTATUS);
287 static irqreturn_t tegra_mcerr_thread(int irq, void *data)
291 cancel_delayed_work(&unthrottle_prints_work);
292 count = atomic_inc_return(&error_count);
294 if (mcerr_throttle_enabled && count >= MAX_PRINTS) {
295 schedule_delayed_work(&unthrottle_prints_work, HZ/2);
296 if (count == MAX_PRINTS)
297 mcerr_pr("Too many MC errors; throttling prints\n");
298 cs_ops->clear_interrupt(irq);
302 cs_ops->log_mcerr_fault(irq);
304 cs_ops->enable_interrupt(irq);
310 * The actual error handling takes longer than is ideal so this must be
313 static irqreturn_t tegra_mcerr_hard_irq(int irq, void *data)
315 trace_printk("MCERR detected.\n");
317 * Disable MC Error interrupt till the MC Error info is logged.
318 * MC Errors can be lost as MC HW holds one MC error at a time.
319 * The first MC Error is good enough to point out potential memory
320 * access issues in SW and allow debugging further.
322 cs_ops->disable_interrupt(irq);
323 return IRQ_WAKE_THREAD;
326 static const struct mc_error *mcerr_default_info(u32 intr)
328 const struct mc_error *err;
330 for (err = mc_errors; err->sig && err->msg; err++) {
331 if (intr != err->sig)
339 void __weak smmu_dump_pagetable(int swgid, dma_addr_t addr)
344 * This will print at least 8 hex digits for address. If the address is bigger
345 * then more digits will be printed but the full 16 hex digits for a 64 bit
346 * address will not get printed by the current code.
348 static void mcerr_default_print(const struct mc_error *err,
349 const struct mc_client *client,
350 u32 status, phys_addr_t addr,
351 int secure, int rw, const char *smmu_info)
354 smmu_dump_pagetable(client->swgid, addr);
356 mcerr_pr("(%d) %s: %s\n", client->swgid, client->name, err->msg);
357 mcerr_pr(" status = 0x%08x; addr = 0x%08llx\n", status,
358 (long long unsigned int)addr);
359 if (report_smmu_errs)
360 mcerr_pr(" secure: %s, access-type: %s, SMMU fault: %s\n",
361 secure ? "yes" : "no", rw ? "write" : "read",
362 smmu_info ? smmu_info : "none");
364 mcerr_pr(" secure: %s, access-type: %s\n",
365 secure ? "yes" : "no", rw ? "write" : "read");
369 * Print the MC err stats for each client.
371 static int mcerr_default_debugfs_show(struct seq_file *s, void *v)
376 seq_printf(s, "%-18s %-18s", "swgroup", "client");
377 for (i = 0; i < (sizeof(u32) * 8); i++) {
378 if (chip_specific.intr_descriptions[i])
379 seq_printf(s, " %-12s",
380 chip_specific.intr_descriptions[i]);
384 for (i = 0; i < chip_specific.nr_clients; i++) {
387 /* Only print clients who actually have errors. */
388 for (j = 0; j < (sizeof(u32) * 8); j++) {
389 if (chip_specific.intr_descriptions[j] &&
390 mc_clients[i].intr_counts[j]) {
397 seq_printf(s, "%-18s %-18s",
399 mc_clients[i].swgroup);
400 for (j = 0; j < (sizeof(u32) * 8); j++) {
401 if (!chip_specific.intr_descriptions[j])
403 seq_printf(s, " %-12u",
404 mc_clients[i].intr_counts[j]);
413 static int mcerr_debugfs_open(struct inode *inode, struct file *file)
415 return single_open(file, chip_specific.mcerr_debugfs_show, NULL);
418 static const struct file_operations mcerr_debugfs_fops = {
419 .open = mcerr_debugfs_open,
422 .release = single_release,
425 static int __get_throttle(void *data, u64 *val)
427 *val = mcerr_throttle_enabled;
431 static int __set_throttle(void *data, u64 val)
433 atomic_set(&error_count, 0);
435 mcerr_throttle_enabled = (bool) val;
438 DEFINE_SIMPLE_ATTRIBUTE(mcerr_throttle_debugfs_fops, __get_throttle,
439 __set_throttle, "%llu\n");
442 * This will always be successful. However, if something goes wrong in the
443 * init a message will be printed to the kernel log. Since this is a
444 * non-essential piece of the kernel no reason to fail the entire MC init
447 int tegra_mcerr_init(struct dentry *mc_parent, struct platform_device *pdev)
452 chip_specific.mcerr_info = mcerr_default_info;
453 chip_specific.mcerr_print = mcerr_default_print;
454 chip_specific.mcerr_debugfs_show = mcerr_default_debugfs_show;
455 chip_specific.enable_interrupt = enable_interrupt;
456 chip_specific.disable_interrupt = disable_interrupt;
457 chip_specific.clear_interrupt = clear_interrupt;
458 chip_specific.log_mcerr_fault = log_mcerr_fault;
459 chip_specific.nr_clients = 0;
461 prop = of_get_property(pdev->dev.of_node,"compatible", NULL);
462 if (prop && strcmp(prop, "nvidia,tegra-t18x-mc") == 0)
463 report_smmu_errs = 0;
465 report_smmu_errs = 1;
468 * mcerr_chip_specific_setup() can override any of the default
469 * functions as it wishes.
471 mcerr_chip_specific_setup(&chip_specific);
472 if (chip_specific.nr_clients == 0 ||
473 chip_specific.intr_descriptions == NULL) {
474 pr_err("Missing necessary chip_specific functionality!\n");
478 prop = of_get_property(pdev->dev.of_node, "int_mask", NULL);
480 pr_err("No int_mask prop for mcerr!\n");
484 mc_int_mask = be32_to_cpup(prop);
485 mc_writel(mc_int_mask, MC_INTMASK);
486 pr_info("Set intmask: 0x%x\n", mc_readl(MC_INTMASK));
488 irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
490 pr_err("Unable to parse/map MC error interrupt\n");
494 if (request_threaded_irq(irq, tegra_mcerr_hard_irq,
495 tegra_mcerr_thread, 0, "mc_status", NULL)) {
496 pr_err("Unable to register MC error interrupt\n");
500 tegra_emcerr_init(mc_parent, pdev);
505 mcerr_debugfs_dir = debugfs_create_dir("err", mc_parent);
506 if (mcerr_debugfs_dir == NULL) {
507 pr_err("Failed to make debugfs node: %ld\n",
508 PTR_ERR(mcerr_debugfs_dir));
511 debugfs_create_file("mcerr", 0644, mcerr_debugfs_dir, NULL,
512 &mcerr_debugfs_fops);
513 debugfs_create_file("mcerr_throttle", S_IRUGO | S_IWUSR,
514 mcerr_debugfs_dir, NULL,
515 &mcerr_throttle_debugfs_fops);
516 debugfs_create_u32("quiet", 0644, mcerr_debugfs_dir, &mcerr_silenced);