drivers/platform/tegra/mc/mcerr.c

   1 /*
   2  * arch/arm/mach-tegra/mcerr.c
   3  *
   4  * MC error code common to T3x and T11x. T20 has been left alone.
   5  *
   6  * Copyright (c) 2010-2016, NVIDIA Corporation. All rights reserved.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful, but WITHOUT
  14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  16  * more details.
  17  *
  18  * You should have received a copy of the GNU General Public License along
  19  * with this program; if not, write to the Free Software Foundation, Inc.,
  20  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  21  */
  22
  23 #define pr_fmt(fmt) "mc-err: " fmt
  24
  25 #include <linux/kernel.h>
  26 #include <linux/module.h>
  27 #include <linux/interrupt.h>
  28 #include <linux/spinlock.h>
  29 #include <linux/stat.h>
  30 #include <linux/sched.h>
  31 #include <linux/debugfs.h>
  32 #include <linux/seq_file.h>
  33 #include <linux/moduleparam.h>
  34 #include <linux/platform_device.h>
  35 #include <linux/of_irq.h>
  36 #include <linux/atomic.h>
  37
  38 #include <linux/platform/tegra/mc.h>
  39 #include <linux/platform/tegra/mcerr.h>
  40 #include <linux/platform/tegra/tegra_emc_err.h>
  41
  42 static bool mcerr_throttle_enabled = true;
  43 u32  mcerr_silenced;
  44
  45 static int arb_intr_mma_set(const char *arg, const struct kernel_param *kp);
  46 static int arb_intr_mma_get(char *buff, const struct kernel_param *kp);
  47 static void unthrottle_prints(struct work_struct *work);
  48
  49 static struct arb_emem_intr_info arb_intr_info = {
  50         .lock = __SPIN_LOCK_UNLOCKED(arb_intr_info.lock),
  51 };
  52 static int arb_intr_count;
  53
  54 static struct kernel_param_ops arb_intr_mma_ops = {
  55         .get = arb_intr_mma_get,
  56         .set = arb_intr_mma_set,
  57 };
  58
  59 module_param_cb(arb_intr_mma_in_ms, &arb_intr_mma_ops,
  60                 &arb_intr_info.arb_intr_mma, S_IRUGO | S_IWUSR);
  61 module_param(arb_intr_count, int, S_IRUGO | S_IWUSR);
  62
  63 /*
  64  * Some platforms report SMMU errors via the SMMU driver.
  65  */
  66 static int report_smmu_errs;
  67 static const char *const smmu_page_attrib[] = {
  68         "nr-nw-s",
  69         "nr-nw-ns",
  70         "nr-wr-s",
  71         "nr-wr-ns",
  72         "rd-nw-s",
  73         "rd-nw-ns",
  74         "rd-wr-s",
  75         "rd-wr-ns"
  76 };
  77
  78 /*
  79  * Table of known errors and their interrupt signatures.
  80  */
  81 static const struct mc_error mc_errors[] = {
  82         MC_ERR(MC_INT_DECERR_EMEM,
  83                "EMEM address decode error",
  84                0, MC_ERR_STATUS, MC_ERR_ADR),
  85         MC_ERR(MC_INT_DECERR_VPR,
  86                "MC request violates VPR requirements",
  87                0, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
  88         MC_ERR(MC_INT_SECURITY_VIOLATION,
  89                "non secure access to secure region",
  90                0, MC_ERR_STATUS, MC_ERR_ADR),
  91         MC_ERR(MC_INT_DECERR_EMEM | MC_INT_SECURITY_VIOLATION,
  92                "non secure access to secure region",
  93                0, MC_ERR_STATUS, MC_ERR_ADR),
  94         MC_ERR(MC_INT_SECERR_SEC,
  95                "MC request violated SEC carveout requirements",
  96                0, MC_ERR_SEC_STATUS, MC_ERR_SEC_ADR),
  97
  98         /*
  99          * SMMU related faults.
 100          */
 101         MC_ERR(MC_INT_INVALID_SMMU_PAGE,
 102                "SMMU address translation fault",
 103                E_SMMU, MC_ERR_STATUS, MC_ERR_ADR),
 104         MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_EMEM,
 105                "EMEM decode error on PDE or PTE entry",
 106                E_SMMU, MC_ERR_STATUS, MC_ERR_ADR),
 107         MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_SECERR_SEC,
 108                "secure SMMU address translation fault",
 109                E_SMMU, MC_ERR_SEC_STATUS, MC_ERR_SEC_ADR),
 110         MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_VPR,
 111                "VPR SMMU address translation fault",
 112                E_SMMU, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
 113         MC_ERR(MC_INT_INVALID_SMMU_PAGE | MC_INT_DECERR_VPR |
 114                MC_INT_DECERR_EMEM,
 115                "EMEM decode error on PDE or PTE entry on VPR context",
 116                E_SMMU, MC_ERR_VPR_STATUS, MC_ERR_VPR_ADR),
 117
 118         /*
 119          * MTS access violation.
 120          */
 121         MC_ERR(MC_INT_DECERR_MTS,
 122                "MTS carveout access violation",
 123                0, MC_ERR_MTS_STATUS, MC_ERR_MTS_ADR),
 124
 125         /*
 126          * Generalized carveouts.
 127          */
 128         MC_ERR(MC_INT_DECERR_GENERALIZED_CARVEOUT,
 129                "GSC access violation", 0,
 130                MC_ERR_GENERALIZED_CARVEOUT_STATUS,
 131                MC_ERR_GENERALIZED_CARVEOUT_ADR),
 132         MC_ERR(MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_EMEM,
 133                "EMEM GSC access violation", 0,
 134                MC_ERR_GENERALIZED_CARVEOUT_STATUS,
 135                MC_ERR_GENERALIZED_CARVEOUT_ADR),
 136
 137         /*
 138          * Miscellaneous errors.
 139          */
 140         MC_ERR(MC_INT_INVALID_APB_ASID_UPDATE,
 141                "invalid APB ASID update", 0,
 142                MC_ERR_STATUS, MC_ERR_ADR),
 143
 144         /* NULL terminate. */
 145         MC_ERR(0, NULL, 0, 0, 0),
 146 };
 147
 148 static atomic_t error_count;
 149
 150 static DECLARE_DELAYED_WORK(unthrottle_prints_work, unthrottle_prints);
 151
 152 static struct dentry *mcerr_debugfs_dir;
 153
 154 u32 mc_int_mask;
 155
 156 /*
 157  * Chip specific functions.
 158  */
 159 static struct mcerr_chip_specific chip_specific;
 160 static struct mcerr_chip_specific *cs_ops = &chip_specific;
 161
 162 static int arb_intr_mma_set(const char *arg, const struct kernel_param *kp)
 163 {
 164         int ret;
 165         unsigned long flags;
 166
 167         spin_lock_irqsave(&arb_intr_info.lock, flags);
 168         ret = param_set_int(arg, kp);
 169         spin_unlock_irqrestore(&arb_intr_info.lock, flags);
 170         return ret;
 171 }
 172
 173 static int arb_intr_mma_get(char *buff, const struct kernel_param *kp)
 174 {
 175         return param_get_int(buff, kp);
 176 }
 177
 178 static void arb_intr(void)
 179 {
 180         u64 time;
 181         u32 time_diff_ms;
 182         unsigned long flags;
 183
 184         spin_lock_irqsave(&arb_intr_info.lock, flags);
 185         arb_intr_count++;
 186         time = sched_clock();
 187         time_diff_ms = (time - arb_intr_info.time) >> 20;
 188         arb_intr_info.time = time;
 189         arb_intr_info.arb_intr_mma =
 190                 ((MMA_HISTORY_SAMPLES - 1) * time_diff_ms +
 191                  arb_intr_info.arb_intr_mma) / MMA_HISTORY_SAMPLES;
 192         spin_unlock_irqrestore(&arb_intr_info.lock, flags);
 193 }
 194
 195 static void unthrottle_prints(struct work_struct *work)
 196 {
 197         atomic_set(&error_count, 0);
 198 }
 199
 200 static void mcerr_info_update(struct mc_client *c, u32 stat)
 201 {
 202         int i;
 203
 204         for (i = 0; i < (sizeof(stat) * 8); i++) {
 205                 if (stat & (1 << i))
 206                         c->intr_counts[i]++;
 207         }
 208 }
 209
 210 static void log_mcerr_fault(unsigned int irq)
 211 {
 212         struct mc_client *client;
 213         const struct mc_error *fault;
 214         const char *smmu_info;
 215         phys_addr_t addr;
 216         u32 status, write, secure, client_id;
 217         int src_chan = MC_BROADCAST_CHANNEL;
 218         u32 intstatus = mc_int_mask &
 219                         __mc_readl(src_chan, MC_INTSTATUS);
 220
 221
 222         if (intstatus & MC_INT_ARBITRATION_EMEM) {
 223                 arb_intr();
 224                 if (intstatus == MC_INT_ARBITRATION_EMEM)
 225                         return;
 226                 intstatus &= ~MC_INT_ARBITRATION_EMEM;
 227         }
 228
 229         fault = chip_specific.mcerr_info(intstatus & mc_int_mask);
 230         if (WARN(!fault, "Unknown error! intr sig: 0x%08x\n",
 231                  intstatus & mc_int_mask))
 232                 return;
 233
 234         if (fault->flags & E_NO_STATUS) {
 235                 mcerr_pr("MC fault - no status: %s\n", fault->msg);
 236                 return;
 237         }
 238
 239         status = __mc_readl(src_chan, fault->stat_reg);
 240         addr = __mc_readl(src_chan, fault->addr_reg);
 241
 242         if (fault->flags & E_TWO_STATUS) {
 243                 mcerr_pr("MC fault - %s\n", fault->msg);
 244                 mcerr_pr("status: 0x%08x status2: 0x%08llx\n",
 245                         status, addr);
 246                 return;
 247         }
 248
 249         secure = !!(status & MC_ERR_STATUS_SECURE);
 250         write = !!(status & MC_ERR_STATUS_WRITE);
 251         client_id = status & 0xff;
 252         client = &mc_clients[client_id <= mc_client_last
 253                              ? client_id : mc_client_last];
 254
 255         mcerr_info_update(client, intstatus & mc_int_mask);
 256
 257         /*
 258          * LPAE: make sure we get the extra 2 physical address bits available
 259          * and pass them down to the printing function.
 260          */
 261         addr |= (((phys_addr_t)(status & MC_ERR_STATUS_ADR_HI)) << 12);
 262
 263         if (fault->flags & E_SMMU)
 264                 smmu_info = smmu_page_attrib[MC_ERR_SMMU_BITS(status)];
 265         else
 266                 smmu_info = NULL;
 267
 268         chip_specific.mcerr_print(fault, client, status, addr, secure, write,
 269                                   smmu_info);
 270 }
 271
 272 static void disable_interrupt(unsigned int irq)
 273 {
 274         mc_writel(0, MC_INTMASK);
 275 }
 276
 277 static void enable_interrupt(unsigned int irq)
 278 {
 279         mc_writel(mc_int_mask, MC_INTMASK);
 280 }
 281
 282 static void clear_interrupt(unsigned int irq)
 283 {
 284         mc_writel(0x00033F40, MC_INTSTATUS);
 285 }
 286
 287 static irqreturn_t tegra_mcerr_thread(int irq, void *data)
 288 {
 289         unsigned long count;
 290
 291         cancel_delayed_work(&unthrottle_prints_work);
 292         count = atomic_inc_return(&error_count);
 293
 294         if (mcerr_throttle_enabled && count >= MAX_PRINTS) {
 295                 schedule_delayed_work(&unthrottle_prints_work, HZ/2);
 296                 if (count == MAX_PRINTS)
 297                         mcerr_pr("Too many MC errors; throttling prints\n");
 298                 cs_ops->clear_interrupt(irq);
 299                 goto exit;
 300         }
 301
 302         cs_ops->log_mcerr_fault(irq);
 303 exit:
 304         cs_ops->enable_interrupt(irq);
 305
 306         return IRQ_HANDLED;
 307 }
 308
 309 /*
 310  * The actual error handling takes longer than is ideal so this must be
 311  * threaded.
 312  */
 313 static irqreturn_t tegra_mcerr_hard_irq(int irq, void *data)
 314 {
 315         trace_printk("MCERR detected.\n");
 316          /*
 317           * Disable MC Error interrupt till the MC Error info is logged.
 318           * MC Errors can be lost as MC HW holds one MC error at a time.
 319           * The first MC Error is good enough to point out potential memory
 320           * access issues in SW and allow debugging further.
 321           */
 322         cs_ops->disable_interrupt(irq);
 323         return IRQ_WAKE_THREAD;
 324 }
 325
 326 static const struct mc_error *mcerr_default_info(u32 intr)
 327 {
 328         const struct mc_error *err;
 329
 330         for (err = mc_errors; err->sig && err->msg; err++) {
 331                 if (intr != err->sig)
 332                         continue;
 333                 return err;
 334         }
 335
 336         return NULL;
 337 }
 338
 339 void __weak smmu_dump_pagetable(int swgid, dma_addr_t addr)
 340 {
 341 }
 342
 343 /*
 344  * This will print at least 8 hex digits for address. If the address is bigger
 345  * then more digits will be printed but the full 16 hex digits for a 64 bit
 346  * address will not get printed by the current code.
 347  */
 348 static void mcerr_default_print(const struct mc_error *err,
 349                                 const struct mc_client *client,
 350                                 u32 status, phys_addr_t addr,
 351                                 int secure, int rw, const char *smmu_info)
 352 {
 353         if (smmu_info)
 354                 smmu_dump_pagetable(client->swgid, addr);
 355
 356         mcerr_pr("(%d) %s: %s\n", client->swgid, client->name, err->msg);
 357         mcerr_pr("  status = 0x%08x; addr = 0x%08llx\n", status,
 358                  (long long unsigned int)addr);
 359         if (report_smmu_errs)
 360                 mcerr_pr("  secure: %s, access-type: %s, SMMU fault: %s\n",
 361                         secure ? "yes" : "no", rw ? "write" : "read",
 362                         smmu_info ? smmu_info : "none");
 363         else
 364                 mcerr_pr("  secure: %s, access-type: %s\n",
 365                         secure ? "yes" : "no", rw ? "write" : "read");
 366 }
 367
 368 /*
 369  * Print the MC err stats for each client.
 370  */
 371 static int mcerr_default_debugfs_show(struct seq_file *s, void *v)
 372 {
 373         int i, j;
 374         int do_print;
 375
 376         seq_printf(s, "%-18s %-18s", "swgroup", "client");
 377         for (i = 0; i < (sizeof(u32) * 8); i++) {
 378                 if (chip_specific.intr_descriptions[i])
 379                         seq_printf(s, " %-12s",
 380                                    chip_specific.intr_descriptions[i]);
 381         }
 382         seq_puts(s, "\n");
 383
 384         for (i = 0; i < chip_specific.nr_clients; i++) {
 385                 do_print = 0;
 386
 387                 /* Only print clients who actually have errors. */
 388                 for (j = 0; j < (sizeof(u32) * 8); j++) {
 389                         if (chip_specific.intr_descriptions[j] &&
 390                             mc_clients[i].intr_counts[j]) {
 391                                 do_print = 1;
 392                                 break;
 393                         }
 394                 }
 395
 396                 if (do_print) {
 397                         seq_printf(s, "%-18s %-18s",
 398                                    mc_clients[i].name,
 399                                    mc_clients[i].swgroup);
 400                         for (j = 0; j < (sizeof(u32) * 8); j++) {
 401                                 if (!chip_specific.intr_descriptions[j])
 402                                         continue;
 403                                 seq_printf(s, " %-12u",
 404                                            mc_clients[i].intr_counts[j]);
 405                         }
 406                         seq_puts(s, "\n");
 407                 }
 408         }
 409
 410         return 0;
 411 }
 412
 413 static int mcerr_debugfs_open(struct inode *inode, struct file *file)
 414 {
 415         return single_open(file, chip_specific.mcerr_debugfs_show, NULL);
 416 }
 417
 418 static const struct file_operations mcerr_debugfs_fops = {
 419         .open           = mcerr_debugfs_open,
 420         .read           = seq_read,
 421         .llseek         = seq_lseek,
 422         .release        = single_release,
 423 };
 424
 425 static int __get_throttle(void *data, u64 *val)
 426 {
 427         *val = mcerr_throttle_enabled;
 428         return 0;
 429 }
 430
 431 static int __set_throttle(void *data, u64 val)
 432 {
 433         atomic_set(&error_count, 0);
 434
 435         mcerr_throttle_enabled = (bool) val;
 436         return 0;
 437 }
 438 DEFINE_SIMPLE_ATTRIBUTE(mcerr_throttle_debugfs_fops, __get_throttle,
 439                         __set_throttle, "%llu\n");
 440
 441 /*
 442  * This will always be successful. However, if something goes wrong in the
 443  * init a message will be printed to the kernel log. Since this is a
 444  * non-essential piece of the kernel no reason to fail the entire MC init
 445  * if this fails.
 446  */
 447 int tegra_mcerr_init(struct dentry *mc_parent, struct platform_device *pdev)
 448 {
 449         int irq;
 450         const void *prop;
 451
 452         chip_specific.mcerr_info         = mcerr_default_info;
 453         chip_specific.mcerr_print        = mcerr_default_print;
 454         chip_specific.mcerr_debugfs_show = mcerr_default_debugfs_show;
 455         chip_specific.enable_interrupt   = enable_interrupt;
 456         chip_specific.disable_interrupt  = disable_interrupt;
 457         chip_specific.clear_interrupt    = clear_interrupt;
 458         chip_specific.log_mcerr_fault    = log_mcerr_fault;
 459         chip_specific.nr_clients = 0;
 460
 461         prop = of_get_property(pdev->dev.of_node,"compatible", NULL);
 462         if (prop && strcmp(prop, "nvidia,tegra-t18x-mc") == 0)
 463                 report_smmu_errs = 0;
 464         else
 465                 report_smmu_errs = 1;
 466
 467         /*
 468          * mcerr_chip_specific_setup() can override any of the default
 469          * functions as it wishes.
 470          */
 471         mcerr_chip_specific_setup(&chip_specific);
 472         if (chip_specific.nr_clients == 0 ||
 473             chip_specific.intr_descriptions == NULL) {
 474                 pr_err("Missing necessary chip_specific functionality!\n");
 475                 return -ENODEV;
 476         }
 477
 478         prop = of_get_property(pdev->dev.of_node, "int_mask", NULL);
 479         if (!prop) {
 480                 pr_err("No int_mask prop for mcerr!\n");
 481                 return -EINVAL;
 482         }
 483
 484         mc_int_mask = be32_to_cpup(prop);
 485         mc_writel(mc_int_mask, MC_INTMASK);
 486         pr_info("Set intmask: 0x%x\n", mc_readl(MC_INTMASK));
 487
 488         irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
 489         if (irq < 0) {
 490                 pr_err("Unable to parse/map MC error interrupt\n");
 491                 goto done;
 492         }
 493
 494         if (request_threaded_irq(irq, tegra_mcerr_hard_irq,
 495                                  tegra_mcerr_thread, 0, "mc_status", NULL)) {
 496                 pr_err("Unable to register MC error interrupt\n");
 497                 goto done;
 498         }
 499
 500         tegra_emcerr_init(mc_parent, pdev);
 501
 502         if (!mc_parent)
 503                 goto done;
 504
 505         mcerr_debugfs_dir = debugfs_create_dir("err", mc_parent);
 506         if (mcerr_debugfs_dir == NULL) {
 507                 pr_err("Failed to make debugfs node: %ld\n",
 508                        PTR_ERR(mcerr_debugfs_dir));
 509                 goto done;
 510         }
 511         debugfs_create_file("mcerr", 0644, mcerr_debugfs_dir, NULL,
 512                             &mcerr_debugfs_fops);
 513         debugfs_create_file("mcerr_throttle", S_IRUGO | S_IWUSR,
 514                             mcerr_debugfs_dir, NULL,
 515                             &mcerr_throttle_debugfs_fops);
 516         debugfs_create_u32("quiet", 0644, mcerr_debugfs_dir, &mcerr_silenced);
 517 done:
 518         return 0;
 519 }