2 * drivers/misc/tegra-profiler/armv7_pmu.c
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 #include <linux/err.h>
20 #include <linux/bitmap.h>
21 #include <linux/slab.h>
22 #include <asm/cputype.h>
25 #include <linux/tegra_profiler.h>
28 #include "armv7_pmu.h"
29 #include "armv7_events.h"
33 static struct quadd_pmu_ctx pmu_ctx;
36 QUADD_ARM_CPU_TYPE_UNKNOWN,
37 QUADD_ARM_CPU_TYPE_CORTEX_A5,
38 QUADD_ARM_CPU_TYPE_CORTEX_A8,
39 QUADD_ARM_CPU_TYPE_CORTEX_A9,
40 QUADD_ARM_CPU_TYPE_CORTEX_A15,
43 struct quadd_pmu_info {
44 DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
45 u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
46 int is_already_active;
49 struct quadd_cntrs_info {
56 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
58 static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
59 [QUADD_EVENT_TYPE_INSTRUCTIONS] =
60 QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
61 [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
62 QUADD_ARMV7_HW_EVENT_PC_WRITE,
63 [QUADD_EVENT_TYPE_BRANCH_MISSES] =
64 QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
65 [QUADD_EVENT_TYPE_BUS_CYCLES] =
66 QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
68 [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
69 QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
70 [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
71 QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
72 [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
73 QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
75 [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
76 QUADD_ARMV7_UNSUPPORTED_EVENT,
77 [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
78 QUADD_ARMV7_UNSUPPORTED_EVENT,
79 [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
80 QUADD_ARMV7_UNSUPPORTED_EVENT,
83 static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
84 [QUADD_EVENT_TYPE_INSTRUCTIONS] =
85 QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
86 [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
87 QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
88 [QUADD_EVENT_TYPE_BRANCH_MISSES] =
89 QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
90 [QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
92 [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
93 QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
94 [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
95 QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
96 [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
97 QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
99 [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
100 QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
101 [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
102 QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
103 [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
104 QUADD_ARMV7_UNSUPPORTED_EVENT,
108 armv7_pmu_pmnc_read(void)
112 /* Read Performance MoNitor Control (PMNC) register */
113 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
118 armv7_pmu_pmnc_write(u32 val)
120 /* Write Performance MoNitor Control (PMNC) register */
121 asm volatile("mcr p15, 0, %0, c9, c12, 0" : :
122 "r"(val & QUADD_ARMV7_PMNC_MASK));
126 armv7_pmu_cntens_read(void)
130 /* Read CouNT ENable Set (CNTENS) register */
131 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(val));
136 armv7_pmu_cntens_write(u32 val)
138 /* Write CouNT ENable Set (CNTENS) register */
139 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
143 armv7_pmu_cntenc_write(u32 val)
145 /* Write CouNT ENable Clear (CNTENC) register */
146 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
150 armv7_pmu_pmnxsel_write(u32 val)
152 /* Read Performance Counter SELection (PMNXSEL) register */
153 asm volatile("mcr p15, 0, %0, c9, c12, 5" : :
154 "r" (val & QUADD_ARMV7_SELECT_MASK));
158 armv7_pmu_ccnt_read(void)
162 /* Read Cycle CouNT (CCNT) register */
163 asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
168 armv7_pmu_ccnt_write(u32 val)
170 /* Write Cycle CouNT (CCNT) register */
171 asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(val));
175 armv7_pmu_pmcnt_read(void)
179 /* Read Performance Monitor CouNT (PMCNTx) registers */
180 asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(val));
185 armv7_pmu_pmcnt_write(u32 val)
187 /* Write Performance Monitor CouNT (PMCNTx) registers */
188 asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(val));
192 armv7_pmu_evtsel_write(u32 event)
194 /* Write Event SELection (EVTSEL) register */
195 asm volatile("mcr p15, 0, %0, c9, c13, 1" : :
196 "r" (event & QUADD_ARMV7_EVTSEL_MASK));
200 armv7_pmu_intens_read(void)
204 /* Read INTerrupt ENable Set (INTENS) register */
205 asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(val));
210 armv7_pmu_intens_write(u32 val)
212 /* Write INTerrupt ENable Set (INTENS) register */
213 asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(val));
217 armv7_pmu_intenc_write(u32 val)
219 /* Write INTerrupt ENable Clear (INTENC) register */
220 asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(val));
223 static void enable_counter(int idx)
225 armv7_pmu_cntens_write(1UL << idx);
228 static void disable_counter(int idx)
230 armv7_pmu_cntenc_write(1UL << idx);
233 static void select_counter(unsigned int counter)
235 armv7_pmu_pmnxsel_write(counter);
238 static int is_pmu_enabled(void)
240 u32 pmnc = armv7_pmu_pmnc_read();
242 if (pmnc & QUADD_ARMV7_PMNC_E) {
243 u32 cnten = armv7_pmu_cntens_read();
244 cnten &= pmu_ctx.counters_mask | QUADD_ARMV7_CCNT;
245 return cnten ? 1 : 0;
251 static u32 read_counter(int idx)
255 if (idx == QUADD_ARMV7_CCNT_BIT) {
256 val = armv7_pmu_ccnt_read();
259 val = armv7_pmu_pmcnt_read();
265 static void write_counter(int idx, u32 value)
267 if (idx == QUADD_ARMV7_CCNT_BIT) {
268 armv7_pmu_ccnt_write(value);
271 armv7_pmu_pmcnt_write(value);
276 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
281 cntens = armv7_pmu_cntens_read();
282 cntens = ~cntens & (pmu_ctx.counters_mask | QUADD_ARMV7_CCNT);
284 bitmap_zero(bitmap, nbits);
285 bitmap_copy(bitmap, (unsigned long *)&cntens,
286 BITS_PER_BYTE * sizeof(u32));
288 cc = (cntens & QUADD_ARMV7_CCNT) ? 1 : 0;
293 return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
296 static u32 armv7_pmu_adjust_value(u32 value, int event_id)
299 * Cortex A8/A9: l1 cache performance counters
300 * don't differentiate between read and write data accesses/misses,
301 * so currently we are devided by two
303 if (pmu_ctx.l1_cache_rw &&
304 (pmu_ctx.arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
305 pmu_ctx.arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
306 (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
307 event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
313 static void __maybe_unused
314 disable_interrupt(int idx)
316 armv7_pmu_intenc_write(1UL << idx);
320 disable_all_interrupts(void)
322 u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
323 armv7_pmu_intenc_write(val);
327 armv7_pmnc_reset_overflow_flags(void)
329 u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
330 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
334 select_event(unsigned int idx, unsigned int event)
337 armv7_pmu_evtsel_write(event);
340 static void disable_all_counters(void)
344 /* Disable all counters */
345 val = armv7_pmu_pmnc_read();
346 if (val & QUADD_ARMV7_PMNC_E)
347 armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
349 armv7_pmu_cntenc_write(QUADD_ARMV7_CCNT | pmu_ctx.counters_mask);
352 static void enable_all_counters(void)
356 /* Enable all counters */
357 val = armv7_pmu_pmnc_read();
358 val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X;
359 armv7_pmu_pmnc_write(val);
362 static void reset_all_counters(void)
366 val = armv7_pmu_pmnc_read();
367 val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C;
368 armv7_pmu_pmnc_write(val);
371 static void quadd_init_pmu(void)
373 armv7_pmnc_reset_overflow_flags();
374 disable_all_interrupts();
377 static int pmu_enable(void)
379 pr_info("pmu was reserved\n");
383 static void __pmu_disable(void *arg)
385 struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
387 if (!pi->is_already_active) {
388 pr_info("[%d] reset all counters\n",
391 disable_all_counters();
392 reset_all_counters();
396 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
397 pr_info("[%d] reset counter: %d\n",
398 smp_processor_id(), idx);
400 disable_counter(idx);
401 write_counter(idx, 0);
406 static void pmu_disable(void)
408 on_each_cpu(__pmu_disable, NULL, 1);
409 pr_info("pmu was released\n");
412 static void pmu_start(void)
414 int idx = 0, pcntrs, ccntr;
416 DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
417 struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
418 u32 *prevp = pi->prev_vals;
419 struct quadd_pmu_event_info *ei;
421 bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
423 if (is_pmu_enabled()) {
424 pi->is_already_active = 1;
426 disable_all_counters();
429 pi->is_already_active = 0;
432 pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
434 list_for_each_entry(ei, &pmu_ctx.used_events, list) {
439 event = ei->hw_value;
441 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
443 pr_err_once("Error: cpu cycles counter is already occupied\n");
446 index = QUADD_ARMV7_CCNT_BIT;
449 pr_err_once("Error: too many performance events\n");
453 index = find_next_bit(free_bitmap,
454 QUADD_MAX_PMU_COUNTERS, idx);
455 if (index >= QUADD_MAX_PMU_COUNTERS) {
456 pr_err_once("Error: too many events\n");
460 select_event(index, event);
462 set_bit(index, pi->used_cntrs);
464 write_counter(index, 0);
465 enable_counter(index);
468 if (!pi->is_already_active) {
469 reset_all_counters();
470 enable_all_counters();
473 qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
476 static void pmu_stop(void)
479 struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
481 if (!pi->is_already_active) {
482 disable_all_counters();
483 reset_all_counters();
485 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
486 disable_counter(idx);
487 write_counter(idx, 0);
491 qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
494 static int __maybe_unused
495 pmu_read(struct event_data *events, int max_events)
499 struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
500 u32 *prevp = pi->prev_vals;
501 struct quadd_pmu_event_info *ei;
503 if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
504 pr_err_once("Error: counters were not initialized\n");
508 list_for_each_entry(ei, &pmu_ctx.used_events, list) {
511 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
512 if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) {
513 pr_err_once("Error: ccntr is not used\n");
516 index = QUADD_ARMV7_CCNT_BIT;
518 index = find_next_bit(pi->used_cntrs,
519 QUADD_MAX_PMU_COUNTERS, idx);
522 if (index >= QUADD_MAX_PMU_COUNTERS) {
523 pr_err_once("Error: perf counter is not used\n");
528 val = read_counter(index);
529 val = armv7_pmu_adjust_value(val, ei->quadd_event_id);
531 events->event_source = QUADD_EVENT_SOURCE_PMU;
532 events->event_id = ei->quadd_event_id;
535 events->prev_val = *prevp;
539 qm_debug_read_counter(events->event_id, events->prev_val,
542 if (++i >= max_events)
552 static int __maybe_unused
553 pmu_read_emulate(struct event_data *events, int max_events)
556 static u32 val = 100;
557 struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
558 u32 *prevp = pi->prev_vals;
559 struct quadd_pmu_event_info *ei;
561 list_for_each_entry(ei, &pmu_ctx.used_events, list) {
565 events->event_id = *prevp;
571 if (++i >= max_events)
581 static void __get_free_counters(void *arg)
584 DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
585 struct quadd_cntrs_info *ci = arg;
587 pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
589 spin_lock(&ci->lock);
591 ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
596 pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
597 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
599 spin_unlock(&ci->lock);
602 static void free_events(struct list_head *head)
604 struct quadd_pmu_event_info *entry, *next;
606 list_for_each_entry_safe(entry, next, head, list) {
607 list_del(&entry->list);
612 static int set_events(int *events, int size)
614 int free_pcntrs, err;
615 int i, nr_l1_r = 0, nr_l1_w = 0;
616 struct quadd_cntrs_info free_ci;
618 pmu_ctx.l1_cache_rw = 0;
620 free_events(&pmu_ctx.used_events);
622 if (!events || !size)
625 if (!pmu_ctx.current_map) {
626 pr_err("Invalid current_map\n");
630 spin_lock_init(&free_ci.lock);
631 free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
634 on_each_cpu(__get_free_counters, &free_ci, 1);
636 free_pcntrs = free_ci.pcntrs;
637 pr_info("free counters: pcntrs/ccntr: %d/%d\n",
638 free_pcntrs, free_ci.ccntr);
640 for (i = 0; i < size; i++) {
641 struct quadd_pmu_event_info *ei;
643 if (events[i] > QUADD_EVENT_TYPE_MAX) {
644 pr_err("Error event: %d\n", events[i]);
649 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
655 INIT_LIST_HEAD(&ei->list);
656 list_add_tail(&ei->list, &pmu_ctx.used_events);
658 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
659 ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
660 if (!free_ci.ccntr) {
661 pr_err("Error: cpu cycles counter is already occupied\n");
666 if (!free_pcntrs--) {
667 pr_err("Error: too many performance events\n");
672 ei->hw_value = pmu_ctx.current_map[events[i]];
675 ei->quadd_event_id = events[i];
677 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
679 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
682 pr_info("Event has been added: id/pmu value: %s/%#x\n",
683 quadd_get_event_str(events[i]),
687 if (nr_l1_r > 0 && nr_l1_w > 0)
688 pmu_ctx.l1_cache_rw = 1;
693 free_events(&pmu_ctx.used_events);
697 static int get_supported_events(int *events, int max_events)
699 int i, nr_events = 0;
701 max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
703 for (i = 0; i < max_events; i++) {
704 if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT)
705 events[nr_events++] = i;
710 static int get_current_events(int *events, int max_events)
713 struct quadd_pmu_event_info *ei;
715 list_for_each_entry(ei, &pmu_ctx.used_events, list) {
716 events[i++] = ei->quadd_event_id;
725 static struct quadd_arch_info *get_arch(void)
727 return &pmu_ctx.arch;
730 static struct quadd_event_source_interface pmu_armv7_int = {
731 .enable = pmu_enable,
732 .disable = pmu_disable,
737 #ifndef QUADD_USE_EMULATE_COUNTERS
740 .read = pmu_read_emulate,
742 .set_events = set_events,
743 .get_supported_events = get_supported_events,
744 .get_current_events = get_current_events,
745 .get_arch = get_arch,
748 struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
750 struct quadd_event_source_interface *pmu = NULL;
751 unsigned long cpu_id, cpu_implementer, part_number;
753 cpu_id = read_cpuid_id();
754 cpu_implementer = cpu_id >> 24;
755 part_number = cpu_id & 0xFFF0;
757 pmu_ctx.arch.type = QUADD_ARM_CPU_TYPE_UNKNOWN;
758 pmu_ctx.arch.ver = 0;
759 strncpy(pmu_ctx.arch.name, "Unknown",
760 sizeof(pmu_ctx.arch.name));
762 if (cpu_implementer == ARM_CPU_IMP_ARM) {
763 switch (part_number) {
764 case ARM_CPU_PART_CORTEX_A9:
765 pmu_ctx.arch.type = QUADD_ARM_CPU_TYPE_CORTEX_A9;
766 strncpy(pmu_ctx.arch.name, "Cortex A9",
767 sizeof(pmu_ctx.arch.name));
769 pmu_ctx.counters_mask =
770 QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
771 pmu_ctx.current_map = quadd_armv7_a9_events_map;
772 pmu = &pmu_armv7_int;
775 case ARM_CPU_PART_CORTEX_A15:
776 pmu_ctx.arch.type = QUADD_ARM_CPU_TYPE_CORTEX_A15;
777 strncpy(pmu_ctx.arch.name, "Cortex A15",
778 sizeof(pmu_ctx.arch.name));
780 pmu_ctx.counters_mask =
781 QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
782 pmu_ctx.current_map = quadd_armv7_a15_events_map;
783 pmu = &pmu_armv7_int;
787 pmu_ctx.arch.type = QUADD_ARM_CPU_TYPE_UNKNOWN;
788 pmu_ctx.current_map = NULL;
793 INIT_LIST_HEAD(&pmu_ctx.used_events);
795 pmu_ctx.arch.name[sizeof(pmu_ctx.arch.name) - 1] = '\0';
796 pr_info("arch: %s, type: %d, ver: %d\n",
797 pmu_ctx.arch.name, pmu_ctx.arch.type, pmu_ctx.arch.ver);
802 void quadd_armv7_pmu_deinit(void)
804 free_events(&pmu_ctx.used_events);