]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/misc/tegra-profiler/armv8_pmu.c
misc: tegra-profiler: add Cortex-A57 events
[sojka/nv-tegra/linux-3.10.git] / drivers / misc / tegra-profiler / armv8_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv8_pmu.c
3  *
4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/printk.h>
20 #include <linux/types.h>
21 #include <linux/string.h>
22
23 #include <linux/version.h>
24 #include <linux/err.h>
25 #include <linux/bitmap.h>
26 #include <linux/slab.h>
27
28 #include <asm/cputype.h>
29
30 #include "arm_pmu.h"
31 #include "armv8_pmu.h"
32 #include "armv8_events.h"
33 #include "quadd.h"
34 #include "debug.h"
35
36 struct quadd_pmu_info {
37         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
38         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
39         int is_already_active;
40 };
41
42 struct quadd_cntrs_info {
43         int pcntrs;
44         int ccntr;
45
46         spinlock_t lock;
47 };
48
49 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
50
51 static struct quadd_pmu_ctx pmu_ctx;
52
53 static unsigned
54 quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_TYPE_MAX] = {
55         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
56                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
57         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
58                 QUADD_ARMV8_UNSUPPORTED_EVENT,
59         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
60                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
61         [QUADD_EVENT_TYPE_BUS_CYCLES] =
62                 QUADD_ARMV8_UNSUPPORTED_EVENT,
63
64         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
65                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
66         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
67                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
68         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
69                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
70
71         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
72                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
73         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
74                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
75         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
76                 QUADD_ARMV8_UNSUPPORTED_EVENT,
77 };
78
79 static unsigned
80 quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_TYPE_MAX] = {
81         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
82                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
83         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
84                 QUADD_ARMV8_UNSUPPORTED_EVENT,
85         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
86                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
87         [QUADD_EVENT_TYPE_BUS_CYCLES] =
88                 QUADD_ARMV8_UNSUPPORTED_EVENT,
89
90         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
91                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
92         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
93                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
94         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
95                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
96
97         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
98                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
99         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
100                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
101         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
102                 QUADD_ARMV8_UNSUPPORTED_EVENT,
103 };
104
105 static unsigned
106 quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_TYPE_MAX] = {
107         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
108                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
109         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
110                 QUADD_ARMV8_UNSUPPORTED_EVENT,
111         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
112                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
113         [QUADD_EVENT_TYPE_BUS_CYCLES] =
114                 QUADD_ARMV8_UNSUPPORTED_EVENT,
115
116         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
117                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
118         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
119                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
120         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
121                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
122
123         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
124                 QUADD_ARMV8_UNSUPPORTED_EVENT,
125         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
126                 QUADD_ARMV8_UNSUPPORTED_EVENT,
127         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
128                 QUADD_ARMV8_UNSUPPORTED_EVENT,
129 };
130
131 /*********************************************************************/
132
133 static inline u32
134 armv8_pmu_pmcr_read(void)
135 {
136         u32 val;
137
138         /* Read Performance Monitors Control Register */
139         asm volatile("mrs %0, pmcr_el0" : "=r" (val));
140         return val;
141 }
142
143 static inline void
144 armv8_pmu_pmcr_write(u32 val)
145 {
146         asm volatile("msr pmcr_el0, %0" : :
147                      "r" (val & QUADD_ARMV8_PMCR_WR_MASK));
148 }
149
150 static inline u32
151 armv8_pmu_pmceid_read(void)
152 {
153         u32 val;
154
155         /* Read Performance Monitors Common Event Identification Register */
156         asm volatile("mrs %0, pmceid0_el0" : "=r" (val));
157         return val;
158 }
159
160 static inline u32
161 armv8_pmu_pmcntenset_read(void)
162 {
163         u32 val;
164
165         /* Read Performance Monitors Count Enable Set Register */
166         asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
167         return val;
168 }
169
170 static inline void
171 armv8_pmu_pmcntenset_write(u32 val)
172 {
173         /* Write Performance Monitors Count Enable Set Register */
174         asm volatile("msr pmcntenset_el0, %0" : : "r" (val));
175 }
176
177 static inline void
178 armv8_pmu_pmcntenclr_write(u32 val)
179 {
180         /* Write Performance Monitors Count Enable Clear Register */
181         asm volatile("msr pmcntenclr_el0, %0" : : "r" (val));
182 }
183
184 static inline void
185 armv8_pmu_pmselr_write(u32 val)
186 {
187         /* Write Performance Monitors Event Counter Selection Register */
188         asm volatile("msr pmselr_el0, %0" : :
189                      "r" (val & QUADD_ARMV8_SELECT_MASK));
190 }
191
192 static inline u64
193 armv8_pmu_pmccntr_read(void)
194 {
195         u64 val;
196
197         /* Read Performance Monitors Cycle Count Register */
198         asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
199         return val;
200 }
201
202 static inline void
203 armv8_pmu_pmccntr_write(u64 val)
204 {
205         /* Write Performance Monitors Selected Event Count Register */
206         asm volatile("msr pmccntr_el0, %0" : : "r" (val));
207 }
208
209 static inline u32
210 armv8_pmu_pmxevcntr_read(void)
211 {
212         u32 val;
213
214         /* Read Performance Monitors Selected Event Count Register */
215         asm volatile("mrs %0, pmxevcntr_el0" : "=r" (val));
216         return val;
217 }
218
219 static inline void
220 armv8_pmu_pmxevcntr_write(u32 val)
221 {
222         /* Write Performance Monitors Selected Event Count Register */
223         asm volatile("msr pmxevcntr_el0, %0" : : "r" (val));
224 }
225
226 static inline void
227 armv8_pmu_pmxevtyper_write(u32 event)
228 {
229         /* Write Performance Monitors Selected Event Type Register */
230         asm volatile("msr pmxevtyper_el0, %0" : :
231                      "r" (event & QUADD_ARMV8_EVTSEL_MASK));
232 }
233
234 static inline u32
235 armv8_pmu_pmintenset_read(void)
236 {
237         u32 val;
238
239         /* Read Performance Monitors Interrupt Enable Set Register */
240         asm volatile("mrs %0, pmintenset_el1" : "=r" (val));
241         return val;
242 }
243
244 static inline void
245 armv8_pmu_pmintenset_write(u32 val)
246 {
247         /* Write Performance Monitors Interrupt Enable Set Register */
248         asm volatile("msr pmintenset_el1, %0" : : "r" (val));
249 }
250
251 static inline void
252 armv8_pmu_pmintenclr_write(u32 val)
253 {
254         /* Write Performance Monitors Interrupt Enable Clear Register */
255         asm volatile("msr pmintenclr_el1, %0" : : "r" (val));
256 }
257
258 static inline u32
259 armv8_pmu_pmovsclr_read(void)
260 {
261         u32 val;
262
263         /* Read Performance Monitors Overflow Flag Status Register */
264         asm volatile("mrs %0, pmovsclr_el0" : "=r" (val));
265         return val;
266 }
267
268 static inline void
269 armv8_pmu_pmovsclr_write(int idx)
270 {
271         /* Write Performance Monitors Overflow Flag Status Register */
272         asm volatile("msr pmovsclr_el0, %0" : : "r" (BIT(idx)));
273 }
274
275 static inline u32
276 armv8_id_afr0_el1_read(void)
277 {
278         u32 val;
279
280         /* Read Auxiliary Feature Register 0 */
281         asm volatile("mrs %0, id_afr0_el1" : "=r" (val));
282         return val;
283 }
284
285 static void enable_counter(int idx)
286 {
287         armv8_pmu_pmcntenset_write(BIT(idx));
288 }
289
290 static void disable_counter(int idx)
291 {
292         armv8_pmu_pmcntenclr_write(BIT(idx));
293 }
294
295 static void select_counter(unsigned int counter)
296 {
297         armv8_pmu_pmselr_write(counter);
298 }
299
300 static int is_pmu_enabled(void)
301 {
302         u32 pmcr = armv8_pmu_pmcr_read();
303
304         if (pmcr & QUADD_ARMV8_PMCR_E) {
305                 u32 pmcnten = armv8_pmu_pmcntenset_read();
306                 pmcnten &= pmu_ctx.counters_mask | QUADD_ARMV8_CCNT;
307                 return pmcnten ? 1 : 0;
308         }
309
310         return 0;
311 }
312
313 static u32 read_counter(int idx)
314 {
315         u32 val;
316
317         if (idx == QUADD_ARMV8_CCNT_BIT) {
318                 val = armv8_pmu_pmccntr_read();
319         } else {
320                 select_counter(idx);
321                 val = armv8_pmu_pmxevcntr_read();
322         }
323
324         return val;
325 }
326
327 static void write_counter(int idx, u32 value)
328 {
329         if (idx == QUADD_ARMV8_CCNT_BIT) {
330                 armv8_pmu_pmccntr_write(value);
331         } else {
332                 select_counter(idx);
333                 armv8_pmu_pmxevcntr_write(value);
334         }
335 }
336
337 static int
338 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
339 {
340         int cc;
341         u32 cntens;
342
343         cntens = armv8_pmu_pmcntenset_read();
344         cntens = ~cntens & (pmu_ctx.counters_mask | QUADD_ARMV8_CCNT);
345
346         bitmap_zero(bitmap, nbits);
347         bitmap_copy(bitmap, (unsigned long *)&cntens,
348                     BITS_PER_BYTE * sizeof(u32));
349
350         cc = (cntens & QUADD_ARMV8_CCNT) ? 1 : 0;
351
352         if (ccntr)
353                 *ccntr = cc;
354
355         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
356 }
357
358 static void __maybe_unused
359 disable_interrupt(int idx)
360 {
361         armv8_pmu_pmintenclr_write(BIT(idx));
362 }
363
364 static void
365 disable_all_interrupts(void)
366 {
367         u32 val = QUADD_ARMV8_CCNT | pmu_ctx.counters_mask;
368         armv8_pmu_pmintenclr_write(val);
369 }
370
371 static void
372 reset_overflow_flags(void)
373 {
374         u32 val = QUADD_ARMV8_CCNT | pmu_ctx.counters_mask;
375         armv8_pmu_pmovsclr_write(val);
376 }
377
378 static void
379 select_event(unsigned int idx, unsigned int event)
380 {
381         select_counter(idx);
382         armv8_pmu_pmxevtyper_write(event);
383 }
384
385 static void disable_all_counters(void)
386 {
387         u32 val;
388
389         /* Disable all counters */
390         val = armv8_pmu_pmcr_read();
391         if (val & QUADD_ARMV8_PMCR_E)
392                 armv8_pmu_pmcr_write(val & ~QUADD_ARMV8_PMCR_E);
393
394         armv8_pmu_pmcntenclr_write(QUADD_ARMV8_CCNT | pmu_ctx.counters_mask);
395 }
396
397 static void enable_all_counters(void)
398 {
399         u32 val;
400
401         /* Enable all counters */
402         val = armv8_pmu_pmcr_read();
403         val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
404         armv8_pmu_pmcr_write(val);
405 }
406
407 static void reset_all_counters(void)
408 {
409         u32 val;
410
411         val = armv8_pmu_pmcr_read();
412         val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
413         armv8_pmu_pmcr_write(val);
414 }
415
416 static void quadd_init_pmu(void)
417 {
418         reset_overflow_flags();
419         disable_all_interrupts();
420 }
421
422 static int pmu_enable(void)
423 {
424         pr_info("pmu was reserved\n");
425         return 0;
426 }
427
428 static void __pmu_disable(void *arg)
429 {
430         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
431
432         if (!pi->is_already_active) {
433                 pr_info("[%d] reset all counters\n",
434                         smp_processor_id());
435
436                 disable_all_counters();
437                 reset_all_counters();
438         } else {
439                 int idx;
440
441                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
442                         pr_info("[%d] reset counter: %d\n",
443                                 smp_processor_id(), idx);
444
445                         disable_counter(idx);
446                         write_counter(idx, 0);
447                 }
448         }
449 }
450
451 static void pmu_disable(void)
452 {
453         on_each_cpu(__pmu_disable, NULL, 1);
454         pr_info("pmu was released\n");
455 }
456
457 static void pmu_start(void)
458 {
459         int idx = 0, pcntrs, ccntr;
460         u32 event;
461         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
462         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
463         u32 *prevp = pi->prev_vals;
464         struct quadd_pmu_event_info *ei;
465
466         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
467
468         if (is_pmu_enabled()) {
469                 pi->is_already_active = 1;
470         } else {
471                 disable_all_counters();
472                 quadd_init_pmu();
473
474                 pi->is_already_active = 0;
475         }
476
477         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
478
479         list_for_each_entry(ei, &pmu_ctx.used_events, list) {
480                 int index;
481
482                 *prevp++ = 0;
483
484                 event = ei->hw_value;
485
486                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
487                         if (!ccntr) {
488                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
489                                 return;
490                         }
491                         index = QUADD_ARMV8_CCNT_BIT;
492                 } else {
493                         if (!pcntrs--) {
494                                 pr_err_once("Error: too many performance events\n");
495                                 return;
496                         }
497
498                         index = find_next_bit(free_bitmap,
499                                               QUADD_MAX_PMU_COUNTERS, idx);
500                         if (index >= QUADD_MAX_PMU_COUNTERS) {
501                                 pr_err_once("Error: too many events\n");
502                                 return;
503                         }
504                         idx = index + 1;
505                         select_event(index, event);
506                 }
507                 set_bit(index, pi->used_cntrs);
508
509                 write_counter(index, 0);
510                 enable_counter(index);
511         }
512
513         if (!pi->is_already_active) {
514                 reset_all_counters();
515                 enable_all_counters();
516         }
517
518         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
519 }
520
521 static void pmu_stop(void)
522 {
523         int idx;
524         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
525
526         if (!pi->is_already_active) {
527                 disable_all_counters();
528                 reset_all_counters();
529         } else {
530                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
531                         disable_counter(idx);
532                         write_counter(idx, 0);
533                 }
534         }
535
536         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
537 }
538
539 static int __maybe_unused
540 pmu_read(struct event_data *events, int max_events)
541 {
542         u32 val;
543         int idx = 0, i = 0;
544         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
545         u32 *prevp = pi->prev_vals;
546         struct quadd_pmu_event_info *ei;
547
548         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
549                 pr_err_once("Error: counters were not initialized\n");
550                 return 0;
551         }
552
553         list_for_each_entry(ei, &pmu_ctx.used_events, list) {
554                 int index;
555
556                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
557                         if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
558                                 pr_err_once("Error: ccntr is not used\n");
559                                 return 0;
560                         }
561                         index = QUADD_ARMV8_CCNT_BIT;
562                 } else {
563                         index = find_next_bit(pi->used_cntrs,
564                                               QUADD_MAX_PMU_COUNTERS, idx);
565                         idx = index + 1;
566
567                         if (index >= QUADD_MAX_PMU_COUNTERS) {
568                                 pr_err_once("Error: perf counter is not used\n");
569                                 return 0;
570                         }
571                 }
572
573                 val = read_counter(index);
574
575                 events->event_source = QUADD_EVENT_SOURCE_PMU;
576                 events->event_id = ei->quadd_event_id;
577
578                 events->val = val;
579                 events->prev_val = *prevp;
580
581                 *prevp = val;
582
583                 qm_debug_read_counter(events->event_id, events->prev_val,
584                                       events->val);
585
586                 if (++i >= max_events)
587                         break;
588
589                 events++;
590                 prevp++;
591         }
592
593         return i;
594 }
595
596 static int __maybe_unused
597 pmu_read_emulate(struct event_data *events, int max_events)
598 {
599         int i = 0;
600         static u32 val = 100;
601         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
602         u32 *prevp = pi->prev_vals;
603         struct quadd_pmu_event_info *ei;
604
605         list_for_each_entry(ei, &pmu_ctx.used_events, list) {
606                 if (val > 200)
607                         val = 100;
608
609                 events->event_id = *prevp;
610                 events->val = val;
611
612                 *prevp = val;
613                 val += 5;
614
615                 if (++i >= max_events)
616                         break;
617
618                 events++;
619                 prevp++;
620         }
621
622         return i;
623 }
624
625 static void __get_free_counters(void *arg)
626 {
627         int pcntrs, ccntr;
628         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
629         struct quadd_cntrs_info *ci = arg;
630
631         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
632
633         spin_lock(&ci->lock);
634
635         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
636
637         if (!ccntr)
638                 ci->ccntr = 0;
639
640         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
641                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
642
643         spin_unlock(&ci->lock);
644 }
645
646 static void free_events(struct list_head *head)
647 {
648         struct quadd_pmu_event_info *entry, *next;
649
650         list_for_each_entry_safe(entry, next, head, list) {
651                 list_del(&entry->list);
652                 kfree(entry);
653         }
654 }
655
656 static int set_events(int *events, int size)
657 {
658         int free_pcntrs, err;
659         int i, nr_l1_r = 0, nr_l1_w = 0;
660         struct quadd_cntrs_info free_ci;
661
662         pmu_ctx.l1_cache_rw = 0;
663
664         free_events(&pmu_ctx.used_events);
665
666         if (!events || !size)
667                 return 0;
668
669         if (!pmu_ctx.current_map) {
670                 pr_err("Invalid current_map\n");
671                 return -ENODEV;
672         }
673
674         spin_lock_init(&free_ci.lock);
675         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
676         free_ci.ccntr = 1;
677
678         on_each_cpu(__get_free_counters, &free_ci, 1);
679
680         free_pcntrs = free_ci.pcntrs;
681         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
682                 free_pcntrs, free_ci.ccntr);
683
684         pr_info("event identification register: %#x\n",
685                 armv8_pmu_pmceid_read());
686
687         for (i = 0; i < size; i++) {
688                 struct quadd_pmu_event_info *ei;
689
690                 if (events[i] > QUADD_EVENT_TYPE_MAX) {
691                         pr_err("error event: %d\n", events[i]);
692                         err = -EINVAL;
693                         goto out_free;
694                 }
695
696                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
697                 if (!ei) {
698                         err = -ENOMEM;
699                         goto out_free;
700                 }
701
702                 INIT_LIST_HEAD(&ei->list);
703                 list_add_tail(&ei->list, &pmu_ctx.used_events);
704
705                 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
706                         ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
707                         if (!free_ci.ccntr) {
708                                 pr_err("error: cpu cycles counter is already occupied\n");
709                                 err = -EBUSY;
710                                 goto out_free;
711                         }
712                 } else {
713                         if (!free_pcntrs--) {
714                                 pr_err("error: too many performance events\n");
715                                 err = -ENOSPC;
716                                 goto out_free;
717                         }
718
719                         ei->hw_value = pmu_ctx.current_map[events[i]];
720                 }
721
722                 ei->quadd_event_id = events[i];
723
724                 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
725                         nr_l1_r++;
726                 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
727                         nr_l1_w++;
728
729                 pr_info("Event has been added: id/pmu value: %s/%#x\n",
730                         quadd_get_event_str(events[i]),
731                         ei->hw_value);
732         }
733
734         if (nr_l1_r > 0 && nr_l1_w > 0)
735                 pmu_ctx.l1_cache_rw = 1;
736
737         return 0;
738
739 out_free:
740         free_events(&pmu_ctx.used_events);
741         return err;
742 }
743
744 static int get_supported_events(int *events, int max_events)
745 {
746         int i, nr_events = 0;
747
748         max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
749
750         for (i = 0; i < max_events; i++) {
751                 if (pmu_ctx.current_map[i] != QUADD_ARMV8_UNSUPPORTED_EVENT)
752                         events[nr_events++] = i;
753         }
754         return nr_events;
755 }
756
757 static int get_current_events(int *events, int max_events)
758 {
759         int i = 0;
760         struct quadd_pmu_event_info *ei;
761
762         list_for_each_entry(ei, &pmu_ctx.used_events, list) {
763                 events[i++] = ei->quadd_event_id;
764
765                 if (i >= max_events)
766                         break;
767         }
768
769         return i;
770 }
771
772 static struct quadd_arch_info *get_arch(void)
773 {
774         return &pmu_ctx.arch;
775 }
776
777 static struct quadd_event_source_interface pmu_armv8_int = {
778         .enable                 = pmu_enable,
779         .disable                = pmu_disable,
780
781         .start                  = pmu_start,
782         .stop                   = pmu_stop,
783
784 #ifndef QUADD_USE_EMULATE_COUNTERS
785         .read                   = pmu_read,
786 #else
787         .read                   = pmu_read_emulate,
788 #endif
789         .set_events             = set_events,
790         .get_supported_events   = get_supported_events,
791         .get_current_events     = get_current_events,
792         .get_arch               = get_arch,
793 };
794
795 struct quadd_event_source_interface *quadd_armv8_pmu_init(void)
796 {
797         u32 pmcr, imp, idcode;
798         struct quadd_event_source_interface *pmu = NULL;
799
800         u64 aa64_dfr = read_cpuid(ID_AA64DFR0_EL1);
801         aa64_dfr = (aa64_dfr >> 8) & 0x0f;
802
803         strncpy(pmu_ctx.arch.name, "Unknown", sizeof(pmu_ctx.arch.name));
804         pmu_ctx.arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN;
805         pmu_ctx.arch.ver = 0;
806         pmu_ctx.current_map = NULL;
807
808         switch (aa64_dfr) {
809         case QUADD_AA64_PMUVER_PMUV3:
810                 strncpy(pmu_ctx.arch.name, "AA64 PmuV3",
811                         sizeof(pmu_ctx.arch.name));
812                 pmu_ctx.arch.name[sizeof(pmu_ctx.arch.name) - 1] = '\0';
813
814                 pmu_ctx.counters_mask =
815                         QUADD_ARMV8_COUNTERS_MASK_PMUV3;
816                 pmu_ctx.current_map =
817                         quadd_armv8_pmuv3_arm_events_map;
818
819                 pmcr = armv8_pmu_pmcr_read();
820
821                 idcode = (pmcr >> QUADD_ARMV8_PMCR_IDCODE_SHIFT) &
822                         QUADD_ARMV8_PMCR_IDCODE_MASK;
823                 imp = pmcr >> QUADD_ARMV8_PMCR_IMP_SHIFT;
824
825                 pr_info("imp: %#x, idcode: %#x\n", imp, idcode);
826
827                 if (imp == ARM_CPU_IMP_ARM) {
828                         strncat(pmu_ctx.arch.name, " ARM",
829                                 sizeof(pmu_ctx.arch.name) -
830                                 strlen(pmu_ctx.arch.name));
831                         pmu_ctx.arch.name[sizeof(pmu_ctx.arch.name) - 1] = '\0';
832
833                         if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A53) {
834                                 pmu_ctx.arch.type =
835                                         QUADD_AA64_CPU_TYPE_CORTEX_A53;
836
837                                 strncat(pmu_ctx.arch.name, " CORTEX-A53",
838                                         sizeof(pmu_ctx.arch.name) -
839                                         strlen(pmu_ctx.arch.name));
840                         } else if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A57) {
841                                 pmu_ctx.arch.type =
842                                         QUADD_AA64_CPU_TYPE_CORTEX_A57;
843                                 pmu_ctx.current_map =
844                                         quadd_armv8_pmuv3_a57_events_map;
845
846                                 strncat(pmu_ctx.arch.name, " CORTEX-A57",
847                                         sizeof(pmu_ctx.arch.name) -
848                                         strlen(pmu_ctx.arch.name));
849                         } else {
850                                 pmu_ctx.arch.type = QUADD_AA64_CPU_TYPE_ARM;
851                         }
852                 } else if (imp == QUADD_AA64_CPU_IMP_NVIDIA) {
853                         u32 ext_ver = armv8_id_afr0_el1_read();
854                         ext_ver = (ext_ver >> QUADD_ARMV8_PMU_NVEXT_SHIFT) &
855                                   QUADD_ARMV8_PMU_NVEXT_MASK;
856
857                         strncat(pmu_ctx.arch.name, " NVIDIA (Denver)",
858                                 sizeof(pmu_ctx.arch.name) -
859                                 strlen(pmu_ctx.arch.name));
860
861                         pmu_ctx.arch.type = QUADD_AA64_CPU_TYPE_DENVER;
862                         pmu_ctx.arch.ver = ext_ver;
863                         pmu_ctx.current_map =
864                                 quadd_armv8_pmuv3_denver_events_map;
865                 } else {
866                         strncat(pmu_ctx.arch.name, " Unknown implementor code",
867                                 sizeof(pmu_ctx.arch.name) -
868                                 strlen(pmu_ctx.arch.name));
869                         pmu_ctx.arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN_IMP;
870                 }
871
872                 pmu = &pmu_armv8_int;
873                 break;
874
875         default:
876                 pr_err("error: incorrect PMUVer\n");
877                 break;
878         }
879
880         INIT_LIST_HEAD(&pmu_ctx.used_events);
881
882         pmu_ctx.arch.name[sizeof(pmu_ctx.arch.name) - 1] = '\0';
883         pr_info("arch: %s, type: %d, ver: %d\n",
884                 pmu_ctx.arch.name, pmu_ctx.arch.type, pmu_ctx.arch.ver);
885
886         return pmu;
887 }
888
889 void quadd_armv8_pmu_deinit(void)
890 {
891         free_events(&pmu_ctx.used_events);
892 }