1 INTERFACE [{ia32,ux,amd64}-perf_cnt]:
6 EXTENSION class Perf_cnt
12 static Perf_read_fn read_pmc[Max_slot];
13 virtual void init_loadcnt() = 0;
14 virtual void start_pmc(Mword) = 0;
16 static Perf_cnt_arch *pcnt;
19 static Perf_read_fn *read_pmc_fns;
20 static Perf_read_fn read_pmc_fn[Max_slot];
21 static char const *perf_type_str;
22 static Perf_event_type perf_event_type;
28 // basic initialization
29 virtual int init() = 0;
31 // set event the counter should count
32 virtual void set_pmc_event(Mword slot) = 0;
34 inline void touch_watchdog()
35 { Cpu::wrmsr(hold_watchdog, _ctr_reg0 + pmc_watchdog); }
45 char user; // 1=count in user mode
46 char kern; // 1=count in kernel mode
47 char edge; // 1=count edge / 0=count duration
48 Mword pmc; // # of performance counter
49 Mword bitmask; // counter bitmask
50 Mword evnt; // event selector
53 static Mword pmc_watchdog; // # perfcounter of watchdog
54 static Mword pmc_loadcnt; // # perfcounter of loadcnt
55 static Signed64 hold_watchdog;
56 static Event pmc_event[Perf_cnt::Max_slot]; // index is slot number
57 static char pmc_alloc[Perf_cnt::Max_pmc]; // index is # of perfcounter
60 class Perf_cnt_p5 : public Perf_cnt_arch {};
61 class Perf_cnt_p6 : public Perf_cnt_arch {};
62 class Perf_cnt_k7 : public Perf_cnt_p6 {};
63 class Perf_cnt_p4 : public Perf_cnt_arch {};
64 class Perf_cnt_ap : public Perf_cnt_p6 {};
66 IMPLEMENTATION [{ia32,ux,amd64}-perf_cnt]:
73 #include "static_init.h"
76 Perf_cnt::Perf_read_fn Perf_cnt::read_pmc[Perf_cnt::Max_slot] =
77 { dummy_read_pmc, dummy_read_pmc };
78 Perf_cnt::Perf_read_fn Perf_cnt::read_pmc_fn[Perf_cnt::Max_slot] =
79 { dummy_read_pmc, dummy_read_pmc };
81 Perf_cnt::Perf_read_fn *Perf_cnt::read_pmc_fns;
82 Perf_cnt::Perf_event_type Perf_cnt::perf_event_type;
83 Perf_cnt_arch *Perf_cnt::pcnt;
84 char const *Perf_cnt::perf_type_str = "n/a";
86 Mword Perf_cnt_arch::pmc_watchdog = (Mword)-1;
87 Mword Perf_cnt_arch::pmc_loadcnt = (Mword)-1;
88 Signed64 Perf_cnt_arch::hold_watchdog;
89 Perf_cnt_arch::Event Perf_cnt_arch::pmc_event[Perf_cnt::Max_slot];
90 char Perf_cnt_arch::pmc_alloc[Perf_cnt::Max_pmc];
92 static Perf_cnt_p5 perf_cnt_p5 __attribute__ ((init_priority(PERF_CNT_INIT_PRIO)));
93 static Perf_cnt_p6 perf_cnt_p6 __attribute__ ((init_priority(PERF_CNT_INIT_PRIO)));
94 static Perf_cnt_k7 perf_cnt_k7 __attribute__ ((init_priority(PERF_CNT_INIT_PRIO)));
95 static Perf_cnt_p4 perf_cnt_p4 __attribute__ ((init_priority(PERF_CNT_INIT_PRIO)));
96 static Perf_cnt_ap perf_cnt_ap __attribute__ ((init_priority(PERF_CNT_INIT_PRIO)));
100 Alloc_none = 0, // unallocated
101 Alloc_perf = 1, // allocated as performance counter
102 Alloc_watchdog = 2, // allocated for watchdog
111 P5_evntsel_user = 0x00000080,
112 P5_evntsel_kern = 0x00000040,
113 P5_evntsel_duration = 0x00000100,
116 Msr_p6_perfctr0 = 0xC1,
117 Msr_p6_evntsel0 = 0x186,
118 P6_evntsel_enable = 0x00400000,
119 P6_evntsel_int = 0x00100000,
120 P6_evntsel_user = 0x00010000,
121 P6_evntsel_kern = 0x00020000,
122 P6_evntsel_edge = 0x00040000,
125 Msr_k7_evntsel0 = 0xC0010000,
126 Msr_k7_perfctr0 = 0xC0010004,
127 K7_evntsel_enable = P6_evntsel_enable,
128 K7_evntsel_int = P6_evntsel_int,
129 K7_evntsel_user = P6_evntsel_user,
130 K7_evntsel_kern = P6_evntsel_kern,
131 K7_evntsel_edge = P6_evntsel_edge,
134 Msr_p4_misc_enable = 0x1A0,
135 Msr_p4_perfctr0 = 0x300,
136 Msr_p4_bpu_counter0 = 0x300,
137 Msr_p4_cccr0 = 0x360,
138 Msr_p4_fsb_escr0 = 0x3A2,
139 P4_escr_user = (1<<2),
140 P4_escr_kern = (1<<3),
141 Msr_p4_bpu_cccr0 = 0x360,
142 P4_cccr_ovf = (1<<31),
143 P4_cccr_ovf_pmi = (1<<26),
144 P4_cccr_complement = (1<<19),
145 P4_cccr_compare = (1<<18),
146 P4_cccr_required = (3<<16),
147 P4_cccr_enable = (1<<12),
149 Msr_ap_perfctr0 = 0xC1,
150 Msr_ap_evntsel0 = 0x186,
151 AP_evntsel_enable = P6_evntsel_enable,
152 AP_evntsel_int = P6_evntsel_int,
153 AP_evntsel_user = P6_evntsel_user,
154 AP_evntsel_kern = P6_evntsel_kern,
155 AP_evntsel_edge = P6_evntsel_edge,
158 // -----------------------------------------------------------------------
162 Perfctr_x86_generic = 0, /* any x86 with rdtsc */
163 Perfctr_x86_intel_p5 = 1, /* no rdpmc */
164 Perfctr_x86_intel_p5mmx = 2,
165 Perfctr_x86_intel_p6 = 3,
166 Perfctr_x86_intel_pii = 4,
167 Perfctr_x86_intel_piii = 5,
168 Perfctr_x86_intel_p4 = 11, /* model 0 and 1 */
169 Perfctr_x86_intel_p4m2 = 12, /* model 2 */
170 Perfctr_x86_intel_p4m3 = 16, /* model 3 and above */
171 Perfctr_x86_intel_pentm = 14,
172 Perfctr_x86_amd_k7 = 9,
173 Perfctr_x86_amd_k8 = 13,
174 Perfctr_x86_arch_perfmon = 14,
177 enum perfctr_unit_mask_type
179 perfctr_um_type_fixed, /* one fixed (required) value */
180 perfctr_um_type_exclusive, /* exactly one of N values */
181 perfctr_um_type_bitmask, /* bitwise 'or' of N power-of-2 values */
184 struct perfctr_unit_mask_value
187 const char *description; /* [NAME:]text */
190 struct perfctr_unit_mask
192 unsigned int default_value;
193 enum perfctr_unit_mask_type type:16;
194 unsigned short nvalues;
195 struct perfctr_unit_mask_value values[1/*nvalues*/];
200 unsigned int evntsel;
201 unsigned int counters_set; /* P4 force this to be CPU-specific */
202 const struct perfctr_unit_mask *unit_mask;
204 const char *description;
207 struct perfctr_event_set
209 unsigned int cpu_type;
210 const char *event_prefix;
211 const struct perfctr_event_set *include;
212 unsigned int nevents;
213 const struct perfctr_event *events;
216 // The following functions are only available if the perfctr module
217 // is linked into the kernel. If not, all symbols perfctr_* are 0. */
220 extern "C" void perfctr_set_cputype(unsigned)
221 __attribute__((weak));
222 extern "C" const struct perfctr_event* perfctr_lookup_event(unsigned,
224 __attribute__((weak));
225 extern "C" const struct perfctr_event* perfctr_index_event(unsigned)
226 __attribute__((weak));
227 extern "C" unsigned perfctr_get_max_event(void)
228 __attribute__((weak));
229 extern "C" const struct perfctr_event_set *perfctr_cpu_event_set(unsigned)
230 __attribute__((weak));
234 clear_msr_range(Mword base, Mword n)
236 for (Mword i=0; i<n; i++)
237 Cpu::wrmsr(0, base+i);
240 //--------------------------------------------------------------------
243 static Mword dummy_read_pmc() { return 0; }
245 //--------------------------------------------------------------------
246 // Intel P5 (Pentium/Pentium MMX) has 2 performance counters. No overflow
247 // interrupt available. Some events are not symmtetric.
248 PUBLIC inline NOEXPORT
249 Perf_cnt_p5::Perf_cnt_p5()
250 : Perf_cnt_arch(Msr_p5_cesr, Msr_p5_ctr0, 2, 0)
257 Cpu::wrmsr(0, Msr_p5_cesr); // disable both counters
262 Perf_cnt_p5::set_pmc_event(Mword slot)
267 event = pmc_event[slot].evnt;
268 if (pmc_event[slot].user)
269 event += P5_evntsel_user;
270 if (pmc_event[slot].kern)
271 event += P5_evntsel_kern;
272 if (pmc_event[slot].kern)
273 event += P5_evntsel_kern;
274 if (!pmc_event[slot].edge)
275 event += P5_evntsel_duration;
277 msr = Cpu::rdmsr(Msr_p5_cesr);
278 if (pmc_event[slot].pmc == 0)
286 msr |= (event << 16);
288 Cpu::wrmsr(event, Msr_p5_cesr);
291 static Mword p5_read_pmc_0()
292 { return Cpu::rdmsr(Msr_p5_ctr0); }
293 static Mword p5_read_pmc_1()
294 { return Cpu::rdmsr(Msr_p5_ctr1); }
296 static Perf_cnt::Perf_read_fn p5_read_pmc_fns[] =
297 { &p5_read_pmc_0, &p5_read_pmc_1 };
300 //--------------------------------------------------------------------
301 // Intel P6 (PPro/PII/PIII) has 2 performance counters. Overflow interrupt
302 // is available. Some events are not symmtetric.
303 PUBLIC inline NOEXPORT
304 Perf_cnt_p6::Perf_cnt_p6()
305 : Perf_cnt_arch(Msr_p6_evntsel0, Msr_p6_perfctr0, 2, 1)
309 Perf_cnt_p6::Perf_cnt_p6(Mword sel_reg0, Mword ctr_reg0,
310 Mword nr_regs, Mword watchdog)
311 : Perf_cnt_arch(sel_reg0, ctr_reg0, nr_regs, watchdog)
318 for (Mword i=0; i<_nr_regs; i++)
320 Cpu::wrmsr(0, _sel_reg0+i);
321 Cpu::wrmsr(0, _ctr_reg0+i);
327 Perf_cnt_p6::set_pmc_event(Mword slot)
331 event = pmc_event[slot].evnt;
332 if (pmc_event[slot].user)
333 event += P6_evntsel_user;
334 if (pmc_event[slot].kern)
335 event += P6_evntsel_kern;
336 if (pmc_event[slot].edge)
337 event += P6_evntsel_edge;
340 Cpu::wrmsr(event, _sel_reg0+pmc_event[slot].pmc);
344 Perf_cnt_p6::start_pmc(Mword /*reg_nr*/)
348 msr = Cpu::rdmsr(_sel_reg0);
349 msr |= P6_evntsel_enable; // enable both!! counters
350 Cpu::wrmsr(msr, _sel_reg0);
354 Perf_cnt_p6::init_watchdog()
358 msr = P6_evntsel_int // Int enable: enable interrupt on overflow
359 | P6_evntsel_kern // Monitor kernel-level events
360 | P6_evntsel_user // Monitor user-level events
361 | 0x79; // #clocks CPU is not halted
362 Cpu::wrmsr(msr, _sel_reg0+pmc_watchdog);
366 Perf_cnt_p6::init_loadcnt()
370 msr = P6_evntsel_kern // Monitor kernel-level events
371 | P6_evntsel_user // Monitor user-level events
372 | 0x79; // #clocks CPU is not halted
373 Cpu::wrmsr(msr, _sel_reg0+pmc_loadcnt);
375 printf("Load counter initialized (read with rdpmc(0x%02lX))\n", pmc_loadcnt);
379 Perf_cnt_p6::start_watchdog()
383 msr = Cpu::rdmsr(_sel_reg0+pmc_watchdog);
384 msr |= P6_evntsel_int; // Int enable: enable interrupt on overflow
385 Cpu::wrmsr(msr, _sel_reg0+pmc_watchdog);
389 Perf_cnt_p6::stop_watchdog()
393 msr = Cpu::rdmsr(_sel_reg0+pmc_watchdog);
394 msr &= ~P6_evntsel_int; // Int enable: enable interrupt on overflow
395 Cpu::wrmsr(msr, _sel_reg0+pmc_watchdog);
398 static Mword p6_read_pmc_0() { return Cpu::rdpmc(0, 0xC1); }
399 static Mword p6_read_pmc_1() { return Cpu::rdpmc(1, 0xC2); }
401 static Perf_cnt::Perf_read_fn p6_read_pmc_fns[] =
402 { &p6_read_pmc_0, &p6_read_pmc_1 };
405 //--------------------------------------------------------------------
406 // AMD K7 (Athlon, K8=Athlon64) has 4 performance counters. All events
407 // seem to be symmetric. Overflow interrupts available.
408 PUBLIC inline NOEXPORT
409 Perf_cnt_k7::Perf_cnt_k7()
410 : Perf_cnt_p6(Msr_k7_evntsel0, Msr_k7_perfctr0, 4, 1)
414 Perf_cnt_k7::start_pmc(Mword reg_nr)
418 msr = Cpu::rdmsr(_sel_reg0+reg_nr);
419 msr |= K7_evntsel_enable;
420 Cpu::wrmsr(msr, _sel_reg0+reg_nr);
424 Perf_cnt_k7::init_watchdog()
428 msr = K7_evntsel_int // Int enable: enable interrupt on overflow
429 | K7_evntsel_kern // Monitor kernel-level events
430 | K7_evntsel_user // Monitor user-level events
431 | 0x76; // #clocks CPU is running
432 Cpu::wrmsr(msr, _sel_reg0+pmc_watchdog);
436 Perf_cnt_k7::init_loadcnt()
440 msr = K7_evntsel_kern // Monitor kernel-level events
441 | K7_evntsel_user // Monitor user-level events
442 | 0x76; // #clocks CPU is running
443 Cpu::wrmsr(msr, _sel_reg0+pmc_loadcnt);
445 printf("Load counter initialized (read with rdpmc(0x%02lX))\n", pmc_loadcnt);
448 static Mword k7_read_pmc_0() { return Cpu::rdpmc(0, 0xC0010004); }
449 static Mword k7_read_pmc_1() { return Cpu::rdpmc(1, 0xC0010005); }
450 static Mword k7_read_pmc_2() { return Cpu::rdpmc(2, 0xC0010006); }
451 static Mword k7_read_pmc_3() { return Cpu::rdpmc(3, 0xC0010007); }
453 static Perf_cnt::Perf_read_fn k7_read_pmc_fns[] =
454 { &k7_read_pmc_0, &k7_read_pmc_1, &k7_read_pmc_2, &k7_read_pmc_3 };
457 //--------------------------------------------------------------------
458 // Arch Perfmon. Intel Core architecture
459 PUBLIC inline NOEXPORT
460 Perf_cnt_ap::Perf_cnt_ap()
461 : Perf_cnt_p6(Msr_ap_evntsel0, Msr_ap_perfctr0, 2, 1)
463 Unsigned32 eax, ebx, ecx;
464 Cpu::boot_cpu()->arch_perfmon_info(&eax, &ebx, &ecx);
465 _nr_regs = (eax & 0x0000ff00) >> 8;
469 Perf_cnt_ap::start_pmc(Mword reg_nr)
473 msr = Cpu::rdmsr(_sel_reg0 + reg_nr);
474 msr |= AP_evntsel_enable;
475 Cpu::wrmsr(msr, _sel_reg0 + reg_nr);
479 Perf_cnt_ap::init_watchdog()
483 msr = AP_evntsel_int // Int enable: enable interrupt on overflow
484 | AP_evntsel_kern // Monitor kernel-level events
485 | AP_evntsel_user // Monitor user-level events
486 | 0x3C; // #clocks CPU is running
487 Cpu::wrmsr(msr, _sel_reg0 + pmc_watchdog);
491 Perf_cnt_ap::init_loadcnt()
495 msr = AP_evntsel_kern // Monitor kernel-level events
496 | AP_evntsel_user // Monitor user-level events
497 | 0x3C; // #clocks CPU is running
498 Cpu::wrmsr(msr, _sel_reg0 + pmc_loadcnt);
500 printf("Load counter initialized (read with rdpmc(0x%02lX))\n", pmc_loadcnt);
504 //--------------------------------------------------------------------
506 PUBLIC inline NOEXPORT
507 Perf_cnt_p4::Perf_cnt_p4()
508 : Perf_cnt_arch(Msr_p4_bpu_cccr0, Msr_p4_bpu_counter0, 2, 1)
511 static inline NOEXPORT
513 Perf_cnt_p4::escr_event_select(Mword n)
516 static inline NOEXPORT
518 Perf_cnt_p4::escr_event_mask(Mword n)
521 static inline NOEXPORT
523 Perf_cnt_p4::cccr_threshold(Mword n)
526 static inline NOEXPORT
528 Perf_cnt_p4::cccr_escr_select(Mword n)
535 Unsigned32 misc_enable = Cpu::rdmsr(Msr_p4_misc_enable);
537 // performance monitoring available?
538 if (!(misc_enable & (1<<7)))
541 // disable precise event based sampling
542 if (!(misc_enable & (1<<12)))
543 clear_msr_range(0x3F1, 2);
545 // ensure sane state of performance counter registers
546 clear_msr_range(0x3A0, 26);
547 if (Cpu::boot_cpu()->model() <= 2)
548 clear_msr_range(0x3BA, 2);
549 clear_msr_range(0x3BC, 3);
550 clear_msr_range(0x3C0, 6);
551 clear_msr_range(0x3C8, 6);
552 clear_msr_range(0x3E0, 2);
553 clear_msr_range(Msr_p4_cccr0, 18);
554 clear_msr_range(Msr_p4_perfctr0, 18);
560 Perf_cnt_p4::set_pmc_event(Mword /*slot*/)
564 Perf_cnt_p4::start_pmc(Mword reg_nr)
568 msr = Cpu::rdmsr(Msr_p4_bpu_cccr0 + reg_nr);
569 msr |= P4_cccr_enable;
570 Cpu::wrmsr(msr, Msr_p4_bpu_cccr0 + reg_nr);
574 Perf_cnt_p4::init_watchdog()
578 msr = escr_event_select(0x13) // global power events
579 | escr_event_mask(1) // the processor is active (non-halted)
580 | P4_escr_kern // Monitor kernel-level events
581 | P4_escr_user; // Monitor user-level events
582 Cpu::wrmsr(msr, Msr_p4_fsb_escr0);
584 msr = P4_cccr_ovf_pmi // performance monitor interrupt on overflow
585 | P4_cccr_required // must be set
586 | cccr_escr_select(6); // select ESCR to select events to be counted
587 Cpu::wrmsr(msr, Msr_p4_bpu_cccr0 + pmc_watchdog);
591 Perf_cnt_p4::init_loadcnt()
595 msr = escr_event_select(0x13) // global power events
596 | escr_event_mask(1) // the processor is active (non-halted)
597 | P4_escr_kern // Monitor kernel-level events
598 | P4_escr_user; // Monitor user-level events
599 Cpu::wrmsr(msr, Msr_p4_fsb_escr0);
601 msr = P4_cccr_required // must be set
602 | cccr_escr_select(6); // select ESCR to select events to be counted
604 Cpu::wrmsr(msr, Msr_p4_bpu_cccr0 + pmc_loadcnt);
606 printf("Load counter initialized (read with rdpmc(0x%02lX))\n",
611 Perf_cnt_p4::start_watchdog()
615 msr = Cpu::rdmsr(Msr_p4_bpu_cccr0);
616 msr |= P4_cccr_ovf_pmi | P4_cccr_enable; // Int enable, Ctr enable
617 msr &= ~P4_cccr_ovf; // clear Overflow
618 Cpu::wrmsr(msr, Msr_p4_bpu_cccr0 + pmc_watchdog);
622 Perf_cnt_p4::stop_watchdog()
626 msr = Cpu::rdmsr(Msr_p4_bpu_cccr0);
627 msr &= ~(P4_cccr_ovf_pmi | P4_cccr_enable); // Int disable, Ctr disable
628 Cpu::wrmsr(msr, Msr_p4_bpu_cccr0 + pmc_watchdog);
633 p4_read_pmc() { return 0; }
635 static Perf_cnt::Perf_read_fn p4_read_pmc_fns[] = { &p4_read_pmc };
638 //--------------------------------------------------------------------
641 Perf_cnt_arch::Perf_cnt_arch(Mword sel_reg0, Mword ctr_reg0,
642 Mword nr_regs, Mword watchdog)
644 _sel_reg0 = sel_reg0;
645 _ctr_reg0 = ctr_reg0;
647 _watchdog = watchdog;
649 for (Mword slot=0; slot<Perf_cnt::Max_slot; slot++)
651 pmc_event[slot].pmc = (Mword)-1;
652 pmc_event[slot].edge = 0;
658 Perf_cnt_arch::watchdog_allocated()
659 { return (pmc_watchdog != (Mword)-1); }
663 Perf_cnt_arch::loadcnt_allocated()
664 { return (pmc_loadcnt != (Mword)-1); }
667 Perf_cnt_arch::alloc_watchdog()
670 panic("Watchdog not available");
672 for (Mword pmc=0; pmc<_nr_regs; pmc++)
673 if (pmc_alloc[pmc] == Alloc_none)
675 pmc_alloc[pmc] = Alloc_watchdog;
680 panic("No performance counter available for watchdog");
684 Perf_cnt_arch::alloc_loadcnt()
686 if (pmc_alloc[0] == Alloc_watchdog)
688 // allocate the watchdog counter
689 pmc_alloc[0] = Alloc_perf;
691 // move the watchdog to another counter
692 pmc_watchdog = (Mword)-1;
697 if (pmc_alloc[0] == Alloc_none)
699 pmc_alloc[0] = Alloc_perf;
704 panic("No performance counter available for loadcounter");
707 // allocate a performance counter according to bitmask (some events depend
708 // on specific counters)
711 Perf_cnt_arch::alloc_pmc(Mword slot, Mword bitmask)
713 // free previous allocated counter
714 Mword pmc = pmc_event[slot].pmc;
715 if (pmc != (Mword)-1 && pmc_alloc[pmc] == Alloc_perf)
717 pmc_event[slot].pmc = (Mword)-1;
718 pmc_alloc[pmc] = Alloc_none;
721 // search counter according to bitmask
722 for (pmc=0; pmc<_nr_regs; pmc++)
723 if ((pmc_alloc[pmc] == Alloc_none) && (bitmask & (1<<pmc)))
725 pmc_event[slot].pmc = pmc;
726 pmc_alloc[pmc] = Alloc_perf;
727 Perf_cnt::set_pmc_fn(slot, pmc);
731 // did not found an appropriate free counter (restricted by bitmask) so try
732 // to re-assign the watchdog because the watchdog usually uses a more general
733 // counter with no restrictions
734 if (watchdog_allocated() && (bitmask & (1<<pmc_watchdog)))
736 // allocate the watchdog counter
737 pmc_event[slot].pmc = pmc_watchdog;
738 pmc_alloc[pmc_watchdog] = Alloc_perf;
739 Perf_cnt::set_pmc_fn(slot, pmc_watchdog);
740 // move the watchdog to another counter
741 pmc_watchdog = (Mword)-1;
751 Perf_cnt_arch::clear_pmc(Mword reg_nr)
752 { Cpu::wrmsr(0, _ctr_reg0+reg_nr); }
756 Perf_cnt_arch::mode(Mword slot, const char **mode,
757 Mword *event, Mword *user, Mword *kern, Mword *edge)
759 static const char * const mode_str[2][2][2] =
760 { { { "off", "off" }, { "d.K", "e.K" } },
761 { { "d.U", "e.U" }, { "d.K+U", "e.K+U" } } };
763 *mode = mode_str[(int)pmc_event[slot].user]
764 [(int)pmc_event[slot].kern]
765 [(int)pmc_event[slot].edge];
766 *event = pmc_event[slot].evnt;
767 *user = pmc_event[slot].user;
768 *kern = pmc_event[slot].kern;
769 *edge = pmc_event[slot].edge;
774 Perf_cnt_arch::setup_pmc(Mword slot, Mword bitmask,
775 Mword event, Mword user, Mword kern, Mword edge)
777 if (alloc_pmc(slot, bitmask))
779 pmc_event[slot].user = user;
780 pmc_event[slot].kern = kern;
781 pmc_event[slot].edge = edge;
782 pmc_event[slot].evnt = event;
783 pmc_event[slot].bitmask = bitmask;
785 clear_pmc(pmc_event[slot].pmc);
786 start_pmc(pmc_event[slot].pmc);
792 Perf_cnt_arch::start_pmc(Mword /*reg_nr*/)
794 // nothing to do per default
797 // watchdog supported by performance counter architecture?
800 Perf_cnt_arch::have_watchdog()
801 { return _watchdog; }
805 Perf_cnt_arch::setup_watchdog(Mword timeout)
808 if (watchdog_allocated())
810 hold_watchdog = ((Signed64)((Cpu::boot_cpu()->frequency() >> 16) * timeout)) << 16;
811 // The maximum value a performance counter register can be written to
812 // is 0x7ffffffff. The 31st bit is extracted to the bits 32-39 (see
813 // "IA-32 Intel Architecture Software Developer's Manual. Volume 3:
814 // Programming Guide" section 14.10.2: PerfCtr0 and PerfCtr1 MSRs.
815 if (hold_watchdog > 0x7fffffff)
816 hold_watchdog = 0x7fffffff;
817 hold_watchdog = -hold_watchdog;
821 start_pmc(pmc_watchdog);
827 Perf_cnt_arch::setup_loadcnt()
830 if (loadcnt_allocated())
833 start_pmc(pmc_loadcnt);
839 Perf_cnt_arch::init_watchdog()
840 {} // no watchdog per default
844 Perf_cnt_arch::init_loadcnt()
845 { panic("Cannot initialize load counter"); }
847 // start watchdog (enable generation of overflow interrupt)
850 Perf_cnt_arch::start_watchdog()
851 {} // no watchdog per default
853 // stop watchdog (disable generation of overflow interrupt)
856 Perf_cnt_arch::stop_watchdog()
857 {} // no watchdog per default
859 //--------------------------------------------------------------------
861 STATIC_INITIALIZE_P(Perf_cnt, PERF_CNT_INIT_PRIO);
863 // basic perfcounter detection
864 PUBLIC static FIASCO_INIT_CPU
868 Cpu const &cpu = *Cpu::boot_cpu();
869 Mword perfctr_type = Perfctr_x86_generic;
870 Unsigned32 eax, ebx, ecx;
872 for (Mword i=0; i<Perf_cnt::Max_slot; i++)
873 read_pmc_fn[i] = dummy_read_pmc;
875 if (cpu.tsc() && cpu.can_wrmsr())
877 cpu.arch_perfmon_info(&eax, &ebx, &ecx);
878 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
880 perfctr_type = Perfctr_x86_arch_perfmon;
881 perf_type_str = "PA";
882 read_pmc_fns = p6_read_pmc_fns;
885 if (perfctr_type == Perfctr_x86_generic)
887 if (cpu.vendor() == cpu.Vendor_intel)
890 switch (cpu.family())
893 perf_event_type = P5;
894 if (cpu.local_features() & Cpu::Lf_rdpmc)
896 perfctr_type = Perfctr_x86_intel_p5mmx;
897 perf_type_str = "P5MMX";
898 read_pmc_fns = p6_read_pmc_fns;
902 perfctr_type = Perfctr_x86_intel_p5;
903 perf_type_str = "P5";
904 read_pmc_fns = p5_read_pmc_fns;
910 perf_event_type = P6;
911 if (cpu.model() == 9 || cpu.model() == 13)
913 perfctr_type = Perfctr_x86_intel_pentm;
914 perf_type_str = "PntM";
916 else if (cpu.model() >= 7)
918 perfctr_type = Perfctr_x86_intel_piii;
919 perf_type_str = "PIII";
921 else if (cpu.model() >= 3)
923 perfctr_type = Perfctr_x86_intel_pii;
924 perf_type_str = "PII";
928 perfctr_type = Perfctr_x86_intel_p6;
929 perf_type_str = "PPro";
931 read_pmc_fns = p6_read_pmc_fns;
936 perf_event_type = P4;
937 if (cpu.model() >= 3)
939 perfctr_type = Perfctr_x86_intel_p4m3;
940 perf_type_str = "P4M3";
942 else if (cpu.model() >= 2)
944 perfctr_type = Perfctr_x86_intel_p4m2;
945 perf_type_str = "P4M2";
949 perfctr_type = Perfctr_x86_intel_p4;
950 perf_type_str = "P4";
952 read_pmc_fns = p4_read_pmc_fns;
957 else if (cpu.vendor() == Cpu::Vendor_amd)
960 switch (cpu.family())
965 if (cpu.family() == 15)
967 perf_type_str = "K8";
968 perfctr_type = Perfctr_x86_amd_k8;
972 perf_type_str = "K7";
973 perfctr_type = Perfctr_x86_amd_k7;
975 perf_event_type = P6;
976 read_pmc_fns = k7_read_pmc_fns;
983 // set PCE-Flag in CR4 to enable read of performance measurement
984 // counters in usermode. PMC were introduced in Pentium MMX and
986 if (cpu.local_features() & Cpu::Lf_rdpmc)
990 if (pcnt && !pcnt->init())
992 perfctr_type = Perfctr_x86_generic;
993 pcnt = 0; // init failed, no performance counters available
996 if (perfctr_cpu_event_set != 0 && perfctr_cpu_event_set(perfctr_type) == 0)
998 perfctr_type = Perfctr_x86_generic;
999 pcnt = 0; // init failed, no performance counters available
1002 // tell perflib the cpu type
1003 if (perfctr_set_cputype != 0)
1004 perfctr_set_cputype(perfctr_type);
1008 PUBLIC static inline void FIASCO_INIT_CPU
1013 Perf_cnt::pcnt->init();
1014 Perf_cnt::pcnt->init_loadcnt();
1015 Perf_cnt::pcnt->start_pmc(0);
1019 PUBLIC static inline NOEXPORT void
1020 Perf_cnt::set_pmc_fn(Mword slot, Mword nr)
1021 { read_pmc_fn[slot] = read_pmc_fns[nr]; }
1023 // watchdog supported by performance counter architecture?
1024 PUBLIC static inline
1026 Perf_cnt::have_watchdog()
1027 { return (pcnt && pcnt->have_watchdog()); }
1029 // setup watchdog function with timeout in seconds
1030 PUBLIC static inline
1032 Perf_cnt::setup_watchdog(Mword timeout)
1035 pcnt->setup_watchdog(timeout);
1038 PUBLIC static inline
1040 Perf_cnt::setup_loadcnt()
1043 pcnt->setup_loadcnt();
1046 PUBLIC static inline
1048 Perf_cnt::start_watchdog()
1050 if (pcnt && pcnt->watchdog_allocated())
1052 pcnt->touch_watchdog();
1053 pcnt->start_watchdog();
1057 PUBLIC static inline
1059 Perf_cnt::stop_watchdog()
1061 if (pcnt && pcnt->watchdog_allocated())
1062 pcnt->stop_watchdog();
1065 PUBLIC static inline
1067 Perf_cnt::touch_watchdog()
1069 if (pcnt && pcnt->watchdog_allocated())
1070 pcnt->touch_watchdog();
1073 // return human-readable type of performance counters
1074 PUBLIC static inline
1076 Perf_cnt::perf_type()
1077 { return perf_type_str; }
1079 // set performance counter counting the selected event in slot #slot
1082 Perf_cnt::setup_pmc(Mword slot, Mword event, Mword user, Mword kern, Mword edge)
1087 unsigned nr, evntsel;
1088 Mword bitmask, unit_mask;
1089 const struct perfctr_event *pe = 0;
1091 split_event(event, &evntsel, &unit_mask);
1092 if (perfctr_lookup_event != 0)
1093 pe = perfctr_lookup_event(evntsel, &nr);
1094 bitmask = pe ? pe->counters_set : 0xffff;
1095 pcnt->setup_pmc(slot, bitmask, event, user, kern, edge);
1096 Tb_entry::set_rdcnt(slot, (kern | user) ? read_pmc_fn[slot] : 0);
1100 // return current selected event for a slot #slot
1103 Perf_cnt::mode(Mword slot, const char **mode, const char **name,
1104 Mword *event, Mword *user, Mword *kern, Mword *edge)
1106 if (!perf_type() || !pcnt)
1109 *event = *user = *kern = 0;
1113 unsigned nr, evntsel;
1115 const struct perfctr_event *pe = 0;
1117 pcnt->mode(slot, mode, event, user, kern, edge);
1118 split_event(*event, &evntsel, &unit_mask);
1119 if (perfctr_lookup_event != 0)
1120 pe = perfctr_lookup_event(evntsel, &nr);
1121 *name = pe ? pe->name : "";
1126 Perf_cnt::get_max_perf_event()
1127 { return (perfctr_get_max_event != 0) ? perfctr_get_max_event() : 0; }
1130 Perf_cnt::get_perf_event(Mword nr, unsigned *evntsel,
1131 const char **name, const char **desc)
1133 const struct perfctr_event *pe = 0;
1135 if (perfctr_index_event != 0)
1136 pe = perfctr_index_event(nr);
1138 *name = pe ? pe->name : 0;
1139 *desc = pe ? pe->description : 0;
1140 *evntsel = pe ? pe->evntsel : 0;
1144 Perf_cnt::lookup_event(unsigned evntsel)
1148 if (perfctr_lookup_event != 0 && perfctr_lookup_event(evntsel, &nr) != 0)
1154 Perf_cnt::get_unit_mask(Mword nr, Unit_mask_type *type,
1155 Mword *default_value, Mword *nvalues)
1157 const struct perfctr_event *event = 0;
1159 if (perfctr_index_event != 0)
1160 event = perfctr_index_event(nr);
1163 if (event && event->unit_mask)
1165 *default_value = event->unit_mask->default_value;
1166 switch (event->unit_mask->type)
1168 case perfctr_um_type_fixed: *type = Fixed; break;
1169 case perfctr_um_type_exclusive: *type = Exclusive; break;
1170 case perfctr_um_type_bitmask: *type = Bitmask; break;
1172 *nvalues = event->unit_mask->nvalues;
1177 Perf_cnt::get_unit_mask_entry(Mword nr, Mword idx,
1178 Mword *value, const char **desc)
1180 const struct perfctr_event *event = 0;
1182 if (perfctr_index_event != 0)
1183 event = perfctr_index_event(nr);
1187 if (event && event->unit_mask && (idx < event->unit_mask->nvalues))
1189 *value = event->unit_mask->values[idx].value;
1190 *desc = event->unit_mask->values[idx].description;
1194 /** Split event into event selector and unit mask (depending on perftype). */
1197 Perf_cnt::split_event(Mword event, unsigned *evntsel, Mword *unit_mask)
1199 switch (perf_event_type)
1206 *evntsel = event & 0x000000ff;
1207 *unit_mask = (event & 0x0000ff00) >> 8;
1217 /** Combine event from selector and unit mask. */
1220 Perf_cnt::combine_event(Mword evntsel, Mword unit_mask, Mword *event)
1222 switch (perf_event_type)
1228 *event = (evntsel & 0x000000ff) + ((unit_mask & 0x000000ff) << 8);