2 /*--------------------------------------------------------------------*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Callgrind, a Valgrind tool for call graph
11 Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2010 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 The GNU General Public License is contained in the file COPYING.
35 #include "callgrind.h"
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
41 #include "cg_branchpred.c"
43 /*------------------------------------------------------------*/
44 /*--- Global variables ---*/
45 /*------------------------------------------------------------*/
48 CommandLineOptions CLG_(clo);
49 Statistics CLG_(stat);
50 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
52 /* thread and signal handler specific */
53 exec_state CLG_(current_state);
56 /*------------------------------------------------------------*/
57 /*--- Statistics ---*/
58 /*------------------------------------------------------------*/
60 static void CLG_(init_statistics)(Statistics* s)
65 s->rec_call_counter = 0;
69 s->context_counter = 0;
70 s->bb_retranslations = 0;
73 s->distinct_files = 0;
75 s->distinct_contexts = 0;
77 s->distinct_bbccs = 0;
78 s->distinct_instrs = 0;
79 s->distinct_skips = 0;
81 s->bb_hash_resizes = 0;
82 s->bbcc_hash_resizes = 0;
83 s->jcc_hash_resizes = 0;
84 s->cxt_hash_resizes = 0;
85 s->fn_array_resizes = 0;
86 s->call_stack_resizes = 0;
87 s->fn_stack_resizes = 0;
89 s->full_debug_BBs = 0;
90 s->file_line_debug_BBs = 0;
91 s->fn_name_debug_BBs = 0;
93 s->bbcc_lru_misses = 0;
94 s->jcc_lru_misses = 0;
95 s->cxt_lru_misses = 0;
100 /*------------------------------------------------------------*/
101 /*--- Simple callbacks (not cache similator) ---*/
102 /*------------------------------------------------------------*/
105 static void log_global_event(InstrInfo* ii)
109 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
110 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
112 if (!CLG_(current_state).collect) return;
114 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
116 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
118 if (CLG_(current_state).nonskipped)
119 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
121 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
126 /* For branches, we consult two different predictors, one which
127 predicts taken/untaken for conditional branches, and the other
128 which predicts the branch target address for indirect branches
129 (jump-to-register style ones). */
132 void log_cond_branch(InstrInfo* ii, Word taken)
138 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n",
139 CLG_(bb_base) + ii->instr_offset, taken);
141 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
143 if (!CLG_(current_state).collect) return;
145 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
147 if (CLG_(current_state).nonskipped)
148 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
150 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
152 fullOffset_Bc = fullOffset(EG_BC);
153 CLG_(current_state).cost[ fullOffset_Bc ]++;
156 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
162 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
168 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
169 CLG_(bb_base) + ii->instr_offset, actual_dst);
171 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
173 if (!CLG_(current_state).collect) return;
175 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
177 if (CLG_(current_state).nonskipped)
178 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
180 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
182 fullOffset_Bi = fullOffset(EG_BI);
183 CLG_(current_state).cost[ fullOffset_Bi ]++;
186 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
191 /*------------------------------------------------------------*/
192 /*--- Instrumentation structures and event queue handling ---*/
193 /*------------------------------------------------------------*/
195 /* Maintain an ordered list of memory events which are outstanding, in
196 the sense that no IR has yet been generated to do the relevant
197 helper calls. The BB is scanned top to bottom and memory events
198 are added to the end of the list, merging with the most recent
199 notified event where possible (Dw immediately following Dr and
200 having the same size and EA can be merged).
202 This merging is done so that for architectures which have
203 load-op-store instructions (x86, amd64), the insn is treated as if
204 it makes just one memory reference (a modify), rather than two (a
205 read followed by a write at the same address).
207 At various points the list will need to be flushed, that is, IR
208 generated from it. That must happen before any possible exit from
209 the block (the end, or an IRStmt_Exit). Flushing also takes place
210 when there is no space to add a new event.
212 If we require the simulation statistics to be up to date with
213 respect to possible memory exceptions, then the list would have to
214 be flushed before each memory reference. That would however lose
215 performance by inhibiting event-merging during flushing.
217 Flushing the list consists of walking it start to end and emitting
218 instrumentation IR for each event, in the order in which they
219 appear. It may be possible to emit a single call for two adjacent
220 events in order to reduce the number of helper function calls made.
221 For example, it could well be profitable to handle two adjacent Ir
222 events with a single helper call. */
230 Ev_Ir, // Instruction read
233 Ev_Dm, // Data modify (read then write)
234 Ev_Bc, // branch conditional
235 Ev_Bi, // branch indirect (to unknown destination)
236 Ev_G // Global bus event
260 IRAtom* taken; /* :: Ity_I1 */
271 static void init_Event ( Event* ev ) {
272 VG_(memset)(ev, 0, sizeof(Event));
275 static IRAtom* get_Event_dea ( Event* ev ) {
277 case Ev_Dr: return ev->Ev.Dr.ea;
278 case Ev_Dw: return ev->Ev.Dw.ea;
279 case Ev_Dm: return ev->Ev.Dm.ea;
280 default: tl_assert(0);
284 static Int get_Event_dszB ( Event* ev ) {
286 case Ev_Dr: return ev->Ev.Dr.szB;
287 case Ev_Dw: return ev->Ev.Dw.szB;
288 case Ev_Dm: return ev->Ev.Dm.szB;
289 default: tl_assert(0);
294 /* Up to this many unnotified events are allowed. Number is
295 arbitrary. Larger numbers allow more event merging to occur, but
296 potentially induce more spilling due to extending live ranges of
297 address temporaries. */
301 /* A struct which holds all the running state during instrumentation.
302 Mostly to avoid passing loads of parameters everywhere. */
304 /* The current outstanding-memory-event list. */
305 Event events[N_EVENTS];
308 /* The array of InstrInfo's is part of BB struct. */
311 /* BB seen before (ie. re-instrumentation) */
314 /* Number InstrInfo bins 'used' so far. */
317 // current offset of guest instructions from BB start
320 /* The output SB being constructed. */
325 static void showEvent ( Event* ev )
329 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
330 ev->inode, ev->inode->instr_offset);
333 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
334 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
335 ppIRExpr(ev->Ev.Dr.ea);
339 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
340 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
341 ppIRExpr(ev->Ev.Dw.ea);
345 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
346 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
347 ppIRExpr(ev->Ev.Dm.ea);
351 VG_(printf)("Bc %p GA=", ev->inode);
352 ppIRExpr(ev->Ev.Bc.taken);
356 VG_(printf)("Bi %p DST=", ev->inode);
357 ppIRExpr(ev->Ev.Bi.dst);
361 VG_(printf)("G %p\n", ev->inode);
369 /* Generate code for all outstanding memory events, and mark the queue
370 empty. Code is generated into cgs->sbOut, and this activity
371 'consumes' slots in cgs->bb. */
373 static void flushEvents ( ClgState* clgs )
375 Int i, regparms, inew;
385 if (!clgs->seen_before) {
386 // extend event sets as needed
387 // available sets: D0 Dr
388 for(i=0; i<clgs->events_used; i++) {
389 ev = &clgs->events[i];
392 // Ir event always is first for a guest instruction
393 CLG_ASSERT(ev->inode->eventset == 0);
394 ev->inode->eventset = CLG_(sets).base;
397 // extend event set by Dr counters
398 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
403 // extend event set by Dw counters
404 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
408 // extend event set by Bc counters
409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
413 // extend event set by Bi counters
414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
418 // extend event set by Bus counter
419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
428 for(i = 0; i < clgs->events_used; i = inew) {
435 /* generate IR to notify event i and possibly the ones
436 immediately following it. */
437 tl_assert(i >= 0 && i < clgs->events_used);
439 ev = &clgs->events[i];
440 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
441 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
444 VG_(printf)(" flush ");
448 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
450 /* Decide on helper fn to call and args to pass it, and advance
452 Dm events have same effect as Dw events */
455 /* Merge an Ir with a following Dr. */
456 if (ev2 && ev2->tag == Ev_Dr) {
457 /* Why is this true? It's because we're merging an Ir
458 with a following Dr. The Ir derives from the
459 instruction's IMark and the Dr from data
460 references which follow it. In short it holds
461 because each insn starts with an IMark, hence an
462 Ev_Ir, and so these Dr must pertain to the
463 immediately preceding Ir. Same applies to analogous
464 assertions in the subsequent cases. */
465 tl_assert(ev2->inode == ev->inode);
466 helperName = CLG_(cachesim).log_1I1Dr_name;
467 helperAddr = CLG_(cachesim).log_1I1Dr;
468 argv = mkIRExprVec_3( i_node_expr,
470 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
474 /* Merge an Ir with a following Dw/Dm. */
476 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
477 tl_assert(ev2->inode == ev->inode);
478 helperName = CLG_(cachesim).log_1I1Dw_name;
479 helperAddr = CLG_(cachesim).log_1I1Dw;
480 argv = mkIRExprVec_3( i_node_expr,
482 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
486 /* Merge an Ir with two following Irs. */
488 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
489 helperName = CLG_(cachesim).log_3I0D_name;
490 helperAddr = CLG_(cachesim).log_3I0D;
491 argv = mkIRExprVec_3( i_node_expr,
492 mkIRExpr_HWord( (HWord)ev2->inode ),
493 mkIRExpr_HWord( (HWord)ev3->inode ) );
497 /* Merge an Ir with one following Ir. */
499 if (ev2 && ev2->tag == Ev_Ir) {
500 helperName = CLG_(cachesim).log_2I0D_name;
501 helperAddr = CLG_(cachesim).log_2I0D;
502 argv = mkIRExprVec_2( i_node_expr,
503 mkIRExpr_HWord( (HWord)ev2->inode ) );
507 /* No merging possible; emit as-is. */
509 helperName = CLG_(cachesim).log_1I0D_name;
510 helperAddr = CLG_(cachesim).log_1I0D;
511 argv = mkIRExprVec_1( i_node_expr );
517 /* Data read or modify */
518 helperName = CLG_(cachesim).log_0I1Dr_name;
519 helperAddr = CLG_(cachesim).log_0I1Dr;
520 argv = mkIRExprVec_3( i_node_expr,
522 mkIRExpr_HWord( get_Event_dszB(ev) ) );
529 helperName = CLG_(cachesim).log_0I1Dw_name;
530 helperAddr = CLG_(cachesim).log_0I1Dw;
531 argv = mkIRExprVec_3( i_node_expr,
533 mkIRExpr_HWord( get_Event_dszB(ev) ) );
538 /* Conditional branch */
539 helperName = "log_cond_branch";
540 helperAddr = &log_cond_branch;
541 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
546 /* Branch to an unknown destination */
547 helperName = "log_ind_branch";
548 helperAddr = &log_ind_branch;
549 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
554 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
555 helperName = "log_global_event";
556 helperAddr = &log_global_event;
557 argv = mkIRExprVec_1( i_node_expr );
567 VG_(printf)(" merge ");
571 VG_(printf)(" merge ");
575 VG_(printf)(" call %s (%p)\n",
576 helperName, helperAddr);
579 /* helper could be unset depending on the simulator used */
580 if (helperAddr == 0) continue;
582 /* Add the helper. */
583 tl_assert(helperName);
584 tl_assert(helperAddr);
586 di = unsafeIRDirty_0_N( regparms,
587 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
589 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
592 clgs->events_used = 0;
595 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
598 tl_assert(clgs->seen_before || (inode->eventset == 0));
599 if (!CLG_(clo).simulate_cache) return;
601 if (clgs->events_used == N_EVENTS)
603 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
604 evt = &clgs->events[clgs->events_used];
612 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
615 tl_assert(isIRAtom(ea));
616 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
617 if (!CLG_(clo).simulate_cache) return;
619 if (clgs->events_used == N_EVENTS)
621 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
622 evt = &clgs->events[clgs->events_used];
626 evt->Ev.Dr.szB = datasize;
632 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
636 tl_assert(isIRAtom(ea));
637 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
638 if (!CLG_(clo).simulate_cache) return;
640 /* Is it possible to merge this write with the preceding read? */
641 lastEvt = &clgs->events[clgs->events_used-1];
642 if (clgs->events_used > 0
643 && lastEvt->tag == Ev_Dr
644 && lastEvt->Ev.Dr.szB == datasize
645 && lastEvt->inode == inode
646 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
648 lastEvt->tag = Ev_Dm;
652 /* No. Add as normal. */
653 if (clgs->events_used == N_EVENTS)
655 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
656 evt = &clgs->events[clgs->events_used];
660 evt->Ev.Dw.szB = datasize;
666 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
669 tl_assert(isIRAtom(guard));
670 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
671 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
672 if (!CLG_(clo).simulate_branch) return;
674 if (clgs->events_used == N_EVENTS)
676 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
677 evt = &clgs->events[clgs->events_used];
681 evt->Ev.Bc.taken = guard;
686 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
689 tl_assert(isIRAtom(whereTo));
690 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
691 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
692 if (!CLG_(clo).simulate_branch) return;
694 if (clgs->events_used == N_EVENTS)
696 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
697 evt = &clgs->events[clgs->events_used];
701 evt->Ev.Bi.dst = whereTo;
706 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
709 if (!CLG_(clo).collect_bus) return;
711 if (clgs->events_used == N_EVENTS)
713 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
714 evt = &clgs->events[clgs->events_used];
721 /* Initialise or check (if already seen before) an InstrInfo for next insn.
722 We only can set instr_offset/instr_size here. The required event set and
723 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
724 instructions. The event set is extended as required on flush of the event
725 queue (when Dm events were determined), cost offsets are determined at
726 end of BB instrumentation. */
728 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
731 tl_assert(clgs->ii_index >= 0);
732 tl_assert(clgs->ii_index < clgs->bb->instr_count);
733 ii = &clgs->bb->instr[ clgs->ii_index ];
735 if (clgs->seen_before) {
736 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
737 CLG_ASSERT(ii->instr_size == instr_size);
740 ii->instr_offset = clgs->instr_offset;
741 ii->instr_size = instr_size;
747 clgs->instr_offset += instr_size;
748 CLG_(stat).distinct_instrs++;
753 // return total number of cost values needed for this BB
755 UInt update_cost_offsets( ClgState* clgs )
759 UInt cost_offset = 0;
761 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
762 for(i=0; i<clgs->ii_index; i++) {
763 ii = &clgs->bb->instr[i];
764 if (clgs->seen_before) {
765 CLG_ASSERT(ii->cost_offset == cost_offset);
767 ii->cost_offset = cost_offset;
768 cost_offset += ii->eventset ? ii->eventset->size : 0;
774 /*------------------------------------------------------------*/
775 /*--- Instrumentation ---*/
776 /*------------------------------------------------------------*/
778 #if defined(VG_BIGENDIAN)
779 # define CLGEndness Iend_BE
780 #elif defined(VG_LITTLEENDIAN)
781 # define CLGEndness Iend_LE
783 # error "Unknown endianness"
787 Addr IRConst2Addr(IRConst* con)
791 if (sizeof(Addr) == 4) {
792 CLG_ASSERT( con->tag == Ico_U32 );
795 else if (sizeof(Addr) == 8) {
796 CLG_ASSERT( con->tag == Ico_U64 );
800 VG_(tool_panic)("Callgrind: invalid Addr type");
805 /* First pass over a BB to instrument, counting instructions and jumps
806 * This is needed for the size of the BB struct to allocate
808 * Called from CLG_(get_bb)
810 void CLG_(collectBlockInfo)(IRSB* sbIn,
811 /*INOUT*/ UInt* instrs,
812 /*INOUT*/ UInt* cjmps,
813 /*INOUT*/ Bool* cjmp_inverted)
817 Addr instrAddr =0, jumpDst;
819 Bool toNextInstr = False;
821 // Ist_Exit has to be ignored in preamble code, before first IMark:
822 // preamble code is added by VEX for self modifying code, and has
823 // nothing to do with client code
824 Bool inPreamble = True;
828 for (i = 0; i < sbIn->stmts_used; i++) {
830 if (Ist_IMark == st->tag) {
833 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
834 instrLen = st->Ist.IMark.len;
839 if (inPreamble) continue;
840 if (Ist_Exit == st->tag) {
841 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
842 toNextInstr = (jumpDst == instrAddr + instrLen);
848 /* if the last instructions of BB conditionally jumps to next instruction
849 * (= first instruction of next BB in memory), this is a inverted by VEX.
851 *cjmp_inverted = toNextInstr;
855 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
857 addStmtToIRSB( bbOut,
858 IRStmt_Store(CLGEndness,
859 IRExpr_Const(hWordTy == Ity_I32 ?
860 IRConst_U32( addr ) :
861 IRConst_U64( addr )),
862 IRExpr_Const(IRConst_U32(val)) ));
866 /* add helper call to setup_bbcc, with pointer to BB struct as argument
868 * precondition for setup_bbcc:
869 * - jmps_passed has number of cond.jumps passed in last executed BB
870 * - current_bbcc has a pointer to the BBCC of the last executed BB
871 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
872 * current_bbcc->bb->jmp_addr
873 * gives the address of the jump source.
875 * the setup does 2 things:
877 * * Unwind own call stack, i.e sync our ESP with real ESP
878 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
879 * * For CALLs or JMPs crossing objects, record call arg +
880 * push are on own call stack
882 * - prepare for cache log functions:
883 * set current_bbcc to BBCC that gets the costs for this BB execution
887 void addBBSetupCall(ClgState* clgs)
890 IRExpr *arg1, **argv;
892 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
893 argv = mkIRExprVec_1(arg1);
894 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
895 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
897 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
902 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
904 VexGuestLayout* layout,
905 VexGuestExtents* vge,
906 IRType gWordTy, IRType hWordTy )
911 Addr64 cia; /* address of current insn */
912 InstrInfo* curr_inode = NULL;
917 if (gWordTy != hWordTy) {
918 /* We don't currently support this case. */
919 VG_(tool_panic)("host/guest word size mismatch");
922 // No instrumentation if it is switched off
923 if (! CLG_(instrument_state)) {
924 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
925 (Addr)closure->readdr);
929 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
931 /* Set up SB for instrumented IR */
932 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
934 // Copy verbatim any IR preamble preceding the first IMark
936 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
937 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
941 // Get the first statement, and origAddr from it
942 CLG_ASSERT(sbIn->stmts_used >0);
943 CLG_ASSERT(i < sbIn->stmts_used);
945 CLG_ASSERT(Ist_IMark == st->tag);
947 origAddr = (Addr)st->Ist.IMark.addr;
948 cia = st->Ist.IMark.addr;
949 isize = st->Ist.IMark.len;
950 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
952 /* Get BB struct (creating if necessary).
953 * JS: The hash table is keyed with orig_addr_noredir -- important!
954 * JW: Why? If it is because of different chasing of the redirection,
955 * this is not needed, as chasing is switched off in callgrind
957 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
959 addBBSetupCall(&clgs);
961 // Set up running state
962 clgs.events_used = 0;
964 clgs.instr_offset = 0;
966 for (/*use current i*/; i < sbIn->stmts_used; i++) {
969 CLG_ASSERT(isFlatIRStmt(st));
980 cia = st->Ist.IMark.addr;
981 isize = st->Ist.IMark.len;
982 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
983 // If Vex fails to decode an instruction, the size will be zero.
984 // Pretend otherwise.
985 if (isize == 0) isize = VG_MIN_INSTR_SZB;
987 // Sanity-check size.
988 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
989 || VG_CLREQ_SZB == isize );
991 // Init the inode, record it as the current one.
992 // Subsequent Dr/Dw/Dm events from the same instruction will
994 curr_inode = next_InstrInfo (&clgs, isize);
996 addEvent_Ir( &clgs, curr_inode );
1001 IRExpr* data = st->Ist.WrTmp.data;
1002 if (data->tag == Iex_Load) {
1003 IRExpr* aexpr = data->Iex.Load.addr;
1004 // Note also, endianness info is ignored. I guess
1005 // that's not interesting.
1006 addEvent_Dr( &clgs, curr_inode,
1007 sizeofIRType(data->Iex.Load.ty), aexpr );
1013 IRExpr* data = st->Ist.Store.data;
1014 IRExpr* aexpr = st->Ist.Store.addr;
1015 addEvent_Dw( &clgs, curr_inode,
1016 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1022 IRDirty* d = st->Ist.Dirty.details;
1023 if (d->mFx != Ifx_None) {
1024 /* This dirty helper accesses memory. Collect the details. */
1025 tl_assert(d->mAddr != NULL);
1026 tl_assert(d->mSize != 0);
1027 dataSize = d->mSize;
1028 // Large (eg. 28B, 108B, 512B on x86) data-sized
1029 // instructions will be done inaccurately, but they're
1030 // very rare and this avoids errors from hitting more
1031 // than two cache lines in the simulation.
1032 if (dataSize > MIN_LINE_SIZE)
1033 dataSize = MIN_LINE_SIZE;
1034 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1035 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1036 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1037 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1039 tl_assert(d->mAddr == NULL);
1040 tl_assert(d->mSize == 0);
1046 /* We treat it as a read and a write of the location. I
1047 think that is the same behaviour as it was before IRCAS
1048 was introduced, since prior to that point, the Vex
1049 front ends would translate a lock-prefixed instruction
1050 into a (normal) read followed by a (normal) write. */
1052 IRCAS* cas = st->Ist.CAS.details;
1053 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1054 CLG_ASSERT(cas->dataLo);
1055 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1056 if (cas->dataHi != NULL)
1057 dataSize *= 2; /* since this is a doubleword-cas */
1058 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1059 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1060 addEvent_G( &clgs, curr_inode );
1066 if (st->Ist.LLSC.storedata == NULL) {
1068 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1069 addEvent_Dr( &clgs, curr_inode,
1070 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1073 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1074 addEvent_Dw( &clgs, curr_inode,
1075 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1076 /* I don't know whether the global-bus-lock cost should
1077 be attributed to the LL or the SC, but it doesn't
1078 really matter since they always have to be used in
1079 pairs anyway. Hence put it (quite arbitrarily) on
1081 addEvent_G( &clgs, curr_inode );
1087 Bool guest_exit, inverted;
1089 /* VEX code generation sometimes inverts conditional branches.
1090 * As Callgrind counts (conditional) jumps, it has to correct
1091 * inversions. The heuristic is the following:
1092 * (1) Callgrind switches off SB chasing and unrolling, and
1093 * therefore it assumes that a candidate for inversion only is
1094 * the last conditional branch in an SB.
1095 * (2) inversion is assumed if the branch jumps to the address of
1096 * the next guest instruction in memory.
1097 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1099 * Branching behavior is also used for branch prediction. Note that
1100 * above heuristic is different from what Cachegrind does.
1101 * Cachegrind uses (2) for all branches.
1103 if (cJumps+1 == clgs.bb->cjmp_count)
1104 inverted = clgs.bb->cjmp_inverted;
1108 // call branch predictor only if this is a branch in guest code
1109 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1110 (st->Ist.Exit.jk == Ijk_Call) ||
1111 (st->Ist.Exit.jk == Ijk_Ret);
1114 /* Stuff to widen the guard expression to a host word, so
1115 we can pass it to the branch predictor simulation
1116 functions easily. */
1117 IRType tyW = hWordTy;
1118 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1119 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1120 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1121 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1122 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1123 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1124 : IRExpr_Const(IRConst_U64(1));
1126 /* Widen the guard expression. */
1127 addStmtToIRSB( clgs.sbOut,
1128 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1129 addStmtToIRSB( clgs.sbOut,
1130 IRStmt_WrTmp( guardW,
1132 IRExpr_RdTmp(guard1))) );
1133 /* If the exit is inverted, invert the sense of the guard. */
1138 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1139 : IRExpr_RdTmp(guardW)
1141 /* And post the event. */
1142 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1145 /* We may never reach the next statement, so need to flush
1146 all outstanding transactions now. */
1147 flushEvents( &clgs );
1149 CLG_ASSERT(clgs.ii_index>0);
1150 if (!clgs.seen_before) {
1151 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1152 clgs.bb->jmp[cJumps].skip = False;
1155 /* Update global variable jmps_passed before the jump
1156 * A correction is needed if VEX inverted the last jump condition
1158 addConstMemStoreStmt( clgs.sbOut,
1159 (UWord) &CLG_(current_state).jmps_passed,
1160 inverted ? cJumps+1 : cJumps, hWordTy);
1171 /* Copy the original statement */
1172 addStmtToIRSB( clgs.sbOut, st );
1175 VG_(printf)(" pass ");
1181 /* Deal with branches to unknown destinations. Except ignore ones
1182 which are function returns as we assume the return stack
1183 predictor never mispredicts. */
1184 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1185 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1186 switch (sbIn->next->tag) {
1188 break; /* boring - branch to known address */
1190 /* looks like an indirect branch (branch to unknown) */
1191 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1194 /* shouldn't happen - if the incoming IR is properly
1195 flattened, should only have tmp and const cases to
1201 /* At the end of the bb. Flush outstandings. */
1202 flushEvents( &clgs );
1204 /* Always update global variable jmps_passed at end of bb.
1205 * A correction is needed if VEX inverted the last jump condition
1208 UInt jmps_passed = cJumps;
1209 if (clgs.bb->cjmp_inverted) jmps_passed--;
1210 addConstMemStoreStmt( clgs.sbOut,
1211 (UWord) &CLG_(current_state).jmps_passed,
1212 jmps_passed, hWordTy);
1214 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1215 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
1217 /* This stores the instr of the call/ret at BB end */
1218 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1220 if (clgs.seen_before) {
1221 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1222 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
1223 CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
1226 clgs.bb->cost_count = update_cost_offsets(&clgs);
1227 clgs.bb->instr_len = clgs.instr_offset;
1228 clgs.bb->jmpkind = sbIn->jumpkind;
1231 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1232 origAddr, clgs.bb->instr_len,
1233 clgs.bb->cjmp_count, clgs.bb->cost_count);
1235 CLG_DEBUG(3, " [ ");
1236 for (i=0;i<cJumps;i++)
1237 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
1238 CLG_DEBUG(3, "], last inverted: %s \n",
1239 clgs.bb->cjmp_inverted ? "yes":"no");
1245 /*--------------------------------------------------------------------*/
1246 /*--- Discarding BB info ---*/
1247 /*--------------------------------------------------------------------*/
1249 // Called when a translation is removed from the translation cache for
1250 // any reason at all: to free up space, because the guest code was
1251 // unmapped or modified, or for any arbitrary reason.
1253 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1255 Addr orig_addr = (Addr)orig_addr64;
1257 tl_assert(vge.n_used > 0);
1260 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1261 (void*)(Addr)orig_addr,
1262 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1264 // Get BB info, remove from table, free BB info. Simple! Note that we
1265 // use orig_addr, not the first instruction address in vge.
1266 CLG_(delete_bb)(orig_addr);
1270 /*------------------------------------------------------------*/
1271 /*--- CLG_(fini)() and related function ---*/
1272 /*------------------------------------------------------------*/
1276 static void zero_thread_cost(thread_info* t)
1280 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1281 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1283 /* reset call counters to current for active calls */
1284 CLG_(copy_cost)( CLG_(sets).full,
1285 CLG_(current_call_stack).entry[i].enter_cost,
1286 CLG_(current_state).cost );
1287 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1290 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1292 /* set counter for last dump */
1293 CLG_(copy_cost)( CLG_(sets).full,
1294 t->lastdump_cost, CLG_(current_state).cost );
1297 void CLG_(zero_all_cost)(Bool only_current_thread)
1299 if (VG_(clo_verbosity) > 1)
1300 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1302 if (only_current_thread)
1303 zero_thread_cost(CLG_(get_current_thread)());
1305 CLG_(forall_threads)(zero_thread_cost);
1307 if (VG_(clo_verbosity) > 1)
1308 VG_(message)(Vg_DebugMsg, " ...done\n");
1312 void unwind_thread(thread_info* t)
1314 /* unwind signal handlers */
1315 while(CLG_(current_state).sig !=0)
1316 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1318 /* unwind regular call stack */
1319 while(CLG_(current_call_stack).sp>0)
1320 CLG_(pop_call_stack)();
1322 /* reset context and function stack for context generation */
1323 CLG_(init_exec_state)( &CLG_(current_state) );
1324 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1328 void zero_state_cost(thread_info* t)
1330 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1333 /* Ups, this can go wrong... */
1334 extern void VG_(discard_translations) ( Addr64 start, ULong range );
1336 void CLG_(set_instrument_state)(Char* reason, Bool state)
1338 if (CLG_(instrument_state) == state) {
1339 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1340 reason, state ? "ON" : "OFF");
1343 CLG_(instrument_state) = state;
1344 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1345 reason, state ? "ON" : "OFF");
1347 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
1349 /* reset internal state: call stacks, simulator */
1350 CLG_(forall_threads)(unwind_thread);
1351 CLG_(forall_threads)(zero_state_cost);
1352 (*CLG_(cachesim).clear)();
1354 if (VG_(clo_verbosity) > 1)
1355 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1356 reason, state ? "ON" : "OFF");
1359 static void print_monitor_help ( void )
1361 VG_(gdb_printf) ("\n");
1362 VG_(gdb_printf) ("callgrind monitor commands:\n");
1363 VG_(gdb_printf) (" ct.dump [<dump_hint>]\n");
1364 VG_(gdb_printf) (" dump counters\n");
1365 VG_(gdb_printf) (" ct.zero\n");
1366 VG_(gdb_printf) (" zero counters\n");
1367 VG_(gdb_printf) ("\n");
1370 /* return True if request recognised, False otherwise */
1371 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
1374 Char s[VG_(strlen(req))]; /* copy for strtok_r */
1377 VG_(strcpy) (s, req);
1379 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1380 switch (VG_(keyword_id) ("help ct.dump ct.zero",
1381 wcmd, kwd_report_duplicated_matches)) {
1382 case -2: /* multiple matches */
1384 case -1: /* not found */
1387 print_monitor_help();
1389 case 1: { /* ct.dump */
1390 CLG_(dump_profile)(req, False);
1393 case 2: { /* ct.zero */
1394 CLG_(zero_all_cost)(False);
1405 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1407 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1408 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1412 case VG_USERREQ__DUMP_STATS:
1413 CLG_(dump_profile)("Client Request", True);
1414 *ret = 0; /* meaningless */
1417 case VG_USERREQ__DUMP_STATS_AT:
1420 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]);
1421 CLG_(dump_profile)(buf, True);
1422 *ret = 0; /* meaningless */
1426 case VG_USERREQ__ZERO_STATS:
1427 CLG_(zero_all_cost)(True);
1428 *ret = 0; /* meaningless */
1431 case VG_USERREQ__TOGGLE_COLLECT:
1432 CLG_(current_state).collect = !CLG_(current_state).collect;
1433 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1434 CLG_(current_state).collect ? "ON" : "OFF");
1435 *ret = 0; /* meaningless */
1438 case VG_USERREQ__START_INSTRUMENTATION:
1439 CLG_(set_instrument_state)("Client Request", True);
1440 *ret = 0; /* meaningless */
1443 case VG_USERREQ__STOP_INSTRUMENTATION:
1444 CLG_(set_instrument_state)("Client Request", False);
1445 *ret = 0; /* meaningless */
1448 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1449 Bool handled = handle_gdb_monitor_command (tid, (Char*)args[1]);
1464 /* Syscall Timing */
1466 /* struct timeval syscalltime[VG_N_THREADS]; */
1467 #if CLG_MICROSYSTIME
1468 #include <sys/time.h>
1469 #include <sys/syscall.h>
1470 extern Int VG_(do_syscall) ( UInt, ... );
1472 ULong syscalltime[VG_N_THREADS];
1474 UInt syscalltime[VG_N_THREADS];
1478 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1479 UWord* args, UInt nArgs)
1481 if (CLG_(clo).collect_systime) {
1482 #if CLG_MICROSYSTIME
1483 struct vki_timeval tv_now;
1484 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1485 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1487 syscalltime[tid] = VG_(read_millisecond_timer)();
1493 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1494 UWord* args, UInt nArgs, SysRes res)
1496 if (CLG_(clo).collect_systime &&
1497 CLG_(current_state).bbcc) {
1499 #if CLG_MICROSYSTIME
1500 struct vki_timeval tv_now;
1503 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1504 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1506 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1509 /* offset o is for "SysCount", o+1 for "SysTime" */
1510 o = fullOffset(EG_SYS);
1512 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
1514 CLG_(current_state).cost[o] ++;
1515 CLG_(current_state).cost[o+1] += diff;
1516 if (!CLG_(current_state).bbcc->skipped)
1517 CLG_(init_cost_lz)(CLG_(sets).full,
1518 &(CLG_(current_state).bbcc->skipped));
1519 CLG_(current_state).bbcc->skipped[o] ++;
1520 CLG_(current_state).bbcc->skipped[o+1] += diff;
1524 static UInt ULong_width(ULong n)
1532 return w + (w-1)/3; // add space for commas
1536 void branchsim_printstat(int l1, int l2, int l3)
1538 static Char buf1[128], buf2[128], buf3[128], fmt[128];
1540 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1541 ULong B_total_b, B_total_mp;
1543 total = CLG_(total_cost);
1544 Bc_total_b = total[ fullOffset(EG_BC) ];
1545 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1546 Bi_total_b = total[ fullOffset(EG_BI) ];
1547 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1549 /* Make format string, getting width right for numbers */
1550 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1553 if (0 == Bc_total_b) Bc_total_b = 1;
1554 if (0 == Bi_total_b) Bi_total_b = 1;
1555 B_total_b = Bc_total_b + Bi_total_b;
1556 B_total_mp = Bc_total_mp + Bi_total_mp;
1559 VG_(umsg)(fmt, "Branches: ",
1560 B_total_b, Bc_total_b, Bi_total_b);
1562 VG_(umsg)(fmt, "Mispredicts: ",
1563 B_total_mp, Bc_total_mp, Bi_total_mp);
1565 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1);
1566 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
1567 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
1569 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1576 Char buf[32+COSTS_LEN], fmt[128];
1580 CLG_DEBUG(0, "finish()\n");
1582 (*CLG_(cachesim).finish)();
1584 /* pop all remaining items from CallStack for correct sum
1586 CLG_(forall_threads)(unwind_thread);
1588 CLG_(dump_profile)(0, False);
1590 CLG_(finish_command)();
1592 if (VG_(clo_verbosity) == 0) return;
1594 /* Hash table stats */
1595 if (VG_(clo_stats)) {
1597 CLG_(stat).full_debug_BBs +
1598 CLG_(stat).fn_name_debug_BBs +
1599 CLG_(stat).file_line_debug_BBs +
1600 CLG_(stat).no_debug_BBs;
1602 VG_(message)(Vg_DebugMsg, "\n");
1603 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1604 CLG_(stat).distinct_objs);
1605 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1606 CLG_(stat).distinct_files);
1607 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1608 CLG_(stat).distinct_fns);
1609 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1610 CLG_(stat).distinct_contexts);
1611 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1612 CLG_(stat).distinct_bbs);
1613 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n",
1614 CLG_(costarray_entries), CLG_(costarray_chunks));
1615 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1616 CLG_(stat).distinct_bbccs);
1617 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1618 CLG_(stat).distinct_jccs);
1619 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1620 CLG_(stat).distinct_skips);
1621 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1624 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1625 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1626 CLG_(stat).full_debug_BBs);
1627 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1628 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1629 CLG_(stat).file_line_debug_BBs);
1630 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1631 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1632 CLG_(stat).fn_name_debug_BBs);
1633 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1634 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1635 CLG_(stat).no_debug_BBs);
1637 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1638 CLG_(stat).bbcc_clones);
1639 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1640 CLG_(stat).bb_retranslations);
1641 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1642 CLG_(stat).distinct_instrs);
1643 VG_(message)(Vg_DebugMsg, "");
1645 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1646 CLG_(stat).cxt_lru_misses);
1647 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1648 CLG_(stat).bbcc_lru_misses);
1649 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1650 CLG_(stat).jcc_lru_misses);
1651 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1652 CLG_(stat).bb_executions);
1653 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1654 CLG_(stat).call_counter);
1655 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1656 CLG_(stat).jcnd_counter);
1657 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1658 CLG_(stat).jump_counter);
1659 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1660 CLG_(stat).rec_call_counter);
1661 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1662 CLG_(stat).ret_counter);
1664 VG_(message)(Vg_DebugMsg, "");
1667 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1668 VG_(message)(Vg_UserMsg, "Events : %s\n", buf);
1669 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1670 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
1671 VG_(message)(Vg_UserMsg, "\n");
1673 /* determine value widths for statistics */
1674 total = CLG_(total_cost);
1675 l1 = ULong_width( total[fullOffset(EG_IR)] );
1677 if (CLG_(clo).simulate_cache) {
1678 l2 = ULong_width( total[fullOffset(EG_DR)] );
1679 l3 = ULong_width( total[fullOffset(EG_DW)] );
1681 if (CLG_(clo).simulate_branch) {
1682 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1683 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1684 if (l2b > l2) l2 = l2b;
1685 if (l3b > l3) l3 = l3b;
1688 /* Make format string, getting width right for numbers */
1689 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1691 /* Always print this */
1692 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1694 if (CLG_(clo).simulate_cache)
1695 (*CLG_(cachesim).printstat)(l1, l2, l3);
1697 if (CLG_(clo).simulate_branch)
1698 branchsim_printstat(l1, l2, l3);
1703 void CLG_(fini)(Int exitcode)
1709 /*--------------------------------------------------------------------*/
1711 /*--------------------------------------------------------------------*/
1713 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1715 static ULong last_blocks_done = 0;
1718 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1720 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1721 if (blocks_done - last_blocks_done < 5000) return;
1722 last_blocks_done = blocks_done;
1724 CLG_(run_thread)( tid );
1728 void CLG_(post_clo_init)(void)
1730 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1731 VG_(clo_vex_control).guest_chase_thresh = 0;
1733 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1734 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1735 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1737 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
1738 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
1739 CLG_(clo).dump_line = True;
1743 CLG_(init_command)();
1745 (*CLG_(cachesim).post_clo_init)();
1747 CLG_(init_eventsets)();
1748 CLG_(init_statistics)(& CLG_(stat));
1749 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1751 /* initialize hash tables */
1752 CLG_(init_obj_table)();
1753 CLG_(init_cxt_table)();
1754 CLG_(init_bb_hash)();
1756 CLG_(init_threads)();
1757 CLG_(run_thread)(1);
1759 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1761 if (VG_(clo_verbosity > 0)) {
1762 VG_(message)(Vg_UserMsg,
1763 "For interactive control, run 'callgrind_control -h'.\n");
1768 void CLG_(pre_clo_init)(void)
1770 VG_(details_name) ("Callgrind");
1771 VG_(details_version) (NULL);
1772 VG_(details_description) ("a call-graph generating cache profiler");
1773 VG_(details_copyright_author)("Copyright (C) 2002-2010, and GNU GPL'd, "
1774 "by Josef Weidendorfer et al.");
1775 VG_(details_bug_reports_to) (VG_BUGS_TO);
1776 VG_(details_avg_translation_sizeB) ( 500 );
1778 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1782 VG_(needs_superblock_discards)(clg_discard_superblock_info);
1785 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1787 CLG_(print_debug_usage));
1789 VG_(needs_client_requests)(CLG_(handle_client_request));
1790 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1791 CLG_(post_syscalltime));
1793 VG_(track_start_client_code) ( & clg_start_client_code_callback );
1794 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1795 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
1797 CLG_(set_clo_defaults)();
1800 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1802 /*--------------------------------------------------------------------*/
1803 /*--- end main.c ---*/
1804 /*--------------------------------------------------------------------*/