2 /*--------------------------------------------------------------------*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Callgrind, a Valgrind tool for call graph
11 Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2010 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 The GNU General Public License is contained in the file COPYING.
35 #include "callgrind.h"
38 #include <pub_tool_threadstate.h>
40 /*------------------------------------------------------------*/
41 /*--- Global variables ---*/
42 /*------------------------------------------------------------*/
45 CommandLineOptions CLG_(clo);
46 Statistics CLG_(stat);
47 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
49 /* thread and signal handler specific */
50 exec_state CLG_(current_state);
53 /*------------------------------------------------------------*/
54 /*--- Statistics ---*/
55 /*------------------------------------------------------------*/
57 static void CLG_(init_statistics)(Statistics* s)
62 s->rec_call_counter = 0;
66 s->context_counter = 0;
67 s->bb_retranslations = 0;
70 s->distinct_files = 0;
72 s->distinct_contexts = 0;
74 s->distinct_bbccs = 0;
75 s->distinct_instrs = 0;
76 s->distinct_skips = 0;
78 s->bb_hash_resizes = 0;
79 s->bbcc_hash_resizes = 0;
80 s->jcc_hash_resizes = 0;
81 s->cxt_hash_resizes = 0;
82 s->fn_array_resizes = 0;
83 s->call_stack_resizes = 0;
84 s->fn_stack_resizes = 0;
86 s->full_debug_BBs = 0;
87 s->file_line_debug_BBs = 0;
88 s->fn_name_debug_BBs = 0;
90 s->bbcc_lru_misses = 0;
91 s->jcc_lru_misses = 0;
92 s->cxt_lru_misses = 0;
97 /*------------------------------------------------------------*/
98 /*--- Instrumentation structures and event queue handling ---*/
99 /*------------------------------------------------------------*/
101 /* Maintain an ordered list of memory events which are outstanding, in
102 the sense that no IR has yet been generated to do the relevant
103 helper calls. The BB is scanned top to bottom and memory events
104 are added to the end of the list, merging with the most recent
105 notified event where possible (Dw immediately following Dr and
106 having the same size and EA can be merged).
108 This merging is done so that for architectures which have
109 load-op-store instructions (x86, amd64), the insn is treated as if
110 it makes just one memory reference (a modify), rather than two (a
111 read followed by a write at the same address).
113 At various points the list will need to be flushed, that is, IR
114 generated from it. That must happen before any possible exit from
115 the block (the end, or an IRStmt_Exit). Flushing also takes place
116 when there is no space to add a new event.
118 If we require the simulation statistics to be up to date with
119 respect to possible memory exceptions, then the list would have to
120 be flushed before each memory reference. That would however lose
121 performance by inhibiting event-merging during flushing.
123 Flushing the list consists of walking it start to end and emitting
124 instrumentation IR for each event, in the order in which they
125 appear. It may be possible to emit a single call for two adjacent
126 events in order to reduce the number of helper function calls made.
127 For example, it could well be profitable to handle two adjacent Ir
128 events with a single helper call. */
136 Ev_Ir, // Instruction read
139 Ev_Dm, // Data modify (read then write)
166 static void init_Event ( Event* ev ) {
167 VG_(memset)(ev, 0, sizeof(Event));
170 static IRAtom* get_Event_dea ( Event* ev ) {
172 case Ev_Dr: return ev->Ev.Dr.ea;
173 case Ev_Dw: return ev->Ev.Dw.ea;
174 case Ev_Dm: return ev->Ev.Dm.ea;
175 default: tl_assert(0);
179 static Int get_Event_dszB ( Event* ev ) {
181 case Ev_Dr: return ev->Ev.Dr.szB;
182 case Ev_Dw: return ev->Ev.Dw.szB;
183 case Ev_Dm: return ev->Ev.Dm.szB;
184 default: tl_assert(0);
189 /* Up to this many unnotified events are allowed. Number is
190 arbitrary. Larger numbers allow more event merging to occur, but
191 potentially induce more spilling due to extending live ranges of
192 address temporaries. */
196 /* A struct which holds all the running state during instrumentation.
197 Mostly to avoid passing loads of parameters everywhere. */
199 /* The current outstanding-memory-event list. */
200 Event events[N_EVENTS];
203 /* The array of InstrInfo's is part of BB struct. */
206 /* BB seen before (ie. re-instrumentation) */
209 /* Number InstrInfo bins 'used' so far. */
212 // current offset of guest instructions from BB start
215 /* The output SB being constructed. */
220 static void showEvent ( Event* ev )
224 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
225 ev->inode, ev->inode->instr_offset);
228 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
229 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
230 ppIRExpr(ev->Ev.Dr.ea);
234 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
235 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
236 ppIRExpr(ev->Ev.Dw.ea);
240 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
241 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
242 ppIRExpr(ev->Ev.Dm.ea);
251 /* Generate code for all outstanding memory events, and mark the queue
252 empty. Code is generated into cgs->sbOut, and this activity
253 'consumes' slots in cgs->bb. */
255 static void flushEvents ( ClgState* clgs )
257 Int i, regparms, inew;
267 if (!clgs->seen_before) {
268 // extend event sets as needed
269 // available sets: D0 Dr
270 for(i=0; i<clgs->events_used; i++) {
271 ev = &clgs->events[i];
274 // Ir event always is first for a guest instruction
275 CLG_ASSERT(ev->inode->eventset == 0);
276 ev->inode->eventset = CLG_(sets).UIr;
279 // extend event set by Dr counter
280 if ((ev->inode->eventset == CLG_(sets).UIrDr) ||
281 (ev->inode->eventset == CLG_(sets).UIrDrDw) ||
282 (ev->inode->eventset == CLG_(sets).UIrDwDr))
284 if (ev->inode->eventset == CLG_(sets).UIrDw) {
285 ev->inode->eventset = CLG_(sets).UIrDwDr;
288 CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr);
289 ev->inode->eventset = CLG_(sets).UIrDr;
293 // extend event set by Dw counter
294 if ((ev->inode->eventset == CLG_(sets).UIrDw) ||
295 (ev->inode->eventset == CLG_(sets).UIrDwDr) ||
296 (ev->inode->eventset == CLG_(sets).UIrDrDw))
298 if (ev->inode->eventset == CLG_(sets).UIrDr) {
299 ev->inode->eventset = CLG_(sets).UIrDrDw;
302 CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr);
303 ev->inode->eventset = CLG_(sets).UIrDw;
311 for(i = 0; i < clgs->events_used; i = inew) {
318 /* generate IR to notify event i and possibly the ones
319 immediately following it. */
320 tl_assert(i >= 0 && i < clgs->events_used);
322 ev = &clgs->events[i];
323 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
324 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
327 VG_(printf)(" flush ");
331 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
333 /* Decide on helper fn to call and args to pass it, and advance
335 Dm events have same effect as Dw events */
338 /* Merge an Ir with a following Dr. */
339 if (ev2 && ev2->tag == Ev_Dr) {
340 /* Why is this true? It's because we're merging an Ir
341 with a following Dr. The Ir derives from the
342 instruction's IMark and the Dr from data
343 references which follow it. In short it holds
344 because each insn starts with an IMark, hence an
345 Ev_Ir, and so these Dr must pertain to the
346 immediately preceding Ir. Same applies to analogous
347 assertions in the subsequent cases. */
348 tl_assert(ev2->inode == ev->inode);
349 helperName = CLG_(cachesim).log_1I1Dr_name;
350 helperAddr = CLG_(cachesim).log_1I1Dr;
351 argv = mkIRExprVec_3( i_node_expr,
353 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
357 /* Merge an Ir with a following Dw/Dm. */
359 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
360 tl_assert(ev2->inode == ev->inode);
361 helperName = CLG_(cachesim).log_1I1Dw_name;
362 helperAddr = CLG_(cachesim).log_1I1Dw;
363 argv = mkIRExprVec_3( i_node_expr,
365 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
369 /* Merge an Ir with two following Irs. */
371 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
372 helperName = CLG_(cachesim).log_3I0D_name;
373 helperAddr = CLG_(cachesim).log_3I0D;
374 argv = mkIRExprVec_3( i_node_expr,
375 mkIRExpr_HWord( (HWord)ev2->inode ),
376 mkIRExpr_HWord( (HWord)ev3->inode ) );
380 /* Merge an Ir with one following Ir. */
382 if (ev2 && ev2->tag == Ev_Ir) {
383 helperName = CLG_(cachesim).log_2I0D_name;
384 helperAddr = CLG_(cachesim).log_2I0D;
385 argv = mkIRExprVec_2( i_node_expr,
386 mkIRExpr_HWord( (HWord)ev2->inode ) );
390 /* No merging possible; emit as-is. */
392 helperName = CLG_(cachesim).log_1I0D_name;
393 helperAddr = CLG_(cachesim).log_1I0D;
394 argv = mkIRExprVec_1( i_node_expr );
400 /* Data read or modify */
401 helperName = CLG_(cachesim).log_0I1Dr_name;
402 helperAddr = CLG_(cachesim).log_0I1Dr;
403 argv = mkIRExprVec_3( i_node_expr,
405 mkIRExpr_HWord( get_Event_dszB(ev) ) );
412 helperName = CLG_(cachesim).log_0I1Dw_name;
413 helperAddr = CLG_(cachesim).log_0I1Dw;
414 argv = mkIRExprVec_3( i_node_expr,
416 mkIRExpr_HWord( get_Event_dszB(ev) ) );
426 VG_(printf)(" merge ");
430 VG_(printf)(" merge ");
434 VG_(printf)(" call %s (%p)\n",
435 helperName, helperAddr);
438 /* helper could be unset depending on the simulator used */
439 if (helperAddr == 0) continue;
441 /* Add the helper. */
442 tl_assert(helperName);
443 tl_assert(helperAddr);
445 di = unsafeIRDirty_0_N( regparms,
446 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
448 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
451 clgs->events_used = 0;
454 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
457 tl_assert(clgs->seen_before || (inode->eventset == 0));
458 if (!CLG_(clo).simulate_cache) return;
460 if (clgs->events_used == N_EVENTS)
462 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
463 evt = &clgs->events[clgs->events_used];
471 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
474 tl_assert(isIRAtom(ea));
475 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
476 if (!CLG_(clo).simulate_cache) return;
478 if (clgs->events_used == N_EVENTS)
480 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
481 evt = &clgs->events[clgs->events_used];
485 evt->Ev.Dr.szB = datasize;
491 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
495 tl_assert(isIRAtom(ea));
496 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
497 if (!CLG_(clo).simulate_cache) return;
499 /* Is it possible to merge this write with the preceding read? */
500 lastEvt = &clgs->events[clgs->events_used-1];
501 if (clgs->events_used > 0
502 && lastEvt->tag == Ev_Dr
503 && lastEvt->Ev.Dr.szB == datasize
504 && lastEvt->inode == inode
505 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
507 lastEvt->tag = Ev_Dm;
511 /* No. Add as normal. */
512 if (clgs->events_used == N_EVENTS)
514 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
515 evt = &clgs->events[clgs->events_used];
519 evt->Ev.Dw.szB = datasize;
524 /* Initialise or check (if already seen before) an InstrInfo for next insn.
525 We only can set instr_offset/instr_size here. The required event set and
526 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
527 instructions. The event set is extended as required on flush of the event
528 queue (when Dm events were determined), cost offsets are determined at
529 end of BB instrumentation. */
531 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
534 tl_assert(clgs->ii_index >= 0);
535 tl_assert(clgs->ii_index < clgs->bb->instr_count);
536 ii = &clgs->bb->instr[ clgs->ii_index ];
538 if (clgs->seen_before) {
539 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
540 CLG_ASSERT(ii->instr_size == instr_size);
543 ii->instr_offset = clgs->instr_offset;
544 ii->instr_size = instr_size;
550 clgs->instr_offset += instr_size;
551 CLG_(stat).distinct_instrs++;
556 // return total number of cost values needed for this BB
558 UInt update_cost_offsets( ClgState* clgs )
562 UInt cost_offset = 0;
564 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
565 for(i=0; i<clgs->ii_index; i++) {
566 ii = &clgs->bb->instr[i];
567 if (clgs->seen_before) {
568 CLG_ASSERT(ii->cost_offset == cost_offset);
570 ii->cost_offset = cost_offset;
571 cost_offset += ii->eventset ? ii->eventset->size : 0;
577 /*------------------------------------------------------------*/
578 /*--- Instrumentation ---*/
579 /*------------------------------------------------------------*/
581 #if defined(VG_BIGENDIAN)
582 # define CLGEndness Iend_BE
583 #elif defined(VG_LITTLEENDIAN)
584 # define CLGEndness Iend_LE
586 # error "Unknown endianness"
590 Addr IRConst2Addr(IRConst* con)
594 if (sizeof(Addr) == 4) {
595 CLG_ASSERT( con->tag == Ico_U32 );
598 else if (sizeof(Addr) == 8) {
599 CLG_ASSERT( con->tag == Ico_U64 );
603 VG_(tool_panic)("Callgrind: invalid Addr type");
608 /* First pass over a BB to instrument, counting instructions and jumps
609 * This is needed for the size of the BB struct to allocate
611 * Called from CLG_(get_bb)
613 void CLG_(collectBlockInfo)(IRSB* sbIn,
614 /*INOUT*/ UInt* instrs,
615 /*INOUT*/ UInt* cjmps,
616 /*INOUT*/ Bool* cjmp_inverted)
620 Addr instrAddr =0, jumpDst;
622 Bool toNextInstr = False;
624 // Ist_Exit has to be ignored in preamble code, before first IMark:
625 // preamble code is added by VEX for self modifying code, and has
626 // nothing to do with client code
627 Bool inPreamble = True;
631 for (i = 0; i < sbIn->stmts_used; i++) {
633 if (Ist_IMark == st->tag) {
636 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
637 instrLen = st->Ist.IMark.len;
642 if (inPreamble) continue;
643 if (Ist_Exit == st->tag) {
644 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
645 toNextInstr = (jumpDst == instrAddr + instrLen);
651 /* if the last instructions of BB conditionally jumps to next instruction
652 * (= first instruction of next BB in memory), this is a inverted by VEX.
654 *cjmp_inverted = toNextInstr;
658 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
660 addStmtToIRSB( bbOut,
661 IRStmt_Store(CLGEndness,
662 IRExpr_Const(hWordTy == Ity_I32 ?
663 IRConst_U32( addr ) :
664 IRConst_U64( addr )),
665 IRExpr_Const(IRConst_U32(val)) ));
669 /* add helper call to setup_bbcc, with pointer to BB struct as argument
671 * precondition for setup_bbcc:
672 * - jmps_passed has number of cond.jumps passed in last executed BB
673 * - current_bbcc has a pointer to the BBCC of the last executed BB
674 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
675 * current_bbcc->bb->jmp_addr
676 * gives the address of the jump source.
678 * the setup does 2 things:
680 * * Unwind own call stack, i.e sync our ESP with real ESP
681 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
682 * * For CALLs or JMPs crossing objects, record call arg +
683 * push are on own call stack
685 * - prepare for cache log functions:
686 * set current_bbcc to BBCC that gets the costs for this BB execution
690 void addBBSetupCall(ClgState* clgs)
693 IRExpr *arg1, **argv;
695 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
696 argv = mkIRExprVec_1(arg1);
697 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
698 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
700 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
705 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
707 VexGuestLayout* layout,
708 VexGuestExtents* vge,
709 IRType gWordTy, IRType hWordTy )
714 InstrInfo* curr_inode = NULL;
719 if (gWordTy != hWordTy) {
720 /* We don't currently support this case. */
721 VG_(tool_panic)("host/guest word size mismatch");
724 // No instrumentation if it is switched off
725 if (! CLG_(instrument_state)) {
726 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
727 (Addr)closure->readdr);
731 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
733 /* Set up SB for instrumented IR */
734 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
736 // Copy verbatim any IR preamble preceding the first IMark
738 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
739 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
743 // Get the first statement, and origAddr from it
744 CLG_ASSERT(sbIn->stmts_used >0);
745 CLG_ASSERT(i < sbIn->stmts_used);
747 CLG_ASSERT(Ist_IMark == st->tag);
749 origAddr = (Addr)st->Ist.IMark.addr;
750 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
752 /* Get BB struct (creating if necessary).
753 * JS: The hash table is keyed with orig_addr_noredir -- important!
754 * JW: Why? If it is because of different chasing of the redirection,
755 * this is not needed, as chasing is switched off in callgrind
757 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
759 addBBSetupCall(&clgs);
761 // Set up running state
762 clgs.events_used = 0;
764 clgs.instr_offset = 0;
766 for (/*use current i*/; i < sbIn->stmts_used; i++) {
769 CLG_ASSERT(isFlatIRStmt(st));
780 CLG_ASSERT(clgs.instr_offset == (Addr)st->Ist.IMark.addr - origAddr);
781 isize = st->Ist.IMark.len;
782 // If Vex fails to decode an instruction, the size will be zero.
783 // Pretend otherwise.
784 if (isize == 0) isize = VG_MIN_INSTR_SZB;
786 // Sanity-check size.
787 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
788 || VG_CLREQ_SZB == isize );
790 // Init the inode, record it as the current one.
791 // Subsequent Dr/Dw/Dm events from the same instruction will
793 curr_inode = next_InstrInfo (&clgs, isize);
795 addEvent_Ir( &clgs, curr_inode );
800 IRExpr* data = st->Ist.WrTmp.data;
801 if (data->tag == Iex_Load) {
802 IRExpr* aexpr = data->Iex.Load.addr;
803 // Note also, endianness info is ignored. I guess
804 // that's not interesting.
805 addEvent_Dr( &clgs, curr_inode,
806 sizeofIRType(data->Iex.Load.ty), aexpr );
812 IRExpr* data = st->Ist.Store.data;
813 IRExpr* aexpr = st->Ist.Store.addr;
814 addEvent_Dw( &clgs, curr_inode,
815 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
821 IRDirty* d = st->Ist.Dirty.details;
822 if (d->mFx != Ifx_None) {
823 /* This dirty helper accesses memory. Collect the details. */
824 tl_assert(d->mAddr != NULL);
825 tl_assert(d->mSize != 0);
827 // Large (eg. 28B, 108B, 512B on x86) data-sized
828 // instructions will be done inaccurately, but they're
829 // very rare and this avoids errors from hitting more
830 // than two cache lines in the simulation.
831 if (dataSize > MIN_LINE_SIZE)
832 dataSize = MIN_LINE_SIZE;
833 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
834 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
835 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
836 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
838 tl_assert(d->mAddr == NULL);
839 tl_assert(d->mSize == 0);
845 /* We treat it as a read and a write of the location. I
846 think that is the same behaviour as it was before IRCAS
847 was introduced, since prior to that point, the Vex
848 front ends would translate a lock-prefixed instruction
849 into a (normal) read followed by a (normal) write. */
851 IRCAS* cas = st->Ist.CAS.details;
852 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
853 CLG_ASSERT(cas->dataLo);
854 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
855 if (cas->dataHi != NULL)
856 dataSize *= 2; /* since this is a doubleword-cas */
857 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
858 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
864 if (st->Ist.LLSC.storedata == NULL) {
866 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
867 addEvent_Dr( &clgs, curr_inode,
868 sizeofIRType(dataTy), st->Ist.LLSC.addr );
871 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
872 addEvent_Dw( &clgs, curr_inode,
873 sizeofIRType(dataTy), st->Ist.LLSC.addr );
881 /* We may never reach the next statement, so need to flush
882 all outstanding transactions now. */
883 flushEvents( &clgs );
885 CLG_ASSERT(clgs.ii_index>0);
886 if (!clgs.seen_before) {
887 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
888 clgs.bb->jmp[cJumps].skip = False;
891 /* Update global variable jmps_passed before the jump
892 * A correction is needed if VEX inverted the last jump condition
894 jmps_passed = cJumps;
895 if ((cJumps+1 == clgs.bb->cjmp_count) && clgs.bb->cjmp_inverted)
897 addConstMemStoreStmt( clgs.sbOut,
898 (UWord) &CLG_(current_state).jmps_passed,
899 jmps_passed, hWordTy);
910 /* Copy the original statement */
911 addStmtToIRSB( clgs.sbOut, st );
914 VG_(printf)(" pass ");
920 /* At the end of the bb. Flush outstandings. */
921 flushEvents( &clgs );
923 /* Always update global variable jmps_passed at end of bb.
924 * A correction is needed if VEX inverted the last jump condition
927 UInt jmps_passed = cJumps;
928 if (clgs.bb->cjmp_inverted) jmps_passed--;
929 addConstMemStoreStmt( clgs.sbOut,
930 (UWord) &CLG_(current_state).jmps_passed,
931 jmps_passed, hWordTy);
933 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
934 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
936 /* This stores the instr of the call/ret at BB end */
937 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
939 if (clgs.seen_before) {
940 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
941 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
942 CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
945 clgs.bb->cost_count = update_cost_offsets(&clgs);
946 clgs.bb->instr_len = clgs.instr_offset;
947 clgs.bb->jmpkind = sbIn->jumpkind;
950 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
951 origAddr, clgs.bb->instr_len,
952 clgs.bb->cjmp_count, clgs.bb->cost_count);
955 for (i=0;i<cJumps;i++)
956 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
957 CLG_DEBUG(3, "], last inverted: %s \n",
958 clgs.bb->cjmp_inverted ? "yes":"no");
964 /*--------------------------------------------------------------------*/
965 /*--- Discarding BB info ---*/
966 /*--------------------------------------------------------------------*/
968 // Called when a translation is removed from the translation cache for
969 // any reason at all: to free up space, because the guest code was
970 // unmapped or modified, or for any arbitrary reason.
972 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
974 Addr orig_addr = (Addr)orig_addr64;
976 tl_assert(vge.n_used > 0);
979 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
980 (void*)(Addr)orig_addr,
981 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
983 // Get BB info, remove from table, free BB info. Simple! Note that we
984 // use orig_addr, not the first instruction address in vge.
985 CLG_(delete_bb)(orig_addr);
989 /*------------------------------------------------------------*/
990 /*--- CLG_(fini)() and related function ---*/
991 /*------------------------------------------------------------*/
995 static void zero_thread_cost(thread_info* t)
999 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1000 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1002 /* reset call counters to current for active calls */
1003 CLG_(copy_cost)( CLG_(sets).full,
1004 CLG_(current_call_stack).entry[i].enter_cost,
1005 CLG_(current_state).cost );
1006 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1009 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1011 /* set counter for last dump */
1012 CLG_(copy_cost)( CLG_(sets).full,
1013 t->lastdump_cost, CLG_(current_state).cost );
1016 void CLG_(zero_all_cost)(Bool only_current_thread)
1018 if (VG_(clo_verbosity) > 1)
1019 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1021 if (only_current_thread)
1022 zero_thread_cost(CLG_(get_current_thread)());
1024 CLG_(forall_threads)(zero_thread_cost);
1026 if (VG_(clo_verbosity) > 1)
1027 VG_(message)(Vg_DebugMsg, " ...done\n");
1031 void unwind_thread(thread_info* t)
1033 /* unwind signal handlers */
1034 while(CLG_(current_state).sig !=0)
1035 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1037 /* unwind regular call stack */
1038 while(CLG_(current_call_stack).sp>0)
1039 CLG_(pop_call_stack)();
1041 /* reset context and function stack for context generation */
1042 CLG_(init_exec_state)( &CLG_(current_state) );
1043 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1047 void zero_state_cost(thread_info* t)
1049 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1052 /* Ups, this can go wrong... */
1053 extern void VG_(discard_translations) ( Addr64 start, ULong range );
1055 void CLG_(set_instrument_state)(Char* reason, Bool state)
1057 if (CLG_(instrument_state) == state) {
1058 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1059 reason, state ? "ON" : "OFF");
1062 CLG_(instrument_state) = state;
1063 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1064 reason, state ? "ON" : "OFF");
1066 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
1068 /* reset internal state: call stacks, simulator */
1069 CLG_(forall_threads)(unwind_thread);
1070 CLG_(forall_threads)(zero_state_cost);
1071 (*CLG_(cachesim).clear)();
1073 if (VG_(clo_verbosity) > 1)
1074 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1075 reason, state ? "ON" : "OFF");
1080 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1082 if (!VG_IS_TOOL_USERREQ('C','T',args[0]))
1086 case VG_USERREQ__DUMP_STATS:
1087 CLG_(dump_profile)("Client Request", True);
1088 *ret = 0; /* meaningless */
1091 case VG_USERREQ__DUMP_STATS_AT:
1094 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]);
1095 CLG_(dump_profile)(buf, True);
1096 *ret = 0; /* meaningless */
1100 case VG_USERREQ__ZERO_STATS:
1101 CLG_(zero_all_cost)(True);
1102 *ret = 0; /* meaningless */
1105 case VG_USERREQ__TOGGLE_COLLECT:
1106 CLG_(current_state).collect = !CLG_(current_state).collect;
1107 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1108 CLG_(current_state).collect ? "ON" : "OFF");
1109 *ret = 0; /* meaningless */
1112 case VG_USERREQ__START_INSTRUMENTATION:
1113 CLG_(set_instrument_state)("Client Request", True);
1114 *ret = 0; /* meaningless */
1117 case VG_USERREQ__STOP_INSTRUMENTATION:
1118 CLG_(set_instrument_state)("Client Request", False);
1119 *ret = 0; /* meaningless */
1130 /* Syscall Timing */
1132 /* struct timeval syscalltime[VG_N_THREADS]; */
1133 #if CLG_MICROSYSTIME
1134 #include <sys/time.h>
1135 #include <sys/syscall.h>
1136 extern Int VG_(do_syscall) ( UInt, ... );
1138 ULong syscalltime[VG_N_THREADS];
1140 UInt syscalltime[VG_N_THREADS];
1144 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1145 UWord* args, UInt nArgs)
1147 if (CLG_(clo).collect_systime) {
1148 #if CLG_MICROSYSTIME
1149 struct vki_timeval tv_now;
1150 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1151 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1153 syscalltime[tid] = VG_(read_millisecond_timer)();
1159 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1160 UWord* args, UInt nArgs, SysRes res)
1162 if (CLG_(clo).collect_systime &&
1163 CLG_(current_state).bbcc) {
1164 Int o = CLG_(sets).off_full_systime;
1165 #if CLG_MICROSYSTIME
1166 struct vki_timeval tv_now;
1169 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1170 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1172 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1175 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
1179 CLG_(current_state).cost[o] ++;
1180 CLG_(current_state).cost[o+1] += diff;
1181 if (!CLG_(current_state).bbcc->skipped)
1182 CLG_(init_cost_lz)(CLG_(sets).full,
1183 &(CLG_(current_state).bbcc->skipped));
1184 CLG_(current_state).bbcc->skipped[o] ++;
1185 CLG_(current_state).bbcc->skipped[o+1] += diff;
1192 char buf[RESULTS_BUF_LEN];
1194 CLG_DEBUG(0, "finish()\n");
1196 (*CLG_(cachesim).finish)();
1198 /* pop all remaining items from CallStack for correct sum
1200 CLG_(forall_threads)(unwind_thread);
1202 CLG_(dump_profile)(0, False);
1204 CLG_(finish_command)();
1206 if (VG_(clo_verbosity) == 0) return;
1208 /* Hash table stats */
1209 if (VG_(clo_stats)) {
1211 CLG_(stat).full_debug_BBs +
1212 CLG_(stat).fn_name_debug_BBs +
1213 CLG_(stat).file_line_debug_BBs +
1214 CLG_(stat).no_debug_BBs;
1216 VG_(message)(Vg_DebugMsg, "\n");
1217 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1218 CLG_(stat).distinct_objs);
1219 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1220 CLG_(stat).distinct_files);
1221 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1222 CLG_(stat).distinct_fns);
1223 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1224 CLG_(stat).distinct_contexts);
1225 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1226 CLG_(stat).distinct_bbs);
1227 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n",
1228 CLG_(costarray_entries), CLG_(costarray_chunks));
1229 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1230 CLG_(stat).distinct_bbccs);
1231 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1232 CLG_(stat).distinct_jccs);
1233 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1234 CLG_(stat).distinct_skips);
1235 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1238 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1239 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1240 CLG_(stat).full_debug_BBs);
1241 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1242 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1243 CLG_(stat).file_line_debug_BBs);
1244 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1245 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1246 CLG_(stat).fn_name_debug_BBs);
1247 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1248 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1249 CLG_(stat).no_debug_BBs);
1251 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1252 CLG_(stat).bbcc_clones);
1253 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1254 CLG_(stat).bb_retranslations);
1255 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1256 CLG_(stat).distinct_instrs);
1257 VG_(message)(Vg_DebugMsg, "");
1259 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1260 CLG_(stat).cxt_lru_misses);
1261 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1262 CLG_(stat).bbcc_lru_misses);
1263 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1264 CLG_(stat).jcc_lru_misses);
1265 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1266 CLG_(stat).bb_executions);
1267 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1268 CLG_(stat).call_counter);
1269 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1270 CLG_(stat).jcnd_counter);
1271 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1272 CLG_(stat).jump_counter);
1273 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1274 CLG_(stat).rec_call_counter);
1275 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1276 CLG_(stat).ret_counter);
1278 VG_(message)(Vg_DebugMsg, "");
1281 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1282 VG_(message)(Vg_UserMsg, "Events : %s\n", buf);
1283 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1284 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
1285 VG_(message)(Vg_UserMsg, "\n");
1287 // if (CLG_(clo).simulate_cache)
1288 (*CLG_(cachesim).printstat)();
1292 void CLG_(fini)(Int exitcode)
1298 /*--------------------------------------------------------------------*/
1300 /*--------------------------------------------------------------------*/
1302 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1304 static ULong last_blocks_done = 0;
1307 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1309 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1310 if (blocks_done - last_blocks_done < 5000) return;
1311 last_blocks_done = blocks_done;
1313 CLG_(run_thread)( tid );
1317 void CLG_(post_clo_init)(void)
1319 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1320 VG_(clo_vex_control).guest_chase_thresh = 0;
1322 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1323 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1324 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1326 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
1327 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
1328 CLG_(clo).dump_line = True;
1332 CLG_(init_command)();
1334 (*CLG_(cachesim).post_clo_init)();
1336 CLG_(init_eventsets)(0);
1337 CLG_(init_statistics)(& CLG_(stat));
1338 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1340 /* initialize hash tables */
1341 CLG_(init_obj_table)();
1342 CLG_(init_cxt_table)();
1343 CLG_(init_bb_hash)();
1345 CLG_(init_threads)();
1346 CLG_(run_thread)(1);
1348 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1350 if (VG_(clo_verbosity > 0)) {
1351 VG_(message)(Vg_UserMsg,
1352 "For interactive control, run 'callgrind_control -h'.\n");
1357 void CLG_(pre_clo_init)(void)
1359 VG_(details_name) ("Callgrind");
1360 VG_(details_version) (NULL);
1361 VG_(details_description) ("a call-graph generating cache profiler");
1362 VG_(details_copyright_author)("Copyright (C) 2002-2010, and GNU GPL'd, "
1363 "by Josef Weidendorfer et al.");
1364 VG_(details_bug_reports_to) (VG_BUGS_TO);
1365 VG_(details_avg_translation_sizeB) ( 500 );
1367 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1371 VG_(needs_superblock_discards)(clg_discard_superblock_info);
1374 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1376 CLG_(print_debug_usage));
1378 VG_(needs_client_requests)(CLG_(handle_client_request));
1379 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1380 CLG_(post_syscalltime));
1382 VG_(track_start_client_code) ( & clg_start_client_code_callback );
1383 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1384 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
1386 CLG_(set_clo_defaults)();
1389 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1391 /*--------------------------------------------------------------------*/
1392 /*--- end main.c ---*/
1393 /*--------------------------------------------------------------------*/