2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Cachegrind, a Valgrind tool for cache
11 Copyright (C) 2002-2010 Nicholas Nethercote
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_vki.h"
34 #include "pub_tool_debuginfo.h"
35 #include "pub_tool_libcbase.h"
36 #include "pub_tool_libcassert.h"
37 #include "pub_tool_libcfile.h"
38 #include "pub_tool_libcprint.h"
39 #include "pub_tool_libcproc.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_tooliface.h"
45 #include "pub_tool_xarray.h"
46 #include "pub_tool_clientstate.h"
47 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
51 #include "cg_branchpred.c"
53 /*------------------------------------------------------------*/
55 /*------------------------------------------------------------*/
57 /* Set to 1 for very verbose debugging */
60 #define MIN_LINE_SIZE 16
61 #define FILE_LEN VKI_PATH_MAX
64 /*------------------------------------------------------------*/
66 /*------------------------------------------------------------*/
68 static Bool clo_cache_sim = True; /* do cache simulation? */
69 static Bool clo_branch_sim = False; /* do branch simulation? */
70 static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures ---*/
74 /*------------------------------------------------------------*/
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
80 ULong m2; /* misses in the second level cache */
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs. CC indexing done by file/function/line (as
95 // determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
114 // First compare file, then fn, then line.
115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
118 CodeLoc* a = (CodeLoc*)vloc;
119 CodeLoc* b = &(((LineCC*)vcc)->loc);
121 res = VG_(strcmp)(a->file, b->file);
125 res = VG_(strcmp)(a->fn, b->fn);
129 return a->line - b->line;
132 static OSet* CC_table;
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 // CC. This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
143 typedef struct _InstrInfo InstrInfo;
147 LineCC* parent; // parent line-CC
150 typedef struct _SB_info SB_info;
152 Addr SB_addr; // key; MUST BE FIRST
157 static OSet* instrInfoTable;
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 // pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 // is good when printing the output file at the end.
167 static OSet* stringTable;
169 //------------------------------------------------------------
171 static Int distinct_files = 0;
172 static Int distinct_fns = 0;
173 static Int distinct_lines = 0;
174 static Int distinct_instrs = 0;
176 static Int full_debugs = 0;
177 static Int file_line_debugs = 0;
178 static Int fn_debugs = 0;
179 static Int no_debugs = 0;
181 /*------------------------------------------------------------*/
182 /*--- String table operations ---*/
183 /*------------------------------------------------------------*/
185 static Word stringCmp( const void* key, const void* elem )
187 return VG_(strcmp)(*(Char**)key, *(Char**)elem);
190 // Get a permanent string; either pull it out of the string table if it's
191 // been encountered before, or dup it and put it into the string table.
192 static Char* get_perm_string(Char* s)
194 Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
198 Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
199 *s_node = VG_(strdup)("cg.main.gps.1", s);
200 VG_(OSetGen_Insert)(stringTable, s_node);
205 /*------------------------------------------------------------*/
206 /*--- CC table operations ---*/
207 /*------------------------------------------------------------*/
209 static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
210 Char fn[FN_LEN], Int* line)
214 Bool found_file_line = VG_(get_filename_linenum)(
217 dir, FILE_LEN, &found_dirname,
220 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
222 if (!found_file_line) {
223 VG_(strcpy)(file, "???");
227 VG_(strcpy)(fn, "???");
232 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
233 VG_(strcat)(dir, "/"); // Append '/'
234 VG_(strcat)(dir, file); // Append file to dir
235 VG_(strcpy)(file, dir); // Move dir+file to file
238 if (found_file_line) {
239 if (found_fn) full_debugs++;
240 else file_line_debugs++;
242 if (found_fn) fn_debugs++;
247 // Do a three step traversal: by file, then fn, then line.
248 // Returns a pointer to the line CC, creates a new one if necessary.
249 static LineCC* get_lineCC(Addr origAddr)
251 Char file[FILE_LEN], fn[FN_LEN];
256 get_debug_info(origAddr, file, fn, &line);
262 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
264 // Allocate and zero a new node.
265 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
266 lineCC->loc.file = get_perm_string(loc.file);
267 lineCC->loc.fn = get_perm_string(loc.fn);
268 lineCC->loc.line = loc.line;
282 VG_(OSetGen_Insert)(CC_table, lineCC);
288 /*------------------------------------------------------------*/
289 /*--- Cache simulation functions ---*/
290 /*------------------------------------------------------------*/
293 void log_1I_0D_cache_access(InstrInfo* n)
295 //VG_(printf)("1I_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
296 // n, n->instr_addr, n->instr_len);
297 cachesim_I1_doref(n->instr_addr, n->instr_len,
298 &n->parent->Ir.m1, &n->parent->Ir.m2);
303 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
305 //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
306 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
307 // n, n->instr_addr, n->instr_len,
308 // n2, n2->instr_addr, n2->instr_len);
309 cachesim_I1_doref(n->instr_addr, n->instr_len,
310 &n->parent->Ir.m1, &n->parent->Ir.m2);
312 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
313 &n2->parent->Ir.m1, &n2->parent->Ir.m2);
318 void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
320 //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
321 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
322 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
323 // n, n->instr_addr, n->instr_len,
324 // n2, n2->instr_addr, n2->instr_len,
325 // n3, n3->instr_addr, n3->instr_len);
326 cachesim_I1_doref(n->instr_addr, n->instr_len,
327 &n->parent->Ir.m1, &n->parent->Ir.m2);
329 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
330 &n2->parent->Ir.m1, &n2->parent->Ir.m2);
332 cachesim_I1_doref(n3->instr_addr, n3->instr_len,
333 &n3->parent->Ir.m1, &n3->parent->Ir.m2);
338 void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
340 //VG_(printf)("1I_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
341 // " daddr=0x%010lx, dsize=%lu\n",
342 // n, n->instr_addr, n->instr_len, data_addr, data_size);
343 cachesim_I1_doref(n->instr_addr, n->instr_len,
344 &n->parent->Ir.m1, &n->parent->Ir.m2);
347 cachesim_D1_doref(data_addr, data_size,
348 &n->parent->Dr.m1, &n->parent->Dr.m2);
353 void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
355 //VG_(printf)("1I_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
356 // " daddr=0x%010lx, dsize=%lu\n",
357 // n, n->instr_addr, n->instr_len, data_addr, data_size);
358 cachesim_I1_doref(n->instr_addr, n->instr_len,
359 &n->parent->Ir.m1, &n->parent->Ir.m2);
362 cachesim_D1_doref(data_addr, data_size,
363 &n->parent->Dw.m1, &n->parent->Dw.m2);
368 void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
370 //VG_(printf)("0I_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
371 // n, data_addr, data_size);
372 cachesim_D1_doref(data_addr, data_size,
373 &n->parent->Dr.m1, &n->parent->Dr.m2);
378 void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
380 //VG_(printf)("0I_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
381 // n, data_addr, data_size);
382 cachesim_D1_doref(data_addr, data_size,
383 &n->parent->Dw.m1, &n->parent->Dw.m2);
387 /* For branches, we consult two different predictors, one which
388 predicts taken/untaken for conditional branches, and the other
389 which predicts the branch target address for indirect branches
390 (jump-to-register style ones). */
393 void log_cond_branch(InstrInfo* n, Word taken)
395 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
399 += (1 & do_cond_branch_predict(n->instr_addr, taken));
403 void log_ind_branch(InstrInfo* n, UWord actual_dst)
405 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
409 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
413 /*------------------------------------------------------------*/
414 /*--- Instrumentation types and structures ---*/
415 /*------------------------------------------------------------*/
417 /* Maintain an ordered list of memory events which are outstanding, in
418 the sense that no IR has yet been generated to do the relevant
419 helper calls. The BB is scanned top to bottom and memory events
420 are added to the end of the list, merging with the most recent
421 notified event where possible (Dw immediately following Dr and
422 having the same size and EA can be merged).
424 This merging is done so that for architectures which have
425 load-op-store instructions (x86, amd64), the insn is treated as if
426 it makes just one memory reference (a modify), rather than two (a
427 read followed by a write at the same address).
429 At various points the list will need to be flushed, that is, IR
430 generated from it. That must happen before any possible exit from
431 the block (the end, or an IRStmt_Exit). Flushing also takes place
432 when there is no space to add a new event.
434 If we require the simulation statistics to be up to date with
435 respect to possible memory exceptions, then the list would have to
436 be flushed before each memory reference. That would however lose
437 performance by inhibiting event-merging during flushing.
439 Flushing the list consists of walking it start to end and emitting
440 instrumentation IR for each event, in the order in which they
441 appear. It may be possible to emit a single call for two adjacent
442 events in order to reduce the number of helper function calls made.
443 For example, it could well be profitable to handle two adjacent Ir
444 events with a single helper call. */
452 Ev_Ir, // Instruction read
455 Ev_Dm, // Data modify (read then write)
456 Ev_Bc, // branch conditional
457 Ev_Bi // branch indirect (to unknown destination)
481 IRAtom* taken; /* :: Ity_I1 */
490 static void init_Event ( Event* ev ) {
491 VG_(memset)(ev, 0, sizeof(Event));
494 static IRAtom* get_Event_dea ( Event* ev ) {
496 case Ev_Dr: return ev->Ev.Dr.ea;
497 case Ev_Dw: return ev->Ev.Dw.ea;
498 case Ev_Dm: return ev->Ev.Dm.ea;
499 default: tl_assert(0);
503 static Int get_Event_dszB ( Event* ev ) {
505 case Ev_Dr: return ev->Ev.Dr.szB;
506 case Ev_Dw: return ev->Ev.Dw.szB;
507 case Ev_Dm: return ev->Ev.Dm.szB;
508 default: tl_assert(0);
513 /* Up to this many unnotified events are allowed. Number is
514 arbitrary. Larger numbers allow more event merging to occur, but
515 potentially induce more spilling due to extending live ranges of
516 address temporaries. */
520 /* A struct which holds all the running state during instrumentation.
521 Mostly to avoid passing loads of parameters everywhere. */
524 /* The current outstanding-memory-event list. */
525 Event events[N_EVENTS];
528 /* The array of InstrInfo bins for the BB. */
531 /* Number InstrInfo bins 'used' so far. */
534 /* The output SB being constructed. */
540 /*------------------------------------------------------------*/
541 /*--- Instrumentation main ---*/
542 /*------------------------------------------------------------*/
544 // Note that origAddr is the real origAddr, not the address of the first
545 // instruction in the block (they can be different due to redirection).
547 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
553 // Count number of original instrs in SB
555 for (i = 0; i < sbIn->stmts_used; i++) {
557 if (Ist_IMark == st->tag) n_instrs++;
560 // Check that we don't have an entry for this BB in the instr-info table.
561 // If this assertion fails, there has been some screwup: some
562 // translations must have been discarded but Cachegrind hasn't discarded
563 // the corresponding entries in the instr-info table.
564 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
565 tl_assert(NULL == sbInfo);
567 // BB never translated before (at this address, at least; could have
568 // been unloaded and then reloaded elsewhere in memory)
569 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
570 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
571 sbInfo->SB_addr = origAddr;
572 sbInfo->n_instrs = n_instrs;
573 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
580 static void showEvent ( Event* ev )
584 VG_(printf)("Ir %p\n", ev->inode);
587 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
588 ppIRExpr(ev->Ev.Dr.ea);
592 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
593 ppIRExpr(ev->Ev.Dw.ea);
597 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
598 ppIRExpr(ev->Ev.Dm.ea);
602 VG_(printf)("Bc %p GA=", ev->inode);
603 ppIRExpr(ev->Ev.Bc.taken);
607 VG_(printf)("Bi %p DST=", ev->inode);
608 ppIRExpr(ev->Ev.Bi.dst);
617 // Reserve and initialise an InstrInfo for the first mention of a new insn.
619 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
622 tl_assert(cgs->sbInfo_i >= 0);
623 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
624 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
625 i_node->instr_addr = instr_addr;
626 i_node->instr_len = instr_len;
627 i_node->parent = get_lineCC(instr_addr);
633 /* Generate code for all outstanding memory events, and mark the queue
634 empty. Code is generated into cgs->bbOut, and this activity
635 'consumes' slots in cgs->sbInfo. */
637 static void flushEvents ( CgState* cgs )
650 while (i < cgs->events_used) {
657 /* generate IR to notify event i and possibly the ones
658 immediately following it. */
659 tl_assert(i >= 0 && i < cgs->events_used);
661 ev = &cgs->events[i];
662 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
663 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
666 VG_(printf)(" flush ");
670 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
672 /* Decide on helper fn to call and args to pass it, and advance
676 /* Merge an Ir with a following Dr/Dm. */
677 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
678 /* Why is this true? It's because we're merging an Ir
679 with a following Dr or Dm. The Ir derives from the
680 instruction's IMark and the Dr/Dm from data
681 references which follow it. In short it holds
682 because each insn starts with an IMark, hence an
683 Ev_Ir, and so these Dr/Dm must pertain to the
684 immediately preceding Ir. Same applies to analogous
685 assertions in the subsequent cases. */
686 tl_assert(ev2->inode == ev->inode);
687 helperName = "log_1I_1Dr_cache_access";
688 helperAddr = &log_1I_1Dr_cache_access;
689 argv = mkIRExprVec_3( i_node_expr,
691 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
695 /* Merge an Ir with a following Dw. */
697 if (ev2 && ev2->tag == Ev_Dw) {
698 tl_assert(ev2->inode == ev->inode);
699 helperName = "log_1I_1Dw_cache_access";
700 helperAddr = &log_1I_1Dw_cache_access;
701 argv = mkIRExprVec_3( i_node_expr,
703 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
707 /* Merge an Ir with two following Irs. */
709 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
711 helperName = "log_3I_0D_cache_access";
712 helperAddr = &log_3I_0D_cache_access;
713 argv = mkIRExprVec_3( i_node_expr,
714 mkIRExpr_HWord( (HWord)ev2->inode ),
715 mkIRExpr_HWord( (HWord)ev3->inode ) );
719 /* Merge an Ir with one following Ir. */
721 if (ev2 && ev2->tag == Ev_Ir) {
722 helperName = "log_2I_0D_cache_access";
723 helperAddr = &log_2I_0D_cache_access;
724 argv = mkIRExprVec_2( i_node_expr,
725 mkIRExpr_HWord( (HWord)ev2->inode ) );
729 /* No merging possible; emit as-is. */
731 helperName = "log_1I_0D_cache_access";
732 helperAddr = &log_1I_0D_cache_access;
733 argv = mkIRExprVec_1( i_node_expr );
740 /* Data read or modify */
741 helperName = "log_0I_1Dr_cache_access";
742 helperAddr = &log_0I_1Dr_cache_access;
743 argv = mkIRExprVec_3( i_node_expr,
745 mkIRExpr_HWord( get_Event_dszB(ev) ) );
751 helperName = "log_0I_1Dw_cache_access";
752 helperAddr = &log_0I_1Dw_cache_access;
753 argv = mkIRExprVec_3( i_node_expr,
755 mkIRExpr_HWord( get_Event_dszB(ev) ) );
760 /* Conditional branch */
761 helperName = "log_cond_branch";
762 helperAddr = &log_cond_branch;
763 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
768 /* Branch to an unknown destination */
769 helperName = "log_ind_branch";
770 helperAddr = &log_ind_branch;
771 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
779 /* Add the helper. */
780 tl_assert(helperName);
781 tl_assert(helperAddr);
783 di = unsafeIRDirty_0_N( regparms,
784 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
786 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
789 cgs->events_used = 0;
792 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
795 if (cgs->events_used == N_EVENTS)
797 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
798 evt = &cgs->events[cgs->events_used];
806 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
809 tl_assert(isIRAtom(ea));
810 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
813 if (cgs->events_used == N_EVENTS)
815 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
816 evt = &cgs->events[cgs->events_used];
820 evt->Ev.Dr.szB = datasize;
826 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
831 tl_assert(isIRAtom(ea));
832 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
837 /* Is it possible to merge this write with the preceding read? */
838 lastEvt = &cgs->events[cgs->events_used-1];
839 if (cgs->events_used > 0
840 && lastEvt->tag == Ev_Dr
841 && lastEvt->Ev.Dr.szB == datasize
842 && lastEvt->inode == inode
843 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
845 lastEvt->tag = Ev_Dm;
849 /* No. Add as normal. */
850 if (cgs->events_used == N_EVENTS)
852 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
853 evt = &cgs->events[cgs->events_used];
857 evt->Ev.Dw.szB = datasize;
863 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
866 tl_assert(isIRAtom(guard));
867 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
868 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
871 if (cgs->events_used == N_EVENTS)
873 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
874 evt = &cgs->events[cgs->events_used];
878 evt->Ev.Bc.taken = guard;
883 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
886 tl_assert(isIRAtom(whereTo));
887 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
888 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
891 if (cgs->events_used == N_EVENTS)
893 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
894 evt = &cgs->events[cgs->events_used];
898 evt->Ev.Bi.dst = whereTo;
902 ////////////////////////////////////////////////////////////
906 IRSB* cg_instrument ( VgCallbackClosure* closure,
908 VexGuestLayout* layout,
909 VexGuestExtents* vge,
910 IRType gWordTy, IRType hWordTy )
914 Addr64 cia; /* address of current insn */
916 IRTypeEnv* tyenv = sbIn->tyenv;
917 InstrInfo* curr_inode = NULL;
919 if (gWordTy != hWordTy) {
920 /* We don't currently support this case. */
921 VG_(tool_panic)("host/guest word size mismatch");
925 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
927 // Copy verbatim any IR preamble preceding the first IMark
929 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
930 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
934 // Get the first statement, and initial cia from it
935 tl_assert(sbIn->stmts_used > 0);
936 tl_assert(i < sbIn->stmts_used);
938 tl_assert(Ist_IMark == st->tag);
940 cia = st->Ist.IMark.addr;
941 isize = st->Ist.IMark.len;
942 // If Vex fails to decode an instruction, the size will be zero.
943 // Pretend otherwise.
944 if (isize == 0) isize = VG_MIN_INSTR_SZB;
946 // Set up running state and get block info
947 tl_assert(closure->readdr == vge->base[0]);
949 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
953 VG_(printf)("\n\n---------- cg_instrument ----------\n");
955 // Traverse the block, initialising inodes, adding events and flushing as
957 for (/*use current i*/; i < sbIn->stmts_used; i++) {
960 tl_assert(isFlatIRStmt(st));
971 cia = st->Ist.IMark.addr;
972 isize = st->Ist.IMark.len;
974 // If Vex fails to decode an instruction, the size will be zero.
975 // Pretend otherwise.
976 if (isize == 0) isize = VG_MIN_INSTR_SZB;
978 // Sanity-check size.
979 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
980 || VG_CLREQ_SZB == isize );
982 // Get space for and init the inode, record it as the current one.
983 // Subsequent Dr/Dw/Dm events from the same instruction will
985 curr_inode = setup_InstrInfo(&cgs, cia, isize);
987 addEvent_Ir( &cgs, curr_inode );
991 IRExpr* data = st->Ist.WrTmp.data;
992 if (data->tag == Iex_Load) {
993 IRExpr* aexpr = data->Iex.Load.addr;
994 // Note also, endianness info is ignored. I guess
995 // that's not interesting.
996 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1003 IRExpr* data = st->Ist.Store.data;
1004 IRExpr* aexpr = st->Ist.Store.addr;
1005 addEvent_Dw( &cgs, curr_inode,
1006 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1012 IRDirty* d = st->Ist.Dirty.details;
1013 if (d->mFx != Ifx_None) {
1014 /* This dirty helper accesses memory. Collect the details. */
1015 tl_assert(d->mAddr != NULL);
1016 tl_assert(d->mSize != 0);
1017 dataSize = d->mSize;
1018 // Large (eg. 28B, 108B, 512B on x86) data-sized
1019 // instructions will be done inaccurately, but they're
1020 // very rare and this avoids errors from hitting more
1021 // than two cache lines in the simulation.
1022 if (dataSize > MIN_LINE_SIZE)
1023 dataSize = MIN_LINE_SIZE;
1024 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1025 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1026 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1027 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1029 tl_assert(d->mAddr == NULL);
1030 tl_assert(d->mSize == 0);
1036 /* We treat it as a read and a write of the location. I
1037 think that is the same behaviour as it was before IRCAS
1038 was introduced, since prior to that point, the Vex
1039 front ends would translate a lock-prefixed instruction
1040 into a (normal) read followed by a (normal) write. */
1042 IRCAS* cas = st->Ist.CAS.details;
1043 tl_assert(cas->addr != NULL);
1044 tl_assert(cas->dataLo != NULL);
1045 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1046 if (cas->dataHi != NULL)
1047 dataSize *= 2; /* since it's a doubleword-CAS */
1048 /* I don't think this can ever happen, but play safe. */
1049 if (dataSize > MIN_LINE_SIZE)
1050 dataSize = MIN_LINE_SIZE;
1051 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1052 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1058 if (st->Ist.LLSC.storedata == NULL) {
1060 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1061 addEvent_Dr( &cgs, curr_inode,
1062 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1065 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1066 addEvent_Dw( &cgs, curr_inode,
1067 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1073 /* Stuff to widen the guard expression to a host word, so
1074 we can pass it to the branch predictor simulation
1075 functions easily. */
1079 IRType tyW = hWordTy;
1080 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1081 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1082 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1083 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1084 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1085 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1086 : IRExpr_Const(IRConst_U64(1));
1088 /* First we need to figure out whether the side exit got
1089 inverted by the ir optimiser. To do that, figure out
1090 the next (fallthrough) instruction's address and the
1091 side exit address and see if they are the same. */
1092 nia = cia + (Addr64)isize;
1094 nia &= 0xFFFFFFFFULL;
1096 /* Side exit address */
1097 dst = st->Ist.Exit.dst;
1098 if (tyW == Ity_I32) {
1099 tl_assert(dst->tag == Ico_U32);
1100 sea = (Addr64)(UInt)dst->Ico.U32;
1102 tl_assert(tyW == Ity_I64);
1103 tl_assert(dst->tag == Ico_U64);
1107 inverted = nia == sea;
1109 /* Widen the guard expression. */
1110 addStmtToIRSB( cgs.sbOut,
1111 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1112 addStmtToIRSB( cgs.sbOut,
1113 IRStmt_WrTmp( guardW,
1115 IRExpr_RdTmp(guard1))) );
1116 /* If the exit is inverted, invert the sense of the guard. */
1121 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1122 : IRExpr_RdTmp(guardW)
1124 /* And post the event. */
1125 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1127 /* We may never reach the next statement, so need to flush
1128 all outstanding transactions now. */
1129 flushEvents( &cgs );
1138 /* Copy the original statement */
1139 addStmtToIRSB( cgs.sbOut, st );
1147 /* Deal with branches to unknown destinations. Except ignore ones
1148 which are function returns as we assume the return stack
1149 predictor never mispredicts. */
1150 if (sbIn->jumpkind == Ijk_Boring) {
1151 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1152 switch (sbIn->next->tag) {
1154 break; /* boring - branch to known address */
1156 /* looks like an indirect branch (branch to unknown) */
1157 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1160 /* shouldn't happen - if the incoming IR is properly
1161 flattened, should only have tmp and const cases to
1167 /* At the end of the bb. Flush outstandings. */
1168 flushEvents( &cgs );
1170 /* done. stay sane ... */
1171 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1174 VG_(printf)( "goto {");
1175 ppIRJumpKind(sbIn->jumpkind);
1177 ppIRExpr( sbIn->next );
1178 VG_(printf)( "}\n");
1184 /*------------------------------------------------------------*/
1185 /*--- Cache configuration ---*/
1186 /*------------------------------------------------------------*/
1188 #define UNDEFINED_CACHE { -1, -1, -1 }
1190 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1191 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1192 static cache_t clo_L2_cache = UNDEFINED_CACHE;
1194 /* Checks cache config is ok; makes it so if not. */
1196 void check_cache(cache_t* cache, Char *name)
1198 /* Simulator requires line size and set count to be powers of two */
1199 if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
1200 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
1201 VG_(umsg)("error: %s set count not a power of two; aborting.\n", name);
1205 if (-1 == VG_(log2)(cache->line_size)) {
1206 VG_(umsg)("error: %s line size of %dB not a power of two; aborting.\n",
1207 name, cache->line_size);
1211 // Then check line size >= 16 -- any smaller and a single instruction could
1212 // straddle three cache lines, which breaks a simulation assertion and is
1214 if (cache->line_size < MIN_LINE_SIZE) {
1215 VG_(umsg)("error: %s line size of %dB too small; aborting.\n",
1216 name, cache->line_size);
1220 /* Then check cache size > line size (causes seg faults if not). */
1221 if (cache->size <= cache->line_size) {
1222 VG_(umsg)("error: %s cache size of %dB <= line size of %dB; aborting.\n",
1223 name, cache->size, cache->line_size);
1227 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1228 if (cache->assoc > (cache->size / cache->line_size)) {
1229 VG_(umsg)("warning: %s associativity > (size / line size); aborting.\n",
1236 void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
1238 #define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1242 // Count how many were defined on the command line.
1243 if (DEFINED(clo_I1_cache)) { n_clos++; }
1244 if (DEFINED(clo_D1_cache)) { n_clos++; }
1245 if (DEFINED(clo_L2_cache)) { n_clos++; }
1247 // Set the cache config (using auto-detection, if supported by the
1249 VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
1251 // Then replace with any defined on the command line.
1252 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
1253 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
1254 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
1256 // Then check values and fix if not acceptable.
1257 check_cache(I1c, "I1");
1258 check_cache(D1c, "D1");
1259 check_cache(L2c, "L2");
1261 if (VG_(clo_verbosity) >= 2) {
1262 VG_(umsg)("Cache configuration used:\n");
1263 VG_(umsg)(" I1: %dB, %d-way, %dB lines\n",
1264 I1c->size, I1c->assoc, I1c->line_size);
1265 VG_(umsg)(" D1: %dB, %d-way, %dB lines\n",
1266 D1c->size, D1c->assoc, D1c->line_size);
1267 VG_(umsg)(" L2: %dB, %d-way, %dB lines\n",
1268 L2c->size, L2c->assoc, L2c->line_size);
1270 #undef CMD_LINE_DEFINED
1273 /*------------------------------------------------------------*/
1274 /*--- cg_fini() and related function ---*/
1275 /*------------------------------------------------------------*/
1277 // Total reads/writes/misses. Calculated during CC traversal at the end.
1279 static CacheCC Ir_total;
1280 static CacheCC Dr_total;
1281 static CacheCC Dw_total;
1282 static BranchCC Bc_total;
1283 static BranchCC Bi_total;
1285 static void fprint_CC_table_and_calc_totals(void)
1289 Char buf[512], *currFile = NULL, *currFn = NULL;
1292 // Setup output filename. Nb: it's important to do this now, ie. as late
1293 // as possible. If we do it at start-up and the program forks and the
1294 // output file format string contains a %p (pid) specifier, both the
1295 // parent and child will incorrectly write to the same file; this
1296 // happened in 3.3.0.
1297 Char* cachegrind_out_file =
1298 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1300 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1301 VKI_S_IRUSR|VKI_S_IWUSR);
1302 if (sr_isError(sres)) {
1303 // If the file can't be opened for whatever reason (conflict
1304 // between multiple cachegrinded processes?), give up now.
1305 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1306 cachegrind_out_file );
1307 VG_(umsg)(" ... so simulation results will be missing.\n");
1308 VG_(free)(cachegrind_out_file);
1312 VG_(free)(cachegrind_out_file);
1315 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1316 // the 2nd colon makes cg_annotate's output look nicer.
1317 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1318 "desc: D1 cache: %s\n"
1319 "desc: L2 cache: %s\n",
1320 I1.desc_line, D1.desc_line, L2.desc_line);
1321 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1324 VG_(strcpy)(buf, "cmd:");
1325 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1326 if (VG_(args_the_exename)) {
1327 VG_(write)(fd, " ", 1);
1328 VG_(write)(fd, VG_(args_the_exename),
1329 VG_(strlen)( VG_(args_the_exename) ));
1331 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1332 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1334 VG_(write)(fd, " ", 1);
1335 VG_(write)(fd, arg, VG_(strlen)( arg ));
1339 if (clo_cache_sim && clo_branch_sim) {
1340 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
1343 else if (clo_cache_sim && !clo_branch_sim) {
1344 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
1347 else if (!clo_cache_sim && clo_branch_sim) {
1348 VG_(sprintf)(buf, "\nevents: Ir "
1352 tl_assert(0); /* can't happen */
1354 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1356 // Traverse every lineCC
1357 VG_(OSetGen_ResetIter)(CC_table);
1358 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1359 Bool just_hit_a_new_file = False;
1360 // If we've hit a new file, print a "fl=" line. Note that because
1361 // each string is stored exactly once in the string table, we can use
1362 // pointer comparison rather than strcmp() to test for equality, which
1363 // is good because most of the time the comparisons are equal and so
1364 // the whole strings would have to be checked.
1365 if ( lineCC->loc.file != currFile ) {
1366 currFile = lineCC->loc.file;
1367 VG_(sprintf)(buf, "fl=%s\n", currFile);
1368 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1370 just_hit_a_new_file = True;
1372 // If we've hit a new function, print a "fn=" line. We know to do
1373 // this when the function name changes, and also every time we hit a
1374 // new file (in which case the new function name might be the same as
1375 // in the old file, hence the just_hit_a_new_file test).
1376 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1377 currFn = lineCC->loc.fn;
1378 VG_(sprintf)(buf, "fn=%s\n", currFn);
1379 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1384 if (clo_cache_sim && clo_branch_sim) {
1385 VG_(sprintf)(buf, "%u %llu %llu %llu"
1388 " %llu %llu %llu %llu\n",
1390 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2,
1391 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
1392 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2,
1393 lineCC->Bc.b, lineCC->Bc.mp,
1394 lineCC->Bi.b, lineCC->Bi.mp);
1396 else if (clo_cache_sim && !clo_branch_sim) {
1397 VG_(sprintf)(buf, "%u %llu %llu %llu"
1399 " %llu %llu %llu\n",
1401 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2,
1402 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
1403 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2);
1405 else if (!clo_cache_sim && clo_branch_sim) {
1406 VG_(sprintf)(buf, "%u %llu"
1407 " %llu %llu %llu %llu\n",
1410 lineCC->Bc.b, lineCC->Bc.mp,
1411 lineCC->Bi.b, lineCC->Bi.mp);
1416 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1418 // Update summary stats
1419 Ir_total.a += lineCC->Ir.a;
1420 Ir_total.m1 += lineCC->Ir.m1;
1421 Ir_total.m2 += lineCC->Ir.m2;
1422 Dr_total.a += lineCC->Dr.a;
1423 Dr_total.m1 += lineCC->Dr.m1;
1424 Dr_total.m2 += lineCC->Dr.m2;
1425 Dw_total.a += lineCC->Dw.a;
1426 Dw_total.m1 += lineCC->Dw.m1;
1427 Dw_total.m2 += lineCC->Dw.m2;
1428 Bc_total.b += lineCC->Bc.b;
1429 Bc_total.mp += lineCC->Bc.mp;
1430 Bi_total.b += lineCC->Bi.b;
1431 Bi_total.mp += lineCC->Bi.mp;
1436 // Summary stats must come after rest of table, since we calculate them
1437 // during traversal. */
1438 if (clo_cache_sim && clo_branch_sim) {
1439 VG_(sprintf)(buf, "summary:"
1443 " %llu %llu %llu %llu\n",
1444 Ir_total.a, Ir_total.m1, Ir_total.m2,
1445 Dr_total.a, Dr_total.m1, Dr_total.m2,
1446 Dw_total.a, Dw_total.m1, Dw_total.m2,
1447 Bc_total.b, Bc_total.mp,
1448 Bi_total.b, Bi_total.mp);
1450 else if (clo_cache_sim && !clo_branch_sim) {
1451 VG_(sprintf)(buf, "summary:"
1454 " %llu %llu %llu\n",
1455 Ir_total.a, Ir_total.m1, Ir_total.m2,
1456 Dr_total.a, Dr_total.m1, Dr_total.m2,
1457 Dw_total.a, Dw_total.m1, Dw_total.m2);
1459 else if (!clo_cache_sim && clo_branch_sim) {
1460 VG_(sprintf)(buf, "summary:"
1462 " %llu %llu %llu %llu\n",
1464 Bc_total.b, Bc_total.mp,
1465 Bi_total.b, Bi_total.mp);
1470 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1474 static UInt ULong_width(ULong n)
1482 return w + (w-1)/3; // add space for commas
1485 static void cg_fini(Int exitcode)
1487 static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
1491 ULong L2_total_m, L2_total_mr, L2_total_mw,
1492 L2_total, L2_total_r, L2_total_w;
1495 /* Running with both cache and branch simulation disabled is not
1496 allowed (checked during command line option processing). */
1497 tl_assert(clo_cache_sim || clo_branch_sim);
1499 fprint_CC_table_and_calc_totals();
1501 if (VG_(clo_verbosity) == 0)
1504 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1505 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1507 /* I cache results. Use the I_refs value to determine the first column
1509 l1 = ULong_width(Ir_total.a);
1510 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1511 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1513 /* Make format string, getting width right for numbers */
1514 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1516 /* Always print this */
1517 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1519 /* If cache profiling is enabled, show D access numbers and all
1521 if (clo_cache_sim) {
1522 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1523 VG_(umsg)(fmt, "L2i misses: ", Ir_total.m2);
1525 if (0 == Ir_total.a) Ir_total.a = 1;
1526 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
1527 VG_(umsg)("I1 miss rate: %s\n", buf1);
1529 VG_(percentify)(Ir_total.m2, Ir_total.a, 2, l1+1, buf1);
1530 VG_(umsg)("L2i miss rate: %s\n", buf1);
1533 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1534 * determine the width of columns 2 & 3. */
1535 D_total.a = Dr_total.a + Dw_total.a;
1536 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1537 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1539 /* Make format string, getting width right for numbers */
1540 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1543 VG_(umsg)(fmt, "D refs: ",
1544 D_total.a, Dr_total.a, Dw_total.a);
1545 VG_(umsg)(fmt, "D1 misses: ",
1546 D_total.m1, Dr_total.m1, Dw_total.m1);
1547 VG_(umsg)(fmt, "L2d misses: ",
1548 D_total.m2, Dr_total.m2, Dw_total.m2);
1550 if (0 == D_total.a) D_total.a = 1;
1551 if (0 == Dr_total.a) Dr_total.a = 1;
1552 if (0 == Dw_total.a) Dw_total.a = 1;
1553 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1554 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1555 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
1556 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1558 VG_(percentify)( D_total.m2, D_total.a, 1, l1+1, buf1);
1559 VG_(percentify)(Dr_total.m2, Dr_total.a, 1, l2+1, buf2);
1560 VG_(percentify)(Dw_total.m2, Dw_total.a, 1, l3+1, buf3);
1561 VG_(umsg)("L2d miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1564 /* L2 overall results */
1566 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1567 L2_total_r = Dr_total.m1 + Ir_total.m1;
1568 L2_total_w = Dw_total.m1;
1569 VG_(umsg)(fmt, "L2 refs: ",
1570 L2_total, L2_total_r, L2_total_w);
1572 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1573 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1574 L2_total_mw = Dw_total.m2;
1575 VG_(umsg)(fmt, "L2 misses: ",
1576 L2_total_m, L2_total_mr, L2_total_mw);
1578 VG_(percentify)(L2_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1579 VG_(percentify)(L2_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1580 VG_(percentify)(L2_total_mw, Dw_total.a, 1, l3+1, buf3);
1581 VG_(umsg)("L2 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1584 /* If branch profiling is enabled, show branch overall results. */
1585 if (clo_branch_sim) {
1586 /* Make format string, getting width right for numbers */
1587 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1590 if (0 == Bc_total.b) Bc_total.b = 1;
1591 if (0 == Bi_total.b) Bi_total.b = 1;
1592 B_total.b = Bc_total.b + Bi_total.b;
1593 B_total.mp = Bc_total.mp + Bi_total.mp;
1596 VG_(umsg)(fmt, "Branches: ",
1597 B_total.b, Bc_total.b, Bi_total.b);
1599 VG_(umsg)(fmt, "Mispredicts: ",
1600 B_total.mp, Bc_total.mp, Bi_total.mp);
1602 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1603 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1604 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1606 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1610 if (VG_(clo_stats)) {
1611 Int debug_lookups = full_debugs + fn_debugs +
1612 file_line_debugs + no_debugs;
1615 VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files);
1616 VG_(dmsg)("cachegrind: distinct fns: %d\n", distinct_fns);
1617 VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines);
1618 VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs);
1619 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1621 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1622 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1623 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1624 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
1625 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1627 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1628 buf2, file_line_debugs);
1629 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1631 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1634 VG_(dmsg)("cachegrind: string table size: %lu\n",
1635 VG_(OSetGen_Size)(stringTable));
1636 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1637 VG_(OSetGen_Size)(CC_table));
1638 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1639 VG_(OSetGen_Size)(instrInfoTable));
1643 /*--------------------------------------------------------------------*/
1644 /*--- Discarding BB info ---*/
1645 /*--------------------------------------------------------------------*/
1647 // Called when a translation is removed from the translation cache for
1648 // any reason at all: to free up space, because the guest code was
1649 // unmapped or modified, or for any arbitrary reason.
1651 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1654 Addr orig_addr = (Addr)vge.base[0];
1656 tl_assert(vge.n_used > 0);
1659 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1660 (void*)(Addr)orig_addr,
1661 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1663 // Get BB info, remove from table, free BB info. Simple! Note that we
1664 // use orig_addr, not the first instruction address in vge.
1665 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1666 tl_assert(NULL != sbInfo);
1667 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1670 /*--------------------------------------------------------------------*/
1671 /*--- Command line processing ---*/
1672 /*--------------------------------------------------------------------*/
1674 static void parse_cache_opt ( cache_t* cache, Char* opt )
1679 // Option argument looks like "65536,2,64". Extract them.
1680 i1 = VG_(strtoll10)(opt, &endptr); if (*endptr != ',') goto bad;
1681 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
1682 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
1684 // Check for overflow.
1685 cache->size = (Int)i1;
1686 cache->assoc = (Int)i2;
1687 cache->line_size = (Int)i3;
1688 if (cache->size != i1) goto overflow;
1689 if (cache->assoc != i2) goto overflow;
1690 if (cache->line_size != i3) goto overflow;
1695 VG_(umsg)("one of the cache parameters was too large and overflowed\n");
1697 // XXX: this omits the "--I1/D1/L2=" part from the message, but that's
1699 VG_(err_bad_option)(opt);
1702 static Bool cg_process_cmd_line_option(Char* arg)
1706 // 5 is length of "--I1="
1707 if VG_STR_CLO(arg, "--I1", tmp_str)
1708 parse_cache_opt(&clo_I1_cache, tmp_str);
1709 else if VG_STR_CLO(arg, "--D1", tmp_str)
1710 parse_cache_opt(&clo_D1_cache, tmp_str);
1711 else if VG_STR_CLO(arg, "--L2", tmp_str)
1712 parse_cache_opt(&clo_L2_cache, tmp_str);
1714 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1715 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1716 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1723 static void cg_print_usage(void)
1726 " --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1727 " --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1728 " --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1729 " --cache-sim=yes|no [yes] collect cache stats?\n"
1730 " --branch-sim=yes|no [no] collect branch prediction stats?\n"
1731 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1735 static void cg_print_debug_usage(void)
1742 /*--------------------------------------------------------------------*/
1744 /*--------------------------------------------------------------------*/
1746 static void cg_post_clo_init(void); /* just below */
1748 static void cg_pre_clo_init(void)
1750 VG_(details_name) ("Cachegrind");
1751 VG_(details_version) (NULL);
1752 VG_(details_description) ("a cache and branch-prediction profiler");
1753 VG_(details_copyright_author)(
1754 "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote et al.");
1755 VG_(details_bug_reports_to) (VG_BUGS_TO);
1756 VG_(details_avg_translation_sizeB) ( 500 );
1758 VG_(basic_tool_funcs) (cg_post_clo_init,
1762 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1763 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1765 cg_print_debug_usage);
1768 static void cg_post_clo_init(void)
1770 cache_t I1c, D1c, L2c;
1772 /* Can't disable both cache and branch profiling */
1773 if ((!clo_cache_sim) && (!clo_branch_sim)) {
1774 VG_(umsg)("ERROR: --cache-sim=no --branch-sim=no is not allowed.\n");
1775 VG_(umsg)("You must select cache profiling, "
1776 "or branch profiling, or both.\n");
1781 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1783 VG_(malloc), "cg.main.cpci.1",
1786 VG_(OSetGen_Create)(/*keyOff*/0,
1788 VG_(malloc), "cg.main.cpci.2",
1791 VG_(OSetGen_Create)(/*keyOff*/0,
1793 VG_(malloc), "cg.main.cpci.3",
1796 configure_caches(&I1c, &D1c, &L2c);
1798 cachesim_I1_initcache(I1c);
1799 cachesim_D1_initcache(D1c);
1800 cachesim_L2_initcache(L2c);
1803 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1805 /*--------------------------------------------------------------------*/
1807 /*--------------------------------------------------------------------*/