2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2000-2010 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_options.h"
39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
40 #include "pub_core_transtab.h"
41 #include "pub_core_aspacemgr.h"
42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
44 // JRS FIXME get rid of this somehow
45 #if defined(VGP_arm_linux)
46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
47 # include "pub_core_syscall.h" // VG_(do_syscallN)
51 /* #define DEBUG_TRANSTAB */
54 /*-------------------------------------------------------------*/
55 /*--- Management of the FIFO-based translation table+cache. ---*/
56 /*-------------------------------------------------------------*/
58 /*------------------ CONSTANTS ------------------*/
60 /* Number of sectors the TC is divided into. If you need a larger
61 overall translation cache, increase this value. */
64 /* Number of TC entries in each sector. This needs to be a prime
65 number to work properly, it must be <= 65535 (so that a TT index
66 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
67 'deleted') and it is strongly recommended not to change this.
68 65521 is the largest prime <= 65535. */
69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
71 /* Because each sector contains a hash table of TTEntries, we need to
72 specify the maximum allowable loading, after which the sector is
74 #define SECTOR_TT_LIMIT_PERCENT 65
76 /* The sector is deemed full when this many entries are in it. */
77 #define N_TTES_PER_SECTOR_USABLE \
78 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
80 /* Equivalence classes for fast address range deletion. There are 1 +
81 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
82 address range which does not fall cleanly within any specific bin.
83 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
84 #define ECLASS_SHIFT 11
85 #define ECLASS_WIDTH 8
86 #define ECLASS_MISC (1 << ECLASS_WIDTH)
87 #define ECLASS_N (1 + ECLASS_MISC)
89 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
92 /*------------------ TYPES ------------------*/
94 /* A translation-table entry. This indicates precisely which areas of
95 guest code are included in the translation, and contains all other
96 auxiliary info too. */
99 /* Profiling only: the count and weight (arbitrary meaning) for
100 this translation. Weight is a property of the translation
101 itself and computed once when the translation is created.
102 Count is an entry count for the translation and is
103 incremented by 1 every time the translation is used, if we
108 /* Status of the slot. Note, we need to be able to do lazy
109 deletion, hence the Deleted state. */
110 enum { InUse, Deleted, Empty } status;
112 /* 64-bit aligned pointer to one or more 64-bit words containing
113 the corresponding host code (must be in the same sector!)
114 This is a pointer into the sector's tc (code) area. */
117 /* This is the original guest address that purportedly is the
118 entry point of the translation. You might think that .entry
119 should be the same as .vge->base[0], and most of the time it
120 is. However, when doing redirections, that is not the case.
121 .vge must always correctly describe the guest code sections
122 from which this translation was made. However, .entry may or
123 may not be a lie, depending on whether or not we're doing
127 /* This structure describes precisely what ranges of guest code
128 the translation covers, so we can decide whether or not to
129 delete it when translations of a given address range are
133 /* Address range summary info: these are pointers back to
134 eclass[] entries in the containing Sector. Those entries in
135 turn point back here -- the two structures are mutually
136 redundant but both necessary to make fast deletions work.
137 The eclass info is similar to, and derived from, this entry's
138 'vge' field, but it is not the same */
139 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
140 UShort tte2ec_ec[3]; // for each, the eclass #
141 UInt tte2ec_ix[3]; // and the index within the eclass.
142 // for i in 0 .. n_tte2ec-1
143 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
144 // should be the index
145 // of this TTEntry in the containing Sector's tt array.
150 /* Finally, a sector itself. Each sector contains an array of
151 TCEntries, which hold code, and an array of TTEntries, containing
152 all required administrative info. Profiling is supported using the
153 TTEntry .count and .weight fields, if required. Each sector is
154 independent in that no cross-sector references are allowed.
156 If the sector is not in use, all three pointers are NULL and
161 /* The TCEntry area. Size of this depends on the average
162 translation size. We try and size it so it becomes full
163 precisely when this sector's translation table (tt) reaches
164 its load limit (SECTOR_TT_LIMIT_PERCENT). */
167 /* The TTEntry array. This is a fixed size, always containing
168 exactly N_TTES_PER_SECTOR entries. */
171 /* This points to the current allocation point in tc. */
174 /* The count of tt entries with state InUse. */
177 /* Expandable arrays of tt indices for each of the ECLASS_N
178 address range equivalence classes. These hold indices into
179 the containing sector's tt array, which in turn should point
181 Int ec2tte_size[ECLASS_N];
182 Int ec2tte_used[ECLASS_N];
183 UShort* ec2tte[ECLASS_N];
188 /*------------------ DECLS ------------------*/
190 /* The root data structure is an array of sectors. The index of the
191 youngest sector is recorded, and new translations are put into that
192 sector. When it fills up, we move along to the next sector and
193 start to fill that up, wrapping around at the end of the array.
194 That way, once all N_TC_SECTORS have been bought into use for the
195 first time, and are full, we then re-use the oldest sector,
198 When running, youngest sector should be between >= 0 and <
199 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
202 static Sector sectors[N_SECTORS];
203 static Int youngest_sector = -1;
205 /* The number of ULongs in each TCEntry area. This is computed once
206 at startup and does not change. */
207 static Int tc_sector_szQ;
210 /* A list of sector numbers, in the order which they should be
211 searched to find translations. This is an optimisation to be used
212 when searching for translations and should not affect
213 correctness. -1 denotes "no entry". */
214 static Int sector_search_order[N_SECTORS];
217 /* Fast helper for the TC. A direct-mapped cache which holds a set of
218 recently used (guest address, host address) pairs. This array is
219 referred to directly from m_dispatch/dispatch-<platform>.S.
221 Entries in tt_fast may refer to any valid TC entry, regardless of
222 which sector it's in. Consequently we must be very careful to
223 invalidate this cache when TC entries are changed or disappear.
225 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
226 pointed at to cause that cache entry to miss. This relies on the
227 assumption that no guest code actually has that address, hence a
228 value 0x1 seems good. m_translate gives the client a synthetic
229 segfault if it tries to execute at this address.
239 /*global*/ __attribute__((aligned(16)))
240 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
245 /* For profiling, we have a parallel array of pointers to .count
246 fields in TT entries. Again, these pointers must be invalidated
247 when translations disappear. A NULL pointer suffices to indicate
250 When not profiling (the normal case, VG_(clo_profile_flags) == 0),
251 all tt_fastN entries are set to NULL at startup and never read nor
254 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
255 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
256 then the corresponding tt_fastN[i] must be null. If
257 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
258 to the .count field of the corresponding TT entry.
260 tt_fast and tt_fastN are referred to from assembly code
263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
266 /* Make sure we're not used before initialisation. */
267 static Bool init_done = False;
270 /*------------------ STATS DECLS ------------------*/
272 /* Number of fast-cache updates and flushes done. */
273 ULong n_fast_flushes = 0;
274 ULong n_fast_updates = 0;
276 /* Number of full lookups done. */
277 ULong n_full_lookups = 0;
278 ULong n_lookup_probes = 0;
280 /* Number/osize/tsize of translations entered; also the number of
281 those for which self-checking was requested. */
282 ULong n_in_count = 0;
283 ULong n_in_osize = 0;
284 ULong n_in_tsize = 0;
285 ULong n_in_sc_count = 0;
287 /* Number/osize of translations discarded due to lack of space. */
288 ULong n_dump_count = 0;
289 ULong n_dump_osize = 0;
291 /* Number/osize of translations discarded due to requests to do so. */
292 ULong n_disc_count = 0;
293 ULong n_disc_osize = 0;
296 /*-------------------------------------------------------------*/
297 /*--- Address-range equivalence class stuff ---*/
298 /*-------------------------------------------------------------*/
300 /* Return equivalence class number for a range. */
302 static Int range_to_eclass ( Addr64 start, UInt len )
304 UInt mask = (1 << ECLASS_WIDTH) - 1;
305 UInt lo = (UInt)start;
306 UInt hi = lo + len - 1;
307 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
308 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
309 if (loBits == hiBits) {
310 vg_assert(loBits < ECLASS_N-1);
318 /* Calculates the equivalence class numbers for any VexGuestExtent.
319 These are written in *eclasses, which must be big enough to hold 3
320 Ints. The number written, between 1 and 3, is returned. The
321 eclasses are presented in order, and any duplicates are removed.
325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
326 VexGuestExtents* vge )
328 # define SWAP(_lv1,_lv2) \
329 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
333 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
336 for (i = 0; i < vge->n_used; i++) {
337 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
338 if (r == ECLASS_MISC)
340 /* only add if we haven't already seen it */
341 for (j = 0; j < n_ec; j++)
342 if (eclasses[j] == r)
345 eclasses[n_ec++] = r;
353 if (eclasses[0] > eclasses[1])
354 SWAP(eclasses[0], eclasses[1]);
360 if (eclasses[0] > eclasses[2])
361 SWAP(eclasses[0], eclasses[2]);
362 if (eclasses[0] > eclasses[1])
363 SWAP(eclasses[0], eclasses[1]);
364 if (eclasses[1] > eclasses[2])
365 SWAP(eclasses[1], eclasses[2]);
373 eclasses[0] = ECLASS_MISC;
380 /* Add tteno to the set of entries listed for equivalence class ec in
381 this sector. Returns used location in eclass array. */
384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
386 Int old_sz, new_sz, i, r;
387 UShort *old_ar, *new_ar;
389 vg_assert(ec >= 0 && ec < ECLASS_N);
390 vg_assert(tteno < N_TTES_PER_SECTOR);
392 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
394 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
396 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
398 old_sz = sec->ec2tte_size[ec];
399 old_ar = sec->ec2tte[ec];
400 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
401 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
402 new_sz * sizeof(UShort));
403 for (i = 0; i < old_sz; i++)
404 new_ar[i] = old_ar[i];
406 VG_(arena_free)(VG_AR_TTAUX, old_ar);
407 sec->ec2tte_size[ec] = new_sz;
408 sec->ec2tte[ec] = new_ar;
410 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
414 r = sec->ec2tte_used[ec]++;
415 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
416 sec->ec2tte[ec][r] = tteno;
421 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
422 eclass entries to 'sec'. */
425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
427 Int i, r, eclasses[3];
429 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
431 tte = &sec->tt[tteno];
432 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
434 vg_assert(r >= 1 && r <= 3);
437 for (i = 0; i < r; i++) {
438 tte->tte2ec_ec[i] = eclasses[i];
439 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
444 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
445 True if OK, False if something's not right. Expensive. */
447 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
449 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
451 HChar* whassup = NULL;
452 Int i, j, k, n, ec_num, ec_idx;
457 /* Basic checks on this sector */
458 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
459 BAD("invalid sec->tt_n_inuse");
461 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
462 BAD("sec->tc_next points outside tc");
464 /* For each eclass ... */
465 for (i = 0; i < ECLASS_N; i++) {
466 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
467 BAD("ec2tte_size/ec2tte mismatch(1)");
468 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
469 BAD("ec2tte_size/ec2tte mismatch(2)");
470 if (sec->ec2tte_used[i] < 0
471 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
472 BAD("implausible ec2tte_used");
473 if (sec->ec2tte_used[i] == 0)
476 /* For each tt reference in each eclass .. ensure the reference
477 is to a valid tt entry, and that the entry's address ranges
478 really include this eclass. */
480 for (j = 0; j < sec->ec2tte_used[i]; j++) {
481 tteno = sec->ec2tte[i][j];
482 if (tteno == EC2TTE_DELETED)
484 if (tteno >= N_TTES_PER_SECTOR)
485 BAD("implausible tteno");
486 tte = &sec->tt[tteno];
487 if (tte->status != InUse)
488 BAD("tteno points to non-inuse tte");
489 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
490 BAD("tte->n_tte2ec out of range");
491 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
492 must equal i. Inspect tte's eclass info. */
494 for (k = 0; k < tte->n_tte2ec; k++) {
495 if (k < tte->n_tte2ec-1
496 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
497 BAD("tte->tte2ec_ec[..] out of order");
498 ec_num = tte->tte2ec_ec[k];
499 if (ec_num < 0 || ec_num >= ECLASS_N)
500 BAD("tte->tte2ec_ec[..] out of range");
503 ec_idx = tte->tte2ec_ix[k];
504 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
505 BAD("tte->tte2ec_ix[..] out of range");
510 BAD("tteno does not point back at eclass");
514 /* That establishes that for each forward pointer from TTEntrys
515 there is a corresponding backward pointer from the eclass[]
516 arrays. However, it doesn't rule out the possibility of other,
517 bogus pointers in the eclass[] arrays. So do those similarly:
518 scan through them and check the TTEntryies they point at point
521 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
524 if (tte->status == Empty || tte->status == Deleted) {
525 if (tte->n_tte2ec != 0)
526 BAD("tte->n_eclasses nonzero for unused tte");
530 vg_assert(tte->status == InUse);
532 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
533 BAD("tte->n_eclasses out of range(2)");
535 for (j = 0; j < tte->n_tte2ec; j++) {
536 ec_num = tte->tte2ec_ec[j];
537 if (ec_num < 0 || ec_num >= ECLASS_N)
538 BAD("tte->eclass[..] out of range");
539 ec_idx = tte->tte2ec_ix[j];
540 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
541 BAD("tte->ec_idx[..] out of range(2)");
542 if (sec->ec2tte[ec_num][ec_idx] != i)
543 BAD("ec2tte does not point back to tte");
551 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
554 VG_(printf)("eclass = %d\n", i);
555 VG_(printf)("tteno = %d\n", (Int)tteno);
556 switch (tte->status) {
557 case InUse: VG_(printf)("InUse\n"); break;
558 case Deleted: VG_(printf)("Deleted\n"); break;
559 case Empty: VG_(printf)("Empty\n"); break;
561 if (tte->status != Empty) {
562 for (k = 0; k < tte->vge.n_used; k++)
563 VG_(printf)("0x%llx %d\n", tte->vge.base[k],
564 (Int)tte->vge.len[k]);
574 /* Sanity check absolutely everything. True == check passed. */
577 static Bool sanity_check_redir_tt_tc ( void );
578 static Bool sanity_check_fastcache ( void );
580 static Bool sanity_check_sector_search_order ( void )
583 /* assert the array is the right size */
584 vg_assert(N_SECTORS == (sizeof(sector_search_order)
585 / sizeof(sector_search_order[0])));
586 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
587 for (i = 0; i < N_SECTORS; i++) {
588 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
592 for (/* */; i < N_SECTORS; i++) {
593 if (sector_search_order[i] != -1)
598 /* Check each sector number only appears once */
599 for (i = 0; i < N_SECTORS; i++) {
600 if (sector_search_order[i] == -1)
602 for (j = i+1; j < N_SECTORS; j++) {
603 if (sector_search_order[j] == sector_search_order[i])
607 /* Check that the number of listed sectors equals the number
608 in use, by counting nListed back down. */
609 for (i = 0; i < N_SECTORS; i++) {
610 if (sectors[i].tc != NULL)
618 static Bool sanity_check_all_sectors ( void )
623 for (sno = 0; sno < N_SECTORS; sno++) {
627 sane = sanity_check_eclasses_in_sector( sec );
631 if ( !sanity_check_redir_tt_tc() )
633 if ( !sanity_check_fastcache() )
635 if ( !sanity_check_sector_search_order() )
642 /*-------------------------------------------------------------*/
643 /*--- Add/find translations ---*/
644 /*-------------------------------------------------------------*/
646 static UInt vge_osize ( VexGuestExtents* vge )
649 for (i = 0; i < vge->n_used; i++)
650 n += (UInt)vge->len[i];
654 static Bool isValidSector ( Int sector )
656 if (sector < 0 || sector >= N_SECTORS)
661 static inline UInt HASH_TT ( Addr64 key )
663 UInt kHi = (UInt)(key >> 32);
664 UInt kLo = (UInt)key;
665 UInt k32 = kHi ^ kLo;
668 k32 = (k32 >> ror) | (k32 << (32-ror));
669 return k32 % N_TTES_PER_SECTOR;
672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
674 UInt cno = (UInt)VG_TT_FAST_HASH(key);
675 VG_(tt_fast)[cno].guest = (Addr)key;
676 VG_(tt_fast)[cno].host = (Addr)tcptr;
677 if (VG_(clo_profile_flags) > 0)
678 VG_(tt_fastN)[cno] = count;
680 /* This shouldn't fail. It should be assured by m_translate
681 which should reject any attempt to make translation of code
682 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
683 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */
687 static void invalidateFastNCache ( void )
690 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
691 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
692 VG_(tt_fastN)[j+0] = NULL;
693 VG_(tt_fastN)[j+1] = NULL;
694 VG_(tt_fastN)[j+2] = NULL;
695 VG_(tt_fastN)[j+3] = NULL;
697 vg_assert(j == VG_TT_FAST_SIZE);
700 /* Invalidate the fast cache VG_(tt_fast). If profiling, also
701 invalidate the fast cache's counter array VG_(tt_fastN), otherwise
703 static void invalidateFastCache ( void )
706 /* This loop is popular enough to make it worth unrolling a
707 bit, at least on ppc32. */
708 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
709 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
710 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
711 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
712 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
713 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
716 if (VG_(clo_profile_flags) > 0)
717 invalidateFastNCache();
719 vg_assert(j == VG_TT_FAST_SIZE);
723 static Bool sanity_check_fastcache ( void )
726 if (0) VG_(printf)("sanity check fastcache\n");
727 if (VG_(clo_profile_flags) > 0) {
729 for (j = 0; j < VG_TT_FAST_SIZE; j++) {
730 if (VG_(tt_fastN)[j] == NULL
731 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
733 if (VG_(tt_fastN)[j] != NULL
734 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
739 for (j = 0; j < VG_TT_FAST_SIZE; j++) {
740 if (VG_(tt_fastN)[j] != NULL)
747 static void initialiseSector ( Int sno )
752 vg_assert(isValidSector(sno));
754 { Bool sane = sanity_check_sector_search_order();
759 if (sec->tc == NULL) {
761 /* Sector has never been used before. Need to allocate tt and
763 vg_assert(sec->tt == NULL);
764 vg_assert(sec->tc_next == NULL);
765 vg_assert(sec->tt_n_inuse == 0);
766 for (i = 0; i < ECLASS_N; i++) {
767 vg_assert(sec->ec2tte_size[i] == 0);
768 vg_assert(sec->ec2tte_used[i] == 0);
769 vg_assert(sec->ec2tte[i] == NULL);
772 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
774 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
775 if (sr_isError(sres)) {
776 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
780 sec->tc = (ULong*)(AddrH)sr_Res(sres);
782 sres = VG_(am_mmap_anon_float_valgrind)
783 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
784 if (sr_isError(sres)) {
785 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
786 N_TTES_PER_SECTOR * sizeof(TTEntry) );
789 sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
791 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
792 sec->tt[i].status = Empty;
793 sec->tt[i].n_tte2ec = 0;
796 /* Add an entry in the sector_search_order */
797 for (i = 0; i < N_SECTORS; i++) {
798 if (sector_search_order[i] == -1)
801 vg_assert(i >= 0 && i < N_SECTORS);
802 sector_search_order[i] = sno;
804 if (VG_(clo_verbosity) > 2)
805 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
809 /* Sector has been used before. Dump the old contents. */
810 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
811 vg_assert(sec->tt != NULL);
812 vg_assert(sec->tc_next != NULL);
813 n_dump_count += sec->tt_n_inuse;
815 /* Visit each just-about-to-be-abandoned translation. */
816 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
817 if (sec->tt[i].status == InUse) {
818 vg_assert(sec->tt[i].n_tte2ec >= 1);
819 vg_assert(sec->tt[i].n_tte2ec <= 3);
820 n_dump_osize += vge_osize(&sec->tt[i].vge);
821 /* Tell the tool too. */
822 if (VG_(needs).superblock_discards) {
823 VG_TDICT_CALL( tool_discard_superblock_info,
828 vg_assert(sec->tt[i].n_tte2ec == 0);
830 sec->tt[i].status = Empty;
831 sec->tt[i].n_tte2ec = 0;
834 /* Free up the eclass structures. */
835 for (i = 0; i < ECLASS_N; i++) {
836 if (sec->ec2tte_size[i] == 0) {
837 vg_assert(sec->ec2tte_used[i] == 0);
838 vg_assert(sec->ec2tte[i] == NULL);
840 vg_assert(sec->ec2tte[i] != NULL);
841 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
842 sec->ec2tte[i] = NULL;
843 sec->ec2tte_size[i] = 0;
844 sec->ec2tte_used[i] = 0;
848 /* Sanity check: ensure it is already in
849 sector_search_order[]. */
850 for (i = 0; i < N_SECTORS; i++) {
851 if (sector_search_order[i] == sno)
854 vg_assert(i >= 0 && i < N_SECTORS);
856 if (VG_(clo_verbosity) > 2)
857 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
860 sec->tc_next = sec->tc;
863 invalidateFastCache();
865 { Bool sane = sanity_check_sector_search_order();
870 static void invalidate_icache ( void *ptr, Int nbytes )
872 # if defined(VGA_ppc32) || defined(VGA_ppc64)
873 Addr startaddr = (Addr) ptr;
874 Addr endaddr = startaddr + nbytes;
879 if (nbytes == 0) return;
880 vg_assert(nbytes > 0);
882 VG_(machine_get_VexArchInfo)( NULL, &vai );
883 cls = vai.ppc_cache_line_szB;
886 vg_assert(cls == 32 || cls == 64 || cls == 128);
888 startaddr &= ~(cls - 1);
889 for (addr = startaddr; addr < endaddr; addr += cls) {
890 __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
892 __asm__ __volatile__("sync");
893 for (addr = startaddr; addr < endaddr; addr += cls) {
894 __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
896 __asm__ __volatile__("sync; isync");
898 # elif defined(VGA_x86)
899 /* no need to do anything, hardware provides coherence */
901 # elif defined(VGA_amd64)
902 /* no need to do anything, hardware provides coherence */
904 # elif defined(VGA_s390x)
905 /* no need to do anything, hardware provides coherence */
907 # elif defined(VGP_arm_linux)
908 /* ARM cache flushes are privileged, so we must defer to the kernel. */
909 Addr startaddr = (Addr) ptr;
910 Addr endaddr = startaddr + nbytes;
911 VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
914 # error "Unknown ARCH"
919 /* Add a translation of vge to TT/TC. The translation is temporarily
920 in code[0 .. code_len-1].
922 pre: youngest_sector points to a valid (although possibly full)
925 void VG_(add_to_transtab)( VexGuestExtents* vge,
929 Bool is_self_checking )
931 Int tcAvailQ, reqdQ, y, i;
932 ULong *tcptr, *tcptr2;
936 vg_assert(init_done);
937 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
939 /* 60000: should agree with N_TMPBUF in m_translate.c. */
940 vg_assert(code_len > 0 && code_len < 60000);
943 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
947 n_in_tsize += code_len;
948 n_in_osize += vge_osize(vge);
949 if (is_self_checking)
953 vg_assert(isValidSector(y));
955 if (sectors[y].tc == NULL)
958 /* Try putting the translation in this sector. */
959 reqdQ = (code_len + 7) >> 3;
961 /* Will it fit in tc? */
962 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
963 - ((ULong*)(sectors[y].tc_next));
964 vg_assert(tcAvailQ >= 0);
965 vg_assert(tcAvailQ <= tc_sector_szQ);
968 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
969 /* No. So move on to the next sector. Either it's never been
970 used before, in which case it will get its tt/tc allocated
971 now, or it has been used before, in which case it is set to be
972 empty, hence throwing out the oldest sector. */
973 vg_assert(tc_sector_szQ > 0);
974 VG_(debugLog)(1,"transtab",
975 "declare sector %d full "
976 "(TT loading %2d%%, TC loading %2d%%)\n",
978 (100 * sectors[y].tt_n_inuse)
980 (100 * (tc_sector_szQ - tcAvailQ))
983 if (youngest_sector >= N_SECTORS)
990 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
991 - ((ULong*)(sectors[y].tc_next));
992 vg_assert(tcAvailQ >= 0);
993 vg_assert(tcAvailQ <= tc_sector_szQ);
994 vg_assert(tcAvailQ >= reqdQ);
995 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
996 vg_assert(sectors[y].tt_n_inuse >= 0);
999 tcptr = sectors[y].tc_next;
1000 vg_assert(tcptr >= §ors[y].tc[0]);
1001 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1003 dstP = (UChar*)tcptr;
1004 srcP = (UChar*)code;
1005 for (i = 0; i < code_len; i++)
1007 sectors[y].tc_next += reqdQ;
1008 sectors[y].tt_n_inuse++;
1010 invalidate_icache( dstP, code_len );
1013 tcptr2 = sectors[y].tc_next;
1014 vg_assert(tcptr2 >= §ors[y].tc[0]);
1015 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1017 /* Find an empty tt slot, and use it. There must be such a slot
1018 since tt is never allowed to get completely full. */
1020 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1022 if (sectors[y].tt[i].status == Empty
1023 || sectors[y].tt[i].status == Deleted)
1026 if (i >= N_TTES_PER_SECTOR)
1030 sectors[y].tt[i].status = InUse;
1031 sectors[y].tt[i].tcptr = tcptr;
1032 sectors[y].tt[i].count = 0;
1033 sectors[y].tt[i].weight = 1;
1034 sectors[y].tt[i].vge = *vge;
1035 sectors[y].tt[i].entry = entry;
1037 /* Update the fast-cache. */
1038 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count );
1040 /* Note the eclass numbers for this translation. */
1041 upd_eclasses_after_add( §ors[y], i );
1045 /* Search for the translation of the given guest address. If
1046 requested, a successful search can also cause the fast-caches to be
1049 Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
1053 Int i, j, k, kstart, sno;
1055 vg_assert(init_done);
1056 /* Find the initial probe point just once. It will be the same in
1057 all sectors and avoids multiple expensive % operations. */
1060 kstart = HASH_TT(guest_addr);
1061 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1063 /* Search in all the sectors,using sector_search_order[] as a
1064 heuristic guide as to what order to visit the sectors. */
1065 for (i = 0; i < N_SECTORS; i++) {
1067 sno = sector_search_order[i];
1068 if (UNLIKELY(sno == -1))
1069 return False; /* run out of sectors to search */
1072 for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1074 if (sectors[sno].tt[k].status == InUse
1075 && sectors[sno].tt[k].entry == guest_addr) {
1079 guest_addr, sectors[sno].tt[k].tcptr,
1080 §ors[sno].tt[k].count );
1082 *result = (AddrH)sectors[sno].tt[k].tcptr;
1083 /* pull this one one step closer to the front. For large
1084 apps this more or less halves the number of required
1087 Int tmp = sector_search_order[i-1];
1088 sector_search_order[i-1] = sector_search_order[i];
1089 sector_search_order[i] = tmp;
1093 if (sectors[sno].tt[k].status == Empty)
1094 break; /* not found in this sector */
1096 if (k == N_TTES_PER_SECTOR)
1100 /* If we fall off the end, all entries are InUse and not
1101 matching, or Deleted. In any case we did not find it in this
1105 /* Not found in any sector. */
1110 /*-------------------------------------------------------------*/
1111 /*--- Delete translations. ---*/
1112 /*-------------------------------------------------------------*/
1115 static void unredir_discard_translations( Addr64, ULong );
1117 /* Stuff for deleting translations which intersect with a given
1118 address range. Unfortunately, to make this run at a reasonable
1119 speed, it is complex. */
1122 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1124 Addr64 e1 = s1 + r1 - 1ULL;
1125 Addr64 e2 = s2 + r2 - 1ULL;
1126 if (e1 < s2 || e2 < s1)
1132 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1134 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1136 if (vge->n_used < 2)
1138 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1140 if (vge->n_used < 3)
1142 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1148 /* Delete a tt entry, and update all the eclass data accordingly. */
1150 static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1152 Int i, ec_num, ec_idx;
1155 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1156 tte = &sec->tt[tteno];
1157 vg_assert(tte->status == InUse);
1158 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1160 /* Deal with the ec-to-tte links first. */
1161 for (i = 0; i < tte->n_tte2ec; i++) {
1162 ec_num = (Int)tte->tte2ec_ec[i];
1163 ec_idx = tte->tte2ec_ix[i];
1164 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1165 vg_assert(ec_idx >= 0);
1166 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1167 /* Assert that the two links point at each other. */
1168 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1169 /* "delete" the pointer back to here. */
1170 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1173 /* Now fix up this TTEntry. */
1174 tte->status = Deleted;
1180 n_disc_osize += vge_osize(&tte->vge);
1182 /* Tell the tool too. */
1183 if (VG_(needs).superblock_discards) {
1184 VG_TDICT_CALL( tool_discard_superblock_info,
1191 /* Delete translations from sec which intersect specified range, but
1192 only consider translations in the specified eclass. */
1195 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1196 Addr64 guest_start, ULong range,
1201 Bool anyDeld = False;
1204 vg_assert(ec >= 0 && ec < ECLASS_N);
1206 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1208 tteno = sec->ec2tte[ec][i];
1209 if (tteno == EC2TTE_DELETED) {
1210 /* already deleted */
1214 vg_assert(tteno < N_TTES_PER_SECTOR);
1216 tte = &sec->tt[tteno];
1217 vg_assert(tte->status == InUse);
1219 if (overlaps( guest_start, range, &tte->vge )) {
1221 delete_tte( sec, (Int)tteno );
1230 /* Delete translations from sec which intersect specified range, the
1231 slow way, by inspecting all translations in sec. */
1234 Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1235 Addr64 guest_start, ULong range )
1238 Bool anyDeld = False;
1240 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1241 if (sec->tt[i].status == InUse
1242 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1244 delete_tte( sec, i );
1252 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1257 Bool anyDeleted = False;
1259 vg_assert(init_done);
1261 VG_(debugLog)(2, "transtab",
1262 "discard_translations(0x%llx, %lld) req by %s\n",
1263 guest_start, range, who );
1265 /* Pre-deletion sanity check */
1266 if (VG_(clo_sanity_level >= 4)) {
1267 Bool sane = sanity_check_all_sectors();
1274 /* There are two different ways to do this.
1276 If the range fits within a single address-range equivalence
1277 class, as will be the case for a cache line sized invalidation,
1278 then we only have to inspect the set of translations listed in
1279 that equivalence class, and also in the "sin-bin" equivalence
1282 Otherwise, the invalidation is of a larger range and probably
1283 results from munmap. In this case it's (probably!) faster just
1284 to inspect all translations, dump those we don't want, and
1285 regenerate the equivalence class information (since modifying it
1286 in-situ is even more expensive).
1289 /* First off, figure out if the range falls within a single class,
1290 and if so which one. */
1293 if (range < (1ULL << ECLASS_SHIFT))
1294 ec = range_to_eclass( guest_start, (UInt)range );
1296 /* if ec is ECLASS_MISC then we aren't looking at just a single
1297 class, so use the slow scheme. Else use the fast scheme,
1298 examining 'ec' and ECLASS_MISC. */
1300 if (ec != ECLASS_MISC) {
1302 VG_(debugLog)(2, "transtab",
1303 " FAST, ec = %d\n", ec);
1306 vg_assert(ec >= 0 && ec < ECLASS_MISC);
1308 for (sno = 0; sno < N_SECTORS; sno++) {
1309 sec = §ors[sno];
1310 if (sec->tc == NULL)
1312 anyDeleted |= delete_translations_in_sector_eclass(
1313 sec, guest_start, range, ec );
1314 anyDeleted |= delete_translations_in_sector_eclass(
1315 sec, guest_start, range, ECLASS_MISC );
1322 VG_(debugLog)(2, "transtab",
1323 " SLOW, ec = %d\n", ec);
1325 for (sno = 0; sno < N_SECTORS; sno++) {
1326 sec = §ors[sno];
1327 if (sec->tc == NULL)
1329 anyDeleted |= delete_translations_in_sector(
1330 sec, guest_start, range );
1336 invalidateFastCache();
1338 /* don't forget the no-redir cache */
1339 unredir_discard_translations( guest_start, range );
1341 /* Post-deletion sanity check */
1342 if (VG_(clo_sanity_level >= 4)) {
1345 Bool sane = sanity_check_all_sectors();
1347 /* But now, also check the requested address range isn't
1348 present anywhere. */
1349 for (sno = 0; sno < N_SECTORS; sno++) {
1350 sec = §ors[sno];
1351 if (sec->tc == NULL)
1353 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1355 if (tte->status != InUse)
1357 vg_assert(!overlaps( guest_start, range, &tte->vge ));
1364 /*------------------------------------------------------------*/
1365 /*--- AUXILIARY: the unredirected TT/TC ---*/
1366 /*------------------------------------------------------------*/
1368 /* A very simple translation cache which holds a small number of
1369 unredirected translations. This is completely independent of the
1370 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
1371 both structures are simply dumped and we start over.
1373 Since these translations are unredirected, the search key is (by
1374 definition) the first address entry in the .vge field. */
1376 /* Sized to hold 500 translations of average size 1000 bytes. */
1378 #define UNREDIR_SZB 1000
1380 #define N_UNREDIR_TT 500
1381 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1385 VexGuestExtents vge;
1391 /* We just allocate forwards in _tc, never deleting. */
1392 static ULong *unredir_tc;
1393 static Int unredir_tc_used = N_UNREDIR_TCQ;
1395 /* Slots in _tt can come into use and out again (.inUse).
1396 Nevertheless _tt_highwater is maintained so that invalidations
1397 don't have to scan all the slots when only a few are in use.
1398 _tt_highwater holds the index of the highest ever allocated
1400 static UTCEntry unredir_tt[N_UNREDIR_TT];
1401 static Int unredir_tt_highwater;
1404 static void init_unredir_tt_tc ( void )
1407 if (unredir_tc == NULL) {
1408 SysRes sres = VG_(am_mmap_anon_float_valgrind)
1409 ( N_UNREDIR_TT * UNREDIR_SZB );
1410 if (sr_isError(sres)) {
1411 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1412 N_UNREDIR_TT * UNREDIR_SZB);
1415 unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1417 unredir_tc_used = 0;
1418 for (i = 0; i < N_UNREDIR_TT; i++)
1419 unredir_tt[i].inUse = False;
1420 unredir_tt_highwater = -1;
1423 /* Do a sanity check; return False on failure. */
1424 static Bool sanity_check_redir_tt_tc ( void )
1427 if (unredir_tt_highwater < -1) return False;
1428 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1430 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1431 if (unredir_tt[i].inUse)
1434 if (unredir_tc_used < 0) return False;
1435 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1441 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
1442 is temporarily in code[0 .. code_len-1].
1444 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1452 vg_assert(sanity_check_redir_tt_tc());
1454 /* This is the whole point: it's not redirected! */
1455 vg_assert(entry == vge->base[0]);
1457 /* How many unredir_tt slots are needed */
1458 code_szQ = (code_len + 7) / 8;
1460 /* Look for an empty unredir_tc slot */
1461 for (i = 0; i < N_UNREDIR_TT; i++)
1462 if (!unredir_tt[i].inUse)
1465 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1466 /* It's full; dump everything we currently have */
1467 init_unredir_tt_tc();
1471 vg_assert(unredir_tc_used >= 0);
1472 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1473 vg_assert(code_szQ > 0);
1474 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1475 vg_assert(i >= 0 && i < N_UNREDIR_TT);
1476 vg_assert(unredir_tt[i].inUse == False);
1478 if (i > unredir_tt_highwater)
1479 unredir_tt_highwater = i;
1481 dstP = (HChar*)&unredir_tc[unredir_tc_used];
1482 srcP = (HChar*)code;
1483 for (j = 0; j < code_len; j++)
1486 invalidate_icache( dstP, code_len );
1488 unredir_tt[i].inUse = True;
1489 unredir_tt[i].vge = *vge;
1490 unredir_tt[i].hcode = (Addr)dstP;
1492 unredir_tc_used += code_szQ;
1493 vg_assert(unredir_tc_used >= 0);
1494 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1496 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1499 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1503 for (i = 0; i < N_UNREDIR_TT; i++) {
1504 if (!unredir_tt[i].inUse)
1506 if (unredir_tt[i].vge.base[0] == guest_addr) {
1507 *result = (AddrH)unredir_tt[i].hcode;
1514 static void unredir_discard_translations( Addr64 guest_start, ULong range )
1518 vg_assert(sanity_check_redir_tt_tc());
1520 for (i = 0; i <= unredir_tt_highwater; i++) {
1521 if (unredir_tt[i].inUse
1522 && overlaps( guest_start, range, &unredir_tt[i].vge))
1523 unredir_tt[i].inUse = False;
1528 /*------------------------------------------------------------*/
1529 /*--- Initialisation. ---*/
1530 /*------------------------------------------------------------*/
1532 void VG_(init_tt_tc) ( void )
1534 Int i, j, avg_codeszQ;
1536 vg_assert(!init_done);
1539 /* Otherwise lots of things go wrong... */
1540 vg_assert(sizeof(ULong) == 8);
1541 vg_assert(sizeof(Addr64) == 8);
1542 /* check fast cache entries really are 2 words long */
1543 vg_assert(sizeof(Addr) == sizeof(void*));
1544 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1545 /* check fast cache entries are packed back-to-back with no spaces */
1546 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1547 /* check fast cache is aligned as we requested. Not fatal if it
1548 isn't, but we might as well make sure. */
1549 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1551 if (VG_(clo_verbosity) > 2)
1552 VG_(message)(Vg_DebugMsg,
1553 "TT/TC: VG_(init_tt_tc) "
1554 "(startup of code management)\n");
1556 /* Figure out how big each tc area should be. */
1557 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
1558 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1560 /* Ensure the calculated value is not way crazy. */
1561 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1562 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
1564 /* Initialise the sectors */
1565 youngest_sector = 0;
1566 for (i = 0; i < N_SECTORS; i++) {
1567 sectors[i].tc = NULL;
1568 sectors[i].tt = NULL;
1569 sectors[i].tc_next = NULL;
1570 sectors[i].tt_n_inuse = 0;
1571 for (j = 0; j < ECLASS_N; j++) {
1572 sectors[i].ec2tte_size[j] = 0;
1573 sectors[i].ec2tte_used[j] = 0;
1574 sectors[i].ec2tte[j] = NULL;
1578 /* Initialise the sector_search_order hint table. */
1579 for (i = 0; i < N_SECTORS; i++)
1580 sector_search_order[i] = -1;
1582 /* Initialise the fast caches. If not profiling (the usual case),
1583 we have to explicitly invalidate the fastN cache as
1584 invalidateFastCache() won't do that for us. */
1585 invalidateFastCache();
1586 if (VG_(clo_profile_flags) == 0)
1587 invalidateFastNCache();
1589 /* and the unredir tt/tc */
1590 init_unredir_tt_tc();
1592 if (VG_(clo_verbosity) > 2) {
1593 VG_(message)(Vg_DebugMsg,
1594 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
1595 N_SECTORS, 8 * tc_sector_szQ,
1596 N_SECTORS * 8 * tc_sector_szQ );
1597 VG_(message)(Vg_DebugMsg,
1598 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
1599 N_SECTORS * N_TTES_PER_SECTOR,
1600 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1601 SECTOR_TT_LIMIT_PERCENT );
1604 VG_(debugLog)(2, "transtab",
1605 "cache: %d sectors of %d bytes each = %d total\n",
1606 N_SECTORS, 8 * tc_sector_szQ,
1607 N_SECTORS * 8 * tc_sector_szQ );
1608 VG_(debugLog)(2, "transtab",
1609 "table: %d total entries, max occupancy %d (%d%%)\n",
1610 N_SECTORS * N_TTES_PER_SECTOR,
1611 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1612 SECTOR_TT_LIMIT_PERCENT );
1616 /*------------------------------------------------------------*/
1617 /*--- Printing out statistics. ---*/
1618 /*------------------------------------------------------------*/
1620 static ULong safe_idiv( ULong a, ULong b )
1622 return (b == 0 ? 0 : a / b);
1625 UInt VG_(get_bbs_translated) ( void )
1630 void VG_(print_tt_tc_stats) ( void )
1632 VG_(message)(Vg_DebugMsg,
1633 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
1634 n_full_lookups, n_lookup_probes );
1635 VG_(message)(Vg_DebugMsg,
1636 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
1637 n_fast_updates, n_fast_flushes );
1639 VG_(message)(Vg_DebugMsg,
1640 " transtab: new %'lld "
1641 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
1642 n_in_count, n_in_osize, n_in_tsize,
1643 safe_idiv(10*n_in_tsize, n_in_osize),
1645 VG_(message)(Vg_DebugMsg,
1646 " transtab: dumped %'llu (%'llu -> ?" "?)\n",
1647 n_dump_count, n_dump_osize );
1648 VG_(message)(Vg_DebugMsg,
1649 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
1650 n_disc_count, n_disc_osize );
1655 for (i = 0; i < ECLASS_N; i++) {
1656 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1660 VG_(printf)("\n\n");
1664 /*------------------------------------------------------------*/
1665 /*--- Printing out of profiling results. ---*/
1666 /*------------------------------------------------------------*/
1668 static ULong score ( TTEntry* tte )
1670 return ((ULong)tte->weight) * ((ULong)tte->count);
1673 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1678 /* First, compute the total weighted count, and find the top N
1679 ttes. tops contains pointers to the most-used n_tops blocks, in
1680 descending order (viz, tops[0] is the highest scorer). */
1681 for (i = 0; i < n_tops; i++) {
1688 for (sno = 0; sno < N_SECTORS; sno++) {
1689 if (sectors[sno].tc == NULL)
1691 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1692 if (sectors[sno].tt[i].status != InUse)
1694 score_total += score(§ors[sno].tt[i]);
1695 /* Find the rank for sectors[sno].tt[i]. */
1700 if (tops[r].addr == 0) {
1704 if ( score(§ors[sno].tt[i]) > tops[r].score ) {
1711 vg_assert(r >= 0 && r <= n_tops);
1712 /* This bb should be placed at r, and bbs above it shifted
1713 upwards one slot. */
1715 for (s = n_tops-1; s > r; s--)
1716 tops[s] = tops[s-1];
1717 tops[r].addr = sectors[sno].tt[i].entry;
1718 tops[r].score = score( §ors[sno].tt[i] );
1726 /*--------------------------------------------------------------------*/
1728 /*--------------------------------------------------------------------*/