2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
12 Copyright (C) 2008-2010 OpenWorks Ltd
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 The GNU General Public License is contained in the file COPYING.
33 #include "pub_tool_basics.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcbase.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_wordfm.h"
39 #include "pub_tool_sparsewa.h"
40 #include "pub_tool_xarray.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_threadstate.h"
43 #include "pub_tool_aspacemgr.h"
44 #include "pub_tool_execontext.h"
45 #include "pub_tool_errormgr.h"
46 #include "pub_tool_options.h" // VG_(clo_stats)
47 #include "hg_basics.h"
48 #include "hg_wordset.h"
49 #include "hg_lock_n_thread.h"
50 #include "hg_errors.h"
55 /////////////////////////////////////////////////////////////////
56 /////////////////////////////////////////////////////////////////
58 // Debugging #defines //
60 /////////////////////////////////////////////////////////////////
61 /////////////////////////////////////////////////////////////////
63 /* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
72 /* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
81 /* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
86 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87 # define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
90 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
94 /////////////////////////////////////////////////////////////////
95 /////////////////////////////////////////////////////////////////
97 // Forward declarations //
99 /////////////////////////////////////////////////////////////////
100 /////////////////////////////////////////////////////////////////
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
106 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
110 /////////////////////////////////////////////////////////////////
111 /////////////////////////////////////////////////////////////////
113 // SECTION BEGIN compressed shadow memory //
115 /////////////////////////////////////////////////////////////////
116 /////////////////////////////////////////////////////////////////
123 /* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125 #define SVal_INVALID (3ULL << 62)
127 /* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130 #define SVal_NOACCESS (2ULL << 62)
132 /* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
148 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
150 static void zsm_sset_range ( Addr, SizeT, SVal );
151 static void zsm_scopy_range ( Addr, Addr, SizeT );
152 static void zsm_flush_cache ( void );
154 #endif /* ! __HB_ZSM_H */
157 /* Round a up to the next multiple of N. N must be a power of 2 */
158 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159 /* Round a down to the next multiple of N. N must be a power of 2 */
160 #define ROUNDDN(a, N) ((a) & ~(N-1))
164 /* ------ User-supplied RC functions ------ */
165 static void(*rcinc)(SVal) = NULL;
166 static void(*rcdec)(SVal) = NULL;
169 /* ------ CacheLine ------ */
171 #define N_LINE_BITS 6 /* must be >= 3 */
172 #define N_LINE_ARANGE (1 << N_LINE_BITS)
173 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
182 #define TREE_DESCR_16_0 (1<<0)
183 #define TREE_DESCR_32_0 (1<<1)
184 #define TREE_DESCR_16_1 (1<<2)
185 #define TREE_DESCR_64 (1<<3)
186 #define TREE_DESCR_16_2 (1<<4)
187 #define TREE_DESCR_32_1 (1<<5)
188 #define TREE_DESCR_16_3 (1<<6)
189 #define TREE_DESCR_8_0 (1<<7)
190 #define TREE_DESCR_8_1 (1<<8)
191 #define TREE_DESCR_8_2 (1<<9)
192 #define TREE_DESCR_8_3 (1<<10)
193 #define TREE_DESCR_8_4 (1<<11)
194 #define TREE_DESCR_8_5 (1<<12)
195 #define TREE_DESCR_8_6 (1<<13)
196 #define TREE_DESCR_8_7 (1<<14)
197 #define TREE_DESCR_DTY (1<<15)
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
207 LineZ; /* compressed rep for a cache line */
212 SVal w64s[N_LINE_ARANGE];
214 LineF; /* full rep for a cache line */
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
228 #define N_SECMAP_BITS 13
229 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
231 // # CacheLines held by a SecMap
232 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
234 /* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
244 RC obligations: the RCs presented to the user include exactly
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
249 Hence the following actions at the following transitions are required:
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
259 LineZ linesZ[N_SECMAP_ZLINES];
265 #define SecMap_MAGIC 0x571e58cbU
267 static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
271 /* ------ Cache ------ */
273 #define N_WAY_BITS 16
274 #define N_WAY_NENT (1 << N_WAY_BITS)
276 /* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
290 static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
293 return 0 == (tag & (N_LINE_ARANGE - 1));
297 /* --------- Primary data structures --------- */
299 /* Shadow memory primary map */
300 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301 static Cache cache_shmem;
304 static UWord stats__secmaps_search = 0; // # SM finds
305 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311 static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315 static UWord stats__cache_F_fetches = 0; // # F lines fetched
316 static UWord stats__cache_F_wbacks = 0; // # F lines written back
317 static UWord stats__cache_invals = 0; // # cache invals
318 static UWord stats__cache_flushes = 0; // # cache flushes
319 static UWord stats__cache_totrefs = 0; // # total accesses
320 static UWord stats__cache_totmisses = 0; // # misses
321 static ULong stats__cache_make_New_arange = 0; // total arange made New
322 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
324 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
338 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
344 static UWord stats__vts__tick = 0; // # calls to VTS__tick
345 static UWord stats__vts__join = 0; // # calls to VTS__join
346 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
347 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
348 static UWord stats__vts__cmp_structural_slow = 0; // # calls to VTS__cmp_structural w/ slow case
349 static UWord stats__vts__indexat_slow = 0; // # calls to VTS__indexAt_SLOW
350 static UWord stats__vts_set__fadoa = 0; // # calls to vts_set__find_and_dealloc__or_add
351 static UWord stats__vts_set__fadoa_d = 0; // # calls to vts_set__find_and_dealloc__or_add
352 // that lead to a deallocation
355 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
356 return a & ~(N_SECMAP_ARANGE - 1);
358 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
359 return a & (N_SECMAP_ARANGE - 1);
363 /*----------------------------------------------------------------*/
364 /*--- map_shmem :: WordFM Addr SecMap ---*/
365 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
366 /*----------------------------------------------------------------*/
368 /*--------------- SecMap allocation --------------- */
370 static HChar* shmem__bigchunk_next = NULL;
371 static HChar* shmem__bigchunk_end1 = NULL;
373 static void* shmem__bigchunk_alloc ( SizeT n )
375 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
377 n = VG_ROUNDUP(n, 16);
378 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
379 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
380 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
381 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
383 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
384 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
385 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
386 if (shmem__bigchunk_next == NULL)
387 VG_(out_of_memory_NORETURN)(
388 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
389 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
391 tl_assert(shmem__bigchunk_next);
392 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
393 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
394 shmem__bigchunk_next += n;
395 return shmem__bigchunk_next - n;
398 static SecMap* shmem__alloc_SecMap ( void )
401 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
402 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
404 sm->magic = SecMap_MAGIC;
405 for (i = 0; i < N_SECMAP_ZLINES; i++) {
406 sm->linesZ[i].dict[0] = SVal_NOACCESS;
407 sm->linesZ[i].dict[1] = SVal_INVALID;
408 sm->linesZ[i].dict[2] = SVal_INVALID;
409 sm->linesZ[i].dict[3] = SVal_INVALID;
410 for (j = 0; j < N_LINE_ARANGE/4; j++)
411 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
415 stats__secmaps_allocd++;
416 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
417 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
418 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
422 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
423 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
425 static SecMap* shmem__find_SecMap ( Addr ga )
428 Addr gaKey = shmem__round_to_SecMap_base(ga);
430 stats__secmaps_search++;
431 if (LIKELY(gaKey == smCache[0].gaKey))
432 return smCache[0].sm;
433 if (LIKELY(gaKey == smCache[1].gaKey)) {
434 SMCacheEnt tmp = smCache[0];
435 smCache[0] = smCache[1];
437 return smCache[0].sm;
439 if (gaKey == smCache[2].gaKey) {
440 SMCacheEnt tmp = smCache[1];
441 smCache[1] = smCache[2];
443 return smCache[1].sm;
446 stats__secmaps_search_slow++;
447 if (VG_(lookupFM)( map_shmem,
448 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
449 tl_assert(sm != NULL);
450 smCache[2] = smCache[1];
451 smCache[1] = smCache[0];
452 smCache[0].gaKey = gaKey;
455 tl_assert(sm == NULL);
460 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
462 SecMap* sm = shmem__find_SecMap ( ga );
466 /* create a new one */
467 Addr gaKey = shmem__round_to_SecMap_base(ga);
468 sm = shmem__alloc_SecMap();
470 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
476 /* ------------ LineF and LineZ related ------------ */
478 static void rcinc_LineF ( LineF* lineF ) {
480 tl_assert(lineF->inUse);
481 for (i = 0; i < N_LINE_ARANGE; i++)
482 rcinc(lineF->w64s[i]);
485 static void rcdec_LineF ( LineF* lineF ) {
487 tl_assert(lineF->inUse);
488 for (i = 0; i < N_LINE_ARANGE; i++)
489 rcdec(lineF->w64s[i]);
492 static void rcinc_LineZ ( LineZ* lineZ ) {
493 tl_assert(lineZ->dict[0] != SVal_INVALID);
494 rcinc(lineZ->dict[0]);
495 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
496 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
497 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
500 static void rcdec_LineZ ( LineZ* lineZ ) {
501 tl_assert(lineZ->dict[0] != SVal_INVALID);
502 rcdec(lineZ->dict[0]);
503 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
504 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
505 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
509 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
510 Word bix, shft, mask, prep;
513 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
516 arr[bix] = (arr[bix] & ~mask) | prep;
520 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
524 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
525 return (arr[bix] >> shft) & 3;
528 /* Given address 'tag', find either the Z or F line containing relevant
529 data, so it can be read into the cache.
531 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
532 /*OUT*/LineF** fp, Addr tag ) {
536 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
537 UWord smoff = shmem__get_SecMap_offset(tag);
538 /* since smoff is derived from a valid tag, it should be
539 cacheline-aligned. */
540 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
541 zix = smoff >> N_LINE_BITS;
542 tl_assert(zix < N_SECMAP_ZLINES);
543 lineZ = &sm->linesZ[zix];
545 if (lineZ->dict[0] == SVal_INVALID) {
546 UInt fix = (UInt)lineZ->dict[1];
547 tl_assert(sm->linesF);
548 tl_assert(sm->linesF_size > 0);
549 tl_assert(fix >= 0 && fix < sm->linesF_size);
550 lineF = &sm->linesF[fix];
551 tl_assert(lineF->inUse);
558 /* Given address 'tag', return the relevant SecMap and the index of
559 the LineZ within it, in the expectation that the line is to be
560 overwritten. Regardless of whether 'tag' is currently associated
561 with a Z or F representation, to rcdec on the current
562 representation, in recognition of the fact that the contents are
563 just about to be overwritten. */
564 static __attribute__((noinline))
565 void find_Z_for_writing ( /*OUT*/SecMap** smp,
571 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
572 UWord smoff = shmem__get_SecMap_offset(tag);
573 /* since smoff is derived from a valid tag, it should be
574 cacheline-aligned. */
575 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
576 zix = smoff >> N_LINE_BITS;
577 tl_assert(zix < N_SECMAP_ZLINES);
578 lineZ = &sm->linesZ[zix];
580 /* re RCs, we are freeing up this LineZ/LineF so that new data can
581 be parked in it. Hence have to rcdec it accordingly. */
582 /* If lineZ has an associated lineF, free it up. */
583 if (lineZ->dict[0] == SVal_INVALID) {
584 UInt fix = (UInt)lineZ->dict[1];
585 tl_assert(sm->linesF);
586 tl_assert(sm->linesF_size > 0);
587 tl_assert(fix >= 0 && fix < sm->linesF_size);
588 lineF = &sm->linesF[fix];
589 tl_assert(lineF->inUse);
591 lineF->inUse = False;
599 static __attribute__((noinline))
600 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
605 tl_assert(sm->linesF_size > 0);
607 tl_assert(sm->linesF_size == 0);
611 for (i = 0; i < sm->linesF_size; i++) {
612 if (!sm->linesF[i].inUse) {
619 /* No free F line found. Expand existing array and try again. */
620 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
621 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
622 new_size * sizeof(LineF) );
625 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
626 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
630 VG_(printf)("SM %p: expand F array from %d to %d\n",
631 sm, (Int)sm->linesF_size, new_size);
633 for (i = 0; i < new_size; i++)
634 nyu[i].inUse = False;
637 for (i = 0; i < sm->linesF_size; i++) {
638 tl_assert(sm->linesF[i].inUse);
639 nyu[i] = sm->linesF[i];
641 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
642 HG_(free)(sm->linesF);
646 sm->linesF_size = new_size;
648 for (i = 0; i < sm->linesF_size; i++) {
649 if (!sm->linesF[i].inUse) {
660 /* ------------ CacheLine and implicit-tree related ------------ */
662 __attribute__((unused))
663 static void pp_CacheLine ( CacheLine* cl ) {
666 VG_(printf)("%s","pp_CacheLine(NULL)\n");
669 for (i = 0; i < N_LINE_TREES; i++)
670 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
671 for (i = 0; i < N_LINE_ARANGE; i++)
672 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
675 static UChar descr_to_validbits ( UShort descr )
677 /* a.k.a Party Time for gcc's constant folder */
678 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
679 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
680 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
681 ( (b8_5) << 12) | ( (b8_4) << 11) | \
682 ( (b8_3) << 10) | ( (b8_2) << 9) | \
683 ( (b8_1) << 8) | ( (b8_0) << 7) | \
684 ( (b16_3) << 6) | ( (b32_1) << 5) | \
685 ( (b16_2) << 4) | ( (b64) << 3) | \
686 ( (b16_1) << 2) | ( (b32_0) << 1) | \
689 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
690 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
691 ( (bit5) << 5) | ( (bit4) << 4) | \
692 ( (bit3) << 3) | ( (bit2) << 2) | \
693 ( (bit1) << 1) | ( (bit0) << 0) ) )
695 /* these should all get folded out at compile time */
696 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
697 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
698 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
699 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
700 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
701 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
702 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
703 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
704 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
708 +--------------------------------- TREE_DESCR_8_7
709 | +------------------- TREE_DESCR_8_0
710 | | +---------------- TREE_DESCR_16_3
711 | | | +-------------- TREE_DESCR_32_1
712 | | | | +------------ TREE_DESCR_16_2
713 | | | | | +--------- TREE_DESCR_64
714 | | | | | | +------ TREE_DESCR_16_1
715 | | | | | | | +---- TREE_DESCR_32_0
716 | | | | | | | | +-- TREE_DESCR_16_0
718 | | | | | | | | | GRANULARITY, 7 -> 0 */
719 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
720 return BYTE(1,1,1,1,1,1,1,1);
721 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
722 return BYTE(1,1,0,1,1,1,1,1);
723 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
724 return BYTE(0,1,1,1,1,1,1,1);
725 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
726 return BYTE(0,1,0,1,1,1,1,1);
728 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
729 return BYTE(1,1,1,1,1,1,0,1);
730 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
731 return BYTE(1,1,0,1,1,1,0,1);
732 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
733 return BYTE(0,1,1,1,1,1,0,1);
734 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
735 return BYTE(0,1,0,1,1,1,0,1);
737 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
738 return BYTE(1,1,1,1,0,1,1,1);
739 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
740 return BYTE(1,1,0,1,0,1,1,1);
741 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
742 return BYTE(0,1,1,1,0,1,1,1);
743 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
744 return BYTE(0,1,0,1,0,1,1,1);
746 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
747 return BYTE(1,1,1,1,0,1,0,1);
748 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
749 return BYTE(1,1,0,1,0,1,0,1);
750 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
751 return BYTE(0,1,1,1,0,1,0,1);
752 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
753 return BYTE(0,1,0,1,0,1,0,1);
755 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
756 return BYTE(0,0,0,1,1,1,1,1);
757 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
758 return BYTE(0,0,0,1,1,1,0,1);
759 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
760 return BYTE(0,0,0,1,0,1,1,1);
761 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
762 return BYTE(0,0,0,1,0,1,0,1);
764 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
765 return BYTE(1,1,1,1,0,0,0,1);
766 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
767 return BYTE(1,1,0,1,0,0,0,1);
768 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
769 return BYTE(0,1,1,1,0,0,0,1);
770 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
771 return BYTE(0,1,0,1,0,0,0,1);
773 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
774 return BYTE(0,0,0,1,0,0,0,1);
776 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
777 return BYTE(0,0,0,0,0,0,0,1);
779 default: return BYTE(0,0,0,0,0,0,0,0);
780 /* INVALID - any valid descr produces at least one
781 valid bit in tree[0..7]*/
790 __attribute__((unused))
791 static Bool is_sane_Descr ( UShort descr ) {
792 return descr_to_validbits(descr) != 0;
795 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
797 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
798 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
803 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
804 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
805 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
806 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
807 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
808 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
809 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
810 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
811 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
812 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
815 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
816 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
817 (Int)((byte & 128) ? 1 : 0),
818 (Int)((byte & 64) ? 1 : 0),
819 (Int)((byte & 32) ? 1 : 0),
820 (Int)((byte & 16) ? 1 : 0),
821 (Int)((byte & 8) ? 1 : 0),
822 (Int)((byte & 4) ? 1 : 0),
823 (Int)((byte & 2) ? 1 : 0),
824 (Int)((byte & 1) ? 1 : 0)
828 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
830 UChar validbits = descr_to_validbits(descr);
831 HChar buf[128], buf2[128];
834 for (i = 0; i < 8; i++) {
835 if (validbits & (1<<i)) {
836 if (tree[i] == SVal_INVALID)
839 if (tree[i] != SVal_INVALID)
845 sprintf_Descr( buf, descr );
846 sprintf_Byte( buf2, validbits );
847 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
848 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
849 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
850 for (i = 0; i < 8; i++)
851 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
852 VG_(printf)("%s","}\n");
856 static Bool is_sane_CacheLine ( CacheLine* cl )
862 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
863 UShort descr = cl->descrs[tno];
864 SVal* tree = &cl->svals[cloff];
865 if (!is_sane_Descr_and_Tree(descr, tree))
868 tl_assert(cloff == N_LINE_ARANGE);
875 static UShort normalise_tree ( /*MOD*/SVal* tree )
878 /* pre: incoming tree[0..7] does not have any invalid shvals, in
879 particular no zeroes. */
880 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
881 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
882 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
883 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
886 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
887 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
888 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
889 /* build 16-bit layer */
890 if (tree[1] == tree[0]) {
891 tree[1] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
893 descr |= TREE_DESCR_16_0;
895 if (tree[3] == tree[2]) {
896 tree[3] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
898 descr |= TREE_DESCR_16_1;
900 if (tree[5] == tree[4]) {
901 tree[5] = SVal_INVALID;
902 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
903 descr |= TREE_DESCR_16_2;
905 if (tree[7] == tree[6]) {
906 tree[7] = SVal_INVALID;
907 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
908 descr |= TREE_DESCR_16_3;
910 /* build 32-bit layer */
911 if (tree[2] == tree[0]
912 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
913 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
914 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
915 descr |= TREE_DESCR_32_0;
917 if (tree[6] == tree[4]
918 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
919 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
920 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
921 descr |= TREE_DESCR_32_1;
923 /* build 64-bit layer */
924 if (tree[4] == tree[0]
925 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
926 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
927 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
928 descr |= TREE_DESCR_64;
933 /* This takes a cacheline where all the data is at the leaves
934 (w8[..]) and builds a correctly normalised tree. */
935 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
938 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
939 SVal* tree = &cl->svals[cloff];
940 cl->descrs[tno] = normalise_tree( tree );
942 tl_assert(cloff == N_LINE_ARANGE);
944 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
945 stats__cline_normalises++;
949 typedef struct { UChar count; SVal sval; } CountedSVal;
952 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
953 /*OUT*/Word* dstUsedP,
954 Word nDst, CacheLine* src )
956 Word tno, cloff, dstUsed;
958 tl_assert(nDst == N_LINE_ARANGE);
961 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
962 UShort descr = src->descrs[tno];
963 SVal* tree = &src->svals[cloff];
965 /* sequentialise the tree described by (descr,tree). */
966 # define PUT(_n,_v) \
967 do { dst[dstUsed ].count = (_n); \
968 dst[dstUsed++].sval = (_v); \
972 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
973 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
974 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
975 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
977 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
979 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
980 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
982 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
984 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
985 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
986 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
988 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
990 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
991 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
993 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
996 /* END sequentialise the tree described by (descr,tree). */
999 tl_assert(cloff == N_LINE_ARANGE);
1000 tl_assert(dstUsed <= nDst);
1002 *dstUsedP = dstUsed;
1005 /* Write the cacheline 'wix' to backing store. Where it ends up
1006 is determined by its tag field. */
1007 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1015 Word zix, fix, csvalsUsed;
1016 CountedSVal csvals[N_LINE_ARANGE];
1020 VG_(printf)("scache wback line %d\n", (Int)wix);
1022 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1024 tag = cache_shmem.tags0[wix];
1025 cl = &cache_shmem.lyns0[wix];
1027 /* The cache line may have been invalidated; if so, ignore it. */
1028 if (!is_valid_scache_tag(tag))
1031 /* Where are we going to put it? */
1037 /* find the Z line to write in and rcdec it or the associated F
1039 find_Z_for_writing( &sm, &zix, tag );
1042 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1043 lineZ = &sm->linesZ[zix];
1045 /* Generate the data to be stored */
1047 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1050 sequentialise_CacheLine( csvals, &csvalsUsed,
1051 N_LINE_ARANGE, cl );
1052 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1053 if (0) VG_(printf)("%lu ", csvalsUsed);
1055 lineZ->dict[0] = lineZ->dict[1]
1056 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1058 /* i indexes actual shadow values, k is cursor in csvals */
1060 for (k = 0; k < csvalsUsed; k++) {
1062 sv = csvals[k].sval;
1064 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1065 /* do we already have it? */
1066 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1067 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1068 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1069 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1070 /* no. look for a free slot. */
1072 tl_assert(sv != SVal_INVALID);
1074 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1076 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1078 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1080 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1081 break; /* we'll have to use the f rep */
1083 m = csvals[k].count;
1085 write_twobit_array( lineZ->ix2s, i+0, j );
1086 write_twobit_array( lineZ->ix2s, i+1, j );
1087 write_twobit_array( lineZ->ix2s, i+2, j );
1088 write_twobit_array( lineZ->ix2s, i+3, j );
1089 write_twobit_array( lineZ->ix2s, i+4, j );
1090 write_twobit_array( lineZ->ix2s, i+5, j );
1091 write_twobit_array( lineZ->ix2s, i+6, j );
1092 write_twobit_array( lineZ->ix2s, i+7, j );
1096 write_twobit_array( lineZ->ix2s, i+0, j );
1097 write_twobit_array( lineZ->ix2s, i+1, j );
1098 write_twobit_array( lineZ->ix2s, i+2, j );
1099 write_twobit_array( lineZ->ix2s, i+3, j );
1103 write_twobit_array( lineZ->ix2s, i+0, j );
1107 write_twobit_array( lineZ->ix2s, i+0, j );
1108 write_twobit_array( lineZ->ix2s, i+1, j );
1112 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1117 if (LIKELY(i == N_LINE_ARANGE)) {
1118 /* Construction of the compressed representation was
1121 stats__cache_Z_wbacks++;
1123 /* Cannot use the compressed(z) representation. Use the full(f)
1125 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1126 alloc_F_for_writing( sm, &fix );
1127 tl_assert(sm->linesF);
1128 tl_assert(sm->linesF_size > 0);
1129 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1130 lineF = &sm->linesF[fix];
1131 tl_assert(!lineF->inUse);
1132 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1133 lineZ->dict[1] = (SVal)fix;
1134 lineF->inUse = True;
1136 for (k = 0; k < csvalsUsed; k++) {
1138 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1139 sv = csvals[k].sval;
1141 tl_assert(sv != SVal_INVALID);
1142 for (m = csvals[k].count; m > 0; m--) {
1143 lineF->w64s[i] = sv;
1147 tl_assert(i == N_LINE_ARANGE);
1149 stats__cache_F_wbacks++;
1153 /* Fetch the cacheline 'wix' from the backing store. The tag
1154 associated with 'wix' is assumed to have already been filled in;
1155 hence that is used to determine where in the backing store to read
1157 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1166 VG_(printf)("scache fetch line %d\n", (Int)wix);
1168 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1170 tag = cache_shmem.tags0[wix];
1171 cl = &cache_shmem.lyns0[wix];
1173 /* reject nonsense requests */
1174 tl_assert(is_valid_scache_tag(tag));
1178 find_ZF_for_reading( &lineZ, &lineF, tag );
1179 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1181 /* expand the data into the bottom layer of the tree, then get
1182 cacheline_normalise to build the descriptor array. */
1184 tl_assert(lineF->inUse);
1185 for (i = 0; i < N_LINE_ARANGE; i++) {
1186 cl->svals[i] = lineF->w64s[i];
1188 stats__cache_F_fetches++;
1190 for (i = 0; i < N_LINE_ARANGE; i++) {
1192 UWord ix = read_twobit_array( lineZ->ix2s, i );
1193 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1194 sv = lineZ->dict[ix];
1195 tl_assert(sv != SVal_INVALID);
1198 stats__cache_Z_fetches++;
1200 normalise_CacheLine( cl );
1203 static void shmem__invalidate_scache ( void ) {
1205 if (0) VG_(printf)("%s","scache inval\n");
1206 tl_assert(!is_valid_scache_tag(1));
1207 for (wix = 0; wix < N_WAY_NENT; wix++) {
1208 cache_shmem.tags0[wix] = 1/*INVALID*/;
1210 stats__cache_invals++;
1213 static void shmem__flush_and_invalidate_scache ( void ) {
1216 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1217 tl_assert(!is_valid_scache_tag(1));
1218 for (wix = 0; wix < N_WAY_NENT; wix++) {
1219 tag = cache_shmem.tags0[wix];
1220 if (tag == 1/*INVALID*/) {
1221 /* already invalid; nothing to do */
1223 tl_assert(is_valid_scache_tag(tag));
1224 cacheline_wback( wix );
1226 cache_shmem.tags0[wix] = 1/*INVALID*/;
1228 stats__cache_flushes++;
1229 stats__cache_invals++;
1233 static inline Bool aligned16 ( Addr a ) {
1234 return 0 == (a & 1);
1236 static inline Bool aligned32 ( Addr a ) {
1237 return 0 == (a & 3);
1239 static inline Bool aligned64 ( Addr a ) {
1240 return 0 == (a & 7);
1242 static inline UWord get_cacheline_offset ( Addr a ) {
1243 return (UWord)(a & (N_LINE_ARANGE - 1));
1245 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1246 return ROUNDUP(a, N_LINE_ARANGE);
1248 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1249 return ROUNDDN(a, N_LINE_ARANGE);
1251 static inline UWord get_treeno ( Addr a ) {
1252 return get_cacheline_offset(a) >> 3;
1254 static inline UWord get_tree_offset ( Addr a ) {
1258 static __attribute__((noinline))
1259 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1260 static inline CacheLine* get_cacheline ( Addr a )
1262 /* tag is 'a' with the in-line offset masked out,
1263 eg a[31]..a[4] 0000 */
1264 Addr tag = a & ~(N_LINE_ARANGE - 1);
1265 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1266 stats__cache_totrefs++;
1267 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1268 return &cache_shmem.lyns0[wix];
1270 return get_cacheline_MISS( a );
1274 static __attribute__((noinline))
1275 CacheLine* get_cacheline_MISS ( Addr a )
1277 /* tag is 'a' with the in-line offset masked out,
1278 eg a[31]..a[4] 0000 */
1282 Addr tag = a & ~(N_LINE_ARANGE - 1);
1283 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1285 tl_assert(tag != cache_shmem.tags0[wix]);
1287 /* Dump the old line into the backing store. */
1288 stats__cache_totmisses++;
1290 cl = &cache_shmem.lyns0[wix];
1291 tag_old_p = &cache_shmem.tags0[wix];
1293 if (is_valid_scache_tag( *tag_old_p )) {
1294 /* EXPENSIVE and REDUNDANT: callee does it */
1296 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1297 cacheline_wback( wix );
1299 /* and reload the new one */
1301 cacheline_fetch( wix );
1303 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1307 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1308 stats__cline_64to32pulldown++;
1311 tl_assert(descr & TREE_DESCR_64);
1313 descr &= ~TREE_DESCR_64;
1314 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1322 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1323 stats__cline_32to16pulldown++;
1326 if (!(descr & TREE_DESCR_32_0)) {
1327 descr = pulldown_to_32(tree, 0, descr);
1329 tl_assert(descr & TREE_DESCR_32_0);
1331 descr &= ~TREE_DESCR_32_0;
1332 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1335 if (!(descr & TREE_DESCR_32_1)) {
1336 descr = pulldown_to_32(tree, 4, descr);
1338 tl_assert(descr & TREE_DESCR_32_1);
1340 descr &= ~TREE_DESCR_32_1;
1341 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1349 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1350 stats__cline_16to8pulldown++;
1353 if (!(descr & TREE_DESCR_16_0)) {
1354 descr = pulldown_to_16(tree, 0, descr);
1356 tl_assert(descr & TREE_DESCR_16_0);
1358 descr &= ~TREE_DESCR_16_0;
1359 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1362 if (!(descr & TREE_DESCR_16_1)) {
1363 descr = pulldown_to_16(tree, 2, descr);
1365 tl_assert(descr & TREE_DESCR_16_1);
1367 descr &= ~TREE_DESCR_16_1;
1368 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1371 if (!(descr & TREE_DESCR_16_2)) {
1372 descr = pulldown_to_16(tree, 4, descr);
1374 tl_assert(descr & TREE_DESCR_16_2);
1376 descr &= ~TREE_DESCR_16_2;
1377 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1380 if (!(descr & TREE_DESCR_16_3)) {
1381 descr = pulldown_to_16(tree, 6, descr);
1383 tl_assert(descr & TREE_DESCR_16_3);
1385 descr &= ~TREE_DESCR_16_3;
1386 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1395 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1399 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1400 tl_assert( (descr & mask) == mask );
1402 descr |= TREE_DESCR_16_0;
1405 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1406 tl_assert( (descr & mask) == mask );
1408 descr |= TREE_DESCR_16_1;
1411 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1412 tl_assert( (descr & mask) == mask );
1414 descr |= TREE_DESCR_16_2;
1417 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1418 tl_assert( (descr & mask) == mask );
1420 descr |= TREE_DESCR_16_3;
1428 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1432 if (!(descr & TREE_DESCR_16_0))
1433 descr = pullup_descr_to_16(descr, 0);
1434 if (!(descr & TREE_DESCR_16_1))
1435 descr = pullup_descr_to_16(descr, 2);
1436 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1437 tl_assert( (descr & mask) == mask );
1439 descr |= TREE_DESCR_32_0;
1442 if (!(descr & TREE_DESCR_16_2))
1443 descr = pullup_descr_to_16(descr, 4);
1444 if (!(descr & TREE_DESCR_16_3))
1445 descr = pullup_descr_to_16(descr, 6);
1446 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1447 tl_assert( (descr & mask) == mask );
1449 descr |= TREE_DESCR_32_1;
1457 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1460 return 0 != (descr & TREE_DESCR_64);
1466 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1469 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1471 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1473 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1475 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1481 /* ------------ Cache management ------------ */
1483 static void zsm_flush_cache ( void )
1485 shmem__flush_and_invalidate_scache();
1489 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1491 tl_assert( sizeof(UWord) == sizeof(Addr) );
1496 tl_assert(map_shmem == NULL);
1497 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1499 NULL/*unboxed UWord cmp*/);
1500 tl_assert(map_shmem != NULL);
1501 shmem__invalidate_scache();
1503 /* a SecMap must contain an integral number of CacheLines */
1504 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1505 /* also ... a CacheLine holds an integral number of trees */
1506 tl_assert(0 == (N_LINE_ARANGE % 8));
1509 /////////////////////////////////////////////////////////////////
1510 /////////////////////////////////////////////////////////////////
1512 // SECTION END compressed shadow memory //
1514 /////////////////////////////////////////////////////////////////
1515 /////////////////////////////////////////////////////////////////
1519 /////////////////////////////////////////////////////////////////
1520 /////////////////////////////////////////////////////////////////
1522 // SECTION BEGIN vts primitives //
1524 /////////////////////////////////////////////////////////////////
1525 /////////////////////////////////////////////////////////////////
1530 /* VtsIDs can't exceed 30 bits, since they have to be packed into the
1531 lowest 30 bits of an SVal. */
1533 #define VtsID_INVALID 0xFFFFFFFF
1535 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
1536 a backlink for the caller's convenience. Since we have no idea
1537 what to set that to in the library, it always gets set to
1542 XArray* ts; /* XArray* ScalarTS(abstract) */
1547 /* Create a new, empty VTS. */
1548 static VTS* VTS__new ( void );
1550 /* Delete this VTS in its entirety. */
1551 static void VTS__delete ( VTS* vts );
1553 /* Create a new singleton VTS. */
1554 static VTS* VTS__singleton ( Thr* thr, ULong tym );
1556 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1558 static VTS* VTS__tick ( Thr* me, VTS* vts );
1560 /* Return a new VTS constructed as the join (max) of the 2 args.
1561 Neither arg is modified. */
1562 static VTS* VTS__join ( VTS* a, VTS* b );
1564 /* Compute the partial ordering relation of the two args. Although we
1565 could be completely general and return an enumeration value (EQ,
1566 LT, GT, UN), in fact we only need LEQ, and so we may as well
1569 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1570 the returned Thr* indicates the discovered point for which they are
1571 not. There may be more than one such point, but we only care about
1572 seeing one of them, not all of them. This rather strange
1573 convention is used because sometimes we want to know the actual
1574 index at which they first differ. */
1575 static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
1577 /* Compute an arbitrary structural (total) ordering on the two args,
1578 based on their VCs, so they can be looked up in a table, tree, etc.
1579 Returns -1, 0 or 1. */
1580 static Word VTS__cmp_structural ( VTS* a, VTS* b );
1582 /* Debugging only. Display the given VTS in the buffer. */
1583 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1585 /* Debugging only. Return vts[index], so to speak. */
1586 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
1588 #endif /* ! __HB_VTS_H */
1591 /*--------------- to do with Vector Timestamps ---------------*/
1593 /* Scalar Timestamp */
1602 static Bool is_sane_VTS ( VTS* vts )
1605 ScalarTS *st1, *st2;
1606 if (!vts) return False;
1607 if (!vts->ts) return False;
1608 n = VG_(sizeXA)( vts->ts );
1610 for (i = 0; i < n-1; i++) {
1611 st1 = VG_(indexXA)( vts->ts, i );
1612 st2 = VG_(indexXA)( vts->ts, i+1 );
1613 if (st1->thr >= st2->thr)
1615 if (st1->tym == 0 || st2->tym == 0)
1623 /* Create a new, empty VTS.
1625 VTS* VTS__new ( void )
1628 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1630 vts->id = VtsID_INVALID;
1631 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1632 HG_(free), sizeof(ScalarTS) );
1638 /* Delete this VTS in its entirety.
1640 void VTS__delete ( VTS* vts )
1644 VG_(deleteXA)( vts->ts );
1649 /* Create a new singleton VTS.
1651 VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1655 tl_assert(tym >= 1);
1659 VG_(addToXA)( vts->ts, &st );
1664 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1667 VTS* VTS__tick ( Thr* me, VTS* vts )
1669 ScalarTS* here = NULL;
1677 tl_assert(is_sane_VTS(vts));
1678 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1679 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1681 n = VG_(sizeXA)( vts->ts );
1683 /* main loop doesn't handle zero-entry case correctly, so
1688 VG_(addToXA)( res->ts, &tmp );
1689 tl_assert(is_sane_VTS(res));
1693 for (i = 0; i < n; i++) {
1694 here = VG_(indexXA)( vts->ts, i );
1695 if (me < here->thr) {
1696 /* We just went past 'me', without seeing it. */
1699 VG_(addToXA)( res->ts, &tmp );
1701 VG_(addToXA)( res->ts, &tmp );
1705 else if (me == here->thr) {
1708 VG_(addToXA)( res->ts, &tmp );
1712 else /* me > here->thr */ {
1714 VG_(addToXA)( res->ts, &tmp );
1717 tl_assert(i >= 0 && i <= n);
1718 if (i == n && here && here->thr < me) {
1721 VG_(addToXA)( res->ts, &tmp );
1723 for (/*keepgoing*/; i < n; i++) {
1724 here = VG_(indexXA)( vts->ts, i );
1726 VG_(addToXA)( res->ts, &tmp );
1729 tl_assert(is_sane_VTS(res));
1730 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1731 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1736 /* Return a new VTS constructed as the join (max) of the 2 args.
1737 Neither arg is modified.
1739 VTS* VTS__join ( VTS* a, VTS* b )
1741 Word ia, ib, useda, usedb;
1742 ULong tyma, tymb, tymMax;
1748 tl_assert(a && a->ts);
1749 tl_assert(b && b->ts);
1750 useda = VG_(sizeXA)( a->ts );
1751 usedb = VG_(sizeXA)( b->ts );
1758 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1759 from a and b in order, where thr is the next Thr*
1760 occurring in either a or b, and tyma/b are the relevant
1761 scalar timestamps, taking into account implicit zeroes. */
1762 tl_assert(ia >= 0 && ia <= useda);
1763 tl_assert(ib >= 0 && ib <= usedb);
1765 if (ia == useda && ib == usedb) {
1766 /* both empty - done */
1769 } else if (ia == useda && ib != usedb) {
1770 /* a empty, use up b */
1771 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
1777 } else if (ia != useda && ib == usedb) {
1778 /* b empty, use up a */
1779 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1786 /* both not empty; extract lowest-Thr*'d triple */
1787 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1788 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
1789 if (tmpa->thr < tmpb->thr) {
1790 /* a has the lowest unconsidered Thr* */
1795 } else if (tmpa->thr > tmpb->thr) {
1796 /* b has the lowest unconsidered Thr* */
1802 /* they both next mention the same Thr* */
1803 tl_assert(tmpa->thr == tmpb->thr);
1804 thr = tmpa->thr; /* == tmpb->thr */
1812 /* having laboriously determined (thr, tyma, tymb), do something
1814 tymMax = tyma > tymb ? tyma : tymb;
1819 VG_(addToXA)( res->ts, &st );
1824 tl_assert(is_sane_VTS( res ));
1830 /* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1831 they are, or the first Thr* for which they are not. This rather
1832 strange convention is used because sometimes we want to know the
1833 actual index at which they first differ. */
1834 static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
1836 Word ia, ib, useda, usedb;
1839 stats__vts__cmpLEQ++;
1841 tl_assert(a && a->ts);
1842 tl_assert(b && b->ts);
1843 useda = VG_(sizeXA)( a->ts );
1844 usedb = VG_(sizeXA)( b->ts );
1850 /* This logic is to enumerate doubles (tyma, tymb) drawn
1851 from a and b in order, and tyma/b are the relevant
1852 scalar timestamps, taking into account implicit zeroes. */
1855 tl_assert(ia >= 0 && ia <= useda);
1856 tl_assert(ib >= 0 && ib <= usedb);
1858 if (ia == useda && ib == usedb) {
1859 /* both empty - done */
1862 } else if (ia == useda && ib != usedb) {
1863 /* a empty, use up b */
1864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
1870 } else if (ia != useda && ib == usedb) {
1871 /* b empty, use up a */
1872 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1879 /* both not empty; extract lowest-Thr*'d triple */
1880 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1881 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
1882 if (tmpa->thr < tmpb->thr) {
1883 /* a has the lowest unconsidered Thr* */
1890 if (tmpa->thr > tmpb->thr) {
1891 /* b has the lowest unconsidered Thr* */
1897 /* they both next mention the same Thr* */
1898 tl_assert(tmpa->thr == tmpb->thr);
1907 /* having laboriously determined (tyma, tymb), do something
1910 /* not LEQ at this index. Quit, since the answer is
1911 determined already. */
1917 return NULL; /* all points are LEQ */
1921 /* Compute an arbitrary structural (total) ordering on the two args,
1922 based on their VCs, so they can be looked up in a table, tree, etc.
1923 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
1924 performance critical so there is some effort expended to make it sa
1927 Word VTS__cmp_structural ( VTS* a, VTS* b )
1929 /* We just need to generate an arbitrary total ordering based on
1930 a->ts and b->ts. Preferably do it in a way which comes across likely
1931 differences relatively quickly. */
1933 Word useda = 0, usedb = 0;
1934 ScalarTS *ctsa = NULL, *ctsb = NULL;
1936 stats__vts__cmp_structural++;
1941 VG_(getContentsXA_UNSAFE)( a->ts, (void**)&ctsa, &useda );
1942 VG_(getContentsXA_UNSAFE)( b->ts, (void**)&ctsb, &usedb );
1944 if (LIKELY(useda == usedb)) {
1945 ScalarTS *tmpa = NULL, *tmpb = NULL;
1946 stats__vts__cmp_structural_slow++;
1947 /* Same length vectors. Find the first difference, if any, as
1948 fast as possible. */
1949 for (i = 0; i < useda; i++) {
1952 if (LIKELY(tmpa->tym == tmpb->tym && tmpa->thr == tmpb->thr))
1957 if (UNLIKELY(i == useda)) {
1958 /* They're identical. */
1961 tl_assert(i >= 0 && i < useda);
1962 if (tmpa->tym < tmpb->tym) return -1;
1963 if (tmpa->tym > tmpb->tym) return 1;
1964 if (tmpa->thr < tmpb->thr) return -1;
1965 if (tmpa->thr > tmpb->thr) return 1;
1966 /* we just established them as non-identical, hence: */
1972 if (useda < usedb) return -1;
1973 if (useda > usedb) return 1;
1979 /* Debugging only. Display the given VTS in the buffer.
1981 void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1986 tl_assert(vts && vts->ts);
1987 tl_assert(nBuf > 16);
1990 n = VG_(sizeXA)( vts->ts );
1991 for (i = 0; i < n; i++) {
1992 tl_assert(avail >= 40);
1993 st = VG_(indexXA)( vts->ts, i );
1994 VG_(memset)(unit, 0, sizeof(unit));
1995 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1997 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1998 VG_(strcat)(buf, " ...]");
2002 VG_(strcat)(buf, unit);
2003 avail -= VG_(strlen)(unit);
2005 VG_(strcat)(buf, "]");
2010 /* Debugging only. Return vts[index], so to speak.
2012 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
2014 stats__vts__indexat_slow++;
2015 tl_assert(vts && vts->ts);
2016 n = VG_(sizeXA)( vts->ts );
2017 for (i = 0; i < n; i++) {
2018 ScalarTS* st = VG_(indexXA)( vts->ts, i );
2026 /////////////////////////////////////////////////////////////////
2027 /////////////////////////////////////////////////////////////////
2029 // SECTION END vts primitives //
2031 /////////////////////////////////////////////////////////////////
2032 /////////////////////////////////////////////////////////////////
2036 /////////////////////////////////////////////////////////////////
2037 /////////////////////////////////////////////////////////////////
2039 // SECTION BEGIN main library //
2041 /////////////////////////////////////////////////////////////////
2042 /////////////////////////////////////////////////////////////////
2045 /////////////////////////////////////////////////////////
2049 /////////////////////////////////////////////////////////
2051 static WordFM* /* VTS* void void */ vts_set = NULL;
2053 static void vts_set_init ( void )
2055 tl_assert(!vts_set);
2056 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2058 (Word(*)(UWord,UWord))VTS__cmp_structural );
2062 /* Given a newly made VTS, look in vts_set to see if we already have
2063 an identical one. If yes, free up this one and return instead a
2064 pointer to the existing one. If no, add this one to the set and
2065 return the same pointer. Caller differentiates the two cases by
2066 comparing returned pointer with the supplied one (although that
2067 does require that the supplied VTS is not already in the set).
2069 static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2072 stats__vts_set__fadoa++;
2073 /* lookup cand (by value) */
2074 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2076 tl_assert(valW == 0);
2077 /* if this fails, cand (by ref) was already present (!) */
2078 tl_assert(keyW != (UWord)cand);
2079 stats__vts_set__fadoa_d++;
2083 /* not present. Add and return pointer to same. */
2084 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2090 /////////////////////////////////////////////////////////
2094 /////////////////////////////////////////////////////////
2096 static void VtsID__invalidate_caches ( void ); /* fwds */
2098 /* A type to hold VTS table entries. Invariants:
2099 If .vts == NULL, then this entry is not in use, so:
2101 - this entry is on the freelist (unfortunately, does not imply
2102 any constraints on value for .nextfree)
2103 If .vts != NULL, then this entry is in use:
2104 - .vts is findable in vts_set
2105 - .vts->id == this entry number
2106 - no specific value for .rc (even 0 is OK)
2107 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2111 VTS* vts; /* vts, in vts_set */
2112 UWord rc; /* reference count - enough for entire aspace */
2113 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2117 /* The VTS table. */
2118 static XArray* /* of VtsTE */ vts_tab = NULL;
2120 /* An index into the VTS table, indicating the start of the list of
2121 free (available for use) entries. If the list is empty, this is
2123 static VtsID vts_tab_freelist = VtsID_INVALID;
2125 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2126 vts_tab equals or exceeds this size. After GC, the value here is
2127 set appropriately so as to check for the next GC point. */
2128 static Word vts_next_GC_at = 1000;
2130 static void vts_tab_init ( void )
2133 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2134 HG_(free), sizeof(VtsTE) );
2140 /* Add ii to the free list, checking that it looks out-of-use. */
2141 static void add_to_free_list ( VtsID ii )
2143 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2144 tl_assert(ie->vts == NULL);
2145 tl_assert(ie->rc == 0);
2146 tl_assert(ie->freelink == VtsID_INVALID);
2147 ie->freelink = vts_tab_freelist;
2148 vts_tab_freelist = ii;
2151 /* Get an entry from the free list. This will return VtsID_INVALID if
2152 the free list is empty. */
2153 static VtsID get_from_free_list ( void )
2157 if (vts_tab_freelist == VtsID_INVALID)
2158 return VtsID_INVALID;
2159 ii = vts_tab_freelist;
2160 ie = VG_(indexXA)( vts_tab, ii );
2161 tl_assert(ie->vts == NULL);
2162 tl_assert(ie->rc == 0);
2163 vts_tab_freelist = ie->freelink;
2167 /* Produce a new VtsID that can be used, either by getting it from
2168 the freelist, or, if that is empty, by expanding vts_tab. */
2169 static VtsID get_new_VtsID ( void )
2173 ii = get_from_free_list();
2174 if (ii != VtsID_INVALID)
2178 te.freelink = VtsID_INVALID;
2179 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2184 /* Indirect callback from lib_zsm. */
2185 static void VtsID__rcinc ( VtsID ii )
2188 /* VG_(indexXA) does a range check for us */
2189 ie = VG_(indexXA)( vts_tab, ii );
2190 tl_assert(ie->vts); /* else it's not in use */
2191 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2192 tl_assert(ie->vts->id == ii);
2196 /* Indirect callback from lib_zsm. */
2197 static void VtsID__rcdec ( VtsID ii )
2200 /* VG_(indexXA) does a range check for us */
2201 ie = VG_(indexXA)( vts_tab, ii );
2202 tl_assert(ie->vts); /* else it's not in use */
2203 tl_assert(ie->rc > 0); /* else RC snafu */
2204 tl_assert(ie->vts->id == ii);
2209 /* Look up 'cand' in our collection of VTSs. If present, deallocate
2210 it and return the VtsID for the pre-existing version. If not
2211 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2212 for it, and return that. */
2213 static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2216 tl_assert(cand->id == VtsID_INVALID);
2217 auld = vts_set__find_and_dealloc__or_add(cand);
2219 /* We already have an Aulde one. Use that. */
2221 tl_assert(auld->id != VtsID_INVALID);
2222 ie = VG_(indexXA)( vts_tab, auld->id );
2223 tl_assert(ie->vts == auld);
2226 VtsID ii = get_new_VtsID();
2227 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2230 ie->freelink = VtsID_INVALID;
2237 static void show_vts_stats ( HChar* caller )
2239 UWord nSet, nTab, nLive;
2242 nSet = VG_(sizeFM)( vts_set );
2243 nTab = VG_(sizeXA)( vts_tab );
2246 n = VG_(sizeXA)( vts_tab );
2247 for (i = 0; i < n; i++) {
2248 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2251 totrc += (ULong)ie->rc;
2253 tl_assert(ie->rc == 0);
2256 VG_(printf)(" show_vts_stats %s\n", caller);
2257 VG_(printf)(" vts_tab size %4lu\n", nTab);
2258 VG_(printf)(" vts_tab live %4lu\n", nLive);
2259 VG_(printf)(" vts_set size %4lu\n", nSet);
2260 VG_(printf)(" total rc %4llu\n", totrc);
2263 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2264 __attribute__((noinline))
2265 static void vts_tab__do_GC ( Bool show_stats )
2267 UWord i, nTab, nLive, nFreed;
2269 /* check this is actually necessary. */
2270 tl_assert(vts_tab_freelist == VtsID_INVALID);
2272 /* empty the caches for partial order checks and binary joins. We
2273 could do better and prune out the entries to be deleted, but it
2274 ain't worth the hassle. */
2275 VtsID__invalidate_caches();
2277 /* First, make the reference counts up to date. */
2280 nTab = VG_(sizeXA)( vts_tab );
2283 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2284 show_vts_stats("before GC");
2287 /* Now we can inspect the entire vts_tab. Any entries
2288 with zero .rc fields are now no longer in use and can be
2289 free list, removed from vts_set, and deleted. */
2291 for (i = 0; i < nTab; i++) {
2293 UWord oldK = 0, oldV = 0;
2294 VtsTE* te = VG_(indexXA)( vts_tab, i );
2295 if (te->vts == NULL) {
2296 tl_assert(te->rc == 0);
2297 continue; /* already on the free list (presumably) */
2300 continue; /* in use */
2301 /* Ok, we got one we can free. */
2302 tl_assert(te->vts->id == i);
2303 /* first, remove it from vts_set. */
2304 present = VG_(delFromFM)( vts_set,
2305 &oldK, &oldV, (UWord)te->vts );
2306 tl_assert(present); /* else it isn't in vts_set ?! */
2307 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2308 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2309 /* now free the VTS itself */
2310 VTS__delete(te->vts);
2312 /* and finally put this entry on the free list */
2313 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2314 add_to_free_list( i );
2318 /* Now figure out when the next GC should be. We'll allow the
2319 number of VTSs to double before GCing again. Except of course
2320 that since we can't (or, at least, don't) shrink vts_tab, we
2321 can't set the threshhold value smaller than it. */
2322 tl_assert(nFreed <= nTab);
2323 nLive = nTab - nFreed;
2324 tl_assert(nLive >= 0 && nLive <= nTab);
2325 vts_next_GC_at = 2 * nLive;
2326 if (vts_next_GC_at < nTab)
2327 vts_next_GC_at = nTab;
2330 show_vts_stats("after GC");
2331 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2334 if (VG_(clo_stats)) {
2335 static UInt ctr = 0;
2336 tl_assert(nTab > 0);
2337 VG_(message)(Vg_DebugMsg,
2338 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
2339 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
2344 /////////////////////////////////////////////////////////
2348 /////////////////////////////////////////////////////////
2350 //////////////////////////
2351 static ULong stats__cmpLEQ_queries = 0;
2352 static ULong stats__cmpLEQ_misses = 0;
2353 static ULong stats__join2_queries = 0;
2354 static ULong stats__join2_misses = 0;
2356 static inline UInt ROL32 ( UInt w, Int n ) {
2357 w = (w << n) | (w >> (32-n));
2360 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2361 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2365 #define N_CMPLEQ_CACHE 1023
2367 struct { VtsID vi1; VtsID vi2; Bool leq; }
2368 cmpLEQ_cache[N_CMPLEQ_CACHE];
2370 #define N_JOIN2_CACHE 1023
2372 struct { VtsID vi1; VtsID vi2; VtsID res; }
2373 join2_cache[N_JOIN2_CACHE];
2375 static void VtsID__invalidate_caches ( void ) {
2377 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2378 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2379 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2380 cmpLEQ_cache[i].leq = False;
2382 for (i = 0; i < N_JOIN2_CACHE; i++) {
2383 join2_cache[i].vi1 = VtsID_INVALID;
2384 join2_cache[i].vi2 = VtsID_INVALID;
2385 join2_cache[i].res = VtsID_INVALID;
2388 //////////////////////////
2390 //static Bool VtsID__is_valid ( VtsID vi ) {
2392 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2394 // ve = VG_(indexXA)( vts_tab, vi );
2397 // tl_assert(ve->vts->id == vi);
2401 static VTS* VtsID__to_VTS ( VtsID vi ) {
2402 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2407 static void VtsID__pp ( VtsID vi ) {
2409 VTS* vts = VtsID__to_VTS(vi);
2410 VTS__show( buf, sizeof(buf)-1, vts );
2411 buf[sizeof(buf)-1] = 0;
2412 VG_(printf)("%s", buf);
2415 /* compute partial ordering relation of vi1 and vi2. */
2416 __attribute__((noinline))
2417 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
2421 //if (vi1 == vi2) return True;
2422 tl_assert(vi1 != vi2);
2424 stats__cmpLEQ_queries++;
2425 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2426 if (cmpLEQ_cache[hash].vi1 == vi1
2427 && cmpLEQ_cache[hash].vi2 == vi2)
2428 return cmpLEQ_cache[hash].leq;
2429 stats__cmpLEQ_misses++;
2431 v1 = VtsID__to_VTS(vi1);
2432 v2 = VtsID__to_VTS(vi2);
2433 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
2435 cmpLEQ_cache[hash].vi1 = vi1;
2436 cmpLEQ_cache[hash].vi2 = vi2;
2437 cmpLEQ_cache[hash].leq = leq;
2441 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2442 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
2445 /* compute binary join */
2446 __attribute__((noinline))
2447 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2450 VTS *vts1, *vts2, *nyu;
2451 //if (vi1 == vi2) return vi1;
2452 tl_assert(vi1 != vi2);
2454 stats__join2_queries++;
2455 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2456 if (join2_cache[hash].vi1 == vi1
2457 && join2_cache[hash].vi2 == vi2)
2458 return join2_cache[hash].res;
2459 stats__join2_misses++;
2461 vts1 = VtsID__to_VTS(vi1);
2462 vts2 = VtsID__to_VTS(vi2);
2463 nyu = VTS__join(vts1,vts2);
2464 res = vts_tab__find_and_dealloc__or_add(nyu);
2466 join2_cache[hash].vi1 = vi1;
2467 join2_cache[hash].vi2 = vi2;
2468 join2_cache[hash].res = res;
2472 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2473 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
2476 /* create a singleton VTS, namely [thr:1] */
2477 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2478 VTS* nyu = VTS__singleton(thr,tym);
2479 return vts_tab__find_and_dealloc__or_add(nyu);
2482 /* tick operation, creates value 1 if specified index is absent */
2483 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2484 VTS* vts = VtsID__to_VTS(vi);
2485 VTS* nyu = VTS__tick(idx,vts);
2486 return vts_tab__find_and_dealloc__or_add(nyu);
2489 /* index into a VTS (only for assertions) */
2490 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2491 VTS* vts = VtsID__to_VTS(vi);
2492 return VTS__indexAt_SLOW( vts, idx );
2495 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2496 any, really) element in vi1 which is pointwise greater-than the
2497 corresponding element in vi2. If no such element exists, return
2498 NULL. This needs to be fairly quick since it is called every time
2499 a race is detected. */
2500 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2504 tl_assert(vi1 != vi2);
2505 vts1 = VtsID__to_VTS(vi1);
2506 vts2 = VtsID__to_VTS(vi2);
2507 tl_assert(vts1 != vts2);
2508 diffthr = VTS__cmpLEQ(vts1, vts2);
2509 tl_assert(diffthr); /* else they are LEQ ! */
2514 /////////////////////////////////////////////////////////
2518 /////////////////////////////////////////////////////////
2521 #define FI_LINE_SZB_LOG2 5
2522 #define FI_NUM_LINES_LOG2 10
2524 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2525 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2527 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2528 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2530 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2531 & (Addr)(FI_NUM_LINES-1) )
2534 /* In the lines, each 8 bytes are treated individually, and are mapped
2535 to a UShort. Regardless of endianness of the underlying machine,
2536 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2537 the highest address.
2539 Of each bit pair, the higher numbered bit is set if a R has been
2540 seen, so the actual layout is:
2544 R W for addr+7 ... R W for addr+0
2546 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2549 /* tags are separated from lines. tags are Addrs and are
2550 the base address of the line. */
2553 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2559 Addr tags[FI_NUM_LINES];
2560 FiLine lines[FI_NUM_LINES];
2564 /* Forget everything we know -- clear the filter and let everything
2565 through. This needs to be as fast as possible, since it is called
2566 every time the running thread changes, and every time a thread's
2567 vector clocks change, which can be quite frequent. The obvious
2568 fast way to do this is simply to stuff in tags which we know are
2569 not going to match anything, since they're not aligned to the start
2571 static void Filter__clear ( Filter* fi, HChar* who )
2574 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2575 for (i = 0; i < FI_NUM_LINES; i += 8) {
2576 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2585 tl_assert(i == FI_NUM_LINES);
2588 /* Clearing an arbitrary range in the filter. Unfortunately
2589 we have to do this due to core-supplied new/die-mem events. */
2591 static void Filter__clear_1byte ( Filter* fi, Addr a )
2593 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2594 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2595 FiLine* line = &fi->lines[lineno];
2596 UWord loff = (a - atag) / 8;
2597 UShort mask = 0x3 << (2 * (a & 7));
2598 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2599 if (LIKELY( fi->tags[lineno] == atag )) {
2600 /* hit. clear the bits. */
2601 UShort u16 = line->u16s[loff];
2602 line->u16s[loff] = u16 & ~mask; /* clear them */
2604 /* miss. The filter doesn't hold this address, so ignore. */
2608 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 if (LIKELY( fi->tags[lineno] == atag )) {
2615 line->u16s[loff] = 0;
2617 /* miss. The filter doesn't hold this address, so ignore. */
2621 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2623 //VG_(printf)("%lu ", len);
2624 /* slowly do part preceding 8-alignment */
2625 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2626 Filter__clear_1byte( fi, a );
2632 Filter__clear_8bytes_aligned( fi, a );
2636 /* slowly do tail */
2637 while (UNLIKELY(len > 0)) {
2638 Filter__clear_1byte( fi, a );
2645 /* ------ Read handlers for the filter. ------ */
2647 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2649 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2652 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2653 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2654 FiLine* line = &fi->lines[lineno];
2655 UWord loff = (a - atag) / 8;
2656 UShort mask = 0xAAAA;
2657 if (LIKELY( fi->tags[lineno] == atag )) {
2658 /* hit. check line and update. */
2659 UShort u16 = line->u16s[loff];
2660 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2661 line->u16s[loff] = u16 | mask; /* set them */
2664 /* miss. nuke existing line and re-use it. */
2666 fi->tags[lineno] = atag;
2667 for (i = 0; i < FI_LINE_SZB / 8; i++)
2669 line->u16s[loff] = mask;
2675 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2677 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2680 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2681 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2682 FiLine* line = &fi->lines[lineno];
2683 UWord loff = (a - atag) / 8;
2684 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2685 if (LIKELY( fi->tags[lineno] == atag )) {
2686 /* hit. check line and update. */
2687 UShort u16 = line->u16s[loff];
2688 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2689 line->u16s[loff] = u16 | mask; /* set them */
2692 /* miss. nuke existing line and re-use it. */
2694 fi->tags[lineno] = atag;
2695 for (i = 0; i < FI_LINE_SZB / 8; i++)
2697 line->u16s[loff] = mask;
2703 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2705 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2708 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2709 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2710 FiLine* line = &fi->lines[lineno];
2711 UWord loff = (a - atag) / 8;
2712 UShort mask = 0xA << (2 * (a & 6));
2713 /* mask is A000, 0A00, 00A0 or 000A */
2714 if (LIKELY( fi->tags[lineno] == atag )) {
2715 /* hit. check line and update. */
2716 UShort u16 = line->u16s[loff];
2717 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2718 line->u16s[loff] = u16 | mask; /* set them */
2721 /* miss. nuke existing line and re-use it. */
2723 fi->tags[lineno] = atag;
2724 for (i = 0; i < FI_LINE_SZB / 8; i++)
2726 line->u16s[loff] = mask;
2732 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2735 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2736 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2737 FiLine* line = &fi->lines[lineno];
2738 UWord loff = (a - atag) / 8;
2739 UShort mask = 0x2 << (2 * (a & 7));
2740 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2741 if (LIKELY( fi->tags[lineno] == atag )) {
2742 /* hit. check line and update. */
2743 UShort u16 = line->u16s[loff];
2744 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2745 line->u16s[loff] = u16 | mask; /* set them */
2748 /* miss. nuke existing line and re-use it. */
2750 fi->tags[lineno] = atag;
2751 for (i = 0; i < FI_LINE_SZB / 8; i++)
2753 line->u16s[loff] = mask;
2760 /* ------ Write handlers for the filter. ------ */
2762 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2764 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2767 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2768 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2769 FiLine* line = &fi->lines[lineno];
2770 UWord loff = (a - atag) / 8;
2771 UShort mask = 0xFFFF;
2772 if (LIKELY( fi->tags[lineno] == atag )) {
2773 /* hit. check line and update. */
2774 UShort u16 = line->u16s[loff];
2775 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2776 line->u16s[loff] = u16 | mask; /* set them */
2779 /* miss. nuke existing line and re-use it. */
2781 fi->tags[lineno] = atag;
2782 for (i = 0; i < FI_LINE_SZB / 8; i++)
2784 line->u16s[loff] = mask;
2790 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2792 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2795 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2796 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2797 FiLine* line = &fi->lines[lineno];
2798 UWord loff = (a - atag) / 8;
2799 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2800 if (LIKELY( fi->tags[lineno] == atag )) {
2801 /* hit. check line and update. */
2802 UShort u16 = line->u16s[loff];
2803 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2804 line->u16s[loff] = u16 | mask; /* set them */
2807 /* miss. nuke existing line and re-use it. */
2809 fi->tags[lineno] = atag;
2810 for (i = 0; i < FI_LINE_SZB / 8; i++)
2812 line->u16s[loff] = mask;
2818 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2820 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2823 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2824 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2825 FiLine* line = &fi->lines[lineno];
2826 UWord loff = (a - atag) / 8;
2827 UShort mask = 0xF << (2 * (a & 6));
2828 /* mask is F000, 0F00, 00F0 or 000F */
2829 if (LIKELY( fi->tags[lineno] == atag )) {
2830 /* hit. check line and update. */
2831 UShort u16 = line->u16s[loff];
2832 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2833 line->u16s[loff] = u16 | mask; /* set them */
2836 /* miss. nuke existing line and re-use it. */
2838 fi->tags[lineno] = atag;
2839 for (i = 0; i < FI_LINE_SZB / 8; i++)
2841 line->u16s[loff] = mask;
2847 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2850 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2851 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2852 FiLine* line = &fi->lines[lineno];
2853 UWord loff = (a - atag) / 8;
2854 UShort mask = 0x3 << (2 * (a & 7));
2855 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2856 if (LIKELY( fi->tags[lineno] == atag )) {
2857 /* hit. check line and update. */
2858 UShort u16 = line->u16s[loff];
2859 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2860 line->u16s[loff] = u16 | mask; /* set them */
2863 /* miss. nuke existing line and re-use it. */
2865 fi->tags[lineno] = atag;
2866 for (i = 0; i < FI_LINE_SZB / 8; i++)
2868 line->u16s[loff] = mask;
2875 /////////////////////////////////////////////////////////
2879 /////////////////////////////////////////////////////////
2881 // QQQ move this somewhere else
2882 typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2884 /* How many of the above records to collect for each thread? Older
2885 ones are dumped when we run out of space. 62.5k requires 1MB per
2886 thread, since each ULong_n_EC record is 16 bytes long. When more
2887 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
2888 deleted to make space. Hence in the worst case we will be able to
2889 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
2890 Kw transitions (segments in this thread). For the current setting
2891 that gives a guaranteed stack for at least the last 31.25k
2893 #define N_KWs_N_STACKs_PER_THREAD 62500
2897 /* Current VTSs for this thread. They change as we go along. viR
2898 is the VTS to be used for reads, viW for writes. Usually they
2899 are the same, but can differ when we deal with reader-writer
2900 locks. It is always the case that
2901 VtsID__cmpLEQ(viW,viR) == True
2902 that is, viW must be the same, or lagging behind, viR. */
2906 /* Is initially False, and is set to true after the thread really
2907 has done a low-level exit. */
2910 /* A filter that removes references for which we believe that
2911 msmcread/msmcwrite will not change the state, nor report a
2915 /* opaque (to us) data we hold on behalf of the library's user. */
2918 /* The ULongs (scalar Kws) in this accumulate in strictly
2919 increasing order, without duplicates. This is important because
2920 we need to be able to find a given scalar Kw in this array
2921 later, by binary search. */
2922 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
2925 static Thr* Thr__new ( void ) {
2926 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2927 thr->viR = VtsID_INVALID;
2928 thr->viW = VtsID_INVALID;
2929 thr->still_alive = True;
2930 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
2931 /* We only really need this at history level 1, but unfortunately
2932 this routine is called before the command line processing is
2933 done (sigh), so we can't rely on HG_(clo_history_level) at this
2934 point. Hence always allocate it. Bah. */
2935 thr->local_Kws_n_stacks
2936 = VG_(newXA)( HG_(zalloc),
2937 "libhb.Thr__new.3 (local_Kws_and_stacks)",
2938 HG_(free), sizeof(ULong_n_EC) );
2942 static void note_local_Kw_n_stack_for ( Thr* thr )
2948 // We only collect this info at history level 1 (approx)
2949 if (HG_(clo_history_level) != 1)
2952 /* This is the scalar Kw for thr. */
2953 pair.ull = VtsID__indexAt( thr->viW, thr );
2954 pair.ec = main_get_EC( thr );
2956 tl_assert(thr->local_Kws_n_stacks);
2958 /* check that we're not adding duplicates */
2959 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
2961 /* Throw away old stacks, if necessary. We can't accumulate stuff
2963 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
2964 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
2965 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
2967 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
2968 thr, pair.ull, pair.ec );
2972 ULong_n_EC* prevPair
2973 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
2974 tl_assert( prevPair->ull <= pair.ull );
2980 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
2983 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
2984 thr, pair.ull, pair.ec );
2986 VG_(pp_ExeContext)(pair.ec);
2989 static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2991 if (pair1->ull < pair2->ull) return -1;
2992 if (pair1->ull > pair2->ull) return 1;
2997 /////////////////////////////////////////////////////////
3001 /////////////////////////////////////////////////////////
3003 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3004 // hb_zsm.h. We have to do everything else here.
3006 /* SVal is 64 bit unsigned int.
3008 <---------30---------> <---------30--------->
3009 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
3010 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
3011 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3014 #define SVAL_TAGMASK (3ULL << 62)
3016 static inline Bool SVal__isC ( SVal s ) {
3017 return (0ULL << 62) == (s & SVAL_TAGMASK);
3019 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3020 //tl_assert(VtsID__is_valid(rmini));
3021 //tl_assert(VtsID__is_valid(wmini));
3022 return (((ULong)rmini) << 32) | ((ULong)wmini);
3024 static inline VtsID SVal__unC_Rmin ( SVal s ) {
3025 tl_assert(SVal__isC(s));
3026 return (VtsID)(s >> 32);
3028 static inline VtsID SVal__unC_Wmin ( SVal s ) {
3029 tl_assert(SVal__isC(s));
3030 return (VtsID)(s & 0xFFFFFFFFULL);
3033 static inline Bool SVal__isA ( SVal s ) {
3034 return (2ULL << 62) == (s & SVAL_TAGMASK);
3036 static inline SVal SVal__mkA ( void ) {
3040 /* Direct callback from lib_zsm. */
3041 static void SVal__rcinc ( SVal s ) {
3043 VtsID__rcinc( SVal__unC_Rmin(s) );
3044 VtsID__rcinc( SVal__unC_Wmin(s) );
3048 /* Direct callback from lib_zsm. */
3049 static void SVal__rcdec ( SVal s ) {
3051 VtsID__rcdec( SVal__unC_Rmin(s) );
3052 VtsID__rcdec( SVal__unC_Wmin(s) );
3057 /////////////////////////////////////////////////////////
3059 // A simple group (memory) allocator //
3061 /////////////////////////////////////////////////////////
3063 //////////////// BEGIN general group allocator
3066 UWord elemSzB; /* element size */
3067 UWord nPerGroup; /* # elems per group */
3068 void* (*alloc)(HChar*, SizeT); /* group allocator */
3069 HChar* cc; /* group allocator's cc */
3070 void (*free)(void*); /* group allocator's free-er (unused) */
3071 /* XArray of void* (pointers to groups). The groups themselves.
3072 Each element is a pointer to a block of size (elemSzB *
3073 nPerGroup) bytes. */
3075 /* next free element. Is a pointer to an element in one of the
3076 groups pointed to by .groups. */
3081 static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3084 void* (*alloc)(HChar*, SizeT),
3086 void (*free)(void*) )
3088 tl_assert(0 == (elemSzB % sizeof(UWord)));
3089 tl_assert(elemSzB >= sizeof(UWord));
3090 tl_assert(nPerGroup >= 100); /* let's say */
3095 VG_(memset)(ga, 0, sizeof(*ga));
3096 ga->elemSzB = elemSzB;
3097 ga->nPerGroup = nPerGroup;
3102 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3103 ga->nextFree = NULL;
3104 tl_assert(ga->groups);
3107 /* The freelist is empty. Allocate a new group and put all the new
3108 elements in it onto the freelist. */
3109 __attribute__((noinline))
3110 static void gal_add_new_group ( GroupAlloc* ga )
3115 tl_assert(ga->nextFree == NULL);
3116 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3118 /* extend the freelist through the new group. Place the freelist
3119 pointer in the first word of each element. That's why the
3120 element size must be at least one word. */
3121 for (i = ga->nPerGroup-1; i >= 0; i--) {
3122 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3123 UWord* elem = (UWord*)elemC;
3124 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3125 *elem = (UWord)ga->nextFree;
3126 ga->nextFree = elem;
3128 /* and add to our collection of groups */
3129 VG_(addToXA)( ga->groups, &group );
3132 inline static void* gal_Alloc ( GroupAlloc* ga )
3135 if (UNLIKELY(ga->nextFree == NULL)) {
3136 gal_add_new_group(ga);
3138 elem = ga->nextFree;
3139 ga->nextFree = (void*)*elem;
3140 *elem = 0; /* unnecessary, but just to be on the safe side */
3144 inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3146 tl_assert(n == ga->elemSzB);
3147 return gal_Alloc( ga );
3150 inline static void gal_Free ( GroupAlloc* ga, void* p )
3152 UWord* elem = (UWord*)p;
3153 *elem = (UWord)ga->nextFree;
3154 ga->nextFree = elem;
3156 //////////////// END general group allocator
3159 /////////////////////////////////////////////////////////
3161 // Change-event map2 //
3163 /////////////////////////////////////////////////////////
3165 #define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3167 /* This is in two parts:
3169 1. A hash table of RCECs. This is a set of reference-counted stack
3170 traces. When the reference count of a stack trace becomes zero,
3171 it is removed from the set and freed up. The intent is to have
3172 a set of stack traces which can be referred to from (2), but to
3173 only represent each one once. The set is indexed/searched by
3174 ordering on the stack trace vectors.
3176 2. A SparseWA of OldRefs. These store information about each old
3177 ref that we need to record. It is indexed by address of the
3178 location for which the information is recorded. For LRU
3179 purposes, each OldRef also contains a generation number,
3180 indicating when it was most recently accessed.
3182 The important part of an OldRef is, however, its accs[] array.
3183 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3184 size) triples to RCECs. This allows us to collect the last
3185 access-traceback by up to N_OLDREF_ACCS different triples for
3186 this location. The accs[] array is a MTF-array. If a binding
3187 falls off the end, that's too bad -- we will lose info about
3188 that triple's access to this location.
3190 When the SparseWA becomes too big, we can throw away the OldRefs
3191 whose generation numbers are below some threshold; hence doing
3192 approximate LRU discarding. For each discarded OldRef we must
3193 of course decrement the reference count on the all RCECs it
3194 refers to, in order that entries from (1) eventually get
3197 A major improvement in reliability of this mechanism would be to
3198 have a dynamically sized OldRef.accs[] array, so no entries ever
3199 fall off the end. In investigations (Dec 08) it appears that a
3200 major cause for the non-availability of conflicting-access traces
3201 in race reports is caused by the fixed size of this array. I
3202 suspect for most OldRefs, only a few entries are used, but for a
3203 minority of cases there is an overflow, leading to info lossage.
3204 Investigations also suggest this is very workload and scheduling
3205 sensitive. Therefore a dynamic sizing would be better.
3207 However, dynamic sizing would defeat the use of a GroupAllocator
3208 for OldRef structures. And that's important for performance. So
3209 it's not straightforward to do.
3213 static UWord stats__ctxt_rcdec1 = 0;
3214 static UWord stats__ctxt_rcdec2 = 0;
3215 static UWord stats__ctxt_rcdec3 = 0;
3216 static UWord stats__ctxt_rcdec_calls = 0;
3217 static UWord stats__ctxt_rcdec_discards = 0;
3218 static UWord stats__ctxt_rcdec1_eq = 0;
3220 static UWord stats__ctxt_tab_curr = 0;
3221 static UWord stats__ctxt_tab_max = 0;
3223 static UWord stats__ctxt_tab_qs = 0;
3224 static UWord stats__ctxt_tab_cmps = 0;
3227 ///////////////////////////////////////////////////////
3228 //// Part (1): A hash table of RCECs
3233 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
3234 #define RCEC_MAGIC 0xab88abb2UL
3236 //#define N_RCEC_TAB 98317 /* prime */
3237 #define N_RCEC_TAB 196613 /* prime */
3241 UWord magic; /* sanity check only */
3244 UWord rcX; /* used for crosschecking */
3245 UWord frames_hash; /* hash of all the frames */
3246 UWord frames[N_FRAMES];
3250 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3253 /* Gives an arbitrary total order on RCEC .frames fields */
3254 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3256 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3257 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
3258 if (ec1->frames_hash < ec2->frames_hash) return -1;
3259 if (ec1->frames_hash > ec2->frames_hash) return 1;
3260 for (i = 0; i < N_FRAMES; i++) {
3261 if (ec1->frames[i] < ec2->frames[i]) return -1;
3262 if (ec1->frames[i] > ec2->frames[i]) return 1;
3268 /* Dec the ref of this RCEC. */
3269 static void ctxt__rcdec ( RCEC* ec )
3271 stats__ctxt_rcdec_calls++;
3272 tl_assert(ec && ec->magic == RCEC_MAGIC);
3273 tl_assert(ec->rc > 0);
3277 static void ctxt__rcinc ( RCEC* ec )
3279 tl_assert(ec && ec->magic == RCEC_MAGIC);
3284 //////////// BEGIN RCEC group allocator
3285 static GroupAlloc rcec_group_allocator;
3287 static RCEC* alloc_RCEC ( void ) {
3288 return gal_Alloc ( &rcec_group_allocator );
3291 static void free_RCEC ( RCEC* rcec ) {
3292 tl_assert(rcec->magic == RCEC_MAGIC);
3293 gal_Free( &rcec_group_allocator, rcec );
3295 //////////// END RCEC group allocator
3298 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3299 move it one step closer the the front of the list, so as to make
3300 subsequent searches for it cheaper. */
3301 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3303 RCEC *ec0, *ec1, *ec2;
3305 tl_assert(0); /* already at head of list */
3306 tl_assert(ec != NULL);
3311 if (ec0 == NULL || ec0 == ec) break;
3316 tl_assert(ec0 == ec);
3317 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3319 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3320 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3321 closer to the start of the list. */
3322 tl_assert(ec2->next == ec1);
3323 tl_assert(ec1->next == ec0);
3330 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3331 /* it's second in the list. */
3332 tl_assert(*headp == ec1);
3333 tl_assert(ec1->next == ec0);
3334 ec1->next = ec0->next;
3341 /* Find the given RCEC in the tree, and return a pointer to it. Or,
3342 if not present, add the given one to the tree (by making a copy of
3343 it, so the caller can immediately deallocate the original) and
3344 return a pointer to the copy. The caller can safely have 'example'
3345 on its stack, since we will always return a pointer to a copy of
3346 it, not to the original. Note that the inserted node will have .rc
3347 of zero and so the caller must immediatly increment it. */
3348 __attribute__((noinline))
3349 static RCEC* ctxt__find_or_add ( RCEC* example )
3353 tl_assert(example && example->magic == RCEC_MAGIC);
3354 tl_assert(example->rc == 0);
3356 /* Search the hash table to see if we already have it. */
3357 stats__ctxt_tab_qs++;
3358 hent = example->frames_hash % N_RCEC_TAB;
3359 copy = contextTab[hent];
3362 tl_assert(copy->magic == RCEC_MAGIC);
3363 stats__ctxt_tab_cmps++;
3364 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3369 tl_assert(copy != example);
3370 /* optimisation: if it's not at the head of its list, move 1
3371 step fwds, to make future searches cheaper */
3372 if (copy != contextTab[hent]) {
3373 move_RCEC_one_step_forward( &contextTab[hent], copy );
3376 copy = alloc_RCEC();
3377 tl_assert(copy != example);
3379 copy->next = contextTab[hent];
3380 contextTab[hent] = copy;
3381 stats__ctxt_tab_curr++;
3382 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3383 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3388 static inline UWord ROLW ( UWord w, Int n )
3390 Int bpw = 8 * sizeof(UWord);
3391 w = (w << n) | (w >> (bpw-n));
3395 __attribute__((noinline))
3396 static RCEC* get_RCEC ( Thr* thr )
3400 example.magic = RCEC_MAGIC;
3403 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
3405 for (i = 0; i < N_FRAMES; i++) {
3406 hash ^= example.frames[i];
3407 hash = ROLW(hash, 19);
3409 example.frames_hash = hash;
3410 return ctxt__find_or_add( &example );
3413 ///////////////////////////////////////////////////////
3415 /// A SparseWA guest-addr -> OldRef, that refers to (1)
3418 // (UInt) `echo "Old Reference Information" | md5sum`
3419 #define OldRef_MAGIC 0x30b1f075UL
3421 /* Records an access: a thread and a context. The size
3422 (1,2,4,8) and read-or-writeness are also encoded as
3423 follows: bottom bit of .thr is 1 if write, 0 if read
3424 bottom 2 bits of .rcec are encode size:
3425 00 = 1, 01 = 2, 10 = 4, 11 = 8
3427 typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3429 #define N_OLDREF_ACCS 5
3433 UWord magic; /* sanity check only */
3434 UWord gen; /* when most recently accessed */
3435 /* or free list when not in use */
3436 /* unused slots in this array have .thr == NULL */
3437 Thr_n_RCEC accs[N_OLDREF_ACCS];
3442 //////////// BEGIN OldRef group allocator
3443 static GroupAlloc oldref_group_allocator;
3445 static OldRef* alloc_OldRef ( void ) {
3446 return gal_Alloc ( &oldref_group_allocator );
3449 static void free_OldRef ( OldRef* r ) {
3450 tl_assert(r->magic == OldRef_MAGIC);
3451 gal_Free( &oldref_group_allocator, r );
3453 //////////// END OldRef group allocator
3456 static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3457 static UWord oldrefGen = 0; /* current LRU generation # */
3458 static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3459 static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
3461 inline static void* ptr_or_UWord ( void* p, UWord w ) {
3462 return (void*)( ((UWord)p) | ((UWord)w) );
3464 inline static void* ptr_and_UWord ( void* p, UWord w ) {
3465 return (void*)( ((UWord)p) & ((UWord)w) );
3468 inline static UInt min_UInt ( UInt a, UInt b ) {
3469 return a < b ? a : b;
3472 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3473 first interval is lower, 1 if the first interval is higher, and 0
3474 if there is any overlap. Redundant paranoia with casting is there
3475 following what looked distinctly like a bug in gcc-4.1.2, in which
3476 some of the comparisons were done signedly instead of
3478 /* Copied from exp-ptrcheck/sg_main.c */
3479 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3480 Addr a2, SizeT n2 ) {
3481 UWord a1w = (UWord)a1;
3482 UWord n1w = (UWord)n1;
3483 UWord a2w = (UWord)a2;
3484 UWord n2w = (UWord)n2;
3485 tl_assert(n1w > 0 && n2w > 0);
3486 if (a1w + n1w <= a2w) return -1L;
3487 if (a2w + n2w <= a1w) return 1L;
3491 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
3499 rcec = get_RCEC( thr );
3502 /* encode the size and writeness of the transaction in the bottom
3503 two bits of thr and rcec. */
3504 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3506 /* This doesn't look particularly branch-predictor friendly. */
3507 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3508 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3509 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3510 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3511 default: tl_assert(0);
3514 /* Look in the map to see if we already have this. */
3515 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
3519 /* We already have a record for this address. We now need to
3520 see if we have a stack trace pertaining to this (thread, R/W,
3522 tl_assert(keyW == a);
3523 ref = (OldRef*)valW;
3524 tl_assert(ref->magic == OldRef_MAGIC);
3527 for (i = 0; i < N_OLDREF_ACCS; i++) {
3528 if (ref->accs[i].thr != thr)
3530 /* since .thr encodes both the accessing thread and the
3531 read/writeness, we know now that at least those features
3532 of the access match this entry. So we just need to check
3533 the size indication. Do this by inspecting the lowest 2 bits of
3534 .rcec, which contain the encoded size info. */
3535 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3537 /* else we have a match, so stop looking. */
3541 if (i < N_OLDREF_ACCS) {
3542 /* thread 'thr' has an entry at index 'i'. Update it. */
3544 Thr_n_RCEC tmp = ref->accs[i-1];
3545 ref->accs[i-1] = ref->accs[i];
3549 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
3550 stats__ctxt_rcdec1++;
3551 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3552 ref->accs[i].rcec = rcec;
3553 tl_assert(ref->accs[i].thr == thr);
3555 /* No entry for this (thread, R/W, size) triple. Shuffle all
3556 of them down one slot, and put the new entry at the start
3558 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3559 /* the last slot is in use. We must dec the rc on the
3561 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3562 stats__ctxt_rcdec2++;
3563 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3564 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
3565 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
3567 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3569 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3570 ref->accs[j] = ref->accs[j-1];
3571 ref->accs[0].thr = thr;
3572 ref->accs[0].rcec = rcec;
3573 /* thr==NULL is used to signify an empty slot, so we can't
3575 tl_assert(ptr_and_UWord(thr, ~3) != 0);
3578 ref->gen = oldrefGen;
3582 /* We don't have a record for this address. Create a new one. */
3583 if (oldrefTreeN >= oldrefGenIncAt) {
3585 oldrefGenIncAt = oldrefTreeN + 50000;
3586 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3587 oldrefGen, oldrefTreeN );
3590 ref = alloc_OldRef();
3591 ref->magic = OldRef_MAGIC;
3592 ref->gen = oldrefGen;
3593 ref->accs[0].rcec = rcec;
3594 ref->accs[0].thr = thr;
3595 /* thr==NULL is used to signify an empty slot, so we can't add a
3597 tl_assert(ptr_and_UWord(thr, ~3) != 0);
3598 for (j = 1; j < N_OLDREF_ACCS; j++) {
3599 ref->accs[j].thr = NULL;
3600 ref->accs[j].rcec = NULL;
3602 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
3609 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3610 /*OUT*/Thr** resThr,
3611 /*OUT*/SizeT* resSzB,
3612 /*OUT*/Bool* resIsW,
3613 Thr* thr, Addr a, SizeT szB, Bool isW )
3630 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
3632 toCheck[nToCheck++] = a;
3633 for (i = -7; i < (Word)szB; i++) {
3635 toCheck[nToCheck++] = a + i;
3637 tl_assert(nToCheck <= 15);
3639 /* Now see if we can find a suitable matching event for
3640 any of the addresses in toCheck[0 .. nToCheck-1]. */
3641 for (j = 0; j < nToCheck; j++) {
3643 cand_a = toCheck[j];
3644 // VG_(printf)("test %ld %p\n", j, cand_a);
3646 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3650 ref = (OldRef*)valW;
3651 tl_assert(keyW == cand_a);
3652 tl_assert(ref->magic == OldRef_MAGIC);
3653 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3660 for (i = 0; i < N_OLDREF_ACCS; i++) {
3661 Thr_n_RCEC* cand = &ref->accs[i];
3662 cand_thr = ptr_and_UWord(cand->thr, ~3);
3663 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3664 /* Decode the writeness from the bottom bit of .thr. */
3665 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3666 /* Decode the size from the bottom two bits of .rcec. */
3667 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3668 case 0: cand_szB = 1; break;
3669 case 1: cand_szB = 2; break;
3670 case 2: cand_szB = 4; break;
3671 case 3: cand_szB = 8; break;
3672 default: tl_assert(0);
3675 if (cand_thr == NULL)
3676 /* This slot isn't in use. Ignore it. */
3679 if (cand_thr == thr)
3680 /* This is an access by the same thread, but we're only
3681 interested in accesses from other threads. Ignore. */
3684 if ((!cand_isW) && (!isW))
3685 /* We don't want to report a read racing against another
3686 read; that's stupid. So in this case move on. */
3689 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3690 /* No overlap with the access we're asking about. Ignore. */
3693 /* We have a match. Stop searching. */
3697 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3699 if (i < N_OLDREF_ACCS) {
3701 /* return with success */
3702 tl_assert(cand_thr);
3703 tl_assert(cand_rcec);
3704 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3705 tl_assert(cand_szB >= 1);
3706 /* Count how many non-zero frames we have. */
3707 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3708 for (n = 0; n < maxNFrames; n++) {
3709 if (0 == cand_rcec->frames[n]) break;
3711 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
3718 /* consider next address in toCheck[] */
3719 } /* for (j = 0; j < nToCheck; j++) */
3721 /* really didn't find anything. */
3725 static void event_map_init ( void )
3729 /* Context (RCEC) group allocator */
3730 init_GroupAlloc ( &rcec_group_allocator,
3732 1000 /* RCECs per group */,
3734 "libhb.event_map_init.1 (RCEC groups)",
3738 tl_assert(!contextTab);
3739 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
3740 N_RCEC_TAB * sizeof(RCEC*) );
3741 tl_assert(contextTab);
3742 for (i = 0; i < N_RCEC_TAB; i++)
3743 contextTab[i] = NULL;
3745 /* Oldref group allocator */
3746 init_GroupAlloc ( &oldref_group_allocator,
3748 1000 /* OldRefs per group */,
3750 "libhb.event_map_init.3 (OldRef groups)",
3754 tl_assert(!oldrefTree);
3755 oldrefTree = VG_(newSWA)(
3757 "libhb.event_map_init.4 (oldref tree)",
3760 tl_assert(oldrefTree);
3767 static void event_map__check_reference_counts ( Bool before )
3775 /* Set the 'check' reference counts to zero. Also, optionally
3776 check that the real reference counts are non-zero. We allow
3777 these to fall to zero before a GC, but the GC must get rid of
3778 all those that are zero, hence none should be zero after a
3780 for (i = 0; i < N_RCEC_TAB; i++) {
3781 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3784 tl_assert(rcec->magic == RCEC_MAGIC);
3786 tl_assert(rcec->rc > 0);
3791 /* check that the stats are sane */
3792 tl_assert(nEnts == stats__ctxt_tab_curr);
3793 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3795 /* visit all the referencing points, inc check ref counts */
3796 VG_(initIterSWA)( oldrefTree );
3797 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
3798 oldref = (OldRef*)valW;
3799 tl_assert(oldref->magic == OldRef_MAGIC);
3800 for (i = 0; i < N_OLDREF_ACCS; i++) {
3801 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3802 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3805 tl_assert(aRef->magic == RCEC_MAGIC);
3813 /* compare check ref counts with actual */
3814 for (i = 0; i < N_RCEC_TAB; i++) {
3815 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3816 tl_assert(rcec->rc == rcec->rcX);
3821 __attribute__((noinline))
3822 static void event_map_maybe_GC ( void )
3825 UWord keyW, valW, retained, maxGen;
3829 UWord* genMap = NULL;
3830 UWord genMap_min = 0;
3831 UWord genMap_size = 0;
3833 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
3837 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3839 /* Check for sane command line params. Limit values must match
3840 those in hg_process_cmd_line_option. */
3841 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3842 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
3844 /* Check our counting is sane (expensive) */
3846 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
3848 /* Check the reference counts (expensive) */
3850 event_map__check_reference_counts( True/*before*/ );
3852 /* Compute the distribution of generation values in the ref tree.
3853 There are likely only to be a few different generation numbers
3854 in the whole tree, but we don't know what they are. Hence use a
3855 dynamically resized array of counters. The array is genMap[0
3856 .. genMap_size-1], where genMap[0] is the count for the
3857 generation number genMap_min, genMap[1] is the count for
3858 genMap_min+1, etc. If a new number is seen outside the range
3859 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3860 copied into a larger array, and genMap_min and genMap_size are
3861 adjusted accordingly. */
3863 /* genMap :: generation-number -> count-of-nodes-with-that-number */
3865 VG_(initIterSWA)( oldrefTree );
3866 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
3869 oldref = (OldRef*)valW;
3872 /* BEGIN find 'ea', which is the index in genMap holding the
3873 count for generation number 'key'. */
3874 if (UNLIKELY(genMap == NULL)) {
3875 /* deal with the first key to be seen, so that the following
3876 cases don't need to handle the complexity of a NULL count
3880 genMap = HG_(zalloc)( "libhb.emmG.1a",
3881 genMap_size * sizeof(UWord) );
3883 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3884 key, genMap_min, genMap_min+genMap_size- 1 );
3887 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3888 /* this is the expected (almost-always-happens) case: 'key'
3889 is already mapped in the array. */
3890 ea = key - genMap_min;
3893 if (key < genMap_min) {
3894 /* 'key' appears before the start of the current array.
3895 Extend the current array by allocating a larger one and
3896 copying the current one to the upper end of it. */
3899 more = genMap_min - key;
3900 tl_assert(more > 0);
3901 map2 = HG_(zalloc)( "libhb.emmG.1b",
3902 (genMap_size + more) * sizeof(UWord) );
3903 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3904 HG_(free)( genMap );
3906 genMap_size += more;
3909 tl_assert(genMap_min == key);
3910 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3911 key, genMap_min, genMap_min+genMap_size- 1 );
3914 /* 'key' appears after the end of the current array. Extend
3915 the current array by allocating a larger one and copying
3916 the current one to the lower end of it. */
3919 tl_assert(key >= genMap_min + genMap_size);
3920 more = key - (genMap_min + genMap_size) + 1;
3921 tl_assert(more > 0);
3922 map2 = HG_(zalloc)( "libhb.emmG.1c",
3923 (genMap_size + more) * sizeof(UWord) );
3924 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3925 HG_(free)( genMap );
3927 genMap_size += more;
3928 ea = genMap_size - 1;;
3929 tl_assert(genMap_min + genMap_size - 1 == key);
3930 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3931 key, genMap_min, genMap_min+genMap_size- 1 );
3933 /* END find 'ea' from 'key' */
3935 tl_assert(ea >= 0 && ea < genMap_size);
3936 /* and the whole point of this elaborate computation of 'ea' is .. */
3941 tl_assert(genMap_size > 0);
3943 /* Sanity check what we just computed */
3945 for (i = 0; i < genMap_size; i++) {
3946 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3947 i + genMap_min, genMap[i] );
3950 tl_assert(sum == oldrefTreeN);
3953 /* Figure out how many generations to throw away */
3954 retained = oldrefTreeN;
3957 for (i = 0; i < genMap_size; i++) {
3958 keyW = i + genMap_min;
3960 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3961 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3962 tl_assert(keyW >= maxGen);
3963 tl_assert(retained >= valW);
3965 > (UWord)(HG_(clo_conflict_cache_size)
3966 * EVENT_MAP_GC_DISCARD_FRACTION)) {
3976 tl_assert(retained >= 0 && retained <= oldrefTreeN);
3978 /* Now make up a big list of the oldrefTree entries we want to
3979 delete. We can't simultaneously traverse the tree and delete
3980 stuff from it, so first we need to copy them off somewhere
3982 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
3983 HG_(free), sizeof(Addr) );
3985 if (retained < oldrefTreeN) {
3987 /* This is the normal (expected) case. We discard any ref whose
3988 generation number <= maxGen. */
3989 VG_(initIterSWA)( oldrefTree );
3990 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
3991 oldref = (OldRef*)valW;
3992 tl_assert(oldref->magic == OldRef_MAGIC);
3993 if (oldref->gen <= maxGen) {
3994 VG_(addToXA)( refs2del, &keyW );
3997 if (VG_(clo_stats)) {
3998 VG_(message)(Vg_DebugMsg,
3999 "libhb: EvM GC: delete generations %lu and below, "
4000 "retaining %lu entries\n",
4006 static UInt rand_seed = 0; /* leave as static */
4008 /* Degenerate case: there's only one generation in the entire
4009 tree, so we need to have some other way of deciding which
4010 refs to throw away. Just throw out half of them randomly. */
4011 tl_assert(retained == oldrefTreeN);
4012 VG_(initIterSWA)( oldrefTree );
4013 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4015 oldref = (OldRef*)valW;
4016 tl_assert(oldref->magic == OldRef_MAGIC);
4017 n = VG_(random)( &rand_seed );
4018 if ((n & 0xFFF) < 0x800) {
4019 VG_(addToXA)( refs2del, &keyW );
4023 if (VG_(clo_stats)) {
4024 VG_(message)(Vg_DebugMsg,
4025 "libhb: EvM GC: randomly delete half the entries, "
4026 "retaining %lu entries\n",
4032 n2del = VG_(sizeXA)( refs2del );
4033 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4035 if (0) VG_(printf)("%s","deleting entries\n");
4036 for (i = 0; i < n2del; i++) {
4038 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
4039 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
4041 tl_assert(keyW == ga2del);
4042 oldref = (OldRef*)valW;
4043 for (j = 0; j < N_OLDREF_ACCS; j++) {
4044 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
4045 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
4048 stats__ctxt_rcdec3++;
4049 ctxt__rcdec( aRef );
4055 free_OldRef( oldref );
4058 VG_(deleteXA)( refs2del );
4060 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
4062 oldrefTreeN = retained;
4063 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4065 /* Throw away all RCECs with zero reference counts */
4066 for (i = 0; i < N_RCEC_TAB; i++) {
4067 RCEC** pp = &contextTab[i];
4074 tl_assert(stats__ctxt_tab_curr > 0);
4075 stats__ctxt_tab_curr--;
4083 /* Check the reference counts (expensive) */
4085 event_map__check_reference_counts( False/*after*/ );
4088 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4089 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4094 /////////////////////////////////////////////////////////
4098 /////////////////////////////////////////////////////////
4100 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4101 Nov 08, and again after [...],
4104 static ULong stats__msmcread = 0;
4105 static ULong stats__msmcread_change = 0;
4106 static ULong stats__msmcwrite = 0;
4107 static ULong stats__msmcwrite_change = 0;
4109 /* Some notes on the H1 history mechanism:
4111 Transition rules are:
4113 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4114 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4116 After any access by a thread T to a location L, L's constraint pair
4117 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4119 After a race by thread T conflicting with some previous access by
4120 some other thread U, for a location with constraint (before
4121 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4122 which the previously access lies.
4124 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4125 are compared so as to find out which thread(s) this access
4126 conflicts with. Once that is established, we also require the
4127 pre-update Cw for the location, so we can index into it for those
4128 threads, to get the scalar clock values for the point at which the
4129 former accesses were made. (In fact we only bother to do any of
4130 this for an arbitrarily chosen one of the conflicting threads, as
4131 that's simpler, it avoids flooding the user with vast amounts of
4132 mostly useless information, and because the program is wrong if it
4133 contains any races at all -- so we don't really need to show all
4134 conflicting access pairs initially, so long as we only show none if
4139 That requires the auxiliary proof that
4141 (Cr `join` Kw)[T] == Kw[T]
4143 Why should that be true? Because for any thread T, Kw[T] >= the
4144 scalar clock value for T known by any other thread. In other
4145 words, because T's value for its own scalar clock is at least as up
4146 to date as the value for it known by any other thread (that is true
4147 for both the R- and W- scalar clocks). Hence no other thread will
4148 be able to feed in a value for that element (indirectly via a
4149 constraint) which will exceed Kw[T], and hence the join cannot
4150 cause that particular element to advance.
4153 __attribute__((noinline))
4154 static void record_race_info ( Thr* acc_thr,
4155 Addr acc_addr, SizeT szB, Bool isWrite,
4160 /* Call here to report a race. We just hand it onwards to
4161 HG_(record_error_Race). If that in turn discovers that the
4162 error is going to be collected, then, at history_level 2, that
4163 queries the conflicting-event map. The alternative would be to
4164 query it right here. But that causes a lot of pointless queries
4165 for errors which will shortly be discarded as duplicates, and
4166 can become a performance overhead; so we defer the query until
4167 we know the error is not a duplicate. */
4169 /* Stacks for the bounds of the (or one of the) conflicting
4170 segment(s). These are only set at history_level 1. */
4171 ExeContext* hist1_seg_start = NULL;
4172 ExeContext* hist1_seg_end = NULL;
4173 Thread* hist1_conf_thr = NULL;
4176 tl_assert(acc_thr->opaque);
4177 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4179 if (HG_(clo_history_level) == 1) {
4181 Word firstIx, lastIx;
4184 /* At history_level 1, we must round up the relevant stack-pair
4185 for the conflicting segment right now. This is because
4186 deferring it is complex; we can't (easily) put Kfailed and
4187 Cfailed into the XError and wait for later without
4188 getting tied up in difficulties with VtsID reference
4189 counting. So just do it now. */
4192 /* Which thread are we in conflict with? There may be more than
4193 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4194 (in fact it's the one with the lowest Thr* value). */
4195 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
4196 /* This must exist! since if it was NULL then there's no
4197 conflict (semantics of return value of
4198 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4199 called us, just checked exactly this -- that there was in
4203 /* Get the scalar clock value that the conflicting thread
4204 introduced into the constraint. A careful examination of the
4205 base machine rules shows that this must be the same as the
4206 conflicting thread's scalar clock when it created this
4207 constraint. Hence we know the scalar clock of the
4208 conflicting thread when the conflicting access was made. */
4209 confTym = VtsID__indexAt( Cfailed, confThr );
4211 /* Using this scalar clock, index into the conflicting thread's
4212 collection of stack traces made each time its vector clock
4213 (hence its scalar clock) changed. This gives the stack
4214 traces at the start and end of the conflicting segment (well,
4215 as per comment just above, of one of the conflicting
4216 segments, if there are more than one). */
4219 /* tl_assert(confThr); -- asserted just above */
4220 tl_assert(confThr->local_Kws_n_stacks);
4221 firstIx = lastIx = 0;
4222 found = VG_(lookupXA_UNSAFE)(
4223 confThr->local_Kws_n_stacks,
4224 &key, &firstIx, &lastIx,
4225 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4227 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
4228 "confTym %llu found %d (%lu,%lu)\n",
4229 Cfailed, Kfailed, Cw,
4230 confThr, confTym, found, firstIx, lastIx);
4231 /* We can't indefinitely collect stack traces at VTS
4232 transitions, since we'd eventually run out of memory. Hence
4233 note_local_Kw_n_stack_for will eventually throw away old
4234 ones, which in turn means we might fail to find index value
4235 confTym in the array. */
4237 ULong_n_EC *pair_start, *pair_end;
4239 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
4240 hist1_seg_start = pair_start->ec;
4241 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
4243 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
4245 /* from properties of VG_(lookupXA) and the comparison fn used: */
4246 tl_assert(pair_start->ull < pair_end->ull);
4247 hist1_seg_end = pair_end->ec;
4248 /* Could do a bit better here. It may be that pair_end
4249 doesn't have a stack, but the following entries in the
4250 array have the same scalar Kw and to have a stack. So
4251 we should search a bit further along the array than
4252 lastIx+1 if hist1_seg_end is NULL. */
4254 if (confThr->still_alive)
4255 hist1_seg_end = main_get_EC( confThr );
4257 // seg_start could be NULL iff this is the first stack in the thread
4258 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4259 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4260 hist1_conf_thr = confThr->opaque;
4264 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
4266 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
4269 static Bool is_sane_SVal_C ( SVal sv ) {
4271 if (!SVal__isC(sv)) return True;
4272 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4277 /* Compute new state following a read */
4278 static inline SVal msmcread ( SVal svOld,
4279 /* The following are only needed for
4280 creating error reports. */
4282 Addr acc_addr, SizeT szB )
4284 SVal svNew = SVal_INVALID;
4287 /* Redundant sanity check on the constraints */
4289 tl_assert(is_sane_SVal_C(svOld));
4292 if (LIKELY(SVal__isC(svOld))) {
4293 VtsID tviR = acc_thr->viR;
4294 VtsID tviW = acc_thr->viW;
4295 VtsID rmini = SVal__unC_Rmin(svOld);
4296 VtsID wmini = SVal__unC_Wmin(svOld);
4297 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4300 /* Note: RWLOCK subtlety: use tviW, not tviR */
4301 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4304 /* assert on sanity of constraints. */
4305 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4307 // same as in non-race case
4308 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4309 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
4310 rmini, /* Cfailed */
4316 if (SVal__isA(svOld)) {
4317 /* reading no-access memory (sigh); leave unchanged */
4318 /* check for no pollution */
4319 tl_assert(svOld == SVal_NOACCESS);
4320 svNew = SVal_NOACCESS;
4323 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
4328 tl_assert(is_sane_SVal_C(svNew));
4330 if (UNLIKELY(svNew != svOld)) {
4331 tl_assert(svNew != SVal_INVALID);
4332 if (HG_(clo_history_level) >= 2
4333 && SVal__isC(svOld) && SVal__isC(svNew)) {
4334 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
4335 stats__msmcread_change++;
4342 /* Compute new state following a write */
4343 static inline SVal msmcwrite ( SVal svOld,
4344 /* The following are only needed for
4345 creating error reports. */
4347 Addr acc_addr, SizeT szB )
4349 SVal svNew = SVal_INVALID;
4352 /* Redundant sanity check on the constraints */
4354 tl_assert(is_sane_SVal_C(svOld));
4357 if (LIKELY(SVal__isC(svOld))) {
4358 VtsID tviW = acc_thr->viW;
4359 VtsID wmini = SVal__unC_Wmin(svOld);
4360 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4363 svNew = SVal__mkC( tviW, tviW );
4366 VtsID rmini = SVal__unC_Rmin(svOld);
4367 /* assert on sanity of constraints. */
4368 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4370 // same as in non-race case
4371 // proof: in the non-race case, we have
4372 // rmini <= wmini (invar on constraints)
4373 // tviW <= tviR (invar on thread clocks)
4374 // wmini <= tviW (from run-time check)
4375 // hence from transitivity of <= we have
4376 // rmini <= wmini <= tviW
4377 // and so join(rmini,tviW) == tviW
4378 // and join(wmini,tviW) == tviW
4380 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4381 VtsID__join2(wmini, tviW) );
4382 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
4383 wmini, /* Cfailed */
4389 if (SVal__isA(svOld)) {
4390 /* writing no-access memory (sigh); leave unchanged */
4391 /* check for no pollution */
4392 tl_assert(svOld == SVal_NOACCESS);
4393 svNew = SVal_NOACCESS;
4396 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
4401 tl_assert(is_sane_SVal_C(svNew));
4403 if (UNLIKELY(svNew != svOld)) {
4404 tl_assert(svNew != SVal_INVALID);
4405 if (HG_(clo_history_level) >= 2
4406 && SVal__isC(svOld) && SVal__isC(svNew)) {
4407 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
4408 stats__msmcwrite_change++;
4415 /////////////////////////////////////////////////////////
4417 // Apply core MSM to specific memory locations //
4419 /////////////////////////////////////////////////////////
4421 /*------------- ZSM accesses: 8 bit sapply ------------- */
4423 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
4425 UWord cloff, tno, toff;
4428 stats__cline_cread08s++;
4429 cl = get_cacheline(a);
4430 cloff = get_cacheline_offset(a);
4431 tno = get_treeno(a);
4432 toff = get_tree_offset(a); /* == 0 .. 7 */
4433 descr = cl->descrs[tno];
4434 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4435 SVal* tree = &cl->svals[tno << 3];
4436 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4438 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4440 svOld = cl->svals[cloff];
4441 svNew = msmcread( svOld, thr,a,1 );
4443 tl_assert(svNew != SVal_INVALID);
4444 cl->svals[cloff] = svNew;
4447 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
4449 UWord cloff, tno, toff;
4452 stats__cline_cwrite08s++;
4453 cl = get_cacheline(a);
4454 cloff = get_cacheline_offset(a);
4455 tno = get_treeno(a);
4456 toff = get_tree_offset(a); /* == 0 .. 7 */
4457 descr = cl->descrs[tno];
4458 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4459 SVal* tree = &cl->svals[tno << 3];
4460 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4462 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4464 svOld = cl->svals[cloff];
4465 svNew = msmcwrite( svOld, thr,a,1 );
4467 tl_assert(svNew != SVal_INVALID);
4468 cl->svals[cloff] = svNew;
4471 /*------------- ZSM accesses: 16 bit sapply ------------- */
4473 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
4475 UWord cloff, tno, toff;
4478 stats__cline_cread16s++;
4479 if (UNLIKELY(!aligned16(a))) goto slowcase;
4480 cl = get_cacheline(a);
4481 cloff = get_cacheline_offset(a);
4482 tno = get_treeno(a);
4483 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4484 descr = cl->descrs[tno];
4485 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4486 if (valid_value_is_below_me_16(descr, toff)) {
4489 SVal* tree = &cl->svals[tno << 3];
4490 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4493 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4495 svOld = cl->svals[cloff];
4496 svNew = msmcread( svOld, thr,a,2 );
4498 tl_assert(svNew != SVal_INVALID);
4499 cl->svals[cloff] = svNew;
4501 slowcase: /* misaligned, or must go further down the tree */
4502 stats__cline_16to8splits++;
4503 zsm_sapply08__msmcread( thr, a + 0 );
4504 zsm_sapply08__msmcread( thr, a + 1 );
4507 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
4509 UWord cloff, tno, toff;
4512 stats__cline_cwrite16s++;
4513 if (UNLIKELY(!aligned16(a))) goto slowcase;
4514 cl = get_cacheline(a);
4515 cloff = get_cacheline_offset(a);
4516 tno = get_treeno(a);
4517 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4518 descr = cl->descrs[tno];
4519 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4520 if (valid_value_is_below_me_16(descr, toff)) {
4523 SVal* tree = &cl->svals[tno << 3];
4524 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4527 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4529 svOld = cl->svals[cloff];
4530 svNew = msmcwrite( svOld, thr,a,2 );
4532 tl_assert(svNew != SVal_INVALID);
4533 cl->svals[cloff] = svNew;
4535 slowcase: /* misaligned, or must go further down the tree */
4536 stats__cline_16to8splits++;
4537 zsm_sapply08__msmcwrite( thr, a + 0 );
4538 zsm_sapply08__msmcwrite( thr, a + 1 );
4541 /*------------- ZSM accesses: 32 bit sapply ------------- */
4543 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
4545 UWord cloff, tno, toff;
4548 stats__cline_cread32s++;
4549 if (UNLIKELY(!aligned32(a))) goto slowcase;
4550 cl = get_cacheline(a);
4551 cloff = get_cacheline_offset(a);
4552 tno = get_treeno(a);
4553 toff = get_tree_offset(a); /* == 0 or 4 */
4554 descr = cl->descrs[tno];
4555 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4556 if (valid_value_is_above_me_32(descr, toff)) {
4557 SVal* tree = &cl->svals[tno << 3];
4558 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4563 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4565 svOld = cl->svals[cloff];
4566 svNew = msmcread( svOld, thr,a,4 );
4568 tl_assert(svNew != SVal_INVALID);
4569 cl->svals[cloff] = svNew;
4571 slowcase: /* misaligned, or must go further down the tree */
4572 stats__cline_32to16splits++;
4573 zsm_sapply16__msmcread( thr, a + 0 );
4574 zsm_sapply16__msmcread( thr, a + 2 );
4577 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
4579 UWord cloff, tno, toff;
4582 stats__cline_cwrite32s++;
4583 if (UNLIKELY(!aligned32(a))) goto slowcase;
4584 cl = get_cacheline(a);
4585 cloff = get_cacheline_offset(a);
4586 tno = get_treeno(a);
4587 toff = get_tree_offset(a); /* == 0 or 4 */
4588 descr = cl->descrs[tno];
4589 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4590 if (valid_value_is_above_me_32(descr, toff)) {
4591 SVal* tree = &cl->svals[tno << 3];
4592 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4597 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4599 svOld = cl->svals[cloff];
4600 svNew = msmcwrite( svOld, thr,a,4 );
4602 tl_assert(svNew != SVal_INVALID);
4603 cl->svals[cloff] = svNew;
4605 slowcase: /* misaligned, or must go further down the tree */
4606 stats__cline_32to16splits++;
4607 zsm_sapply16__msmcwrite( thr, a + 0 );
4608 zsm_sapply16__msmcwrite( thr, a + 2 );
4611 /*------------- ZSM accesses: 64 bit sapply ------------- */
4613 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
4619 stats__cline_cread64s++;
4620 if (UNLIKELY(!aligned64(a))) goto slowcase;
4621 cl = get_cacheline(a);
4622 cloff = get_cacheline_offset(a);
4623 tno = get_treeno(a);
4624 //toff = get_tree_offset(a); /* == 0, unused */
4625 descr = cl->descrs[tno];
4626 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4629 svOld = cl->svals[cloff];
4630 svNew = msmcread( svOld, thr,a,8 );
4632 tl_assert(svNew != SVal_INVALID);
4633 cl->svals[cloff] = svNew;
4635 slowcase: /* misaligned, or must go further down the tree */
4636 stats__cline_64to32splits++;
4637 zsm_sapply32__msmcread( thr, a + 0 );
4638 zsm_sapply32__msmcread( thr, a + 4 );
4641 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
4647 stats__cline_cwrite64s++;
4648 if (UNLIKELY(!aligned64(a))) goto slowcase;
4649 cl = get_cacheline(a);
4650 cloff = get_cacheline_offset(a);
4651 tno = get_treeno(a);
4652 //toff = get_tree_offset(a); /* == 0, unused */
4653 descr = cl->descrs[tno];
4654 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4657 svOld = cl->svals[cloff];
4658 svNew = msmcwrite( svOld, thr,a,8 );
4660 tl_assert(svNew != SVal_INVALID);
4661 cl->svals[cloff] = svNew;
4663 slowcase: /* misaligned, or must go further down the tree */
4664 stats__cline_64to32splits++;
4665 zsm_sapply32__msmcwrite( thr, a + 0 );
4666 zsm_sapply32__msmcwrite( thr, a + 4 );
4669 /*--------------- ZSM accesses: 8 bit swrite --------------- */
4672 void zsm_swrite08 ( Addr a, SVal svNew ) {
4674 UWord cloff, tno, toff;
4676 stats__cline_swrite08s++;
4677 cl = get_cacheline(a);
4678 cloff = get_cacheline_offset(a);
4679 tno = get_treeno(a);
4680 toff = get_tree_offset(a); /* == 0 .. 7 */
4681 descr = cl->descrs[tno];
4682 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4683 SVal* tree = &cl->svals[tno << 3];
4684 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4686 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4688 tl_assert(svNew != SVal_INVALID);
4689 cl->svals[cloff] = svNew;
4692 /*--------------- ZSM accesses: 16 bit swrite --------------- */
4695 void zsm_swrite16 ( Addr a, SVal svNew ) {
4697 UWord cloff, tno, toff;
4699 stats__cline_swrite16s++;
4700 if (UNLIKELY(!aligned16(a))) goto slowcase;
4701 cl = get_cacheline(a);
4702 cloff = get_cacheline_offset(a);
4703 tno = get_treeno(a);
4704 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4705 descr = cl->descrs[tno];
4706 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4707 if (valid_value_is_below_me_16(descr, toff)) {
4708 /* Writing at this level. Need to fix up 'descr'. */
4709 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4710 /* At this point, the tree does not match cl->descr[tno] any
4711 more. The assignments below will fix it up. */
4713 /* We can't indiscriminately write on the w16 node as in the
4714 w64 case, as that might make the node inconsistent with
4715 its parent. So first, pull down to this level. */
4716 SVal* tree = &cl->svals[tno << 3];
4717 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4719 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4722 tl_assert(svNew != SVal_INVALID);
4723 cl->svals[cloff + 0] = svNew;
4724 cl->svals[cloff + 1] = SVal_INVALID;
4726 slowcase: /* misaligned */
4727 stats__cline_16to8splits++;
4728 zsm_swrite08( a + 0, svNew );
4729 zsm_swrite08( a + 1, svNew );
4732 /*--------------- ZSM accesses: 32 bit swrite --------------- */
4735 void zsm_swrite32 ( Addr a, SVal svNew ) {
4737 UWord cloff, tno, toff;
4739 stats__cline_swrite32s++;
4740 if (UNLIKELY(!aligned32(a))) goto slowcase;
4741 cl = get_cacheline(a);
4742 cloff = get_cacheline_offset(a);
4743 tno = get_treeno(a);
4744 toff = get_tree_offset(a); /* == 0 or 4 */
4745 descr = cl->descrs[tno];
4746 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4747 if (valid_value_is_above_me_32(descr, toff)) {
4748 /* We can't indiscriminately write on the w32 node as in the
4749 w64 case, as that might make the node inconsistent with
4750 its parent. So first, pull down to this level. */
4751 SVal* tree = &cl->svals[tno << 3];
4752 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4754 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4756 /* Writing at this level. Need to fix up 'descr'. */
4757 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4758 /* At this point, the tree does not match cl->descr[tno] any
4759 more. The assignments below will fix it up. */
4762 tl_assert(svNew != SVal_INVALID);
4763 cl->svals[cloff + 0] = svNew;
4764 cl->svals[cloff + 1] = SVal_INVALID;
4765 cl->svals[cloff + 2] = SVal_INVALID;
4766 cl->svals[cloff + 3] = SVal_INVALID;
4768 slowcase: /* misaligned */
4769 stats__cline_32to16splits++;
4770 zsm_swrite16( a + 0, svNew );
4771 zsm_swrite16( a + 2, svNew );
4774 /*--------------- ZSM accesses: 64 bit swrite --------------- */
4777 void zsm_swrite64 ( Addr a, SVal svNew ) {
4781 stats__cline_swrite64s++;
4782 if (UNLIKELY(!aligned64(a))) goto slowcase;
4783 cl = get_cacheline(a);
4784 cloff = get_cacheline_offset(a);
4785 tno = get_treeno(a);
4786 //toff = get_tree_offset(a); /* == 0, unused */
4787 cl->descrs[tno] = TREE_DESCR_64;
4788 tl_assert(svNew != SVal_INVALID);
4789 cl->svals[cloff + 0] = svNew;
4790 cl->svals[cloff + 1] = SVal_INVALID;
4791 cl->svals[cloff + 2] = SVal_INVALID;
4792 cl->svals[cloff + 3] = SVal_INVALID;
4793 cl->svals[cloff + 4] = SVal_INVALID;
4794 cl->svals[cloff + 5] = SVal_INVALID;
4795 cl->svals[cloff + 6] = SVal_INVALID;
4796 cl->svals[cloff + 7] = SVal_INVALID;
4798 slowcase: /* misaligned */
4799 stats__cline_64to32splits++;
4800 zsm_swrite32( a + 0, svNew );
4801 zsm_swrite32( a + 4, svNew );
4804 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
4807 SVal zsm_sread08 ( Addr a ) {
4809 UWord cloff, tno, toff;
4811 stats__cline_sread08s++;
4812 cl = get_cacheline(a);
4813 cloff = get_cacheline_offset(a);
4814 tno = get_treeno(a);
4815 toff = get_tree_offset(a); /* == 0 .. 7 */
4816 descr = cl->descrs[tno];
4817 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4818 SVal* tree = &cl->svals[tno << 3];
4819 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4821 return cl->svals[cloff];
4824 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
4826 stats__cline_scopy08s++;
4827 sv = zsm_sread08( src );
4828 zsm_swrite08( dst, sv );
4832 /* Block-copy states (needed for implementing realloc()). Note this
4833 doesn't change the filtering arrangements. The caller of
4834 zsm_scopy_range needs to attend to that. */
4836 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
4842 /* assert for non-overlappingness */
4843 tl_assert(src+len <= dst || dst+len <= src);
4845 /* To be simple, just copy byte by byte. But so as not to wreck
4846 performance for later accesses to dst[0 .. len-1], normalise
4847 destination lines as we finish with them, and also normalise the
4848 line containing the first and last address. */
4849 for (i = 0; i < len; i++) {
4851 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4852 || i == 0 /* first in range */
4853 || i == len-1; /* last in range */
4854 zsm_scopy08( src+i, dst+i, normalise );
4859 /* For setting address ranges to a given value. Has considerable
4860 sophistication so as to avoid generating large numbers of pointless
4861 cache loads/writebacks for large ranges. */
4863 /* Do small ranges in-cache, in the obvious way. */
4865 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
4867 /* fast track a couple of common cases */
4868 if (len == 4 && aligned32(a)) {
4869 zsm_swrite32( a, svNew );
4872 if (len == 8 && aligned64(a)) {
4873 zsm_swrite64( a, svNew );
4877 /* be completely general (but as efficient as possible) */
4878 if (len == 0) return;
4880 if (!aligned16(a) && len >= 1) {
4881 zsm_swrite08( a, svNew );
4884 tl_assert(aligned16(a));
4886 if (len == 0) return;
4888 if (!aligned32(a) && len >= 2) {
4889 zsm_swrite16( a, svNew );
4892 tl_assert(aligned32(a));
4894 if (len == 0) return;
4896 if (!aligned64(a) && len >= 4) {
4897 zsm_swrite32( a, svNew );
4900 tl_assert(aligned64(a));
4902 if (len == 0) return;
4905 tl_assert(aligned64(a));
4907 zsm_swrite64( a, svNew );
4911 tl_assert(aligned64(a));
4913 if (len == 0) return;
4916 tl_assert(aligned32(a));
4918 zsm_swrite32( a, svNew );
4922 if (len == 0) return;
4925 tl_assert(aligned16(a));
4927 zsm_swrite16( a, svNew );
4931 if (len == 0) return;
4934 zsm_swrite08( a, svNew );
4938 tl_assert(len == 0);
4942 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
4943 for larger ranges, try to operate directly on the out-of-cache
4944 representation, rather than dragging lines into the cache,
4945 overwriting them, and forcing them out. This turns out to be an
4946 important performance optimisation.
4948 Note that this doesn't change the filtering arrangements. The
4949 caller of zsm_sset_range needs to attend to that. */
4951 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
4953 tl_assert(svNew != SVal_INVALID);
4954 stats__cache_make_New_arange += (ULong)len;
4957 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4960 static UWord n_New_in_cache = 0;
4961 static UWord n_New_not_in_cache = 0;
4962 /* tag is 'a' with the in-line offset masked out,
4963 eg a[31]..a[4] 0000 */
4964 Addr tag = a & ~(N_LINE_ARANGE - 1);
4965 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4966 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4969 n_New_not_in_cache++;
4971 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4972 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4973 n_New_in_cache, n_New_not_in_cache );
4976 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4977 zsm_sset_range_SMALL( a, len, svNew );
4979 Addr before_start = a;
4980 Addr aligned_start = cacheline_ROUNDUP(a);
4981 Addr after_start = cacheline_ROUNDDN(a + len);
4982 UWord before_len = aligned_start - before_start;
4983 UWord aligned_len = after_start - aligned_start;
4984 UWord after_len = a + len - after_start;
4985 tl_assert(before_start <= aligned_start);
4986 tl_assert(aligned_start <= after_start);
4987 tl_assert(before_len < N_LINE_ARANGE);
4988 tl_assert(after_len < N_LINE_ARANGE);
4989 tl_assert(get_cacheline_offset(aligned_start) == 0);
4990 if (get_cacheline_offset(a) == 0) {
4991 tl_assert(before_len == 0);
4992 tl_assert(a == aligned_start);
4994 if (get_cacheline_offset(a+len) == 0) {
4995 tl_assert(after_len == 0);
4996 tl_assert(after_start == a+len);
4998 if (before_len > 0) {
4999 zsm_sset_range_SMALL( before_start, before_len, svNew );
5001 if (after_len > 0) {
5002 zsm_sset_range_SMALL( after_start, after_len, svNew );
5004 stats__cache_make_New_inZrep += (ULong)aligned_len;
5009 if (aligned_start >= after_start)
5011 tl_assert(get_cacheline_offset(aligned_start) == 0);
5012 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5013 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5014 if (tag == cache_shmem.tags0[wix]) {
5016 for (i = 0; i < N_LINE_ARANGE / 8; i++)
5017 zsm_swrite64( aligned_start + i * 8, svNew );
5023 /* This line is not in the cache. Do not force it in; instead
5024 modify it in-place. */
5025 /* find the Z line to write in and rcdec it or the
5026 associated F line. */
5027 find_Z_for_writing( &sm, &zix, tag );
5029 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5030 lineZ = &sm->linesZ[zix];
5031 lineZ->dict[0] = svNew;
5032 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5033 for (i = 0; i < N_LINE_ARANGE/4; i++)
5034 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5037 aligned_start += N_LINE_ARANGE;
5038 aligned_len -= N_LINE_ARANGE;
5040 tl_assert(aligned_start == after_start);
5041 tl_assert(aligned_len == 0);
5046 /////////////////////////////////////////////////////////
5048 // Front-filtering accesses //
5050 /////////////////////////////////////////////////////////
5052 static UWord stats__f_ac = 0;
5053 static UWord stats__f_sk = 0;
5056 # define STATS__F_SHOW \
5058 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5059 VG_(printf)("filters: ac %lu sk %lu\n", \
5060 stats__f_ac, stats__f_sk); \
5063 # define STATS__F_SHOW /* */
5066 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5069 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5073 zsm_sapply08__msmcwrite(thr, a);
5076 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5079 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5083 zsm_sapply16__msmcwrite(thr, a);
5086 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5089 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5093 zsm_sapply32__msmcwrite(thr, a);
5096 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5099 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5103 zsm_sapply64__msmcwrite(thr, a);
5106 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5108 /* fast track a couple of common cases */
5109 if (len == 4 && aligned32(a)) {
5110 zsm_sapply32_f__msmcwrite( thr, a );
5113 if (len == 8 && aligned64(a)) {
5114 zsm_sapply64_f__msmcwrite( thr, a );
5118 /* be completely general (but as efficient as possible) */
5119 if (len == 0) return;
5121 if (!aligned16(a) && len >= 1) {
5122 zsm_sapply08_f__msmcwrite( thr, a );
5125 tl_assert(aligned16(a));
5127 if (len == 0) return;
5129 if (!aligned32(a) && len >= 2) {
5130 zsm_sapply16_f__msmcwrite( thr, a );
5133 tl_assert(aligned32(a));
5135 if (len == 0) return;
5137 if (!aligned64(a) && len >= 4) {
5138 zsm_sapply32_f__msmcwrite( thr, a );
5141 tl_assert(aligned64(a));
5143 if (len == 0) return;
5146 tl_assert(aligned64(a));
5148 zsm_sapply64_f__msmcwrite( thr, a );
5152 tl_assert(aligned64(a));
5154 if (len == 0) return;
5157 tl_assert(aligned32(a));
5159 zsm_sapply32_f__msmcwrite( thr, a );
5163 if (len == 0) return;
5166 tl_assert(aligned16(a));
5168 zsm_sapply16_f__msmcwrite( thr, a );
5172 if (len == 0) return;
5175 zsm_sapply08_f__msmcwrite( thr, a );
5179 tl_assert(len == 0);
5182 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5185 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5189 zsm_sapply08__msmcread(thr, a);
5192 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5195 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5199 zsm_sapply16__msmcread(thr, a);
5202 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5205 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5209 zsm_sapply32__msmcread(thr, a);
5212 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5215 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5219 zsm_sapply64__msmcread(thr, a);
5222 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5224 /* fast track a couple of common cases */
5225 if (len == 4 && aligned32(a)) {
5226 zsm_sapply32_f__msmcread( thr, a );
5229 if (len == 8 && aligned64(a)) {
5230 zsm_sapply64_f__msmcread( thr, a );
5234 /* be completely general (but as efficient as possible) */
5235 if (len == 0) return;
5237 if (!aligned16(a) && len >= 1) {
5238 zsm_sapply08_f__msmcread( thr, a );
5241 tl_assert(aligned16(a));
5243 if (len == 0) return;
5245 if (!aligned32(a) && len >= 2) {
5246 zsm_sapply16_f__msmcread( thr, a );
5249 tl_assert(aligned32(a));
5251 if (len == 0) return;
5253 if (!aligned64(a) && len >= 4) {
5254 zsm_sapply32_f__msmcread( thr, a );
5257 tl_assert(aligned64(a));
5259 if (len == 0) return;
5262 tl_assert(aligned64(a));
5264 zsm_sapply64_f__msmcread( thr, a );
5268 tl_assert(aligned64(a));
5270 if (len == 0) return;
5273 tl_assert(aligned32(a));
5275 zsm_sapply32_f__msmcread( thr, a );
5279 if (len == 0) return;
5282 tl_assert(aligned16(a));
5284 zsm_sapply16_f__msmcread( thr, a );
5288 if (len == 0) return;
5291 zsm_sapply08_f__msmcread( thr, a );
5295 tl_assert(len == 0);
5298 void libhb_Thr_resumes ( Thr* thr )
5300 if (0) VG_(printf)("resume %p\n", thr);
5302 tl_assert(thr->still_alive);
5303 Filter__clear(thr->filter, "libhb_Thr_resumes");
5304 /* A kludge, but .. if this thread doesn't have any marker stacks
5305 at all, get one right now. This is easier than figuring out
5306 exactly when at thread startup we can and can't take a stack
5308 if (HG_(clo_history_level) == 1) {
5309 tl_assert(thr->local_Kws_n_stacks);
5310 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5311 note_local_Kw_n_stack_for(thr);
5316 /////////////////////////////////////////////////////////
5318 // Synchronisation objects //
5320 /////////////////////////////////////////////////////////
5322 // (UInt) `echo "Synchronisation object" | md5sum`
5323 #define SO_MAGIC 0x56b3c5b0U
5326 VtsID viR; /* r-clock of sender */
5327 VtsID viW; /* w-clock of sender */
5331 static SO* SO__Alloc ( void ) {
5332 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5333 so->viR = VtsID_INVALID;
5334 so->viW = VtsID_INVALID;
5335 so->magic = SO_MAGIC;
5338 static void SO__Dealloc ( SO* so ) {
5340 tl_assert(so->magic == SO_MAGIC);
5341 if (so->viR == VtsID_INVALID) {
5342 tl_assert(so->viW == VtsID_INVALID);
5344 tl_assert(so->viW != VtsID_INVALID);
5345 VtsID__rcdec(so->viR);
5346 VtsID__rcdec(so->viW);
5353 /////////////////////////////////////////////////////////
5357 /////////////////////////////////////////////////////////
5359 static void show_thread_state ( HChar* str, Thr* t )
5362 if (t->viR == t->viW) {
5363 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5364 VtsID__pp( t->viR );
5365 VG_(printf)("%s","\n");
5367 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5368 VtsID__pp( t->viR );
5369 VG_(printf)(" viW %u==", t->viW);
5370 VtsID__pp( t->viW );
5371 VG_(printf)("%s","\n");
5377 void (*get_stacktrace)( Thr*, Addr*, UWord ),
5378 ExeContext* (*get_EC)( Thr* )
5383 tl_assert(get_stacktrace);
5385 main_get_stacktrace = get_stacktrace;
5386 main_get_EC = get_EC;
5388 // No need to initialise hg_wordfm.
5389 // No need to initialise hg_wordset.
5394 VtsID__invalidate_caches();
5396 // initialise shadow memory
5397 zsm_init( SVal__rcinc, SVal__rcdec );
5400 vi = VtsID__mk_Singleton( thr, 1 );
5403 VtsID__rcinc(thr->viR);
5404 VtsID__rcinc(thr->viW);
5406 show_thread_state(" root", thr);
5411 Thr* libhb_create ( Thr* parent )
5413 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5414 the child's index. Since the child's index is guaranteed
5415 unique, it has never been seen before, so the implicit value
5416 before the tick is zero and after that is one. */
5417 Thr* child = Thr__new();
5419 child->viR = VtsID__tick( parent->viR, child );
5420 child->viW = VtsID__tick( parent->viW, child );
5421 Filter__clear(child->filter, "libhb_create(child)");
5422 VtsID__rcinc(child->viR);
5423 VtsID__rcinc(child->viW);
5424 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
5425 early for that - it may not have a valid TId yet. So, let
5426 libhb_Thr_resumes pick it up the first time the thread runs. */
5428 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5429 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5431 /* and the parent has to move along too */
5432 VtsID__rcdec(parent->viR);
5433 VtsID__rcdec(parent->viW);
5434 parent->viR = VtsID__tick( parent->viR, parent );
5435 parent->viW = VtsID__tick( parent->viW, parent );
5436 Filter__clear(parent->filter, "libhb_create(parent)");
5437 VtsID__rcinc(parent->viR);
5438 VtsID__rcinc(parent->viW);
5439 note_local_Kw_n_stack_for( parent );
5441 show_thread_state(" child", child);
5442 show_thread_state("parent", parent);
5447 /* Shut down the library, and print stats (in fact that's _all_
5449 void libhb_shutdown ( Bool show_stats )
5452 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5453 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5454 stats__secmaps_allocd,
5455 stats__secmap_ga_space_covered);
5456 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5457 stats__secmap_linesZ_allocd,
5458 stats__secmap_linesZ_bytes);
5459 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5460 stats__secmap_linesF_allocd,
5461 stats__secmap_linesF_bytes);
5462 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5463 stats__secmap_iterator_steppings);
5464 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5465 stats__secmaps_search, stats__secmaps_search_slow);
5467 VG_(printf)("%s","\n");
5468 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5469 stats__cache_totrefs, stats__cache_totmisses );
5470 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5471 stats__cache_Z_fetches, stats__cache_F_fetches );
5472 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5473 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5474 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5475 stats__cache_invals, stats__cache_flushes );
5476 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5477 stats__cache_make_New_arange,
5478 stats__cache_make_New_inZrep);
5480 VG_(printf)("%s","\n");
5481 VG_(printf)(" cline: %'10lu normalises\n",
5482 stats__cline_normalises );
5483 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5484 stats__cline_cread64s,
5485 stats__cline_cread32s,
5486 stats__cline_cread16s,
5487 stats__cline_cread08s );
5488 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5489 stats__cline_cwrite64s,
5490 stats__cline_cwrite32s,
5491 stats__cline_cwrite16s,
5492 stats__cline_cwrite08s );
5493 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5494 stats__cline_swrite64s,
5495 stats__cline_swrite32s,
5496 stats__cline_swrite16s,
5497 stats__cline_swrite08s );
5498 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5499 stats__cline_sread08s, stats__cline_scopy08s );
5500 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5501 stats__cline_64to32splits,
5502 stats__cline_32to16splits,
5503 stats__cline_16to8splits );
5504 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5505 stats__cline_64to32pulldown,
5506 stats__cline_32to16pulldown,
5507 stats__cline_16to8pulldown );
5509 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5510 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5512 VG_(printf)("%s","\n");
5514 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
5515 stats__msmcread, stats__msmcread_change);
5516 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
5517 stats__msmcwrite, stats__msmcwrite_change);
5518 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5519 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
5520 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5521 stats__join2_queries, stats__join2_misses);
5523 VG_(printf)("%s","\n");
5524 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
5525 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
5526 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
5527 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
5528 VG_(printf)( " libhb: VTSset: find_and_dealloc__or_add %'lu (%'lu deallocd)\n",
5529 stats__vts_set__fadoa, stats__vts_set__fadoa_d );
5530 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
5531 stats__vts__indexat_slow );
5533 VG_(printf)("%s","\n");
5535 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5536 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5538 VG_(printf)( " libhb: %lu entries in vts_set\n",
5539 VG_(sizeFM)( vts_set ) );
5541 VG_(printf)("%s","\n");
5542 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5543 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5545 stats__ctxt_rcdec3 );
5546 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5547 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5548 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5550 stats__ctxt_tab_curr );
5551 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5553 stats__ctxt_tab_cmps );
5555 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5556 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5557 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5558 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5559 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5560 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5561 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5562 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5563 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5564 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5565 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5566 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5567 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5568 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5570 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5571 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5572 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5573 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5576 VG_(printf)("%s","<<< END libhb stats >>>\n");
5577 VG_(printf)("%s","\n");
5582 void libhb_async_exit ( Thr* thr )
5585 tl_assert(thr->still_alive);
5586 thr->still_alive = False;
5588 /* free up Filter and local_Kws_n_stacks (well, actually not the
5590 tl_assert(thr->filter);
5591 HG_(free)(thr->filter);
5594 /* Another space-accuracy tradeoff. Do we want to be able to show
5595 H1 history for conflicts in threads which have since exited? If
5596 yes, then we better not free up thr->local_Kws_n_stacks. The
5597 downside is a potential per-thread leak of up to
5598 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
5599 XArray average overcommit factor is (1.5 I'd guess). */
5601 // VG_(deleteXA)(thr->local_Kws_n_stacks);
5602 // thr->local_Kws_n_stacks = NULL;
5605 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
5606 a Seg that points at a VTS is its one-and-only owner, and ditto for
5607 a SO that points at a VTS. */
5609 SO* libhb_so_alloc ( void )
5614 void libhb_so_dealloc ( SO* so )
5617 tl_assert(so->magic == SO_MAGIC);
5621 /* See comments in libhb.h for details on the meaning of
5622 strong vs weak sends and strong vs weak receives. */
5623 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5625 /* Copy the VTSs from 'thr' into the sync object, and then move
5626 the thread along one step. */
5629 tl_assert(so->magic == SO_MAGIC);
5631 /* stay sane .. a thread's read-clock must always lead or be the
5632 same as its write-clock */
5633 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5637 /* since we're overwriting the VtsIDs in the SO, we need to drop
5638 any references made by the previous contents thereof */
5639 if (so->viR == VtsID_INVALID) {
5640 tl_assert(so->viW == VtsID_INVALID);
5643 VtsID__rcinc(so->viR);
5644 VtsID__rcinc(so->viW);
5646 /* In a strong send, we dump any previous VC in the SO and
5647 install the sending thread's VC instead. For a weak send we
5648 must join2 with what's already there. */
5649 tl_assert(so->viW != VtsID_INVALID);
5650 VtsID__rcdec(so->viR);
5651 VtsID__rcdec(so->viW);
5652 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5653 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5654 VtsID__rcinc(so->viR);
5655 VtsID__rcinc(so->viW);
5658 /* move both parent clocks along */
5659 VtsID__rcdec(thr->viR);
5660 VtsID__rcdec(thr->viW);
5661 thr->viR = VtsID__tick( thr->viR, thr );
5662 thr->viW = VtsID__tick( thr->viW, thr );
5663 if (thr->still_alive) {
5664 Filter__clear(thr->filter, "libhb_so_send");
5665 note_local_Kw_n_stack_for(thr);
5667 VtsID__rcinc(thr->viR);
5668 VtsID__rcinc(thr->viW);
5671 show_thread_state("s-send", thr);
5673 show_thread_state("w-send", thr);
5676 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5679 tl_assert(so->magic == SO_MAGIC);
5681 if (so->viR != VtsID_INVALID) {
5682 tl_assert(so->viW != VtsID_INVALID);
5684 /* Weak receive (basically, an R-acquisition of a R-W lock).
5685 This advances the read-clock of the receiver, but not the
5687 VtsID__rcdec(thr->viR);
5688 thr->viR = VtsID__join2( thr->viR, so->viR );
5689 VtsID__rcinc(thr->viR);
5691 /* At one point (r10589) it seemed safest to tick the clocks for
5692 the receiving thread after the join. But on reflection, I
5693 wonder if that might cause it to 'overtake' constraints,
5694 which could lead to missing races. So, back out that part of
5696 //VtsID__rcdec(thr->viR);
5697 //thr->viR = VtsID__tick( thr->viR, thr );
5698 //VtsID__rcinc(thr->viR);
5700 /* For a strong receive, we also advance the receiver's write
5701 clock, which means the receive as a whole is essentially
5702 equivalent to a W-acquisition of a R-W lock. */
5704 VtsID__rcdec(thr->viW);
5705 thr->viW = VtsID__join2( thr->viW, so->viW );
5706 VtsID__rcinc(thr->viW);
5708 /* See comment just above, re r10589. */
5709 //VtsID__rcdec(thr->viW);
5710 //thr->viW = VtsID__tick( thr->viW, thr );
5711 //VtsID__rcinc(thr->viW);
5714 Filter__clear(thr->filter, "libhb_so_recv");
5715 note_local_Kw_n_stack_for(thr);
5718 show_thread_state("s-recv", thr);
5720 show_thread_state("w-recv", thr);
5723 tl_assert(so->viW == VtsID_INVALID);
5724 /* Deal with degenerate case: 'so' has no vts, so there has been
5725 no message posted to it. Just ignore this case. */
5726 show_thread_state("d-recv", thr);
5730 Bool libhb_so_everSent ( SO* so )
5732 if (so->viR == VtsID_INVALID) {
5733 tl_assert(so->viW == VtsID_INVALID);
5736 tl_assert(so->viW != VtsID_INVALID);
5741 #define XXX1 0 // 0x67a106c
5744 static inline Bool TRACEME(Addr a, SizeT szB) {
5745 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5746 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5749 static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
5750 SVal sv = zsm_sread08(a);
5751 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5752 show_thread_state("", thr);
5753 VG_(printf)("%s","\n");
5756 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
5758 SVal sv = SVal__mkC(thr->viW, thr->viW);
5759 tl_assert(is_sane_SVal_C(sv));
5760 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5761 zsm_sset_range( a, szB, sv );
5762 Filter__clear_range( thr->filter, a, szB );
5763 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
5766 void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
5771 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
5773 SVal sv = SVal_NOACCESS;
5774 tl_assert(is_sane_SVal_C(sv));
5775 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
5776 zsm_sset_range( a, szB, sv );
5777 Filter__clear_range( thr->filter, a, szB );
5778 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
5781 void* libhb_get_Thr_opaque ( Thr* thr ) {
5786 void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5791 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
5793 zsm_scopy_range(src, dst, len);
5794 Filter__clear_range( thr->filter, dst, len );
5797 void libhb_maybe_GC ( void )
5799 event_map_maybe_GC();
5800 /* If there are still freelist entries available, no need for a
5802 if (vts_tab_freelist != VtsID_INVALID)
5804 /* So all the table entries are full, and we're having to expand
5805 the table. But did we hit the threshhold point yet? */
5806 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5808 vts_tab__do_GC( False/*don't show stats*/ );
5812 /////////////////////////////////////////////////////////////////
5813 /////////////////////////////////////////////////////////////////
5815 // SECTION END main library //
5817 /////////////////////////////////////////////////////////////////
5818 /////////////////////////////////////////////////////////////////
5820 /*--------------------------------------------------------------------*/
5821 /*--- end libhb_main.c ---*/
5822 /*--------------------------------------------------------------------*/