2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2010 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_hashtable.h" // For mc_include.h
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_tooliface.h"
37 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
38 #include "pub_tool_xarray.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_libcbase.h"
42 #include "mc_include.h"
45 /* This file implements the Memcheck instrumentation, and in
46 particular contains the core of its undefined value detection
47 machinery. For a comprehensive background of the terminology,
48 algorithms and rationale used herein, read:
50 Using Valgrind to detect undefined value errors with
53 Julian Seward and Nicholas Nethercote
55 2005 USENIX Annual Technical Conference (General Track),
56 Anaheim, CA, USA, April 10-15, 2005.
60 Here is as good a place as any to record exactly when V bits are and
61 should be checked, why, and what function is responsible.
64 Memcheck complains when an undefined value is used:
66 1. In the condition of a conditional branch. Because it could cause
67 incorrect control flow, and thus cause incorrect externally-visible
68 behaviour. [mc_translate.c:complainIfUndefined]
70 2. As an argument to a system call, or as the value that specifies
71 the system call number. Because it could cause an incorrect
72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
74 3. As the address in a load or store. Because it could cause an
75 incorrect value to be used later, which could cause externally-visible
76 behaviour (eg. via incorrect control flow or an incorrect system call
77 argument) [complainIfUndefined]
79 4. As the target address of a branch. Because it could cause incorrect
80 control flow. [complainIfUndefined]
82 5. As an argument to setenv, unsetenv, or putenv. Because it could put
83 an incorrect value into the external environment.
84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
91 requested it. [in memcheck.h]
94 Memcheck also complains, but should not, when an undefined value is used:
96 8. As the shift value in certain SIMD shift operations (but not in the
97 standard integer shift operations). This inconsistency is due to
98 historical reasons.) [complainIfUndefined]
101 Memcheck does not complain, but should, when an undefined value is used:
103 9. As an input to a client request. Because the client request may
104 affect the visible behaviour -- see bug #144362 for an example
105 involving the malloc replacements in vg_replace_malloc.c and
106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107 isn't identified. That bug report also has some info on how to solve
108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
111 In practice, 1 and 2 account for the vast majority of cases.
114 /*------------------------------------------------------------*/
115 /*--- Forward decls ---*/
116 /*------------------------------------------------------------*/
120 static IRType shadowTypeV ( IRType ty );
121 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
122 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
125 /*------------------------------------------------------------*/
126 /*--- Memcheck running state, and tmp management. ---*/
127 /*------------------------------------------------------------*/
129 /* Carries info about a particular tmp. The tmp's number is not
130 recorded, as this is implied by (equal to) its index in the tmpMap
131 in MCEnv. The tmp's type is also not recorded, as this is present
134 When .kind is Orig, .shadowV and .shadowB may give the identities
135 of the temps currently holding the associated definedness (shadowV)
136 and origin (shadowB) values, or these may be IRTemp_INVALID if code
137 to compute such values has not yet been emitted.
139 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141 illogical for a shadow tmp itself to be shadowed.
144 enum { Orig=1, VSh=2, BSh=3 }
156 /* Carries around state during memcheck instrumentation. */
159 /* MODIFIED: the superblock being constructed. IRStmts are
164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165 current kind and possibly shadow temps for each temp in the
166 IRSB being constructed. Note that it does not contain the
167 type of each tmp. If you want to know the type, look at the
168 relevant entry in sb->tyenv. It follows that at all times
169 during the instrumentation process, the valid indices for
170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171 total number of Orig, V- and B- temps allocated so far.
173 The reason for this strange split (types in one place, all
174 other info in another) is that we need the types to be
175 attached to sb so as to make it possible to do
176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177 instrumentation process. */
178 XArray* /* of TempMapEnt */ tmpMap;
180 /* MODIFIED: indicates whether "bogus" literals have so far been
181 found. Starts off False, and may change to True. */
184 /* READONLY: the guest layout. This indicates which parts of
185 the guest state should be regarded as 'always defined'. */
186 VexGuestLayout* layout;
188 /* READONLY: the host word type. Needed for constructing
189 arguments of type 'HWord' to be passed to helper functions.
190 Ity_I32 or Ity_I64 only. */
195 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
196 demand), as they are encountered. This is for two reasons.
198 (1) (less important reason): Many original tmps are unused due to
199 initial IR optimisation, and we do not want to spaces in tables
202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
203 table indexed [0 .. n_types-1], which gives the current shadow for
204 each original tmp, or INVALID_IRTEMP if none is so far assigned.
205 It is necessary to support making multiple assignments to a shadow
206 -- specifically, after testing a shadow for definedness, it needs
207 to be made defined. But IR's SSA property disallows this.
209 (2) (more important reason): Therefore, when a shadow needs to get
210 a new value, a new temporary is created, the value is assigned to
211 that, and the tmpMap is updated to reflect the new binding.
213 A corollary is that if the tmpMap maps a given tmp to
214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
215 there's a read-before-write error in the original tmps. The IR
216 sanity checker should catch all such anomalies, however.
219 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220 both the table in mce->sb and to our auxiliary mapping. Note that
221 newTemp may cause mce->tmpMap to resize, hence previous results
222 from VG_(indexXA)(mce->tmpMap) are invalidated. */
223 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
229 ent.shadowV = IRTemp_INVALID;
230 ent.shadowB = IRTemp_INVALID;
231 newIx = VG_(addToXA)( mce->tmpMap, &ent );
232 tl_assert(newIx == (Word)tmp);
237 /* Find the tmp currently shadowing the given original tmp. If none
238 so far exists, allocate one. */
239 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
242 /* VG_(indexXA) range-checks 'orig', hence no need to check
244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245 tl_assert(ent->kind == Orig);
246 if (ent->shadowV == IRTemp_INVALID) {
248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249 /* newTemp may cause mce->tmpMap to resize, hence previous results
250 from VG_(indexXA) are invalid. */
251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252 tl_assert(ent->kind == Orig);
253 tl_assert(ent->shadowV == IRTemp_INVALID);
259 /* Allocate a new shadow for the given original tmp. This means any
260 previous shadow is abandoned. This is needed because it is
261 necessary to give a new value to a shadow once it has been tested
262 for undefinedness, but unfortunately IR's SSA property disallows
263 this. Instead we must abandon the old shadow, allocate a new one
264 and use that instead.
266 This is the same as findShadowTmpV, except we don't bother to see
267 if a shadow temp already existed -- we simply allocate a new one
269 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
272 /* VG_(indexXA) range-checks 'orig', hence no need to check
274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275 tl_assert(ent->kind == Orig);
278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279 /* newTemp may cause mce->tmpMap to resize, hence previous results
280 from VG_(indexXA) are invalid. */
281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282 tl_assert(ent->kind == Orig);
288 /*------------------------------------------------------------*/
289 /*--- IRAtoms -- a subset of IRExprs ---*/
290 /*------------------------------------------------------------*/
292 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
294 input, most of this code deals in atoms. Usefully, a value atom
295 always has a V-value which is also an atom: constants are shadowed
296 by constants, and temps are shadowed by the corresponding shadow
299 typedef IRExpr IRAtom;
301 /* (used for sanity checks only): is this an atom which looks
302 like it's from original code? */
303 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
305 if (a1->tag == Iex_Const)
307 if (a1->tag == Iex_RdTmp) {
308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309 return ent->kind == Orig;
314 /* (used for sanity checks only): is this an atom which looks
315 like it's from shadow code? */
316 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
318 if (a1->tag == Iex_Const)
320 if (a1->tag == Iex_RdTmp) {
321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322 return ent->kind == VSh || ent->kind == BSh;
327 /* (used for sanity checks only): check that both args are atoms and
328 are identically-kinded. */
329 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
333 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
339 /*------------------------------------------------------------*/
340 /*--- Type management ---*/
341 /*------------------------------------------------------------*/
343 /* Shadow state is always accessed using integer types. This returns
344 an integer type with the same size (as per sizeofIRType) as the
345 given type. The only valid shadow types are Bit, I8, I16, I32,
348 static IRType shadowTypeV ( IRType ty )
356 case Ity_I128: return ty;
357 case Ity_F32: return Ity_I32;
358 case Ity_F64: return Ity_I64;
359 case Ity_V128: return Ity_V128;
360 default: ppIRType(ty);
361 VG_(tool_panic)("memcheck:shadowTypeV");
365 /* Produce a 'defined' value of the given shadow type. Should only be
366 supplied shadow types (Bit/I8/I16/I32/UI64). */
367 static IRExpr* definedOfType ( IRType ty ) {
369 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
370 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
371 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
372 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
373 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
375 default: VG_(tool_panic)("memcheck:definedOfType");
380 /*------------------------------------------------------------*/
381 /*--- Constructing IR fragments ---*/
382 /*------------------------------------------------------------*/
384 /* add stmt to a bb */
385 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
387 VG_(printf)(" %c: ", cat);
391 addStmtToIRSB(mce->sb, st);
394 /* assign value to tmp */
396 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
400 /* build various kinds of expressions */
401 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
402 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
403 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
404 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
405 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
406 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
407 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
408 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
410 /* Bind the given expression to a new temporary, and return the
411 temporary. This effectively converts an arbitrary expression into
414 'ty' is the type of 'e' and hence the type that the new temporary
415 needs to be. But passing it in is redundant, since we can deduce
416 the type merely by inspecting 'e'. So at least use that fact to
417 assert that the two types agree. */
418 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
422 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
423 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
425 case 'V': k = VSh; break;
426 case 'B': k = BSh; break;
427 case 'C': k = Orig; break;
428 /* happens when we are making up new "orig"
429 expressions, for IRCAS handling */
430 default: tl_assert(0);
432 t = newTemp(mce, ty, k);
433 assign(cat, mce, t, e);
438 /*------------------------------------------------------------*/
439 /*--- Constructing definedness primitive ops ---*/
440 /*------------------------------------------------------------*/
442 /* --------- Defined-if-either-defined --------- */
444 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
445 tl_assert(isShadowAtom(mce,a1));
446 tl_assert(isShadowAtom(mce,a2));
447 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
450 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
451 tl_assert(isShadowAtom(mce,a1));
452 tl_assert(isShadowAtom(mce,a2));
453 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
456 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
457 tl_assert(isShadowAtom(mce,a1));
458 tl_assert(isShadowAtom(mce,a2));
459 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
462 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
463 tl_assert(isShadowAtom(mce,a1));
464 tl_assert(isShadowAtom(mce,a2));
465 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
468 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
469 tl_assert(isShadowAtom(mce,a1));
470 tl_assert(isShadowAtom(mce,a2));
471 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
474 /* --------- Undefined-if-either-undefined --------- */
476 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
477 tl_assert(isShadowAtom(mce,a1));
478 tl_assert(isShadowAtom(mce,a2));
479 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
482 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
483 tl_assert(isShadowAtom(mce,a1));
484 tl_assert(isShadowAtom(mce,a2));
485 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
488 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
489 tl_assert(isShadowAtom(mce,a1));
490 tl_assert(isShadowAtom(mce,a2));
491 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
494 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
495 tl_assert(isShadowAtom(mce,a1));
496 tl_assert(isShadowAtom(mce,a2));
497 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
500 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
501 tl_assert(isShadowAtom(mce,a1));
502 tl_assert(isShadowAtom(mce,a2));
503 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
506 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
508 case Ity_I8: return mkUifU8(mce, a1, a2);
509 case Ity_I16: return mkUifU16(mce, a1, a2);
510 case Ity_I32: return mkUifU32(mce, a1, a2);
511 case Ity_I64: return mkUifU64(mce, a1, a2);
512 case Ity_V128: return mkUifUV128(mce, a1, a2);
514 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
515 VG_(tool_panic)("memcheck:mkUifU");
519 /* --------- The Left-family of operations. --------- */
521 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
522 tl_assert(isShadowAtom(mce,a1));
523 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
526 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
527 tl_assert(isShadowAtom(mce,a1));
528 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
531 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
532 tl_assert(isShadowAtom(mce,a1));
533 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
536 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
537 tl_assert(isShadowAtom(mce,a1));
538 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
541 /* --------- 'Improvement' functions for AND/OR. --------- */
543 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
544 defined (0); all other -> undefined (1).
546 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
548 tl_assert(isOriginalAtom(mce, data));
549 tl_assert(isShadowAtom(mce, vbits));
550 tl_assert(sameKindedAtoms(data, vbits));
551 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
554 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
556 tl_assert(isOriginalAtom(mce, data));
557 tl_assert(isShadowAtom(mce, vbits));
558 tl_assert(sameKindedAtoms(data, vbits));
559 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
562 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
564 tl_assert(isOriginalAtom(mce, data));
565 tl_assert(isShadowAtom(mce, vbits));
566 tl_assert(sameKindedAtoms(data, vbits));
567 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
570 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
572 tl_assert(isOriginalAtom(mce, data));
573 tl_assert(isShadowAtom(mce, vbits));
574 tl_assert(sameKindedAtoms(data, vbits));
575 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
578 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
580 tl_assert(isOriginalAtom(mce, data));
581 tl_assert(isShadowAtom(mce, vbits));
582 tl_assert(sameKindedAtoms(data, vbits));
583 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
586 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
587 defined (0); all other -> undefined (1).
589 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
591 tl_assert(isOriginalAtom(mce, data));
592 tl_assert(isShadowAtom(mce, vbits));
593 tl_assert(sameKindedAtoms(data, vbits));
597 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
601 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
603 tl_assert(isOriginalAtom(mce, data));
604 tl_assert(isShadowAtom(mce, vbits));
605 tl_assert(sameKindedAtoms(data, vbits));
609 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
613 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
615 tl_assert(isOriginalAtom(mce, data));
616 tl_assert(isShadowAtom(mce, vbits));
617 tl_assert(sameKindedAtoms(data, vbits));
621 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
625 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
627 tl_assert(isOriginalAtom(mce, data));
628 tl_assert(isShadowAtom(mce, vbits));
629 tl_assert(sameKindedAtoms(data, vbits));
633 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
637 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
639 tl_assert(isOriginalAtom(mce, data));
640 tl_assert(isShadowAtom(mce, vbits));
641 tl_assert(sameKindedAtoms(data, vbits));
645 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
649 /* --------- Pessimising casts. --------- */
651 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
655 /* Note, dst_ty is a shadow type, not an original type. */
656 /* First of all, collapse vbits down to a single bit. */
657 tl_assert(isShadowAtom(mce,vbits));
658 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
660 /* Fast-track some common cases */
661 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
662 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
664 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
665 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
667 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
668 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
669 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
672 /* Else do it the slow way .. */
679 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
682 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
685 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
688 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
691 /* Gah. Chop it in half, OR the halves together, and compare
693 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
694 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
695 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
696 tmp1 = assignNew('V', mce, Ity_I1,
697 unop(Iop_CmpNEZ64, tmp4));
702 VG_(tool_panic)("mkPCastTo(1)");
705 /* Now widen up to the dst type. */
710 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
712 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
714 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
716 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
718 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
719 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
722 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
723 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
727 VG_(tool_panic)("mkPCastTo(2)");
731 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
733 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
734 PCasting to Ity_U1. However, sometimes it is necessary to be more
735 accurate. The insight is that the result is defined if two
736 corresponding bits can be found, one from each argument, so that
737 both bits are defined but are different -- that makes EQ say "No"
738 and NE say "Yes". Hence, we compute an improvement term and DifD
739 it onto the "normal" (UifU) result.
745 PCastTo<sz>( UifU<sz>(vxx, vyy) )
750 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
754 vec contains 0 (defined) bits where the corresponding arg bits
755 are defined but different, and 1 bits otherwise.
757 vec = Or<sz>( vxx, // 0 iff bit defined
758 vyy, // 0 iff bit defined
759 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
762 If any bit of vec is 0, the result is defined and so the
763 improvement term should produce 0...0, else it should produce
766 Hence require for the improvement term:
768 if vec == 1...1 then 1...1 else 0...0
770 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
772 This was extensively re-analysed and checked on 6 July 05.
774 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
776 IRAtom* vxx, IRAtom* vyy,
777 IRAtom* xx, IRAtom* yy )
779 IRAtom *naive, *vec, *improvement_term;
780 IRAtom *improved, *final_cast, *top;
781 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
783 tl_assert(isShadowAtom(mce,vxx));
784 tl_assert(isShadowAtom(mce,vyy));
785 tl_assert(isOriginalAtom(mce,xx));
786 tl_assert(isOriginalAtom(mce,yy));
787 tl_assert(sameKindedAtoms(vxx,xx));
788 tl_assert(sameKindedAtoms(vyy,yy));
798 top = mkU32(0xFFFFFFFF);
807 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
810 VG_(tool_panic)("expensiveCmpEQorNE");
815 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
821 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
825 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
829 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
832 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
835 = mkPCastTo( mce, Ity_I1, improved );
841 /* --------- Semi-accurate interpretation of CmpORD. --------- */
843 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
845 CmpORD32S(x,y) = 1<<3 if x <s y
849 and similarly the unsigned variant. The default interpretation is:
851 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
854 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
855 are zero and therefore defined (viz, zero).
857 Also deal with a special case better:
861 Here, bit 3 (LT) of the result is a copy of the top bit of x and
862 will be defined even if the rest of x isn't. In which case we do:
864 CmpORD32S#(x,x#,0,{impliedly 0}#)
865 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
866 | (x# >>u 31) << 3 -- LT# = x#[31]
868 Analogous handling for CmpORD64{S,U}.
870 static Bool isZeroU32 ( IRAtom* e )
873 toBool( e->tag == Iex_Const
874 && e->Iex.Const.con->tag == Ico_U32
875 && e->Iex.Const.con->Ico.U32 == 0 );
878 static Bool isZeroU64 ( IRAtom* e )
881 toBool( e->tag == Iex_Const
882 && e->Iex.Const.con->tag == Ico_U64
883 && e->Iex.Const.con->Ico.U64 == 0 );
886 static IRAtom* doCmpORD ( MCEnv* mce,
888 IRAtom* xxhash, IRAtom* yyhash,
889 IRAtom* xx, IRAtom* yy )
891 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
892 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
893 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
894 IROp opAND = m64 ? Iop_And64 : Iop_And32;
895 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
896 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
897 IRType ty = m64 ? Ity_I64 : Ity_I32;
898 Int width = m64 ? 64 : 32;
900 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
902 IRAtom* threeLeft1 = NULL;
903 IRAtom* sevenLeft1 = NULL;
905 tl_assert(isShadowAtom(mce,xxhash));
906 tl_assert(isShadowAtom(mce,yyhash));
907 tl_assert(isOriginalAtom(mce,xx));
908 tl_assert(isOriginalAtom(mce,yy));
909 tl_assert(sameKindedAtoms(xxhash,xx));
910 tl_assert(sameKindedAtoms(yyhash,yy));
911 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
912 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
915 ppIROp(cmp_op); VG_(printf)(" ");
916 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
919 if (syned && isZero(yy)) {
920 /* fancy interpretation */
921 /* if yy is zero, then it must be fully defined (zero#). */
922 tl_assert(isZero(yyhash));
923 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
931 mkPCastTo(mce,ty, xxhash),
940 binop(opSHR, xxhash, mkU8(width-1))),
945 /* standard interpretation */
946 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
951 mkUifU(mce,ty, xxhash,yyhash)),
958 /*------------------------------------------------------------*/
959 /*--- Emit a test and complaint if something is undefined. ---*/
960 /*------------------------------------------------------------*/
962 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
965 /* Set the annotations on a dirty helper to indicate that the stack
966 pointer and instruction pointers might be read. This is the
967 behaviour of all 'emit-a-complaint' style functions we might
970 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
972 di->fxState[0].fx = Ifx_Read;
973 di->fxState[0].offset = mce->layout->offset_SP;
974 di->fxState[0].size = mce->layout->sizeof_SP;
975 di->fxState[1].fx = Ifx_Read;
976 di->fxState[1].offset = mce->layout->offset_IP;
977 di->fxState[1].size = mce->layout->sizeof_IP;
981 /* Check the supplied **original** atom for undefinedness, and emit a
982 complaint if so. Once that happens, mark it as defined. This is
983 possible because the atom is either a tmp or literal. If it's a
984 tmp, it will be shadowed by a tmp, and so we can set the shadow to
985 be defined. In fact as mentioned above, we will have to allocate a
986 new tmp to carry the new 'defined' shadow value, and update the
987 original->tmp mapping accordingly; we cannot simply assign a new
988 value to an existing shadow tmp as this breaks SSAness -- resulting
989 in the post-instrumentation sanity checker spluttering in disapproval.
991 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1004 // Don't do V bit tests if we're not reporting undefined value errors.
1005 if (MC_(clo_mc_level) == 1)
1008 /* Since the original expression is atomic, there's no duplicated
1009 work generated by making multiple V-expressions for it. So we
1010 don't really care about the possibility that someone else may
1011 also create a V-interpretion for it. */
1012 tl_assert(isOriginalAtom(mce, atom));
1013 vatom = expr2vbits( mce, atom );
1014 tl_assert(isShadowAtom(mce, vatom));
1015 tl_assert(sameKindedAtoms(atom, vatom));
1017 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1019 /* sz is only used for constructing the error message */
1020 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1022 cond = mkPCastTo( mce, Ity_I1, vatom );
1023 /* cond will be 0 if all defined, and 1 if any not defined. */
1025 /* Get the origin info for the value we are about to check. At
1026 least, if we are doing origin tracking. If not, use a dummy
1028 if (MC_(clo_mc_level) == 3) {
1029 origin = schemeE( mce, atom );
1030 if (mce->hWordTy == Ity_I64) {
1031 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1045 fn = &MC_(helperc_value_check0_fail_w_o);
1046 nm = "MC_(helperc_value_check0_fail_w_o)";
1047 args = mkIRExprVec_1(origin);
1050 fn = &MC_(helperc_value_check0_fail_no_o);
1051 nm = "MC_(helperc_value_check0_fail_no_o)";
1052 args = mkIRExprVec_0();
1058 fn = &MC_(helperc_value_check1_fail_w_o);
1059 nm = "MC_(helperc_value_check1_fail_w_o)";
1060 args = mkIRExprVec_1(origin);
1063 fn = &MC_(helperc_value_check1_fail_no_o);
1064 nm = "MC_(helperc_value_check1_fail_no_o)";
1065 args = mkIRExprVec_0();
1071 fn = &MC_(helperc_value_check4_fail_w_o);
1072 nm = "MC_(helperc_value_check4_fail_w_o)";
1073 args = mkIRExprVec_1(origin);
1076 fn = &MC_(helperc_value_check4_fail_no_o);
1077 nm = "MC_(helperc_value_check4_fail_no_o)";
1078 args = mkIRExprVec_0();
1084 fn = &MC_(helperc_value_check8_fail_w_o);
1085 nm = "MC_(helperc_value_check8_fail_w_o)";
1086 args = mkIRExprVec_1(origin);
1089 fn = &MC_(helperc_value_check8_fail_no_o);
1090 nm = "MC_(helperc_value_check8_fail_no_o)";
1091 args = mkIRExprVec_0();
1098 fn = &MC_(helperc_value_checkN_fail_w_o);
1099 nm = "MC_(helperc_value_checkN_fail_w_o)";
1100 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1103 fn = &MC_(helperc_value_checkN_fail_no_o);
1104 nm = "MC_(helperc_value_checkN_fail_no_o)";
1105 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1110 VG_(tool_panic)("unexpected szB");
1116 tl_assert(nargs >= 0 && nargs <= 2);
1117 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1118 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1120 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1121 VG_(fnptr_to_fnentry)( fn ), args );
1123 setHelperAnns( mce, di );
1124 stmt( 'V', mce, IRStmt_Dirty(di));
1126 /* Set the shadow tmp to be defined. First, update the
1127 orig->shadow tmp mapping to reflect the fact that this shadow is
1128 getting a new value. */
1129 tl_assert(isIRAtom(vatom));
1130 /* sameKindedAtoms ... */
1131 if (vatom->tag == Iex_RdTmp) {
1132 tl_assert(atom->tag == Iex_RdTmp);
1133 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1134 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1140 /*------------------------------------------------------------*/
1141 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1142 /*------------------------------------------------------------*/
1144 /* Examine the always-defined sections declared in layout to see if
1145 the (offset,size) section is within one. Note, is is an error to
1146 partially fall into such a region: (offset,size) should either be
1147 completely in such a region or completely not-in such a region.
1149 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1151 Int minoffD, maxoffD, i;
1152 Int minoff = offset;
1153 Int maxoff = minoff + size - 1;
1154 tl_assert((minoff & ~0xFFFF) == 0);
1155 tl_assert((maxoff & ~0xFFFF) == 0);
1157 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1158 minoffD = mce->layout->alwaysDefd[i].offset;
1159 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1160 tl_assert((minoffD & ~0xFFFF) == 0);
1161 tl_assert((maxoffD & ~0xFFFF) == 0);
1163 if (maxoff < minoffD || maxoffD < minoff)
1164 continue; /* no overlap */
1165 if (minoff >= minoffD && maxoff <= maxoffD)
1166 return True; /* completely contained in an always-defd section */
1168 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1170 return False; /* could not find any containing section */
1174 /* Generate into bb suitable actions to shadow this Put. If the state
1175 slice is marked 'always defined', do nothing. Otherwise, write the
1176 supplied V bits to the shadow state. We can pass in either an
1177 original atom or a V-atom, but not both. In the former case the
1178 relevant V-bits are then generated from the original.
1181 void do_shadow_PUT ( MCEnv* mce, Int offset,
1182 IRAtom* atom, IRAtom* vatom )
1186 // Don't do shadow PUTs if we're not doing undefined value checking.
1187 // Their absence lets Vex's optimiser remove all the shadow computation
1188 // that they depend on, which includes GETs of the shadow registers.
1189 if (MC_(clo_mc_level) == 1)
1194 tl_assert(isOriginalAtom(mce, atom));
1195 vatom = expr2vbits( mce, atom );
1198 tl_assert(isShadowAtom(mce, vatom));
1201 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1202 tl_assert(ty != Ity_I1);
1203 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1205 /* emit code to emit a complaint if any of the vbits are 1. */
1206 /* complainIfUndefined(mce, atom); */
1208 /* Do a plain shadow Put. */
1209 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1214 /* Return an expression which contains the V bits corresponding to the
1215 given GETI (passed in in pieces).
1218 void do_shadow_PUTI ( MCEnv* mce,
1220 IRAtom* ix, Int bias, IRAtom* atom )
1226 // Don't do shadow PUTIs if we're not doing undefined value checking.
1227 // Their absence lets Vex's optimiser remove all the shadow computation
1228 // that they depend on, which includes GETIs of the shadow registers.
1229 if (MC_(clo_mc_level) == 1)
1232 tl_assert(isOriginalAtom(mce,atom));
1233 vatom = expr2vbits( mce, atom );
1234 tl_assert(sameKindedAtoms(atom, vatom));
1236 tyS = shadowTypeV(ty);
1237 arrSize = descr->nElems * sizeofIRType(ty);
1238 tl_assert(ty != Ity_I1);
1239 tl_assert(isOriginalAtom(mce,ix));
1240 complainIfUndefined(mce,ix);
1241 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1243 /* emit code to emit a complaint if any of the vbits are 1. */
1244 /* complainIfUndefined(mce, atom); */
1246 /* Do a cloned version of the Put that refers to the shadow
1248 IRRegArray* new_descr
1249 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1250 tyS, descr->nElems);
1251 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
1256 /* Return an expression which contains the V bits corresponding to the
1257 given GET (passed in in pieces).
1260 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1262 IRType tyS = shadowTypeV(ty);
1263 tl_assert(ty != Ity_I1);
1264 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1265 /* Always defined, return all zeroes of the relevant type */
1266 return definedOfType(tyS);
1268 /* return a cloned version of the Get that refers to the shadow
1270 /* FIXME: this isn't an atom! */
1271 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1276 /* Return an expression which contains the V bits corresponding to the
1277 given GETI (passed in in pieces).
1280 IRExpr* shadow_GETI ( MCEnv* mce,
1281 IRRegArray* descr, IRAtom* ix, Int bias )
1283 IRType ty = descr->elemTy;
1284 IRType tyS = shadowTypeV(ty);
1285 Int arrSize = descr->nElems * sizeofIRType(ty);
1286 tl_assert(ty != Ity_I1);
1287 tl_assert(isOriginalAtom(mce,ix));
1288 complainIfUndefined(mce,ix);
1289 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1290 /* Always defined, return all zeroes of the relevant type */
1291 return definedOfType(tyS);
1293 /* return a cloned version of the Get that refers to the shadow
1295 IRRegArray* new_descr
1296 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1297 tyS, descr->nElems);
1298 return IRExpr_GetI( new_descr, ix, bias );
1303 /*------------------------------------------------------------*/
1304 /*--- Generating approximations for unknown operations, ---*/
1305 /*--- using lazy-propagate semantics ---*/
1306 /*------------------------------------------------------------*/
1308 /* Lazy propagation of undefinedness from two values, resulting in the
1309 specified shadow type.
1312 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1315 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1316 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1317 tl_assert(isShadowAtom(mce,va1));
1318 tl_assert(isShadowAtom(mce,va2));
1320 /* The general case is inefficient because PCast is an expensive
1321 operation. Here are some special cases which use PCast only
1322 once rather than twice. */
1324 /* I64 x I64 -> I64 */
1325 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1326 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1327 at = mkUifU(mce, Ity_I64, va1, va2);
1328 at = mkPCastTo(mce, Ity_I64, at);
1332 /* I64 x I64 -> I32 */
1333 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1334 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1335 at = mkUifU(mce, Ity_I64, va1, va2);
1336 at = mkPCastTo(mce, Ity_I32, at);
1341 VG_(printf)("mkLazy2 ");
1350 /* General case: force everything via 32-bit intermediaries. */
1351 at = mkPCastTo(mce, Ity_I32, va1);
1352 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1353 at = mkPCastTo(mce, finalVty, at);
1358 /* 3-arg version of the above. */
1360 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1361 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1364 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1365 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1366 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1367 tl_assert(isShadowAtom(mce,va1));
1368 tl_assert(isShadowAtom(mce,va2));
1369 tl_assert(isShadowAtom(mce,va3));
1371 /* The general case is inefficient because PCast is an expensive
1372 operation. Here are some special cases which use PCast only
1373 twice rather than three times. */
1375 /* I32 x I64 x I64 -> I64 */
1376 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1377 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1378 && finalVty == Ity_I64) {
1379 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1380 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1381 mode indication which is fully defined, this should get
1382 folded out later. */
1383 at = mkPCastTo(mce, Ity_I64, va1);
1384 /* Now fold in 2nd and 3rd args. */
1385 at = mkUifU(mce, Ity_I64, at, va2);
1386 at = mkUifU(mce, Ity_I64, at, va3);
1387 /* and PCast once again. */
1388 at = mkPCastTo(mce, Ity_I64, at);
1392 /* I32 x I64 x I64 -> I32 */
1393 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1394 && finalVty == Ity_I32) {
1395 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1396 at = mkPCastTo(mce, Ity_I64, va1);
1397 at = mkUifU(mce, Ity_I64, at, va2);
1398 at = mkUifU(mce, Ity_I64, at, va3);
1399 at = mkPCastTo(mce, Ity_I32, at);
1403 /* I32 x I32 x I32 -> I32 */
1404 /* 32-bit FP idiom, as (eg) happens on ARM */
1405 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1406 && finalVty == Ity_I32) {
1407 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1409 at = mkUifU(mce, Ity_I32, at, va2);
1410 at = mkUifU(mce, Ity_I32, at, va3);
1411 at = mkPCastTo(mce, Ity_I32, at);
1416 VG_(printf)("mkLazy3: ");
1422 VG_(printf)(" -> ");
1428 /* General case: force everything via 32-bit intermediaries. */
1430 at = mkPCastTo(mce, Ity_I32, va1);
1431 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1432 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1433 at = mkPCastTo(mce, finalVty, at);
1439 /* 4-arg version of the above. */
1441 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1442 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1445 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1446 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1447 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1448 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1449 tl_assert(isShadowAtom(mce,va1));
1450 tl_assert(isShadowAtom(mce,va2));
1451 tl_assert(isShadowAtom(mce,va3));
1452 tl_assert(isShadowAtom(mce,va4));
1454 /* The general case is inefficient because PCast is an expensive
1455 operation. Here are some special cases which use PCast only
1456 twice rather than three times. */
1458 /* I32 x I64 x I64 x I64 -> I64 */
1459 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1460 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1461 && finalVty == Ity_I64) {
1462 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1463 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1464 mode indication which is fully defined, this should get
1465 folded out later. */
1466 at = mkPCastTo(mce, Ity_I64, va1);
1467 /* Now fold in 2nd, 3rd, 4th args. */
1468 at = mkUifU(mce, Ity_I64, at, va2);
1469 at = mkUifU(mce, Ity_I64, at, va3);
1470 at = mkUifU(mce, Ity_I64, at, va4);
1471 /* and PCast once again. */
1472 at = mkPCastTo(mce, Ity_I64, at);
1477 VG_(printf)("mkLazy4: ");
1485 VG_(printf)(" -> ");
1494 /* Do the lazy propagation game from a null-terminated vector of
1495 atoms. This is presumably the arguments to a helper call, so the
1496 IRCallee info is also supplied in order that we can know which
1497 arguments should be ignored (via the .mcx_mask field).
1500 IRAtom* mkLazyN ( MCEnv* mce,
1501 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1507 IRType mergeTy64 = True;
1509 /* Decide on the type of the merge intermediary. If all relevant
1510 args are I64, then it's I64. In all other circumstances, use
1512 for (i = 0; exprvec[i]; i++) {
1514 tl_assert(isOriginalAtom(mce, exprvec[i]));
1515 if (cee->mcx_mask & (1<<i))
1517 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1521 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1522 curr = definedOfType(mergeTy);
1524 for (i = 0; exprvec[i]; i++) {
1526 tl_assert(isOriginalAtom(mce, exprvec[i]));
1527 /* Only take notice of this arg if the callee's mc-exclusion
1528 mask does not say it is to be excluded. */
1529 if (cee->mcx_mask & (1<<i)) {
1530 /* the arg is to be excluded from definedness checking. Do
1532 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1534 /* calculate the arg's definedness, and pessimistically merge
1536 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1538 ? mkUifU64(mce, here, curr)
1539 : mkUifU32(mce, here, curr);
1542 return mkPCastTo(mce, finalVtype, curr );
1546 /*------------------------------------------------------------*/
1547 /*--- Generating expensive sequences for exact carry-chain ---*/
1548 /*--- propagation in add/sub and related operations. ---*/
1549 /*------------------------------------------------------------*/
1552 IRAtom* expensiveAddSub ( MCEnv* mce,
1555 IRAtom* qaa, IRAtom* qbb,
1556 IRAtom* aa, IRAtom* bb )
1558 IRAtom *a_min, *b_min, *a_max, *b_max;
1559 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
1561 tl_assert(isShadowAtom(mce,qaa));
1562 tl_assert(isShadowAtom(mce,qbb));
1563 tl_assert(isOriginalAtom(mce,aa));
1564 tl_assert(isOriginalAtom(mce,bb));
1565 tl_assert(sameKindedAtoms(qaa,aa));
1566 tl_assert(sameKindedAtoms(qbb,bb));
1586 VG_(tool_panic)("expensiveAddSub");
1589 // a_min = aa & ~qaa
1590 a_min = assignNew('V', mce,ty,
1592 assignNew('V', mce,ty, unop(opNOT, qaa))));
1594 // b_min = bb & ~qbb
1595 b_min = assignNew('V', mce,ty,
1597 assignNew('V', mce,ty, unop(opNOT, qbb))));
1600 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1603 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1606 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1608 assignNew('V', mce,ty,
1610 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1611 assignNew('V', mce,ty,
1613 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1614 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1620 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1622 assignNew('V', mce,ty,
1624 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1625 assignNew('V', mce,ty,
1627 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1628 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1638 /*------------------------------------------------------------*/
1639 /*--- Scalar shifts. ---*/
1640 /*------------------------------------------------------------*/
1642 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1643 idea is to shift the definedness bits by the original shift amount.
1644 This introduces 0s ("defined") in new positions for left shifts and
1645 unsigned right shifts, and copies the top definedness bit for
1646 signed right shifts. So, conveniently, applying the original shift
1647 operator to the definedness bits for the left arg is exactly the
1652 However if the shift amount is undefined then the whole result
1653 is undefined. Hence need:
1655 (qaa << bb) `UifU` PCast(qbb)
1657 If the shift amount bb is a literal than qbb will say 'all defined'
1658 and the UifU and PCast will get folded out by post-instrumentation
1661 static IRAtom* scalarShift ( MCEnv* mce,
1664 IRAtom* qaa, IRAtom* qbb,
1665 IRAtom* aa, IRAtom* bb )
1667 tl_assert(isShadowAtom(mce,qaa));
1668 tl_assert(isShadowAtom(mce,qbb));
1669 tl_assert(isOriginalAtom(mce,aa));
1670 tl_assert(isOriginalAtom(mce,bb));
1671 tl_assert(sameKindedAtoms(qaa,aa));
1672 tl_assert(sameKindedAtoms(qbb,bb));
1677 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1678 mkPCastTo(mce, ty, qbb)
1684 /*------------------------------------------------------------*/
1685 /*--- Helpers for dealing with vector primops. ---*/
1686 /*------------------------------------------------------------*/
1688 /* Vector pessimisation -- pessimise within each lane individually. */
1690 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1692 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1695 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1697 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1700 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1702 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1705 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1707 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1710 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1712 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1715 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1717 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1720 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1722 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1726 /* Here's a simple scheme capable of handling ops derived from SSE1
1727 code and while only generating ops that can be efficiently
1728 implemented in SSE1. */
1730 /* All-lanes versions are straightforward:
1732 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1734 unary32Fx4(x,y) ==> PCast32x4(x#)
1736 Lowest-lane-only versions are more complex:
1738 binary32F0x4(x,y) ==> SetV128lo32(
1740 PCast32(V128to32(UifUV128(x#,y#)))
1743 This is perhaps not so obvious. In particular, it's faster to
1744 do a V128-bit UifU and then take the bottom 32 bits than the more
1745 obvious scheme of taking the bottom 32 bits of each operand
1746 and doing a 32-bit UifU. Basically since UifU is fast and
1747 chopping lanes off vector values is slow.
1751 unary32F0x4(x) ==> SetV128lo32(
1753 PCast32(V128to32(x#))
1758 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1759 PCast32x4(v#) = CmpNEZ32x4(v#)
1763 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1766 tl_assert(isShadowAtom(mce, vatomX));
1767 tl_assert(isShadowAtom(mce, vatomY));
1768 at = mkUifUV128(mce, vatomX, vatomY);
1769 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1774 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1777 tl_assert(isShadowAtom(mce, vatomX));
1778 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1783 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1786 tl_assert(isShadowAtom(mce, vatomX));
1787 tl_assert(isShadowAtom(mce, vatomY));
1788 at = mkUifUV128(mce, vatomX, vatomY);
1789 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1790 at = mkPCastTo(mce, Ity_I32, at);
1791 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1796 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1799 tl_assert(isShadowAtom(mce, vatomX));
1800 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1801 at = mkPCastTo(mce, Ity_I32, at);
1802 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1806 /* --- ... and ... 64Fx2 versions of the same ... --- */
1809 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1812 tl_assert(isShadowAtom(mce, vatomX));
1813 tl_assert(isShadowAtom(mce, vatomY));
1814 at = mkUifUV128(mce, vatomX, vatomY);
1815 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1820 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1823 tl_assert(isShadowAtom(mce, vatomX));
1824 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
1829 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1832 tl_assert(isShadowAtom(mce, vatomX));
1833 tl_assert(isShadowAtom(mce, vatomY));
1834 at = mkUifUV128(mce, vatomX, vatomY);
1835 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
1836 at = mkPCastTo(mce, Ity_I64, at);
1837 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1842 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1845 tl_assert(isShadowAtom(mce, vatomX));
1846 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
1847 at = mkPCastTo(mce, Ity_I64, at);
1848 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1852 /* --- --- Vector saturated narrowing --- --- */
1854 /* This is quite subtle. What to do is simple:
1856 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1858 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1860 Why this is right is not so simple. Consider a lane in the args,
1861 vatom1 or 2, doesn't matter.
1863 After the PCast, that lane is all 0s (defined) or all
1866 Both signed and unsigned saturating narrowing of all 0s produces
1867 all 0s, which is what we want.
1869 The all-1s case is more complex. Unsigned narrowing interprets an
1870 all-1s input as the largest unsigned integer, and so produces all
1871 1s as a result since that is the largest unsigned value at the
1874 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1875 to -1, so we still wind up with all 1s at the smaller width.
1877 So: In short, pessimise the args, then apply the original narrowing
1881 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1882 IRAtom* vatom1, IRAtom* vatom2)
1884 IRAtom *at1, *at2, *at3;
1885 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1886 switch (narrow_op) {
1887 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1888 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
1889 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1890 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1891 default: VG_(tool_panic)("vectorNarrowV128");
1893 tl_assert(isShadowAtom(mce,vatom1));
1894 tl_assert(isShadowAtom(mce,vatom2));
1895 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1896 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1897 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
1902 IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1903 IRAtom* vatom1, IRAtom* vatom2)
1905 IRAtom *at1, *at2, *at3;
1906 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1907 switch (narrow_op) {
1908 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1909 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1910 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1911 default: VG_(tool_panic)("vectorNarrow64");
1913 tl_assert(isShadowAtom(mce,vatom1));
1914 tl_assert(isShadowAtom(mce,vatom2));
1915 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1916 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1917 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
1922 /* --- --- Vector integer arithmetic --- --- */
1924 /* Simple ... UifU the args and per-lane pessimise the results. */
1926 /* --- V128-bit versions --- */
1929 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1932 at = mkUifUV128(mce, vatom1, vatom2);
1933 at = mkPCast8x16(mce, at);
1938 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1941 at = mkUifUV128(mce, vatom1, vatom2);
1942 at = mkPCast16x8(mce, at);
1947 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1950 at = mkUifUV128(mce, vatom1, vatom2);
1951 at = mkPCast32x4(mce, at);
1956 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1959 at = mkUifUV128(mce, vatom1, vatom2);
1960 at = mkPCast64x2(mce, at);
1964 /* --- 64-bit versions --- */
1967 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1970 at = mkUifU64(mce, vatom1, vatom2);
1971 at = mkPCast8x8(mce, at);
1976 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1979 at = mkUifU64(mce, vatom1, vatom2);
1980 at = mkPCast16x4(mce, at);
1985 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1988 at = mkUifU64(mce, vatom1, vatom2);
1989 at = mkPCast32x2(mce, at);
1994 /*------------------------------------------------------------*/
1995 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1996 /*------------------------------------------------------------*/
1999 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2001 IRAtom* atom1, IRAtom* atom2,
2002 IRAtom* atom3, IRAtom* atom4 )
2004 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2005 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2006 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2007 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2009 tl_assert(isOriginalAtom(mce,atom1));
2010 tl_assert(isOriginalAtom(mce,atom2));
2011 tl_assert(isOriginalAtom(mce,atom3));
2012 tl_assert(isOriginalAtom(mce,atom4));
2013 tl_assert(isShadowAtom(mce,vatom1));
2014 tl_assert(isShadowAtom(mce,vatom2));
2015 tl_assert(isShadowAtom(mce,vatom3));
2016 tl_assert(isShadowAtom(mce,vatom4));
2017 tl_assert(sameKindedAtoms(atom1,vatom1));
2018 tl_assert(sameKindedAtoms(atom2,vatom2));
2019 tl_assert(sameKindedAtoms(atom3,vatom3));
2020 tl_assert(sameKindedAtoms(atom4,vatom4));
2023 case Iop_MAddF64r32:
2025 case Iop_MSubF64r32:
2026 /* I32(rm) x F64 x F64 x F64 -> F64 */
2027 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2030 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2036 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2038 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2040 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2041 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2042 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2044 tl_assert(isOriginalAtom(mce,atom1));
2045 tl_assert(isOriginalAtom(mce,atom2));
2046 tl_assert(isOriginalAtom(mce,atom3));
2047 tl_assert(isShadowAtom(mce,vatom1));
2048 tl_assert(isShadowAtom(mce,vatom2));
2049 tl_assert(isShadowAtom(mce,vatom3));
2050 tl_assert(sameKindedAtoms(atom1,vatom1));
2051 tl_assert(sameKindedAtoms(atom2,vatom2));
2052 tl_assert(sameKindedAtoms(atom3,vatom3));
2068 /* I32(rm) x F64 x F64 -> F64 */
2069 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2070 case Iop_PRemC3210F64:
2071 case Iop_PRem1C3210F64:
2072 /* I32(rm) x F64 x F64 -> I32 */
2073 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2078 /* I32(rm) x F32 x F32 -> I32 */
2079 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2082 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2088 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2090 IRAtom* atom1, IRAtom* atom2 )
2093 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2094 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2095 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2097 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2098 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2100 tl_assert(isOriginalAtom(mce,atom1));
2101 tl_assert(isOriginalAtom(mce,atom2));
2102 tl_assert(isShadowAtom(mce,vatom1));
2103 tl_assert(isShadowAtom(mce,vatom2));
2104 tl_assert(sameKindedAtoms(atom1,vatom1));
2105 tl_assert(sameKindedAtoms(atom2,vatom2));
2118 /* Same scheme as with all other shifts. */
2119 complainIfUndefined(mce, atom2);
2120 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2122 case Iop_QNarrow32Sx2:
2123 case Iop_QNarrow16Sx4:
2124 case Iop_QNarrow16Ux4:
2125 return vectorNarrow64(mce, op, vatom1, vatom2);
2138 return binary8Ix8(mce, vatom1, vatom2);
2147 case Iop_MulHi16Sx4:
2148 case Iop_MulHi16Ux4:
2149 case Iop_CmpGT16Sx4:
2154 return binary16Ix4(mce, vatom1, vatom2);
2158 case Iop_CmpGT32Sx2:
2161 return binary32Ix2(mce, vatom1, vatom2);
2163 /* 64-bit data-steering */
2164 case Iop_InterleaveLO32x2:
2165 case Iop_InterleaveLO16x4:
2166 case Iop_InterleaveLO8x8:
2167 case Iop_InterleaveHI32x2:
2168 case Iop_InterleaveHI16x4:
2169 case Iop_InterleaveHI8x8:
2170 case Iop_CatOddLanes16x4:
2171 case Iop_CatEvenLanes16x4:
2172 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2174 /* Perm8x8: rearrange values in left arg using steering values
2175 from right arg. So rearrange the vbits in the same way but
2176 pessimise wrt steering values. */
2180 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2181 mkPCast8x8(mce, vatom2)
2196 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2197 this is wrong now, scalar shifts are done properly lazily.
2198 Vector shifts should be fixed too. */
2199 complainIfUndefined(mce, atom2);
2200 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2202 /* V x V shifts/rotates are done using the standard lazy scheme. */
2207 return mkUifUV128(mce,
2208 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2209 mkPCast8x16(mce,vatom2)
2216 return mkUifUV128(mce,
2217 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2218 mkPCast16x8(mce,vatom2)
2225 return mkUifUV128(mce,
2226 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2227 mkPCast32x4(mce,vatom2)
2237 case Iop_CmpGT8Sx16:
2238 case Iop_CmpGT8Ux16:
2245 return binary8Ix16(mce, vatom1, vatom2);
2251 case Iop_MulHi16Sx8:
2252 case Iop_MulHi16Ux8:
2257 case Iop_CmpGT16Sx8:
2258 case Iop_CmpGT16Ux8:
2265 return binary16Ix8(mce, vatom1, vatom2);
2268 case Iop_CmpGT32Sx4:
2269 case Iop_CmpGT32Ux4:
2282 return binary32Ix4(mce, vatom1, vatom2);
2286 return binary64Ix2(mce, vatom1, vatom2);
2288 case Iop_QNarrow32Sx4:
2289 case Iop_QNarrow32Ux4:
2290 case Iop_QNarrow16Sx8:
2291 case Iop_QNarrow16Ux8:
2292 return vectorNarrowV128(mce, op, vatom1, vatom2);
2299 case Iop_CmpLT64Fx2:
2300 case Iop_CmpLE64Fx2:
2301 case Iop_CmpEQ64Fx2:
2302 case Iop_CmpUN64Fx2:
2304 return binary64Fx2(mce, vatom1, vatom2);
2311 case Iop_CmpLT64F0x2:
2312 case Iop_CmpLE64F0x2:
2313 case Iop_CmpEQ64F0x2:
2314 case Iop_CmpUN64F0x2:
2316 return binary64F0x2(mce, vatom1, vatom2);
2323 case Iop_CmpLT32Fx4:
2324 case Iop_CmpLE32Fx4:
2325 case Iop_CmpEQ32Fx4:
2326 case Iop_CmpUN32Fx4:
2327 case Iop_CmpGT32Fx4:
2328 case Iop_CmpGE32Fx4:
2330 return binary32Fx4(mce, vatom1, vatom2);
2337 case Iop_CmpLT32F0x4:
2338 case Iop_CmpLE32F0x4:
2339 case Iop_CmpEQ32F0x4:
2340 case Iop_CmpUN32F0x4:
2342 return binary32F0x4(mce, vatom1, vatom2);
2344 /* V128-bit data-steering */
2345 case Iop_SetV128lo32:
2346 case Iop_SetV128lo64:
2347 case Iop_64HLtoV128:
2348 case Iop_InterleaveLO64x2:
2349 case Iop_InterleaveLO32x4:
2350 case Iop_InterleaveLO16x8:
2351 case Iop_InterleaveLO8x16:
2352 case Iop_InterleaveHI64x2:
2353 case Iop_InterleaveHI32x4:
2354 case Iop_InterleaveHI16x8:
2355 case Iop_InterleaveHI8x16:
2356 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
2358 /* Perm8x16: rearrange values in left arg using steering values
2359 from right arg. So rearrange the vbits in the same way but
2360 pessimise wrt steering values. */
2364 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2365 mkPCast8x16(mce, vatom2)
2368 /* These two take the lower half of each 16-bit lane, sign/zero
2369 extend it to 32, and multiply together, producing a 32x4
2370 result (and implicitly ignoring half the operand bits). So
2371 treat it as a bunch of independent 16x8 operations, but then
2372 do 32-bit shifts left-right to copy the lower half results
2373 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2374 into the upper half of each result lane. */
2375 case Iop_MullEven16Ux8:
2376 case Iop_MullEven16Sx8: {
2378 at = binary16Ix8(mce,vatom1,vatom2);
2379 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2380 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
2384 /* Same deal as Iop_MullEven16{S,U}x8 */
2385 case Iop_MullEven8Ux16:
2386 case Iop_MullEven8Sx16: {
2388 at = binary8Ix16(mce,vatom1,vatom2);
2389 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2390 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
2394 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2395 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2396 Simply apply same op to the V bits, since this really no more
2397 than a data steering operation. */
2398 case Iop_Narrow32x4:
2399 case Iop_Narrow16x8:
2400 return assignNew('V', mce, Ity_V128,
2401 binop(op, vatom1, vatom2));
2405 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2406 this is wrong now, scalar shifts are done properly lazily.
2407 Vector shifts should be fixed too. */
2408 complainIfUndefined(mce, atom2);
2409 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2411 /* I128-bit data-steering */
2413 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
2415 /* Scalar floating point */
2417 case Iop_RoundF64toInt:
2418 case Iop_RoundF64toF32:
2426 /* I32(rm) x I64/F64 -> I64/F64 */
2427 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2430 /* I32(rm) x I32/F32 -> I32/F32 */
2431 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2436 /* First arg is I32 (rounding mode), second is F64 (data). */
2437 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2440 /* First arg is I32 (rounding mode), second is F64 (data). */
2441 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2444 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2446 /* non-FP after here */
2448 case Iop_DivModU64to32:
2449 case Iop_DivModS64to32:
2450 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2452 case Iop_DivModU128to64:
2453 case Iop_DivModS128to64:
2454 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2457 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
2459 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2463 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2464 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
2465 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
2470 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2471 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
2472 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
2477 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2478 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
2479 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
2484 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2485 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
2486 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
2491 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2495 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2498 if (mce->bogusLiterals)
2499 return expensiveAddSub(mce,True,Ity_I32,
2500 vatom1,vatom2, atom1,atom2);
2502 goto cheap_AddSub32;
2504 if (mce->bogusLiterals)
2505 return expensiveAddSub(mce,False,Ity_I32,
2506 vatom1,vatom2, atom1,atom2);
2508 goto cheap_AddSub32;
2512 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2518 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
2521 if (mce->bogusLiterals)
2522 return expensiveAddSub(mce,True,Ity_I64,
2523 vatom1,vatom2, atom1,atom2);
2525 goto cheap_AddSub64;
2527 if (mce->bogusLiterals)
2528 return expensiveAddSub(mce,False,Ity_I64,
2529 vatom1,vatom2, atom1,atom2);
2531 goto cheap_AddSub64;
2535 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2540 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2544 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2548 if (mce->bogusLiterals)
2549 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2553 case Iop_CmpLE64S: case Iop_CmpLE64U:
2554 case Iop_CmpLT64U: case Iop_CmpLT64S:
2555 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2559 if (mce->bogusLiterals)
2560 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2564 case Iop_CmpLE32S: case Iop_CmpLE32U:
2565 case Iop_CmpLT32U: case Iop_CmpLT32S:
2566 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2568 case Iop_CmpEQ16: case Iop_CmpNE16:
2569 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2571 case Iop_CmpEQ8: case Iop_CmpNE8:
2572 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2574 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
2575 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
2576 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
2577 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
2578 /* Just say these all produce a defined result, regardless
2579 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
2580 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
2582 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2583 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2585 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
2586 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
2588 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
2589 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
2591 case Iop_Shl8: case Iop_Shr8:
2592 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
2595 uifu = mkUifUV128; difd = mkDifDV128;
2596 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
2598 uifu = mkUifU64; difd = mkDifD64;
2599 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
2601 uifu = mkUifU32; difd = mkDifD32;
2602 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2604 uifu = mkUifU16; difd = mkDifD16;
2605 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2607 uifu = mkUifU8; difd = mkDifD8;
2608 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2611 uifu = mkUifUV128; difd = mkDifDV128;
2612 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
2614 uifu = mkUifU64; difd = mkDifD64;
2615 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
2617 uifu = mkUifU32; difd = mkDifD32;
2618 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2620 uifu = mkUifU16; difd = mkDifD16;
2621 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
2623 uifu = mkUifU8; difd = mkDifD8;
2624 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
2631 difd(mce, uifu(mce, vatom1, vatom2),
2632 difd(mce, improve(mce, atom1, vatom1),
2633 improve(mce, atom2, vatom2) ) ) );
2636 return mkUifU8(mce, vatom1, vatom2);
2638 return mkUifU16(mce, vatom1, vatom2);
2640 return mkUifU32(mce, vatom1, vatom2);
2642 return mkUifU64(mce, vatom1, vatom2);
2644 return mkUifUV128(mce, vatom1, vatom2);
2648 VG_(tool_panic)("memcheck:expr2vbits_Binop");
2654 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
2656 IRAtom* vatom = expr2vbits( mce, atom );
2657 tl_assert(isOriginalAtom(mce,atom));
2661 return unary64Fx2(mce, vatom);
2663 case Iop_Sqrt64F0x2:
2664 return unary64F0x2(mce, vatom);
2667 case Iop_RSqrt32Fx4:
2668 case Iop_Recip32Fx4:
2671 case Iop_QFtoI32Ux4_RZ:
2672 case Iop_QFtoI32Sx4_RZ:
2673 case Iop_RoundF32x4_RM:
2674 case Iop_RoundF32x4_RP:
2675 case Iop_RoundF32x4_RN:
2676 case Iop_RoundF32x4_RZ:
2677 return unary32Fx4(mce, vatom);
2679 case Iop_Sqrt32F0x4:
2680 case Iop_RSqrt32F0x4:
2681 case Iop_Recip32F0x4:
2682 return unary32F0x4(mce, vatom);
2689 return assignNew('V', mce, Ity_V128, unop(op, vatom));
2696 case Iop_Est5FRSqrt:
2697 case Iop_RoundF64toF64_NEAREST:
2698 case Iop_RoundF64toF64_NegINF:
2699 case Iop_RoundF64toF64_PosINF:
2700 case Iop_RoundF64toF64_ZERO:
2703 return mkPCastTo(mce, Ity_I64, vatom);
2707 case Iop_TruncF64asF32:
2710 return mkPCastTo(mce, Ity_I32, vatom);
2720 case Iop_V128HIto64:
2723 return assignNew('V', mce, Ity_I64, unop(op, vatom));
2734 return assignNew('V', mce, Ity_I32, unop(op, vatom));
2741 return assignNew('V', mce, Ity_I16, unop(op, vatom));
2748 return assignNew('V', mce, Ity_I8, unop(op, vatom));
2751 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
2754 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
2756 case Iop_ReinterpF64asI64:
2757 case Iop_ReinterpI64asF64:
2758 case Iop_ReinterpI32asF32:
2759 case Iop_ReinterpF32asI32:
2770 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2775 /* Worker function; do not call directly. */
2777 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
2778 IREndness end, IRType ty,
2779 IRAtom* addr, UInt bias )
2787 tl_assert(isOriginalAtom(mce,addr));
2788 tl_assert(end == Iend_LE || end == Iend_BE);
2790 /* First, emit a definedness test for the address. This also sets
2791 the address (shadow) to 'defined' following the test. */
2792 complainIfUndefined( mce, addr );
2794 /* Now cook up a call to the relevant helper function, to read the
2795 data V bits from shadow memory. */
2796 ty = shadowTypeV(ty);
2798 if (end == Iend_LE) {
2800 case Ity_I64: helper = &MC_(helperc_LOADV64le);
2801 hname = "MC_(helperc_LOADV64le)";
2803 case Ity_I32: helper = &MC_(helperc_LOADV32le);
2804 hname = "MC_(helperc_LOADV32le)";
2806 case Ity_I16: helper = &MC_(helperc_LOADV16le);
2807 hname = "MC_(helperc_LOADV16le)";
2809 case Ity_I8: helper = &MC_(helperc_LOADV8);
2810 hname = "MC_(helperc_LOADV8)";
2812 default: ppIRType(ty);
2813 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
2817 case Ity_I64: helper = &MC_(helperc_LOADV64be);
2818 hname = "MC_(helperc_LOADV64be)";
2820 case Ity_I32: helper = &MC_(helperc_LOADV32be);
2821 hname = "MC_(helperc_LOADV32be)";
2823 case Ity_I16: helper = &MC_(helperc_LOADV16be);
2824 hname = "MC_(helperc_LOADV16be)";
2826 case Ity_I8: helper = &MC_(helperc_LOADV8);
2827 hname = "MC_(helperc_LOADV8)";
2829 default: ppIRType(ty);
2830 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
2834 /* Generate the actual address into addrAct. */
2840 IRType tyAddr = mce->hWordTy;
2841 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2842 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2843 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2844 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
2847 /* We need to have a place to park the V bits we're just about to
2849 datavbits = newTemp(mce, ty, VSh);
2850 di = unsafeIRDirty_1_N( datavbits,
2852 hname, VG_(fnptr_to_fnentry)( helper ),
2853 mkIRExprVec_1( addrAct ));
2854 setHelperAnns( mce, di );
2855 stmt( 'V', mce, IRStmt_Dirty(di) );
2857 return mkexpr(datavbits);
2862 IRAtom* expr2vbits_Load ( MCEnv* mce,
2863 IREndness end, IRType ty,
2864 IRAtom* addr, UInt bias )
2866 IRAtom *v64hi, *v64lo;
2867 tl_assert(end == Iend_LE || end == Iend_BE);
2868 switch (shadowTypeV(ty)) {
2873 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
2875 if (end == Iend_LE) {
2876 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2877 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2879 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2880 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2882 return assignNew( 'V', mce,
2884 binop(Iop_64HLtoV128, v64hi, v64lo));
2886 VG_(tool_panic)("expr2vbits_Load");
2892 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2893 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2895 IRAtom *vbitsC, *vbits0, *vbitsX;
2897 /* Given Mux0X(cond,expr0,exprX), generate
2898 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2899 That is, steer the V bits like the originals, but trash the
2900 result if the steering value is undefined. This gives
2901 lazy propagation. */
2902 tl_assert(isOriginalAtom(mce, cond));
2903 tl_assert(isOriginalAtom(mce, expr0));
2904 tl_assert(isOriginalAtom(mce, exprX));
2906 vbitsC = expr2vbits(mce, cond);
2907 vbits0 = expr2vbits(mce, expr0);
2908 vbitsX = expr2vbits(mce, exprX);
2909 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
2912 mkUifU(mce, ty, assignNew('V', mce, ty,
2913 IRExpr_Mux0X(cond, vbits0, vbitsX)),
2914 mkPCastTo(mce, ty, vbitsC) );
2917 /* --------- This is the main expression-handling function. --------- */
2920 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2925 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2928 return shadow_GETI( mce, e->Iex.GetI.descr,
2929 e->Iex.GetI.ix, e->Iex.GetI.bias );
2932 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
2935 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
2938 return expr2vbits_Qop(
2941 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
2942 e->Iex.Qop.arg3, e->Iex.Qop.arg4
2946 return expr2vbits_Triop(
2949 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
2953 return expr2vbits_Binop(
2956 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2960 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2963 return expr2vbits_Load( mce, e->Iex.Load.end,
2965 e->Iex.Load.addr, 0/*addr bias*/ );
2968 return mkLazyN( mce, e->Iex.CCall.args,
2973 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2974 e->Iex.Mux0X.exprX);
2980 VG_(tool_panic)("memcheck: expr2vbits");
2984 /*------------------------------------------------------------*/
2985 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2986 /*------------------------------------------------------------*/
2988 /* Widen a value to the host word size. */
2991 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2995 /* vatom is vbits-value and as such can only have a shadow type. */
2996 tl_assert(isShadowAtom(mce,vatom));
2998 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
3001 if (tyH == Ity_I32) {
3006 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3008 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3013 if (tyH == Ity_I64) {
3016 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3018 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3019 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3021 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3022 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3030 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3031 VG_(tool_panic)("zwidenToHostWord");
3035 /* Generate a shadow store. addr is always the original address atom.
3036 You can pass in either originals or V-bits for the data atom, but
3037 obviously not both. guard :: Ity_I1 controls whether the store
3038 really happens; NULL means it unconditionally does. Note that
3039 guard itself is not checked for definedness; the caller of this
3040 function must do that if necessary. */
3043 void do_shadow_Store ( MCEnv* mce,
3045 IRAtom* addr, UInt bias,
3046 IRAtom* data, IRAtom* vdata,
3051 void* helper = NULL;
3055 tyAddr = mce->hWordTy;
3056 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3057 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3058 tl_assert( end == Iend_LE || end == Iend_BE );
3062 tl_assert(isOriginalAtom(mce, data));
3063 tl_assert(bias == 0);
3064 vdata = expr2vbits( mce, data );
3069 tl_assert(isOriginalAtom(mce,addr));
3070 tl_assert(isShadowAtom(mce,vdata));
3073 tl_assert(isOriginalAtom(mce, guard));
3074 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3077 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
3079 // If we're not doing undefined value checking, pretend that this value
3080 // is "all valid". That lets Vex's optimiser remove some of the V bit
3081 // shadow computation ops that precede it.
3082 if (MC_(clo_mc_level) == 1) {
3084 case Ity_V128: // V128 weirdness
3085 c = IRConst_V128(V_BITS16_DEFINED); break;
3086 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3087 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3088 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3089 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3090 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3092 vdata = IRExpr_Const( c );
3095 /* First, emit a definedness test for the address. This also sets
3096 the address (shadow) to 'defined' following the test. */
3097 complainIfUndefined( mce, addr );
3099 /* Now decide which helper function to call to write the data V
3100 bits into shadow memory. */
3101 if (end == Iend_LE) {
3103 case Ity_V128: /* we'll use the helper twice */
3104 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3105 hname = "MC_(helperc_STOREV64le)";
3107 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3108 hname = "MC_(helperc_STOREV32le)";
3110 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3111 hname = "MC_(helperc_STOREV16le)";
3113 case Ity_I8: helper = &MC_(helperc_STOREV8);
3114 hname = "MC_(helperc_STOREV8)";
3116 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3120 case Ity_V128: /* we'll use the helper twice */
3121 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3122 hname = "MC_(helperc_STOREV64be)";
3124 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3125 hname = "MC_(helperc_STOREV32be)";
3127 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3128 hname = "MC_(helperc_STOREV16be)";
3130 case Ity_I8: helper = &MC_(helperc_STOREV8);
3131 hname = "MC_(helperc_STOREV8)";
3133 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3137 if (ty == Ity_V128) {
3140 /* See comment in next clause re 64-bit regparms */
3141 /* also, need to be careful about endianness */
3143 Int offLo64, offHi64;
3144 IRDirty *diLo64, *diHi64;
3145 IRAtom *addrLo64, *addrHi64;
3146 IRAtom *vdataLo64, *vdataHi64;
3147 IRAtom *eBiasLo64, *eBiasHi64;
3149 if (end == Iend_LE) {
3157 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
3158 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3159 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
3160 diLo64 = unsafeIRDirty_0_N(
3162 hname, VG_(fnptr_to_fnentry)( helper ),
3163 mkIRExprVec_2( addrLo64, vdataLo64 )
3165 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
3166 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3167 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
3168 diHi64 = unsafeIRDirty_0_N(
3170 hname, VG_(fnptr_to_fnentry)( helper ),
3171 mkIRExprVec_2( addrHi64, vdataHi64 )
3173 if (guard) diLo64->guard = guard;
3174 if (guard) diHi64->guard = guard;
3175 setHelperAnns( mce, diLo64 );
3176 setHelperAnns( mce, diHi64 );
3177 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3178 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
3185 /* 8/16/32/64-bit cases */
3186 /* Generate the actual address into addrAct. */
3190 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3191 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
3194 if (ty == Ity_I64) {
3195 /* We can't do this with regparm 2 on 32-bit platforms, since
3196 the back ends aren't clever enough to handle 64-bit
3197 regparm args. Therefore be different. */
3198 di = unsafeIRDirty_0_N(
3200 hname, VG_(fnptr_to_fnentry)( helper ),
3201 mkIRExprVec_2( addrAct, vdata )
3204 di = unsafeIRDirty_0_N(
3206 hname, VG_(fnptr_to_fnentry)( helper ),
3207 mkIRExprVec_2( addrAct,
3208 zwidenToHostWord( mce, vdata ))
3211 if (guard) di->guard = guard;
3212 setHelperAnns( mce, di );
3213 stmt( 'V', mce, IRStmt_Dirty(di) );
3219 /* Do lazy pessimistic propagation through a dirty helper call, by
3220 looking at the annotations on it. This is the most complex part of
3223 static IRType szToITy ( Int n )
3226 case 1: return Ity_I8;
3227 case 2: return Ity_I16;
3228 case 4: return Ity_I32;
3229 case 8: return Ity_I64;
3230 default: VG_(tool_panic)("szToITy(memcheck)");
3235 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3237 Int i, n, toDo, gSz, gOff;
3238 IRAtom *src, *here, *curr;
3239 IRType tySrc, tyDst;
3243 /* What's the native endianness? We need to know this. */
3244 # if defined(VG_BIGENDIAN)
3246 # elif defined(VG_LITTLEENDIAN)
3249 # error "Unknown endianness"
3252 /* First check the guard. */
3253 complainIfUndefined(mce, d->guard);
3255 /* Now round up all inputs and PCast over them. */
3256 curr = definedOfType(Ity_I32);
3258 /* Inputs: unmasked args */
3259 for (i = 0; d->args[i]; i++) {
3260 if (d->cee->mcx_mask & (1<<i)) {
3261 /* ignore this arg */
3263 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3264 curr = mkUifU32(mce, here, curr);
3268 /* Inputs: guest state that we read. */
3269 for (i = 0; i < d->nFxState; i++) {
3270 tl_assert(d->fxState[i].fx != Ifx_None);
3271 if (d->fxState[i].fx == Ifx_Write)
3274 /* Ignore any sections marked as 'always defined'. */
3275 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
3277 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3278 d->fxState[i].offset, d->fxState[i].size );
3282 /* This state element is read or modified. So we need to
3283 consider it. If larger than 8 bytes, deal with it in 8-byte
3285 gSz = d->fxState[i].size;
3286 gOff = d->fxState[i].offset;
3289 if (gSz == 0) break;
3290 n = gSz <= 8 ? gSz : 8;
3291 /* update 'curr' with UifU of the state slice
3293 tySrc = szToITy( n );
3294 src = assignNew( 'V', mce, tySrc,
3295 shadow_GET(mce, gOff, tySrc ) );
3296 here = mkPCastTo( mce, Ity_I32, src );
3297 curr = mkUifU32(mce, here, curr);
3304 /* Inputs: memory. First set up some info needed regardless of
3305 whether we're doing reads or writes. */
3307 if (d->mFx != Ifx_None) {
3308 /* Because we may do multiple shadow loads/stores from the same
3309 base address, it's best to do a single test of its
3310 definedness right now. Post-instrumentation optimisation
3311 should remove all but this test. */
3313 tl_assert(d->mAddr);
3314 complainIfUndefined(mce, d->mAddr);
3316 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
3317 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3318 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3321 /* Deal with memory inputs (reads or modifies) */
3322 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
3324 /* chew off 32-bit chunks. We don't care about the endianness
3325 since it's all going to be condensed down to a single bit,
3326 but nevertheless choose an endianness which is hopefully
3327 native to the platform. */
3331 expr2vbits_Load ( mce, end, Ity_I32,
3332 d->mAddr, d->mSize - toDo )
3334 curr = mkUifU32(mce, here, curr);
3337 /* chew off 16-bit chunks */
3341 expr2vbits_Load ( mce, end, Ity_I16,
3342 d->mAddr, d->mSize - toDo )
3344 curr = mkUifU32(mce, here, curr);
3347 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3350 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3351 all the inputs to the helper. Now we need to re-distribute the
3352 results to all destinations. */
3354 /* Outputs: the destination temporary, if there is one. */
3355 if (d->tmp != IRTemp_INVALID) {
3356 dst = findShadowTmpV(mce, d->tmp);
3357 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
3358 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
3361 /* Outputs: guest state that we write or modify. */
3362 for (i = 0; i < d->nFxState; i++) {
3363 tl_assert(d->fxState[i].fx != Ifx_None);
3364 if (d->fxState[i].fx == Ifx_Read)
3366 /* Ignore any sections marked as 'always defined'. */
3367 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3369 /* This state element is written or modified. So we need to
3370 consider it. If larger than 8 bytes, deal with it in 8-byte
3372 gSz = d->fxState[i].size;
3373 gOff = d->fxState[i].offset;
3376 if (gSz == 0) break;
3377 n = gSz <= 8 ? gSz : 8;
3378 /* Write suitably-casted 'curr' to the state slice
3380 tyDst = szToITy( n );
3381 do_shadow_PUT( mce, gOff,
3382 NULL, /* original atom */
3383 mkPCastTo( mce, tyDst, curr ) );
3389 /* Outputs: memory that we write or modify. Same comments about
3390 endianness as above apply. */
3391 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
3393 /* chew off 32-bit chunks */
3395 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3396 NULL, /* original data */
3397 mkPCastTo( mce, Ity_I32, curr ),
3401 /* chew off 16-bit chunks */
3403 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3404 NULL, /* original data */
3405 mkPCastTo( mce, Ity_I16, curr ),
3409 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3415 /* We have an ABI hint telling us that [base .. base+len-1] is to
3416 become undefined ("writable"). Generate code to call a helper to
3417 notify the A/V bit machinery of this fact.
3420 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3424 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
3427 /* Minor optimisation: if not doing origin tracking, ignore the
3428 supplied nia and pass zero instead. This is on the basis that
3429 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3430 almost always generate a shorter instruction to put zero into a
3431 register than any other value. */
3432 if (MC_(clo_mc_level) < 3)
3433 nia = mkIRExpr_HWord(0);
3435 di = unsafeIRDirty_0_N(
3437 "MC_(helperc_MAKE_STACK_UNINIT)",
3438 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
3439 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
3441 stmt( 'V', mce, IRStmt_Dirty(di) );
3445 /* ------ Dealing with IRCAS (big and complex) ------ */
3448 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
3449 IRAtom* baseaddr, Int offset );
3450 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
3451 static void gen_store_b ( MCEnv* mce, Int szB,
3452 IRAtom* baseaddr, Int offset, IRAtom* dataB,
3455 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
3456 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
3459 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
3460 IRExpr.Consts, else this asserts. If they are both Consts, it
3461 doesn't do anything. So that just leaves the RdTmp case.
3463 In which case: this assigns the shadow value SHADOW to the IR
3464 shadow temporary associated with ORIG. That is, ORIG, being an
3465 original temporary, will have a shadow temporary associated with
3466 it. However, in the case envisaged here, there will so far have
3467 been no IR emitted to actually write a shadow value into that
3468 temporary. What this routine does is to (emit IR to) copy the
3469 value in SHADOW into said temporary, so that after this call,
3470 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
3473 Point is to allow callers to compute "by hand" a shadow value for
3474 ORIG, and force it to be associated with ORIG.
3476 How do we know that that shadow associated with ORIG has not so far
3477 been assigned to? Well, we don't per se know that, but supposing
3478 it had. Then this routine would create a second assignment to it,
3479 and later the IR sanity checker would barf. But that never
3482 static void bind_shadow_tmp_to_orig ( UChar how,
3484 IRAtom* orig, IRAtom* shadow )
3486 tl_assert(isOriginalAtom(mce, orig));
3487 tl_assert(isShadowAtom(mce, shadow));
3488 switch (orig->tag) {
3490 tl_assert(shadow->tag == Iex_Const);
3493 tl_assert(shadow->tag == Iex_RdTmp);
3495 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
3498 tl_assert(how == 'B');
3499 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
3510 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
3512 /* Scheme is (both single- and double- cases):
3514 1. fetch data#,dataB (the proposed new value)
3516 2. fetch expd#,expdB (what we expect to see at the address)
3518 3. check definedness of address
3520 4. load old#,oldB from shadow memory; this also checks
3521 addressibility of the address
3525 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
3527 7. if "expected == old" (as computed by (6))
3528 store data#,dataB to shadow memory
3530 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
3531 'data' but 7 stores 'data#'. Hence it is possible for the
3532 shadow data to be incorrectly checked and/or updated:
3534 * 7 is at least gated correctly, since the 'expected == old'
3535 condition is derived from outputs of 5. However, the shadow
3536 write could happen too late: imagine after 5 we are
3537 descheduled, a different thread runs, writes a different
3538 (shadow) value at the address, and then we resume, hence
3539 overwriting the shadow value written by the other thread.
3541 Because the original memory access is atomic, there's no way to
3542 make both the original and shadow accesses into a single atomic
3543 thing, hence this is unavoidable.
3545 At least as Valgrind stands, I don't think it's a problem, since
3546 we're single threaded *and* we guarantee that there are no
3547 context switches during the execution of any specific superblock
3548 -- context switches can only happen at superblock boundaries.
3550 If Valgrind ever becomes MT in the future, then it might be more
3551 of a problem. A possible kludge would be to artificially
3552 associate with the location, a lock, which we must acquire and
3553 release around the transaction as a whole. Hmm, that probably
3554 would't work properly since it only guards us against other
3555 threads doing CASs on the same location, not against other
3556 threads doing normal reads and writes.
3558 ------------------------------------------------------------
3560 COMMENT_ON_CasCmpEQ:
3562 Note two things. Firstly, in the sequence above, we compute
3563 "expected == old", but we don't check definedness of it. Why
3564 not? Also, the x86 and amd64 front ends use
3565 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
3566 determination (expected == old ?) for themselves, and we also
3567 don't check definedness for those primops; we just say that the
3568 result is defined. Why? Details follow.
3570 x86/amd64 contains various forms of locked insns:
3571 * lock prefix before all basic arithmetic insn;
3572 eg lock xorl %reg1,(%reg2)
3573 * atomic exchange reg-mem
3576 Rather than attempt to represent them all, which would be a
3577 royal PITA, I used a result from Maurice Herlihy
3578 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
3579 demonstrates that compare-and-swap is a primitive more general
3580 than the other two, and so can be used to represent all of them.
3581 So the translation scheme for (eg) lock incl (%reg) is as
3587 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
3589 The "atomically" is the CAS bit. The scheme is always the same:
3590 get old value from memory, compute new value, atomically stuff
3591 new value back in memory iff the old value has not changed (iow,
3592 no other thread modified it in the meantime). If it has changed
3593 then we've been out-raced and we have to start over.
3595 Now that's all very neat, but it has the bad side effect of
3596 introducing an explicit equality test into the translation.
3597 Consider the behaviour of said code on a memory location which
3598 is uninitialised. We will wind up doing a comparison on
3599 uninitialised data, and mc duly complains.
3601 What's difficult about this is, the common case is that the
3602 location is uncontended, and so we're usually comparing the same
3603 value (* %reg) with itself. So we shouldn't complain even if it
3604 is undefined. But mc doesn't know that.
3606 My solution is to mark the == in the IR specially, so as to tell
3607 mc that it almost certainly compares a value with itself, and we
3608 should just regard the result as always defined. Rather than
3609 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
3610 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
3612 So there's always the question of, can this give a false
3613 negative? eg, imagine that initially, * %reg is defined; and we
3614 read that; but then in the gap between the read and the CAS, a
3615 different thread writes an undefined (and different) value at
3616 the location. Then the CAS in this thread will fail and we will
3617 go back to "again:", but without knowing that the trip back
3618 there was based on an undefined comparison. No matter; at least
3619 the other thread won the race and the location is correctly
3620 marked as undefined. What if it wrote an uninitialised version
3621 of the same value that was there originally, though?
3623 etc etc. Seems like there's a small corner case in which we
3624 might lose the fact that something's defined -- we're out-raced
3625 in between the "old = * reg" and the "atomically {", _and_ the
3626 other thread is writing in an undefined version of what's
3627 already there. Well, that seems pretty unlikely.
3631 If we ever need to reinstate it .. code which generates a
3632 definedness test for "expected == old" was removed at r10432 of
3635 if (cas->oldHi == IRTemp_INVALID) {
3636 do_shadow_CAS_single( mce, cas );
3638 do_shadow_CAS_double( mce, cas );
3643 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
3645 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3646 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3647 IRAtom *voldLo = NULL, *boldLo = NULL;
3648 IRAtom *expd_eq_old = NULL;
3652 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3655 tl_assert(cas->oldHi == IRTemp_INVALID);
3656 tl_assert(cas->expdHi == NULL);
3657 tl_assert(cas->dataHi == NULL);
3659 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3661 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
3662 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
3663 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
3664 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
3665 default: tl_assert(0); /* IR defn disallows any other types */
3668 /* 1. fetch data# (the proposed new value) */
3669 tl_assert(isOriginalAtom(mce, cas->dataLo));
3671 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3672 tl_assert(isShadowAtom(mce, vdataLo));
3675 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3676 tl_assert(isShadowAtom(mce, bdataLo));
3679 /* 2. fetch expected# (what we expect to see at the address) */
3680 tl_assert(isOriginalAtom(mce, cas->expdLo));
3682 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3683 tl_assert(isShadowAtom(mce, vexpdLo));
3686 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3687 tl_assert(isShadowAtom(mce, bexpdLo));
3690 /* 3. check definedness of address */
3691 /* 4. fetch old# from shadow memory; this also checks
3692 addressibility of the address */
3698 cas->end, elemTy, cas->addr, 0/*Addr bias*/
3700 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
3703 = assignNew('B', mce, Ity_I32,
3704 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
3705 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
3708 /* 5. the CAS itself */
3709 stmt( 'C', mce, IRStmt_CAS(cas) );
3711 /* 6. compute "expected == old" */
3712 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
3713 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3714 tree, but it's not copied from the input block. */
3716 = assignNew('C', mce, Ity_I1,
3717 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
3719 /* 7. if "expected == old"
3720 store data# to shadow memory */
3721 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
3722 NULL/*data*/, vdataLo/*vdata*/,
3723 expd_eq_old/*guard for store*/ );
3725 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
3727 expd_eq_old/*guard for store*/ );
3732 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
3734 IRAtom *vdataHi = NULL, *bdataHi = NULL;
3735 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3736 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
3737 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3738 IRAtom *voldHi = NULL, *boldHi = NULL;
3739 IRAtom *voldLo = NULL, *boldLo = NULL;
3740 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
3741 IRAtom *expd_eq_old = NULL, *zero = NULL;
3742 IROp opCasCmpEQ, opOr, opXor;
3743 Int elemSzB, memOffsLo, memOffsHi;
3745 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3748 tl_assert(cas->oldHi != IRTemp_INVALID);
3749 tl_assert(cas->expdHi != NULL);
3750 tl_assert(cas->dataHi != NULL);
3752 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3755 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
3756 elemSzB = 1; zero = mkU8(0);
3759 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
3760 elemSzB = 2; zero = mkU16(0);
3763 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
3764 elemSzB = 4; zero = mkU32(0);
3767 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
3768 elemSzB = 8; zero = mkU64(0);
3771 tl_assert(0); /* IR defn disallows any other types */
3774 /* 1. fetch data# (the proposed new value) */
3775 tl_assert(isOriginalAtom(mce, cas->dataHi));
3776 tl_assert(isOriginalAtom(mce, cas->dataLo));
3778 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
3780 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3781 tl_assert(isShadowAtom(mce, vdataHi));
3782 tl_assert(isShadowAtom(mce, vdataLo));
3785 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
3787 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3788 tl_assert(isShadowAtom(mce, bdataHi));
3789 tl_assert(isShadowAtom(mce, bdataLo));
3792 /* 2. fetch expected# (what we expect to see at the address) */
3793 tl_assert(isOriginalAtom(mce, cas->expdHi));
3794 tl_assert(isOriginalAtom(mce, cas->expdLo));
3796 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
3798 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3799 tl_assert(isShadowAtom(mce, vexpdHi));
3800 tl_assert(isShadowAtom(mce, vexpdLo));
3803 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
3805 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3806 tl_assert(isShadowAtom(mce, bexpdHi));
3807 tl_assert(isShadowAtom(mce, bexpdLo));
3810 /* 3. check definedness of address */
3811 /* 4. fetch old# from shadow memory; this also checks
3812 addressibility of the address */
3813 if (cas->end == Iend_LE) {
3815 memOffsHi = elemSzB;
3817 tl_assert(cas->end == Iend_BE);
3818 memOffsLo = elemSzB;
3826 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
3833 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
3835 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
3836 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
3839 = assignNew('B', mce, Ity_I32,
3840 gen_load_b(mce, elemSzB, cas->addr,
3841 memOffsHi/*addr bias*/));
3843 = assignNew('B', mce, Ity_I32,
3844 gen_load_b(mce, elemSzB, cas->addr,
3845 memOffsLo/*addr bias*/));
3846 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
3847 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
3850 /* 5. the CAS itself */
3851 stmt( 'C', mce, IRStmt_CAS(cas) );
3853 /* 6. compute "expected == old" */
3854 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
3855 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3856 tree, but it's not copied from the input block. */
3858 xHi = oldHi ^ expdHi;
3859 xLo = oldLo ^ expdLo;
3861 expd_eq_old = xHL == 0;
3863 xHi = assignNew('C', mce, elemTy,
3864 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
3865 xLo = assignNew('C', mce, elemTy,
3866 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
3867 xHL = assignNew('C', mce, elemTy,
3868 binop(opOr, xHi, xLo));
3870 = assignNew('C', mce, Ity_I1,
3871 binop(opCasCmpEQ, xHL, zero));
3873 /* 7. if "expected == old"
3874 store data# to shadow memory */
3875 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
3876 NULL/*data*/, vdataHi/*vdata*/,
3877 expd_eq_old/*guard for store*/ );
3878 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
3879 NULL/*data*/, vdataLo/*vdata*/,
3880 expd_eq_old/*guard for store*/ );
3882 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
3884 expd_eq_old/*guard for store*/ );
3885 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
3887 expd_eq_old/*guard for store*/ );
3892 /* ------ Dealing with LL/SC (not difficult) ------ */
3894 static void do_shadow_LLSC ( MCEnv* mce,
3898 IRExpr* stStoredata )
3900 /* In short: treat a load-linked like a normal load followed by an
3901 assignment of the loaded (shadow) data to the result temporary.
3902 Treat a store-conditional like a normal store, and mark the
3903 result temporary as defined. */
3904 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
3905 IRTemp resTmp = findShadowTmpV(mce, stResult);
3907 tl_assert(isIRAtom(stAddr));
3909 tl_assert(isIRAtom(stStoredata));
3911 if (stStoredata == NULL) {
3913 /* Just treat this as a normal load, followed by an assignment of
3914 the value to .result. */
3916 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
3917 || resTy == Ity_I16 || resTy == Ity_I8);
3918 assign( 'V', mce, resTmp,
3920 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
3922 /* Store Conditional */
3924 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
3926 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
3927 || dataTy == Ity_I16 || dataTy == Ity_I8);
3928 do_shadow_Store( mce, stEnd,
3929 stAddr, 0/* addr bias */,
3931 NULL /* shadow data */,
3933 /* This is a store conditional, so it writes to .result a value
3934 indicating whether or not the store succeeded. Just claim
3935 this value is always defined. In the PowerPC interpretation
3936 of store-conditional, definedness of the success indication
3937 depends on whether the address of the store matches the
3938 reservation address. But we can't tell that here (and
3939 anyway, we're not being PowerPC-specific). At least we are
3940 guaranteed that the definedness of the store address, and its
3941 addressibility, will be checked as per normal. So it seems
3942 pretty safe to just say that the success indication is always
3945 In schemeS, for origin tracking, we must correspondingly set
3946 a no-origin value for the origin shadow of .result.
3948 tl_assert(resTy == Ity_I1);
3949 assign( 'V', mce, resTmp, definedOfType(resTy) );
3954 /*------------------------------------------------------------*/
3955 /*--- Memcheck main ---*/
3956 /*------------------------------------------------------------*/
3958 static void schemeS ( MCEnv* mce, IRStmt* st );
3960 static Bool isBogusAtom ( IRAtom* at )
3964 tl_assert(isIRAtom(at));
3965 if (at->tag == Iex_RdTmp)
3967 tl_assert(at->tag == Iex_Const);
3968 con = at->Iex.Const.con;
3970 case Ico_U1: return False;
3971 case Ico_U8: n = (ULong)con->Ico.U8; break;
3972 case Ico_U16: n = (ULong)con->Ico.U16; break;
3973 case Ico_U32: n = (ULong)con->Ico.U32; break;
3974 case Ico_U64: n = (ULong)con->Ico.U64; break;
3975 case Ico_F64: return False;
3976 case Ico_F64i: return False;
3977 case Ico_V128: return False;
3978 default: ppIRExpr(at); tl_assert(0);
3980 /* VG_(printf)("%llx\n", n); */
3981 return (/*32*/ n == 0xFEFEFEFFULL
3982 /*32*/ || n == 0x80808080ULL
3983 /*32*/ || n == 0x7F7F7F7FULL
3984 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
3985 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
3986 /*64*/ || n == 0x0000000000008080ULL
3987 /*64*/ || n == 0x8080808080808080ULL
3988 /*64*/ || n == 0x0101010101010101ULL
3992 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4000 e = st->Ist.WrTmp.data;
4006 return isBogusAtom(e);
4008 return isBogusAtom(e->Iex.Unop.arg);
4010 return isBogusAtom(e->Iex.GetI.ix);
4012 return isBogusAtom(e->Iex.Binop.arg1)
4013 || isBogusAtom(e->Iex.Binop.arg2);
4015 return isBogusAtom(e->Iex.Triop.arg1)
4016 || isBogusAtom(e->Iex.Triop.arg2)
4017 || isBogusAtom(e->Iex.Triop.arg3);
4019 return isBogusAtom(e->Iex.Qop.arg1)
4020 || isBogusAtom(e->Iex.Qop.arg2)
4021 || isBogusAtom(e->Iex.Qop.arg3)
4022 || isBogusAtom(e->Iex.Qop.arg4);
4024 return isBogusAtom(e->Iex.Mux0X.cond)
4025 || isBogusAtom(e->Iex.Mux0X.expr0)
4026 || isBogusAtom(e->Iex.Mux0X.exprX);
4028 return isBogusAtom(e->Iex.Load.addr);
4030 for (i = 0; e->Iex.CCall.args[i]; i++)
4031 if (isBogusAtom(e->Iex.CCall.args[i]))
4038 d = st->Ist.Dirty.details;
4039 for (i = 0; d->args[i]; i++)
4040 if (isBogusAtom(d->args[i]))
4042 if (d->guard && isBogusAtom(d->guard))
4044 if (d->mAddr && isBogusAtom(d->mAddr))
4048 return isBogusAtom(st->Ist.Put.data);
4050 return isBogusAtom(st->Ist.PutI.ix)
4051 || isBogusAtom(st->Ist.PutI.data);
4053 return isBogusAtom(st->Ist.Store.addr)
4054 || isBogusAtom(st->Ist.Store.data);
4056 return isBogusAtom(st->Ist.Exit.guard);
4058 return isBogusAtom(st->Ist.AbiHint.base)
4059 || isBogusAtom(st->Ist.AbiHint.nia);
4065 cas = st->Ist.CAS.details;
4066 return isBogusAtom(cas->addr)
4067 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4068 || isBogusAtom(cas->expdLo)
4069 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4070 || isBogusAtom(cas->dataLo);
4072 return isBogusAtom(st->Ist.LLSC.addr)
4073 || (st->Ist.LLSC.storedata
4074 ? isBogusAtom(st->Ist.LLSC.storedata)
4079 VG_(tool_panic)("hasBogusLiterals");
4084 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
4086 VexGuestLayout* layout,
4087 VexGuestExtents* vge,
4088 IRType gWordTy, IRType hWordTy )
4090 Bool verboze = 0||False;
4092 Int i, j, first_stmt;
4097 if (gWordTy != hWordTy) {
4098 /* We don't currently support this case. */
4099 VG_(tool_panic)("host/guest word size mismatch");
4102 /* Check we're not completely nuts */
4103 tl_assert(sizeof(UWord) == sizeof(void*));
4104 tl_assert(sizeof(Word) == sizeof(void*));
4105 tl_assert(sizeof(Addr) == sizeof(void*));
4106 tl_assert(sizeof(ULong) == 8);
4107 tl_assert(sizeof(Long) == 8);
4108 tl_assert(sizeof(Addr64) == 8);
4109 tl_assert(sizeof(UInt) == 4);
4110 tl_assert(sizeof(Int) == 4);
4112 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
4115 sb_out = deepCopyIRSBExceptStmts(sb_in);
4117 /* Set up the running environment. Both .sb and .tmpMap are
4118 modified as we go along. Note that tmps are added to both
4119 .sb->tyenv and .tmpMap together, so the valid index-set for
4120 those two arrays should always be identical. */
4121 VG_(memset)(&mce, 0, sizeof(mce));
4123 mce.trace = verboze;
4124 mce.layout = layout;
4125 mce.hWordTy = hWordTy;
4126 mce.bogusLiterals = False;
4128 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4129 sizeof(TempMapEnt));
4130 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4133 ent.shadowV = IRTemp_INVALID;
4134 ent.shadowB = IRTemp_INVALID;
4135 VG_(addToXA)( mce.tmpMap, &ent );
4137 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
4139 /* Make a preliminary inspection of the statements, to see if there
4140 are any dodgy-looking literals. If there are, we generate
4141 extra-detailed (hence extra-expensive) instrumentation in
4142 places. Scan the whole bb even if dodgyness is found earlier,
4143 so that the flatness assertion is applied to all stmts. */
4147 for (i = 0; i < sb_in->stmts_used; i++) {
4149 st = sb_in->stmts[i];
4151 tl_assert(isFlatIRStmt(st));
4154 bogus = checkForBogusLiterals(st);
4156 VG_(printf)("bogus: ");
4164 mce.bogusLiterals = bogus;
4166 /* Copy verbatim any IR preamble preceding the first IMark */
4168 tl_assert(mce.sb == sb_out);
4169 tl_assert(mce.sb != sb_in);
4172 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
4174 st = sb_in->stmts[i];
4176 tl_assert(isFlatIRStmt(st));
4178 stmt( 'C', &mce, sb_in->stmts[i] );
4182 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4183 cause the IR following the preamble to contain references to IR
4184 temporaries defined in the preamble. Because the preamble isn't
4185 instrumented, these temporaries don't have any shadows.
4186 Nevertheless uses of them following the preamble will cause
4187 memcheck to generate references to their shadows. End effect is
4188 to cause IR sanity check failures, due to references to
4189 non-existent shadows. This is only evident for the complex
4190 preambles used for function wrapping on TOC-afflicted platforms
4191 (ppc64-linux, ppc32-aix5, ppc64-aix5).
4193 The following loop therefore scans the preamble looking for
4194 assignments to temporaries. For each one found it creates an
4195 assignment to the corresponding (V) shadow temp, marking it as
4196 'defined'. This is the same resulting IR as if the main
4197 instrumentation loop before had been applied to the statement
4200 Similarly, if origin tracking is enabled, we must generate an
4201 assignment for the corresponding origin (B) shadow, claiming
4202 no-origin, as appropriate for a defined value.
4204 for (j = 0; j < i; j++) {
4205 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
4206 /* findShadowTmpV checks its arg is an original tmp;
4207 no need to assert that here. */
4208 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
4209 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
4210 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
4211 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4212 if (MC_(clo_mc_level) == 3) {
4213 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
4214 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
4215 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4218 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4225 /* Iterate over the remaining stmts to generate instrumentation. */
4227 tl_assert(sb_in->stmts_used > 0);
4229 tl_assert(i < sb_in->stmts_used);
4230 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
4232 for (/* use current i*/; i < sb_in->stmts_used; i++) {
4234 st = sb_in->stmts[i];
4235 first_stmt = sb_out->stmts_used;
4243 if (MC_(clo_mc_level) == 3) {
4244 /* See comments on case Ist_CAS below. */
4245 if (st->tag != Ist_CAS)
4246 schemeS( &mce, st );
4249 /* Generate instrumentation code for each stmt ... */
4254 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4255 expr2vbits( &mce, st->Ist.WrTmp.data) );
4259 do_shadow_PUT( &mce,
4262 NULL /* shadow atom */ );
4266 do_shadow_PUTI( &mce,
4270 st->Ist.PutI.data );
4274 do_shadow_Store( &mce, st->Ist.Store.end,
4275 st->Ist.Store.addr, 0/* addr bias */,
4277 NULL /* shadow data */,
4282 complainIfUndefined( &mce, st->Ist.Exit.guard );
4293 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
4297 do_AbiHint( &mce, st->Ist.AbiHint.base,
4298 st->Ist.AbiHint.len,
4299 st->Ist.AbiHint.nia );
4303 do_shadow_CAS( &mce, st->Ist.CAS.details );
4304 /* Note, do_shadow_CAS copies the CAS itself to the output
4305 block, because it needs to add instrumentation both
4306 before and after it. Hence skip the copy below. Also
4307 skip the origin-tracking stuff (call to schemeS) above,
4308 since that's all tangled up with it too; do_shadow_CAS
4313 do_shadow_LLSC( &mce,
4315 st->Ist.LLSC.result,
4317 st->Ist.LLSC.storedata );
4324 VG_(tool_panic)("memcheck: unhandled IRStmt");
4326 } /* switch (st->tag) */
4329 for (j = first_stmt; j < sb_out->stmts_used; j++) {
4331 ppIRStmt(sb_out->stmts[j]);
4337 /* ... and finally copy the stmt itself to the output. Except,
4338 skip the copy of IRCASs; see comments on case Ist_CAS
4340 if (st->tag != Ist_CAS)
4341 stmt('C', &mce, st);
4344 /* Now we need to complain if the jump target is undefined. */
4345 first_stmt = sb_out->stmts_used;
4348 VG_(printf)("sb_in->next = ");
4349 ppIRExpr(sb_in->next);
4350 VG_(printf)("\n\n");
4353 complainIfUndefined( &mce, sb_in->next );
4356 for (j = first_stmt; j < sb_out->stmts_used; j++) {
4358 ppIRStmt(sb_out->stmts[j]);
4364 /* If this fails, there's been some serious snafu with tmp management,
4365 that should be investigated. */
4366 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4367 VG_(deleteXA)( mce.tmpMap );
4369 tl_assert(mce.sb == sb_out);
4373 /*------------------------------------------------------------*/
4374 /*--- Post-tree-build final tidying ---*/
4375 /*------------------------------------------------------------*/
4377 /* This exploits the observation that Memcheck often produces
4378 repeated conditional calls of the form
4380 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
4382 with the same guard expression G guarding the same helper call.
4383 The second and subsequent calls are redundant. This usually
4384 results from instrumentation of guest code containing multiple
4385 memory references at different constant offsets from the same base
4386 register. After optimisation of the instrumentation, you get a
4387 test for the definedness of the base register for each memory
4388 reference, which is kinda pointless. MC_(final_tidy) therefore
4389 looks for such repeated calls and removes all but the first. */
4391 /* A struct for recording which (helper, guard) pairs we have already
4394 struct { void* entry; IRExpr* guard; }
4397 /* Return True if e1 and e2 definitely denote the same value (used to
4398 compare guards). Return False if unknown; False is the safe
4399 answer. Since guest registers and guest memory do not have the
4400 SSA property we must return False if any Gets or Loads appear in
4403 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4405 if (e1->tag != e2->tag)
4409 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4411 return e1->Iex.Binop.op == e2->Iex.Binop.op
4412 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4413 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4415 return e1->Iex.Unop.op == e2->Iex.Unop.op
4416 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4418 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4420 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4421 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4422 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4426 /* be lazy. Could define equality for these, but they never
4427 appear to be used. */
4432 /* be conservative - these may not give the same value each
4436 /* should never see this */
4439 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
4441 VG_(tool_panic)("memcheck:sameIRValue");
4446 /* See if 'pairs' already has an entry for (entry, guard). Return
4447 True if so. If not, add an entry. */
4450 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
4454 Int i, n = VG_(sizeXA)( pairs );
4455 for (i = 0; i < n; i++) {
4456 pp = VG_(indexXA)( pairs, i );
4457 if (pp->entry == entry && sameIRValue(pp->guard, guard))
4462 VG_(addToXA)( pairs, &p );
4466 static Bool is_helperc_value_checkN_fail ( HChar* name )
4469 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
4470 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
4471 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
4472 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
4473 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
4474 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
4475 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
4476 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
4479 IRSB* MC_(final_tidy) ( IRSB* sb_in )
4486 Bool alreadyPresent;
4487 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
4488 VG_(free), sizeof(Pair) );
4489 /* Scan forwards through the statements. Each time a call to one
4490 of the relevant helpers is seen, check if we have made a
4491 previous call to the same helper using the same guard
4492 expression, and if so, delete the call. */
4493 for (i = 0; i < sb_in->stmts_used; i++) {
4494 st = sb_in->stmts[i];
4496 if (st->tag != Ist_Dirty)
4498 di = st->Ist.Dirty.details;
4502 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
4504 if (!is_helperc_value_checkN_fail( cee->name ))
4506 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
4507 guard 'guard'. Check if we have already seen a call to this
4508 function with the same guard. If so, delete it. If not,
4509 add it to the set of calls we do know about. */
4510 alreadyPresent = check_or_add( pairs, guard, cee->addr );
4511 if (alreadyPresent) {
4512 sb_in->stmts[i] = IRStmt_NoOp();
4513 if (0) VG_(printf)("XX\n");
4516 VG_(deleteXA)( pairs );
4521 /*------------------------------------------------------------*/
4522 /*--- Origin tracking stuff ---*/
4523 /*------------------------------------------------------------*/
4525 /* Almost identical to findShadowTmpV. */
4526 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
4529 /* VG_(indexXA) range-checks 'orig', hence no need to check
4531 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4532 tl_assert(ent->kind == Orig);
4533 if (ent->shadowB == IRTemp_INVALID) {
4535 = newTemp( mce, Ity_I32, BSh );
4536 /* newTemp may cause mce->tmpMap to resize, hence previous results
4537 from VG_(indexXA) are invalid. */
4538 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4539 tl_assert(ent->kind == Orig);
4540 tl_assert(ent->shadowB == IRTemp_INVALID);
4541 ent->shadowB = tmpB;
4543 return ent->shadowB;
4546 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
4548 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
4551 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4552 IRAtom* baseaddr, Int offset )
4558 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
4559 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4560 IRAtom* ea = baseaddr;
4562 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4563 : mkU64( (Long)(Int)offset );
4564 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4566 bTmp = newTemp(mce, mce->hWordTy, BSh);
4569 case 1: hFun = (void*)&MC_(helperc_b_load1);
4570 hName = "MC_(helperc_b_load1)";
4572 case 2: hFun = (void*)&MC_(helperc_b_load2);
4573 hName = "MC_(helperc_b_load2)";
4575 case 4: hFun = (void*)&MC_(helperc_b_load4);
4576 hName = "MC_(helperc_b_load4)";
4578 case 8: hFun = (void*)&MC_(helperc_b_load8);
4579 hName = "MC_(helperc_b_load8)";
4581 case 16: hFun = (void*)&MC_(helperc_b_load16);
4582 hName = "MC_(helperc_b_load16)";
4585 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
4588 di = unsafeIRDirty_1_N(
4589 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
4592 /* no need to mess with any annotations. This call accesses
4593 neither guest state nor guest memory. */
4594 stmt( 'B', mce, IRStmt_Dirty(di) );
4595 if (mce->hWordTy == Ity_I64) {
4597 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
4598 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
4599 return mkexpr(bTmp32);
4602 return mkexpr(bTmp);
4606 /* Generate a shadow store. guard :: Ity_I1 controls whether the
4607 store really happens; NULL means it unconditionally does. */
4608 static void gen_store_b ( MCEnv* mce, Int szB,
4609 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4615 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
4616 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4617 IRAtom* ea = baseaddr;
4619 tl_assert(isOriginalAtom(mce, guard));
4620 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4623 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4624 : mkU64( (Long)(Int)offset );
4625 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4627 if (mce->hWordTy == Ity_I64)
4628 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
4631 case 1: hFun = (void*)&MC_(helperc_b_store1);
4632 hName = "MC_(helperc_b_store1)";
4634 case 2: hFun = (void*)&MC_(helperc_b_store2);
4635 hName = "MC_(helperc_b_store2)";
4637 case 4: hFun = (void*)&MC_(helperc_b_store4);
4638 hName = "MC_(helperc_b_store4)";
4640 case 8: hFun = (void*)&MC_(helperc_b_store8);
4641 hName = "MC_(helperc_b_store8)";
4643 case 16: hFun = (void*)&MC_(helperc_b_store16);
4644 hName = "MC_(helperc_b_store16)";
4649 di = unsafeIRDirty_0_N( 2/*regparms*/,
4650 hName, VG_(fnptr_to_fnentry)( hFun ),
4651 mkIRExprVec_2( ea, dataB )
4653 /* no need to mess with any annotations. This call accesses
4654 neither guest state nor guest memory. */
4655 if (guard) di->guard = guard;
4656 stmt( 'B', mce, IRStmt_Dirty(di) );
4659 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
4660 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
4662 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
4668 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
4669 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
4670 tl_assert(eTy == Ity_I32);
4671 if (dstTy == Ity_I64)
4672 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
4677 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
4679 tl_assert(MC_(clo_mc_level) == 3);
4684 IRRegArray* descr_b;
4685 IRAtom *t1, *t2, *t3, *t4;
4686 IRRegArray* descr = e->Iex.GetI.descr;
4688 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
4689 /* If this array is unshadowable for whatever reason, use the
4690 usual approximation. */
4691 if (equivIntTy == Ity_INVALID)
4693 tl_assert(sizeofIRType(equivIntTy) >= 4);
4694 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
4695 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
4696 equivIntTy, descr->nElems );
4697 /* Do a shadow indexed get of the same size, giving t1. Take
4698 the bottom 32 bits of it, giving t2. Compute into t3 the
4699 origin for the index (almost certainly zero, but there's
4700 no harm in being completely general here, since iropt will
4701 remove any useless code), and fold it in, giving a final
4703 t1 = assignNew( 'B', mce, equivIntTy,
4704 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
4705 e->Iex.GetI.bias ));
4706 t2 = narrowTo32( mce, t1 );
4707 t3 = schemeE( mce, e->Iex.GetI.ix );
4708 t4 = gen_maxU32( mce, t2, t3 );
4714 IRExpr** args = e->Iex.CCall.args;
4715 IRAtom* curr = mkU32(0);
4716 for (i = 0; args[i]; i++) {
4718 tl_assert(isOriginalAtom(mce, args[i]));
4719 /* Only take notice of this arg if the callee's
4720 mc-exclusion mask does not say it is to be excluded. */
4721 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
4722 /* the arg is to be excluded from definedness checking.
4724 if (0) VG_(printf)("excluding %s(%d)\n",
4725 e->Iex.CCall.cee->name, i);
4727 /* calculate the arg's definedness, and pessimistically
4729 here = schemeE( mce, args[i] );
4730 curr = gen_maxU32( mce, curr, here );
4737 dszB = sizeofIRType(e->Iex.Load.ty);
4738 /* assert that the B value for the address is already
4739 available (somewhere) */
4740 tl_assert(isIRAtom(e->Iex.Load.addr));
4741 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
4742 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
4745 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
4746 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
4747 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
4748 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
4751 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
4752 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
4753 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
4754 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
4755 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
4756 gen_maxU32( mce, b3, b4 ) );
4759 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
4760 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
4761 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
4762 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
4765 switch (e->Iex.Binop.op) {
4766 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4767 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4768 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4769 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4770 /* Just say these all produce a defined result,
4771 regardless of their arguments. See
4772 COMMENT_ON_CasCmpEQ in this file. */
4775 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
4776 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
4777 return gen_maxU32( mce, b1, b2 );
4784 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
4790 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
4792 Int b_offset = MC_(get_otrack_shadow_offset)(
4794 sizeofIRType(e->Iex.Get.ty)
4796 tl_assert(b_offset >= -1
4797 && b_offset <= mce->layout->total_sizeB -4);
4798 if (b_offset >= 0) {
4799 /* FIXME: this isn't an atom! */
4800 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
4806 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
4808 VG_(tool_panic)("memcheck:schemeE");
4813 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
4815 // This is a hacked version of do_shadow_Dirty
4816 Int i, n, toDo, gSz, gOff;
4817 IRAtom *here, *curr;
4820 /* First check the guard. */
4821 curr = schemeE( mce, d->guard );
4823 /* Now round up all inputs and maxU32 over them. */
4825 /* Inputs: unmasked args */
4826 for (i = 0; d->args[i]; i++) {
4827 if (d->cee->mcx_mask & (1<<i)) {
4828 /* ignore this arg */
4830 here = schemeE( mce, d->args[i] );
4831 curr = gen_maxU32( mce, curr, here );
4835 /* Inputs: guest state that we read. */
4836 for (i = 0; i < d->nFxState; i++) {
4837 tl_assert(d->fxState[i].fx != Ifx_None);
4838 if (d->fxState[i].fx == Ifx_Write)
4841 /* Ignore any sections marked as 'always defined'. */
4842 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
4844 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4845 d->fxState[i].offset, d->fxState[i].size );
4849 /* This state element is read or modified. So we need to
4850 consider it. If larger than 4 bytes, deal with it in 4-byte
4852 gSz = d->fxState[i].size;
4853 gOff = d->fxState[i].offset;
4857 if (gSz == 0) break;
4858 n = gSz <= 4 ? gSz : 4;
4859 /* update 'curr' with maxU32 of the state slice
4861 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4862 if (b_offset != -1) {
4863 here = assignNew( 'B',mce,
4865 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
4867 curr = gen_maxU32( mce, curr, here );
4875 /* Inputs: memory */
4877 if (d->mFx != Ifx_None) {
4878 /* Because we may do multiple shadow loads/stores from the same
4879 base address, it's best to do a single test of its
4880 definedness right now. Post-instrumentation optimisation
4881 should remove all but this test. */
4882 tl_assert(d->mAddr);
4883 here = schemeE( mce, d->mAddr );
4884 curr = gen_maxU32( mce, curr, here );
4887 /* Deal with memory inputs (reads or modifies) */
4888 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4890 /* chew off 32-bit chunks. We don't care about the endianness
4891 since it's all going to be condensed down to a single bit,
4892 but nevertheless choose an endianness which is hopefully
4893 native to the platform. */
4895 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
4896 curr = gen_maxU32( mce, curr, here );
4899 /* handle possible 16-bit excess */
4901 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
4902 curr = gen_maxU32( mce, curr, here );
4905 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4908 /* Whew! So curr is a 32-bit B-value which should give an origin
4909 of some use if any of the inputs to the helper are undefined.
4910 Now we need to re-distribute the results to all destinations. */
4912 /* Outputs: the destination temporary, if there is one. */
4913 if (d->tmp != IRTemp_INVALID) {
4914 dst = findShadowTmpB(mce, d->tmp);
4915 assign( 'V', mce, dst, curr );
4918 /* Outputs: guest state that we write or modify. */
4919 for (i = 0; i < d->nFxState; i++) {
4920 tl_assert(d->fxState[i].fx != Ifx_None);
4921 if (d->fxState[i].fx == Ifx_Read)
4924 /* Ignore any sections marked as 'always defined'. */
4925 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4928 /* This state element is written or modified. So we need to
4929 consider it. If larger than 4 bytes, deal with it in 4-byte
4931 gSz = d->fxState[i].size;
4932 gOff = d->fxState[i].offset;
4936 if (gSz == 0) break;
4937 n = gSz <= 4 ? gSz : 4;
4938 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
4939 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4940 if (b_offset != -1) {
4941 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
4949 /* Outputs: memory that we write or modify. Same comments about
4950 endianness as above apply. */
4951 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4953 /* chew off 32-bit chunks */
4955 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
4959 /* handle possible 16-bit excess */
4961 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
4965 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4970 static void do_origins_Store ( MCEnv* mce,
4977 /* assert that the B value for the address is already available
4978 (somewhere), since the call to schemeE will want to see it.
4979 XXXX how does this actually ensure that?? */
4980 tl_assert(isIRAtom(stAddr));
4981 tl_assert(isIRAtom(stData));
4982 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
4983 dataB = schemeE( mce, stData );
4984 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
4989 static void schemeS ( MCEnv* mce, IRStmt* st )
4991 tl_assert(MC_(clo_mc_level) == 3);
4996 /* The value-check instrumenter handles this - by arranging
4997 to pass the address of the next instruction to
4998 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
4999 happen for origin tracking w.r.t. AbiHints. So there is
5000 nothing to do here. */
5004 IRRegArray* descr_b;
5005 IRAtom *t1, *t2, *t3, *t4;
5006 IRRegArray* descr = st->Ist.PutI.descr;
5008 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5009 /* If this array is unshadowable for whatever reason,
5010 generate no code. */
5011 if (equivIntTy == Ity_INVALID)
5013 tl_assert(sizeofIRType(equivIntTy) >= 4);
5014 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5016 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5017 equivIntTy, descr->nElems );
5018 /* Compute a value to Put - the conjoinment of the origin for
5019 the data to be Put-ted (obviously) and of the index value
5020 (not so obviously). */
5021 t1 = schemeE( mce, st->Ist.PutI.data );
5022 t2 = schemeE( mce, st->Ist.PutI.ix );
5023 t3 = gen_maxU32( mce, t1, t2 );
5024 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5025 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5026 st->Ist.PutI.bias, t4 ));
5031 do_origins_Dirty( mce, st->Ist.Dirty.details );
5035 do_origins_Store( mce, st->Ist.Store.end,
5037 st->Ist.Store.data );
5041 /* In short: treat a load-linked like a normal load followed
5042 by an assignment of the loaded (shadow) data the result
5043 temporary. Treat a store-conditional like a normal store,
5044 and mark the result temporary as defined. */
5045 if (st->Ist.LLSC.storedata == NULL) {
5048 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5050 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5051 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5052 || resTy == Ity_I16 || resTy == Ity_I8);
5053 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5054 schemeE(mce, vanillaLoad));
5056 /* Store conditional */
5057 do_origins_Store( mce, st->Ist.LLSC.end,
5059 st->Ist.LLSC.storedata );
5060 /* For the rationale behind this, see comments at the
5061 place where the V-shadow for .result is constructed, in
5062 do_shadow_LLSC. In short, we regard .result as
5064 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5072 = MC_(get_otrack_shadow_offset)(
5074 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
5076 if (b_offset >= 0) {
5077 /* FIXME: this isn't an atom! */
5078 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5079 schemeE( mce, st->Ist.Put.data )) );
5085 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5086 schemeE(mce, st->Ist.WrTmp.data) );
5096 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5098 VG_(tool_panic)("memcheck:schemeS");
5103 /*--------------------------------------------------------------------*/
5104 /*--- end mc_translate.c ---*/
5105 /*--------------------------------------------------------------------*/