2 /*---------------------------------------------------------------*/
3 /*--- begin test_main.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
41 #include "libvex_basictypes.h"
44 #include "test_main.h"
47 /*---------------------------------------------------------------*/
49 /*---------------------------------------------------------------*/
52 __attribute__ ((noreturn))
54 void failure_exit ( void )
56 fprintf(stdout, "VEX did failure_exit. Bye.\n");
61 void log_bytes ( HChar* bytes, Int nbytes )
63 fwrite ( bytes, 1, nbytes, stdout );
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
75 static Bool verbose = True;
79 static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
81 IRSB* mc_instrument ( void* closureV,
82 IRSB* bb_in, VexGuestLayout* layout,
84 IRType gWordTy, IRType hWordTy );
87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) { return False; }
89 int main ( int argc, char** argv )
95 Int bb_number, n_bbs_done = 0;
96 Int orig_nbytes, trans_used;
97 VexTranslateResult tres;
100 VexArchInfo vai_x86, vai_amd64, vai_ppc32;
102 VexTranslateArgs vta;
105 fprintf(stderr, "usage: vex file.org\n");
108 f = fopen(argv[1], "r");
110 fprintf(stderr, "can't open `%s'\n", argv[1]);
114 /* Run with default params. However, we can't allow bb chasing
115 since that causes the front end to get segfaults when it tries
116 to read code outside the initial BB we hand it. So when calling
117 LibVEX_Translate, send in a chase-into predicate that always
119 LibVEX_default_VexControl ( &vcon );
120 vcon.iropt_level = 2;
121 vcon.guest_max_insns = 50;
123 LibVEX_Init ( &failure_exit, &log_bytes,
124 1, /* debug_paranoia */
125 TEST_VSUPPORT, /* valgrind support */
131 fgets(linebuf, N_LINEBUF,f);
132 if (linebuf[0] == 0) continue;
133 if (linebuf[0] != '.') continue;
135 if (n_bbs_done == TEST_N_BBS) break;
138 /* first line is: . bb-number bb-addr n-bytes */
139 assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
141 & orig_addr, & orig_nbytes ));
142 assert(orig_nbytes >= 1);
144 fgets(linebuf, N_LINEBUF,f);
145 assert(linebuf[0] == '.');
147 /* second line is: . byte byte byte etc */
149 printf("============ Basic Block %d, Done %d, "
150 "Start %x, nbytes %2d ============",
151 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
153 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
154 for (i = 0; i < orig_nbytes; i++) {
155 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
156 origbuf[i] = (UChar)u;
159 /* FIXME: put sensible values into the .hwcaps fields */
160 LibVEX_default_VexArchInfo(&vai_x86);
161 vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
162 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
164 LibVEX_default_VexArchInfo(&vai_amd64);
165 vai_amd64.hwcaps = 0;
167 LibVEX_default_VexArchInfo(&vai_ppc32);
168 vai_ppc32.hwcaps = 0;
169 vai_ppc32.ppc_cache_line_szB = 128;
171 LibVEX_default_VexAbiInfo(&vbi);
173 /* ----- Set up args for LibVEX_Translate ----- */
174 #if 0 /* ppc32 -> ppc32 */
175 vta.arch_guest = VexArchPPC32;
176 vta.archinfo_guest = vai_ppc32;
177 vta.arch_host = VexArchPPC32;
178 vta.archinfo_host = vai_ppc32;
180 #if 0 /* amd64 -> amd64 */
181 vta.arch_guest = VexArchAMD64;
182 vta.archinfo_guest = vai_amd64;
183 vta.arch_host = VexArchAMD64;
184 vta.archinfo_host = vai_amd64;
186 #if 1 /* x86 -> x86 */
187 vta.arch_guest = VexArchX86;
188 vta.archinfo_guest = vai_x86;
189 vta.arch_host = VexArchX86;
190 vta.archinfo_host = vai_x86;
192 vta.abiinfo_both = vbi;
193 vta.guest_bytes = origbuf;
194 vta.guest_bytes_addr = (Addr64)orig_addr;
195 vta.callback_opaque = NULL;
196 vta.chase_into_ok = chase_into_not_ok;
197 vta.guest_extents = &vge;
198 vta.host_bytes = transbuf;
199 vta.host_bytes_size = N_TRANSBUF;
200 vta.host_bytes_used = &trans_used;
201 #if 0 /* no instrumentation */
202 vta.instrument1 = NULL;
203 vta.instrument2 = NULL;
205 #if 0 /* addrcheck */
206 vta.instrument1 = ac_instrument;
207 vta.instrument2 = NULL;
210 vta.instrument1 = mc_instrument;
211 vta.instrument2 = NULL;
213 vta.do_self_check = False;
214 vta.preamble_function = NULL;
215 vta.traceflags = TEST_FLAGS;
216 #if 1 /* x86, amd64 hosts */
217 vta.dispatch = (void*)0x12345678;
218 #else /* ppc32, ppc64 hosts */
222 vta.finaltidy = NULL;
224 for (i = 0; i < TEST_N_ITERS; i++)
225 tres = LibVEX_Translate ( &vta );
227 if (tres != VexTransOK)
228 printf("\ntres = %d\n", (Int)tres);
229 assert(tres == VexTransOK);
230 assert(vge.n_used == 1);
231 assert((UInt)(vge.len[0]) == orig_nbytes);
234 for (i = 0; i < trans_used; i++)
235 sum += (UInt)transbuf[i];
236 printf ( " %6.2f ... %u\n",
237 (double)trans_used / (double)vge.len[0], sum );
242 LibVEX_ShowAllocStats();
247 //////////////////////////////////////////////////////////////////////
248 //////////////////////////////////////////////////////////////////////
249 //////////////////////////////////////////////////////////////////////
250 //////////////////////////////////////////////////////////////////////
251 //////////////////////////////////////////////////////////////////////
252 //////////////////////////////////////////////////////////////////////
253 //////////////////////////////////////////////////////////////////////
254 //////////////////////////////////////////////////////////////////////
259 __attribute((noreturn))
260 void panic ( HChar* s )
262 printf("\npanic: %s\n", s);
267 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
269 /* Use this rather than eg. -1 because it's a UInt. */
270 #define INVALID_DATA_SIZE 999999
281 IRSB* bb = emptyIRSB();
282 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
283 bb->next = dopyIRExpr(bb_in->next);
284 bb->jumpkind = bb_in->jumpkind;
286 /* No loads to consider in ->next. */
287 assert(isIRAtom(bb_in->next));
289 for (i = 0; i < bb_in->stmts_used; i++) {
290 st = bb_in->stmts[i];
296 data = st->Ist.Tmp.data;
297 if (data->tag == Iex_LDle) {
298 addr = data->Iex.LDle.addr;
299 sz = sizeofIRType(data->Iex.LDle.ty);
302 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
303 (void*)0x12345601); break;
304 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
305 (void*)0x12345602); break;
306 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
307 (void*)0x12345603); break;
308 default: helper = mkIRCallee(0, "ac_helperc_LOADN",
310 needSz = True; break;
316 unsafeIRDirty_0_N( helper->regparms,
317 helper->name, helper->addr,
318 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
324 unsafeIRDirty_0_N( helper->regparms,
325 helper->name, helper->addr,
326 mkIRExprVec_1(addr) )
333 data = st->Ist.STle.data;
334 addr = st->Ist.STle.addr;
335 assert(isIRAtom(data));
336 assert(isIRAtom(addr));
337 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
340 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
341 (void*)0x12345605); break;
342 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
343 (void*)0x12345606); break;
344 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
345 (void*)0x12345607); break;
346 default: helper = mkIRCallee(0, "ac_helperc_STOREN",
348 needSz = True; break;
354 unsafeIRDirty_0_N( helper->regparms,
355 helper->name, helper->addr,
356 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
362 unsafeIRDirty_0_N( helper->regparms,
363 helper->name, helper->addr,
364 mkIRExprVec_1(addr) )
370 assert(isIRAtom(st->Ist.Put.data));
374 assert(isIRAtom(st->Ist.PutI.ix));
375 assert(isIRAtom(st->Ist.PutI.data));
379 assert(isIRAtom(st->Ist.Exit.guard));
383 /* If the call doesn't interact with memory, we ain't
385 if (st->Ist.Dirty.details->mFx == Ifx_None)
394 panic("addrcheck: unhandled IRStmt");
397 addStmtToIRSB( bb, dopyIRStmt(st));
404 //////////////////////////////////////////////////////////////////////
405 //////////////////////////////////////////////////////////////////////
406 //////////////////////////////////////////////////////////////////////
407 //////////////////////////////////////////////////////////////////////
408 //////////////////////////////////////////////////////////////////////
409 //////////////////////////////////////////////////////////////////////
410 //////////////////////////////////////////////////////////////////////
411 //////////////////////////////////////////////////////////////////////
416 __attribute((noreturn))
417 void panic ( HChar* s )
419 printf("\npanic: %s\n", s);
423 #define tl_assert(xxx) assert(xxx)
424 #define VG_(xxxx) xxxx
425 #define tool_panic(zzz) panic(zzz)
426 #define MC_(zzzz) MC_##zzzz
427 #define TL_(zzzz) SK_##zzzz
430 static void MC_helperc_complain_undef ( void );
431 static void MC_helperc_LOADV8 ( void );
432 static void MC_helperc_LOADV4 ( void );
433 static void MC_helperc_LOADV2 ( void );
434 static void MC_helperc_LOADV1 ( void );
435 static void MC_helperc_STOREV8( void );
436 static void MC_helperc_STOREV4( void );
437 static void MC_helperc_STOREV2( void );
438 static void MC_helperc_STOREV1( void );
439 static void MC_helperc_value_check0_fail( void );
440 static void MC_helperc_value_check1_fail( void );
441 static void MC_helperc_value_check4_fail( void );
443 static void MC_helperc_complain_undef ( void ) { }
444 static void MC_helperc_LOADV8 ( void ) { }
445 static void MC_helperc_LOADV4 ( void ) { }
446 static void MC_helperc_LOADV2 ( void ) { }
447 static void MC_helperc_LOADV1 ( void ) { }
448 static void MC_helperc_STOREV8( void ) { }
449 static void MC_helperc_STOREV4( void ) { }
450 static void MC_helperc_STOREV2( void ) { }
451 static void MC_helperc_STOREV1( void ) { }
452 static void MC_helperc_value_check0_fail( void ) { }
453 static void MC_helperc_value_check1_fail( void ) { }
454 static void MC_helperc_value_check4_fail( void ) { }
457 /*--------------------------------------------------------------------*/
458 /*--- Instrument IR to perform memory checking operations. ---*/
459 /*--- mc_translate.c ---*/
460 /*--------------------------------------------------------------------*/
463 This file is part of MemCheck, a heavyweight Valgrind tool for
464 detecting memory errors.
466 Copyright (C) 2000-2010 Julian Seward
469 This program is free software; you can redistribute it and/or
470 modify it under the terms of the GNU General Public License as
471 published by the Free Software Foundation; either version 2 of the
472 License, or (at your option) any later version.
474 This program is distributed in the hope that it will be useful, but
475 WITHOUT ANY WARRANTY; without even the implied warranty of
476 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
477 General Public License for more details.
479 You should have received a copy of the GNU General Public License
480 along with this program; if not, write to the Free Software
481 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
484 The GNU General Public License is contained in the file COPYING.
487 //#include "mc_include.h"
490 /*------------------------------------------------------------*/
491 /*--- Forward decls ---*/
492 /*------------------------------------------------------------*/
496 static IRType shadowType ( IRType ty );
497 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
500 /*------------------------------------------------------------*/
501 /*--- Memcheck running state, and tmp management. ---*/
502 /*------------------------------------------------------------*/
504 /* Carries around state during memcheck instrumentation. */
507 /* MODIFIED: the bb being constructed. IRStmts are added. */
510 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
511 original temps to their current their current shadow temp.
512 Initially all entries are IRTemp_INVALID. Entries are added
513 lazily since many original temps are not used due to
514 optimisation prior to instrumentation. Note that floating
515 point original tmps are shadowed by integer tmps of the same
516 size, and Bit-typed original tmps are shadowed by the type
517 Ity_I8. See comment below. */
519 Int n_originalTmps; /* for range checking */
521 /* READONLY: the guest layout. This indicates which parts of
522 the guest state should be regarded as 'always defined'. */
523 VexGuestLayout* layout;
524 /* READONLY: the host word type. Needed for constructing
525 arguments of type 'HWord' to be passed to helper functions.
526 Ity_I32 or Ity_I64 only. */
531 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
532 demand), as they are encountered. This is for two reasons.
534 (1) (less important reason): Many original tmps are unused due to
535 initial IR optimisation, and we do not want to spaces in tables
538 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
539 table indexed [0 .. n_types-1], which gives the current shadow for
540 each original tmp, or INVALID_IRTEMP if none is so far assigned.
541 It is necessary to support making multiple assignments to a shadow
542 -- specifically, after testing a shadow for definedness, it needs
543 to be made defined. But IR's SSA property disallows this.
545 (2) (more important reason): Therefore, when a shadow needs to get
546 a new value, a new temporary is created, the value is assigned to
547 that, and the tmpMap is updated to reflect the new binding.
549 A corollary is that if the tmpMap maps a given tmp to
550 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
551 there's a read-before-write error in the original tmps. The IR
552 sanity checker should catch all such anomalies, however.
555 /* Find the tmp currently shadowing the given original tmp. If none
556 so far exists, allocate one. */
557 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
559 tl_assert(orig < mce->n_originalTmps);
560 if (mce->tmpMap[orig] == IRTemp_INVALID) {
562 = newIRTemp(mce->bb->tyenv,
563 shadowType(mce->bb->tyenv->types[orig]));
565 return mce->tmpMap[orig];
568 /* Allocate a new shadow for the given original tmp. This means any
569 previous shadow is abandoned. This is needed because it is
570 necessary to give a new value to a shadow once it has been tested
571 for undefinedness, but unfortunately IR's SSA property disallows
572 this. Instead we must abandon the old shadow, allocate a new one
573 and use that instead. */
574 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
576 tl_assert(orig < mce->n_originalTmps);
578 = newIRTemp(mce->bb->tyenv,
579 shadowType(mce->bb->tyenv->types[orig]));
583 /*------------------------------------------------------------*/
584 /*--- IRAtoms -- a subset of IRExprs ---*/
585 /*------------------------------------------------------------*/
587 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
588 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
589 input, most of this code deals in atoms. Usefully, a value atom
590 always has a V-value which is also an atom: constants are shadowed
591 by constants, and temps are shadowed by the corresponding shadow
594 typedef IRExpr IRAtom;
596 /* (used for sanity checks only): is this an atom which looks
597 like it's from original code? */
598 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
600 if (a1->tag == Iex_Const)
602 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
607 /* (used for sanity checks only): is this an atom which looks
608 like it's from shadow code? */
609 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
611 if (a1->tag == Iex_Const)
613 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
618 /* (used for sanity checks only): check that both args are atoms and
619 are identically-kinded. */
620 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
622 if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
624 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
630 /*------------------------------------------------------------*/
631 /*--- Type management ---*/
632 /*------------------------------------------------------------*/
634 /* Shadow state is always accessed using integer types. This returns
635 an integer type with the same size (as per sizeofIRType) as the
636 given type. The only valid shadow types are Bit, I8, I16, I32,
639 static IRType shadowType ( IRType ty )
646 case Ity_I64: return ty;
647 case Ity_F32: return Ity_I32;
648 case Ity_F64: return Ity_I64;
649 case Ity_V128: return Ity_V128;
650 default: ppIRType(ty);
651 VG_(tool_panic)("memcheck:shadowType");
655 /* Produce a 'defined' value of the given shadow type. Should only be
656 supplied shadow types (Bit/I8/I16/I32/UI64). */
657 static IRExpr* definedOfType ( IRType ty ) {
659 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
660 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
661 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
662 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
663 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
664 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
665 default: VG_(tool_panic)("memcheck:definedOfType");
670 /*------------------------------------------------------------*/
671 /*--- Constructing IR fragments ---*/
672 /*------------------------------------------------------------*/
674 /* assign value to tmp */
675 #define assign(_bb,_tmp,_expr) \
676 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
678 /* add stmt to a bb */
679 #define stmt(_bb,_stmt) \
680 addStmtToIRSB((_bb), (_stmt))
682 /* build various kinds of expressions */
683 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
684 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
685 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
686 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
687 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
688 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
689 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
690 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
692 /* bind the given expression to a new temporary, and return the
693 temporary. This effectively converts an arbitrary expression into
695 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
696 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
697 assign(mce->bb, t, e);
702 /*------------------------------------------------------------*/
703 /*--- Constructing definedness primitive ops ---*/
704 /*------------------------------------------------------------*/
706 /* --------- Defined-if-either-defined --------- */
708 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
709 tl_assert(isShadowAtom(mce,a1));
710 tl_assert(isShadowAtom(mce,a2));
711 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
714 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
715 tl_assert(isShadowAtom(mce,a1));
716 tl_assert(isShadowAtom(mce,a2));
717 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
720 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
721 tl_assert(isShadowAtom(mce,a1));
722 tl_assert(isShadowAtom(mce,a2));
723 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
726 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
727 tl_assert(isShadowAtom(mce,a1));
728 tl_assert(isShadowAtom(mce,a2));
729 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
732 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
733 tl_assert(isShadowAtom(mce,a1));
734 tl_assert(isShadowAtom(mce,a2));
735 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
738 /* --------- Undefined-if-either-undefined --------- */
740 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
741 tl_assert(isShadowAtom(mce,a1));
742 tl_assert(isShadowAtom(mce,a2));
743 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
746 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
747 tl_assert(isShadowAtom(mce,a1));
748 tl_assert(isShadowAtom(mce,a2));
749 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
752 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
753 tl_assert(isShadowAtom(mce,a1));
754 tl_assert(isShadowAtom(mce,a2));
755 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
758 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
759 tl_assert(isShadowAtom(mce,a1));
760 tl_assert(isShadowAtom(mce,a2));
761 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
764 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
765 tl_assert(isShadowAtom(mce,a1));
766 tl_assert(isShadowAtom(mce,a2));
767 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
770 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
772 case Ity_I8: return mkUifU8(mce, a1, a2);
773 case Ity_I16: return mkUifU16(mce, a1, a2);
774 case Ity_I32: return mkUifU32(mce, a1, a2);
775 case Ity_I64: return mkUifU64(mce, a1, a2);
776 case Ity_V128: return mkUifUV128(mce, a1, a2);
778 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
779 VG_(tool_panic)("memcheck:mkUifU");
783 /* --------- The Left-family of operations. --------- */
785 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
786 tl_assert(isShadowAtom(mce,a1));
787 /* It's safe to duplicate a1 since it's only an atom */
788 return assignNew(mce, Ity_I8,
790 assignNew(mce, Ity_I8,
791 /* unop(Iop_Neg8, a1)))); */
792 binop(Iop_Sub8, mkU8(0), a1) )));
795 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
796 tl_assert(isShadowAtom(mce,a1));
797 /* It's safe to duplicate a1 since it's only an atom */
798 return assignNew(mce, Ity_I16,
800 assignNew(mce, Ity_I16,
801 /* unop(Iop_Neg16, a1)))); */
802 binop(Iop_Sub16, mkU16(0), a1) )));
805 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
806 tl_assert(isShadowAtom(mce,a1));
807 /* It's safe to duplicate a1 since it's only an atom */
808 return assignNew(mce, Ity_I32,
810 assignNew(mce, Ity_I32,
811 /* unop(Iop_Neg32, a1)))); */
812 binop(Iop_Sub32, mkU32(0), a1) )));
815 /* --------- 'Improvement' functions for AND/OR. --------- */
817 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
818 defined (0); all other -> undefined (1).
820 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
822 tl_assert(isOriginalAtom(mce, data));
823 tl_assert(isShadowAtom(mce, vbits));
824 tl_assert(sameKindedAtoms(data, vbits));
825 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
828 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
830 tl_assert(isOriginalAtom(mce, data));
831 tl_assert(isShadowAtom(mce, vbits));
832 tl_assert(sameKindedAtoms(data, vbits));
833 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
836 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
838 tl_assert(isOriginalAtom(mce, data));
839 tl_assert(isShadowAtom(mce, vbits));
840 tl_assert(sameKindedAtoms(data, vbits));
841 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
844 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846 tl_assert(isOriginalAtom(mce, data));
847 tl_assert(isShadowAtom(mce, vbits));
848 tl_assert(sameKindedAtoms(data, vbits));
849 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
852 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
854 tl_assert(isOriginalAtom(mce, data));
855 tl_assert(isShadowAtom(mce, vbits));
856 tl_assert(sameKindedAtoms(data, vbits));
857 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
860 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
861 defined (0); all other -> undefined (1).
863 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
865 tl_assert(isOriginalAtom(mce, data));
866 tl_assert(isShadowAtom(mce, vbits));
867 tl_assert(sameKindedAtoms(data, vbits));
871 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
875 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
877 tl_assert(isOriginalAtom(mce, data));
878 tl_assert(isShadowAtom(mce, vbits));
879 tl_assert(sameKindedAtoms(data, vbits));
883 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
887 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
889 tl_assert(isOriginalAtom(mce, data));
890 tl_assert(isShadowAtom(mce, vbits));
891 tl_assert(sameKindedAtoms(data, vbits));
895 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
899 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
901 tl_assert(isOriginalAtom(mce, data));
902 tl_assert(isShadowAtom(mce, vbits));
903 tl_assert(sameKindedAtoms(data, vbits));
907 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
911 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
913 tl_assert(isOriginalAtom(mce, data));
914 tl_assert(isShadowAtom(mce, vbits));
915 tl_assert(sameKindedAtoms(data, vbits));
919 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
923 /* --------- Pessimising casts. --------- */
925 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
929 /* Note, dst_ty is a shadow type, not an original type. */
930 /* First of all, collapse vbits down to a single bit. */
931 tl_assert(isShadowAtom(mce,vbits));
932 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
939 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
942 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
945 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
948 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
951 VG_(tool_panic)("mkPCastTo(1)");
954 /* Now widen up to the dst type. */
959 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
961 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
963 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
965 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
967 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
968 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
972 VG_(tool_panic)("mkPCastTo(2)");
977 /*------------------------------------------------------------*/
978 /*--- Emit a test and complaint if something is undefined. ---*/
979 /*------------------------------------------------------------*/
981 /* Set the annotations on a dirty helper to indicate that the stack
982 pointer and instruction pointers might be read. This is the
983 behaviour of all 'emit-a-complaint' style functions we might
986 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
988 di->fxState[0].fx = Ifx_Read;
989 di->fxState[0].offset = mce->layout->offset_SP;
990 di->fxState[0].size = mce->layout->sizeof_SP;
991 di->fxState[1].fx = Ifx_Read;
992 di->fxState[1].offset = mce->layout->offset_IP;
993 di->fxState[1].size = mce->layout->sizeof_IP;
997 /* Check the supplied **original** atom for undefinedness, and emit a
998 complaint if so. Once that happens, mark it as defined. This is
999 possible because the atom is either a tmp or literal. If it's a
1000 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1001 be defined. In fact as mentioned above, we will have to allocate a
1002 new tmp to carry the new 'defined' shadow value, and update the
1003 original->tmp mapping accordingly; we cannot simply assign a new
1004 value to an existing shadow tmp as this breaks SSAness -- resulting
1005 in the post-instrumentation sanity checker spluttering in disapproval.
1007 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1015 /* Since the original expression is atomic, there's no duplicated
1016 work generated by making multiple V-expressions for it. So we
1017 don't really care about the possibility that someone else may
1018 also create a V-interpretion for it. */
1019 tl_assert(isOriginalAtom(mce, atom));
1020 vatom = expr2vbits( mce, atom );
1021 tl_assert(isShadowAtom(mce, vatom));
1022 tl_assert(sameKindedAtoms(atom, vatom));
1024 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1026 /* sz is only used for constructing the error message */
1027 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1029 cond = mkPCastTo( mce, Ity_I1, vatom );
1030 /* cond will be 0 if all defined, and 1 if any not defined. */
1034 di = unsafeIRDirty_0_N( 0/*regparms*/,
1035 "MC_(helperc_value_check0_fail)",
1036 &MC_(helperc_value_check0_fail),
1041 di = unsafeIRDirty_0_N( 0/*regparms*/,
1042 "MC_(helperc_value_check1_fail)",
1043 &MC_(helperc_value_check1_fail),
1048 di = unsafeIRDirty_0_N( 0/*regparms*/,
1049 "MC_(helperc_value_check4_fail)",
1050 &MC_(helperc_value_check4_fail),
1055 di = unsafeIRDirty_0_N( 1/*regparms*/,
1056 "MC_(helperc_complain_undef)",
1057 &MC_(helperc_complain_undef),
1058 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1063 setHelperAnns( mce, di );
1064 stmt( mce->bb, IRStmt_Dirty(di));
1066 /* Set the shadow tmp to be defined. First, update the
1067 orig->shadow tmp mapping to reflect the fact that this shadow is
1068 getting a new value. */
1069 tl_assert(isIRAtom(vatom));
1070 /* sameKindedAtoms ... */
1071 if (vatom->tag == Iex_RdTmp) {
1072 tl_assert(atom->tag == Iex_RdTmp);
1073 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1074 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1080 /*------------------------------------------------------------*/
1081 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1082 /*------------------------------------------------------------*/
1084 /* Examine the always-defined sections declared in layout to see if
1085 the (offset,size) section is within one. Note, is is an error to
1086 partially fall into such a region: (offset,size) should either be
1087 completely in such a region or completely not-in such a region.
1089 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1091 Int minoffD, maxoffD, i;
1092 Int minoff = offset;
1093 Int maxoff = minoff + size - 1;
1094 tl_assert((minoff & ~0xFFFF) == 0);
1095 tl_assert((maxoff & ~0xFFFF) == 0);
1097 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1098 minoffD = mce->layout->alwaysDefd[i].offset;
1099 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1100 tl_assert((minoffD & ~0xFFFF) == 0);
1101 tl_assert((maxoffD & ~0xFFFF) == 0);
1103 if (maxoff < minoffD || maxoffD < minoff)
1104 continue; /* no overlap */
1105 if (minoff >= minoffD && maxoff <= maxoffD)
1106 return True; /* completely contained in an always-defd section */
1108 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1110 return False; /* could not find any containing section */
1114 /* Generate into bb suitable actions to shadow this Put. If the state
1115 slice is marked 'always defined', do nothing. Otherwise, write the
1116 supplied V bits to the shadow state. We can pass in either an
1117 original atom or a V-atom, but not both. In the former case the
1118 relevant V-bits are then generated from the original.
1121 void do_shadow_PUT ( MCEnv* mce, Int offset,
1122 IRAtom* atom, IRAtom* vatom )
1127 tl_assert(isOriginalAtom(mce, atom));
1128 vatom = expr2vbits( mce, atom );
1131 tl_assert(isShadowAtom(mce, vatom));
1134 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1135 tl_assert(ty != Ity_I1);
1136 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1138 /* emit code to emit a complaint if any of the vbits are 1. */
1139 /* complainIfUndefined(mce, atom); */
1141 /* Do a plain shadow Put. */
1142 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1147 /* Return an expression which contains the V bits corresponding to the
1148 given GETI (passed in in pieces).
1151 void do_shadow_PUTI ( MCEnv* mce,
1152 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1158 tl_assert(isOriginalAtom(mce,atom));
1159 vatom = expr2vbits( mce, atom );
1160 tl_assert(sameKindedAtoms(atom, vatom));
1162 tyS = shadowType(ty);
1163 arrSize = descr->nElems * sizeofIRType(ty);
1164 tl_assert(ty != Ity_I1);
1165 tl_assert(isOriginalAtom(mce,ix));
1166 complainIfUndefined(mce,ix);
1167 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1169 /* emit code to emit a complaint if any of the vbits are 1. */
1170 /* complainIfUndefined(mce, atom); */
1172 /* Do a cloned version of the Put that refers to the shadow
1174 IRRegArray* new_descr
1175 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1176 tyS, descr->nElems);
1177 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1182 /* Return an expression which contains the V bits corresponding to the
1183 given GET (passed in in pieces).
1186 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1188 IRType tyS = shadowType(ty);
1189 tl_assert(ty != Ity_I1);
1190 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1191 /* Always defined, return all zeroes of the relevant type */
1192 return definedOfType(tyS);
1194 /* return a cloned version of the Get that refers to the shadow
1196 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1201 /* Return an expression which contains the V bits corresponding to the
1202 given GETI (passed in in pieces).
1205 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1207 IRType ty = descr->elemTy;
1208 IRType tyS = shadowType(ty);
1209 Int arrSize = descr->nElems * sizeofIRType(ty);
1210 tl_assert(ty != Ity_I1);
1211 tl_assert(isOriginalAtom(mce,ix));
1212 complainIfUndefined(mce,ix);
1213 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1214 /* Always defined, return all zeroes of the relevant type */
1215 return definedOfType(tyS);
1217 /* return a cloned version of the Get that refers to the shadow
1219 IRRegArray* new_descr
1220 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1221 tyS, descr->nElems);
1222 return IRExpr_GetI( new_descr, ix, bias );
1227 /*------------------------------------------------------------*/
1228 /*--- Generating approximations for unknown operations, ---*/
1229 /*--- using lazy-propagate semantics ---*/
1230 /*------------------------------------------------------------*/
1232 /* Lazy propagation of undefinedness from two values, resulting in the
1233 specified shadow type.
1236 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1238 /* force everything via 32-bit intermediaries. */
1240 tl_assert(isShadowAtom(mce,va1));
1241 tl_assert(isShadowAtom(mce,va2));
1242 at = mkPCastTo(mce, Ity_I32, va1);
1243 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1244 at = mkPCastTo(mce, finalVty, at);
1249 /* Do the lazy propagation game from a null-terminated vector of
1250 atoms. This is presumably the arguments to a helper call, so the
1251 IRCallee info is also supplied in order that we can know which
1252 arguments should be ignored (via the .mcx_mask field).
1255 IRAtom* mkLazyN ( MCEnv* mce,
1256 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1260 IRAtom* curr = definedOfType(Ity_I32);
1261 for (i = 0; exprvec[i]; i++) {
1263 tl_assert(isOriginalAtom(mce, exprvec[i]));
1264 /* Only take notice of this arg if the callee's mc-exclusion
1265 mask does not say it is to be excluded. */
1266 if (cee->mcx_mask & (1<<i)) {
1267 /* the arg is to be excluded from definedness checking. Do
1269 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1271 /* calculate the arg's definedness, and pessimistically merge
1273 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1274 curr = mkUifU32(mce, here, curr);
1277 return mkPCastTo(mce, finalVtype, curr );
1281 /*------------------------------------------------------------*/
1282 /*--- Generating expensive sequences for exact carry-chain ---*/
1283 /*--- propagation in add/sub and related operations. ---*/
1284 /*------------------------------------------------------------*/
1287 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1288 IRAtom* aa, IRAtom* bb )
1290 IRAtom *a_min, *b_min, *a_max, *b_max;
1292 IROp opAND, opOR, opXOR, opNOT, opADD;
1294 tl_assert(isShadowAtom(mce,qaa));
1295 tl_assert(isShadowAtom(mce,qbb));
1296 tl_assert(isOriginalAtom(mce,aa));
1297 tl_assert(isOriginalAtom(mce,bb));
1298 tl_assert(sameKindedAtoms(qaa,aa));
1299 tl_assert(sameKindedAtoms(qbb,bb));
1308 // a_min = aa & ~qaa
1309 a_min = assignNew(mce,ty,
1311 assignNew(mce,ty, unop(opNOT, qaa))));
1313 // b_min = bb & ~qbb
1314 b_min = assignNew(mce,ty,
1316 assignNew(mce,ty, unop(opNOT, qbb))));
1319 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1322 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1324 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1328 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1330 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1331 assignNew(mce,ty, binop(opADD, a_max, b_max))
1339 /*------------------------------------------------------------*/
1340 /*--- Helpers for dealing with vector primops. ---*/
1341 /*------------------------------------------------------------*/
1343 /* Vector pessimisation -- pessimise within each lane individually. */
1345 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1347 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1350 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1352 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1355 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1357 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1360 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1362 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1366 /* Here's a simple scheme capable of handling ops derived from SSE1
1367 code and while only generating ops that can be efficiently
1368 implemented in SSE1. */
1370 /* All-lanes versions are straightforward:
1372 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1374 unary32Fx4(x,y) ==> PCast32x4(x#)
1376 Lowest-lane-only versions are more complex:
1378 binary32F0x4(x,y) ==> SetV128lo32(
1380 PCast32(V128to32(UifUV128(x#,y#)))
1383 This is perhaps not so obvious. In particular, it's faster to
1384 do a V128-bit UifU and then take the bottom 32 bits than the more
1385 obvious scheme of taking the bottom 32 bits of each operand
1386 and doing a 32-bit UifU. Basically since UifU is fast and
1387 chopping lanes off vector values is slow.
1391 unary32F0x4(x) ==> SetV128lo32(
1393 PCast32(V128to32(x#))
1398 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1399 PCast32x4(v#) = CmpNEZ32x4(v#)
1403 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1406 tl_assert(isShadowAtom(mce, vatomX));
1407 tl_assert(isShadowAtom(mce, vatomY));
1408 at = mkUifUV128(mce, vatomX, vatomY);
1409 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1414 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1417 tl_assert(isShadowAtom(mce, vatomX));
1418 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1423 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1426 tl_assert(isShadowAtom(mce, vatomX));
1427 tl_assert(isShadowAtom(mce, vatomY));
1428 at = mkUifUV128(mce, vatomX, vatomY);
1429 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1430 at = mkPCastTo(mce, Ity_I32, at);
1431 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1436 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1439 tl_assert(isShadowAtom(mce, vatomX));
1440 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1441 at = mkPCastTo(mce, Ity_I32, at);
1442 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1446 /* --- ... and ... 64Fx2 versions of the same ... --- */
1449 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1452 tl_assert(isShadowAtom(mce, vatomX));
1453 tl_assert(isShadowAtom(mce, vatomY));
1454 at = mkUifUV128(mce, vatomX, vatomY);
1455 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1460 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1463 tl_assert(isShadowAtom(mce, vatomX));
1464 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1469 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1472 tl_assert(isShadowAtom(mce, vatomX));
1473 tl_assert(isShadowAtom(mce, vatomY));
1474 at = mkUifUV128(mce, vatomX, vatomY);
1475 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1476 at = mkPCastTo(mce, Ity_I64, at);
1477 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1482 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1485 tl_assert(isShadowAtom(mce, vatomX));
1486 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1487 at = mkPCastTo(mce, Ity_I64, at);
1488 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1492 /* --- --- Vector saturated narrowing --- --- */
1494 /* This is quite subtle. What to do is simple:
1496 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1498 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1500 Why this is right is not so simple. Consider a lane in the args,
1501 vatom1 or 2, doesn't matter.
1503 After the PCast, that lane is all 0s (defined) or all
1506 Both signed and unsigned saturating narrowing of all 0s produces
1507 all 0s, which is what we want.
1509 The all-1s case is more complex. Unsigned narrowing interprets an
1510 all-1s input as the largest unsigned integer, and so produces all
1511 1s as a result since that is the largest unsigned value at the
1514 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1515 to -1, so we still wind up with all 1s at the smaller width.
1517 So: In short, pessimise the args, then apply the original narrowing
1521 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1522 IRAtom* vatom1, IRAtom* vatom2)
1524 IRAtom *at1, *at2, *at3;
1525 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1526 switch (narrow_op) {
1527 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1528 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1529 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1530 default: VG_(tool_panic)("vectorNarrowV128");
1532 tl_assert(isShadowAtom(mce,vatom1));
1533 tl_assert(isShadowAtom(mce,vatom2));
1534 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1535 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1536 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1541 /* --- --- Vector integer arithmetic --- --- */
1543 /* Simple ... UifU the args and per-lane pessimise the results. */
1545 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1548 at = mkUifUV128(mce, vatom1, vatom2);
1549 at = mkPCast8x16(mce, at);
1554 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1557 at = mkUifUV128(mce, vatom1, vatom2);
1558 at = mkPCast16x8(mce, at);
1563 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1566 at = mkUifUV128(mce, vatom1, vatom2);
1567 at = mkPCast32x4(mce, at);
1572 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1575 at = mkUifUV128(mce, vatom1, vatom2);
1576 at = mkPCast64x2(mce, at);
1581 /*------------------------------------------------------------*/
1582 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1583 /*------------------------------------------------------------*/
1586 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1588 IRAtom* atom1, IRAtom* atom2 )
1591 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1592 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1593 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1595 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1596 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1598 tl_assert(isOriginalAtom(mce,atom1));
1599 tl_assert(isOriginalAtom(mce,atom2));
1600 tl_assert(isShadowAtom(mce,vatom1));
1601 tl_assert(isShadowAtom(mce,vatom2));
1602 tl_assert(sameKindedAtoms(atom1,vatom1));
1603 tl_assert(sameKindedAtoms(atom2,vatom2));
1606 /* V128-bit SIMD (SSE2-esque) */
1616 /* Same scheme as with all other shifts. */
1617 complainIfUndefined(mce, atom2);
1618 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1625 case Iop_CmpGT8Sx16:
1631 return binary8Ix16(mce, vatom1, vatom2);
1637 case Iop_MulHi16Sx8:
1638 case Iop_MulHi16Ux8:
1641 case Iop_CmpGT16Sx8:
1647 return binary16Ix8(mce, vatom1, vatom2);
1652 case Iop_CmpGT32Sx4:
1657 return binary32Ix4(mce, vatom1, vatom2);
1665 return binary64Ix2(mce, vatom1, vatom2);
1667 case Iop_QNarrow32Sx4:
1668 case Iop_QNarrow16Sx8:
1669 case Iop_QNarrow16Ux8:
1670 return vectorNarrowV128(mce, op, vatom1, vatom2);
1677 case Iop_CmpLT64Fx2:
1678 case Iop_CmpLE64Fx2:
1679 case Iop_CmpEQ64Fx2:
1681 return binary64Fx2(mce, vatom1, vatom2);
1688 case Iop_CmpLT64F0x2:
1689 case Iop_CmpLE64F0x2:
1690 case Iop_CmpEQ64F0x2:
1692 return binary64F0x2(mce, vatom1, vatom2);
1694 /* V128-bit SIMD (SSE1-esque) */
1701 case Iop_CmpLT32Fx4:
1702 case Iop_CmpLE32Fx4:
1703 case Iop_CmpEQ32Fx4:
1705 return binary32Fx4(mce, vatom1, vatom2);
1712 case Iop_CmpLT32F0x4:
1713 case Iop_CmpLE32F0x4:
1714 case Iop_CmpEQ32F0x4:
1716 return binary32F0x4(mce, vatom1, vatom2);
1718 /* V128-bit data-steering */
1719 case Iop_SetV128lo32:
1720 case Iop_SetV128lo64:
1721 case Iop_64HLtoV128:
1722 case Iop_InterleaveLO64x2:
1723 case Iop_InterleaveLO32x4:
1724 case Iop_InterleaveLO16x8:
1725 case Iop_InterleaveLO8x16:
1726 case Iop_InterleaveHI64x2:
1727 case Iop_InterleaveHI32x4:
1728 case Iop_InterleaveHI16x8:
1729 case Iop_InterleaveHI8x16:
1730 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1732 /* Scalar floating point */
1734 // case Iop_RoundF64:
1737 /* First arg is I32 (rounding mode), second is F64 or I64
1739 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1741 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1742 /* Takes two F64 args. */
1745 /* First arg is I32 (rounding mode), second is F64 (data). */
1746 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1749 /* First arg is I32 (rounding mode), second is F64 (data). */
1750 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1761 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1764 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1766 /* non-FP after here */
1768 case Iop_DivModU64to32:
1769 case Iop_DivModS64to32:
1770 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1773 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1775 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1779 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1780 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1781 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1786 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1787 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1788 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1793 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1794 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1795 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1800 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1804 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1809 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1813 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1815 case Iop_CmpLE32S: case Iop_CmpLE32U:
1816 case Iop_CmpLT32U: case Iop_CmpLT32S:
1817 case Iop_CmpEQ32: case Iop_CmpNE32:
1818 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1820 case Iop_CmpEQ16: case Iop_CmpNE16:
1821 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1823 case Iop_CmpEQ8: case Iop_CmpNE8:
1824 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1826 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1827 /* Complain if the shift amount is undefined. Then simply
1828 shift the first arg's V bits by the real shift amount. */
1829 complainIfUndefined(mce, atom2);
1830 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1832 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1833 /* Same scheme as with 32-bit shifts. */
1834 complainIfUndefined(mce, atom2);
1835 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1837 case Iop_Shl8: case Iop_Shr8:
1838 /* Same scheme as with 32-bit shifts. */
1839 complainIfUndefined(mce, atom2);
1840 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1842 case Iop_Shl64: case Iop_Shr64:
1843 /* Same scheme as with 32-bit shifts. */
1844 complainIfUndefined(mce, atom2);
1845 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1848 uifu = mkUifUV128; difd = mkDifDV128;
1849 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1851 uifu = mkUifU64; difd = mkDifD64;
1852 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1854 uifu = mkUifU32; difd = mkDifD32;
1855 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1857 uifu = mkUifU16; difd = mkDifD16;
1858 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1860 uifu = mkUifU8; difd = mkDifD8;
1861 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1864 uifu = mkUifUV128; difd = mkDifDV128;
1865 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1867 uifu = mkUifU64; difd = mkDifD64;
1868 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1870 uifu = mkUifU32; difd = mkDifD32;
1871 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1873 uifu = mkUifU16; difd = mkDifD16;
1874 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1876 uifu = mkUifU8; difd = mkDifD8;
1877 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1884 difd(mce, uifu(mce, vatom1, vatom2),
1885 difd(mce, improve(mce, atom1, vatom1),
1886 improve(mce, atom2, vatom2) ) ) );
1889 return mkUifU8(mce, vatom1, vatom2);
1891 return mkUifU16(mce, vatom1, vatom2);
1893 return mkUifU32(mce, vatom1, vatom2);
1895 return mkUifU64(mce, vatom1, vatom2);
1897 return mkUifUV128(mce, vatom1, vatom2);
1901 VG_(tool_panic)("memcheck:expr2vbits_Binop");
1907 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1909 IRAtom* vatom = expr2vbits( mce, atom );
1910 tl_assert(isOriginalAtom(mce,atom));
1914 return unary64Fx2(mce, vatom);
1916 case Iop_Sqrt64F0x2:
1917 return unary64F0x2(mce, vatom);
1920 case Iop_RSqrt32Fx4:
1921 case Iop_Recip32Fx4:
1922 return unary32Fx4(mce, vatom);
1924 case Iop_Sqrt32F0x4:
1925 case Iop_RSqrt32F0x4:
1926 case Iop_Recip32F0x4:
1927 return unary32F0x4(mce, vatom);
1931 return assignNew(mce, Ity_V128, unop(op, vatom));
1942 return mkPCastTo(mce, Ity_I64, vatom);
1946 return mkPCastTo(mce, Ity_I32, vatom);
1951 case Iop_V128HIto64:
1952 return assignNew(mce, Ity_I64, unop(op, vatom));
1961 return assignNew(mce, Ity_I32, unop(op, vatom));
1967 return assignNew(mce, Ity_I16, unop(op, vatom));
1972 return assignNew(mce, Ity_I8, unop(op, vatom));
1975 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1977 case Iop_ReinterpF64asI64:
1978 case Iop_ReinterpI64asF64:
1979 case Iop_ReinterpI32asF32:
1990 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1995 /* Worker function; do not call directly. */
1997 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2005 tl_assert(isOriginalAtom(mce,addr));
2007 /* First, emit a definedness test for the address. This also sets
2008 the address (shadow) to 'defined' following the test. */
2009 complainIfUndefined( mce, addr );
2011 /* Now cook up a call to the relevant helper function, to read the
2012 data V bits from shadow memory. */
2013 ty = shadowType(ty);
2015 case Ity_I64: helper = &MC_(helperc_LOADV8);
2016 hname = "MC_(helperc_LOADV8)";
2018 case Ity_I32: helper = &MC_(helperc_LOADV4);
2019 hname = "MC_(helperc_LOADV4)";
2021 case Ity_I16: helper = &MC_(helperc_LOADV2);
2022 hname = "MC_(helperc_LOADV2)";
2024 case Ity_I8: helper = &MC_(helperc_LOADV1);
2025 hname = "MC_(helperc_LOADV1)";
2027 default: ppIRType(ty);
2028 VG_(tool_panic)("memcheck:do_shadow_LDle");
2031 /* Generate the actual address into addrAct. */
2037 IRType tyAddr = mce->hWordTy;
2038 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2039 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2040 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2041 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2044 /* We need to have a place to park the V bits we're just about to
2046 datavbits = newIRTemp(mce->bb->tyenv, ty);
2047 di = unsafeIRDirty_1_N( datavbits,
2048 1/*regparms*/, hname, helper,
2049 mkIRExprVec_1( addrAct ));
2050 setHelperAnns( mce, di );
2051 stmt( mce->bb, IRStmt_Dirty(di) );
2053 return mkexpr(datavbits);
2058 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2060 IRAtom *v64hi, *v64lo;
2061 switch (shadowType(ty)) {
2066 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2068 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2069 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2070 return assignNew( mce,
2072 binop(Iop_64HLtoV128, v64hi, v64lo));
2074 VG_(tool_panic)("expr2vbits_LDle");
2080 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2081 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2083 IRAtom *vbitsC, *vbits0, *vbitsX;
2085 /* Given Mux0X(cond,expr0,exprX), generate
2086 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2087 That is, steer the V bits like the originals, but trash the
2088 result if the steering value is undefined. This gives
2089 lazy propagation. */
2090 tl_assert(isOriginalAtom(mce, cond));
2091 tl_assert(isOriginalAtom(mce, expr0));
2092 tl_assert(isOriginalAtom(mce, exprX));
2094 vbitsC = expr2vbits(mce, cond);
2095 vbits0 = expr2vbits(mce, expr0);
2096 vbitsX = expr2vbits(mce, exprX);
2097 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2100 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2101 mkPCastTo(mce, ty, vbitsC) );
2104 /* --------- This is the main expression-handling function. --------- */
2107 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2112 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2115 return shadow_GETI( mce, e->Iex.GetI.descr,
2116 e->Iex.GetI.ix, e->Iex.GetI.bias );
2119 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2122 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2125 return expr2vbits_Binop(
2128 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2132 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2135 return expr2vbits_LDle( mce, e->Iex.Load.ty,
2136 e->Iex.Load.addr, 0/*addr bias*/ );
2139 return mkLazyN( mce, e->Iex.CCall.args,
2144 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2145 e->Iex.Mux0X.exprX);
2151 VG_(tool_panic)("memcheck: expr2vbits");
2155 /*------------------------------------------------------------*/
2156 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2157 /*------------------------------------------------------------*/
2159 /* Widen a value to the host word size. */
2162 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2166 /* vatom is vbits-value and as such can only have a shadow type. */
2167 tl_assert(isShadowAtom(mce,vatom));
2169 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2172 if (tyH == Ity_I32) {
2174 case Ity_I32: return vatom;
2175 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2176 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2177 default: goto unhandled;
2183 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2184 VG_(tool_panic)("zwidenToHostWord");
2188 /* Generate a shadow store. addr is always the original address atom.
2189 You can pass in either originals or V-bits for the data atom, but
2190 obviously not both. */
2193 void do_shadow_STle ( MCEnv* mce,
2194 IRAtom* addr, UInt bias,
2195 IRAtom* data, IRAtom* vdata )
2199 IRDirty *di, *diLo64, *diHi64;
2200 IRAtom *addrAct, *addrLo64, *addrHi64;
2201 IRAtom *vdataLo64, *vdataHi64;
2202 IRAtom *eBias, *eBias0, *eBias8;
2203 void* helper = NULL;
2204 HChar* hname = NULL;
2206 tyAddr = mce->hWordTy;
2207 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2208 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2210 di = diLo64 = diHi64 = NULL;
2211 eBias = eBias0 = eBias8 = NULL;
2212 addrAct = addrLo64 = addrHi64 = NULL;
2213 vdataLo64 = vdataHi64 = NULL;
2217 tl_assert(isOriginalAtom(mce, data));
2218 tl_assert(bias == 0);
2219 vdata = expr2vbits( mce, data );
2224 tl_assert(isOriginalAtom(mce,addr));
2225 tl_assert(isShadowAtom(mce,vdata));
2227 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2229 /* First, emit a definedness test for the address. This also sets
2230 the address (shadow) to 'defined' following the test. */
2231 complainIfUndefined( mce, addr );
2233 /* Now decide which helper function to call to write the data V
2234 bits into shadow memory. */
2236 case Ity_V128: /* we'll use the helper twice */
2237 case Ity_I64: helper = &MC_(helperc_STOREV8);
2238 hname = "MC_(helperc_STOREV8)";
2240 case Ity_I32: helper = &MC_(helperc_STOREV4);
2241 hname = "MC_(helperc_STOREV4)";
2243 case Ity_I16: helper = &MC_(helperc_STOREV2);
2244 hname = "MC_(helperc_STOREV2)";
2246 case Ity_I8: helper = &MC_(helperc_STOREV1);
2247 hname = "MC_(helperc_STOREV1)";
2249 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2252 if (ty == Ity_V128) {
2255 /* See comment in next clause re 64-bit regparms */
2256 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2257 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2258 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2259 diLo64 = unsafeIRDirty_0_N(
2260 1/*regparms*/, hname, helper,
2261 mkIRExprVec_2( addrLo64, vdataLo64 ));
2263 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2264 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2265 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2266 diHi64 = unsafeIRDirty_0_N(
2267 1/*regparms*/, hname, helper,
2268 mkIRExprVec_2( addrHi64, vdataHi64 ));
2270 setHelperAnns( mce, diLo64 );
2271 setHelperAnns( mce, diHi64 );
2272 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2273 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2277 /* 8/16/32/64-bit cases */
2278 /* Generate the actual address into addrAct. */
2282 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2283 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2286 if (ty == Ity_I64) {
2287 /* We can't do this with regparm 2 on 32-bit platforms, since
2288 the back ends aren't clever enough to handle 64-bit
2289 regparm args. Therefore be different. */
2290 di = unsafeIRDirty_0_N(
2291 1/*regparms*/, hname, helper,
2292 mkIRExprVec_2( addrAct, vdata ));
2294 di = unsafeIRDirty_0_N(
2295 2/*regparms*/, hname, helper,
2296 mkIRExprVec_2( addrAct,
2297 zwidenToHostWord( mce, vdata )));
2299 setHelperAnns( mce, di );
2300 stmt( mce->bb, IRStmt_Dirty(di) );
2306 /* Do lazy pessimistic propagation through a dirty helper call, by
2307 looking at the annotations on it. This is the most complex part of
2310 static IRType szToITy ( Int n )
2313 case 1: return Ity_I8;
2314 case 2: return Ity_I16;
2315 case 4: return Ity_I32;
2316 case 8: return Ity_I64;
2317 default: VG_(tool_panic)("szToITy(memcheck)");
2322 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2324 Int i, n, offset, toDo, gSz, gOff;
2325 IRAtom *src, *here, *curr;
2326 IRType tyAddr, tySrc, tyDst;
2329 /* First check the guard. */
2330 complainIfUndefined(mce, d->guard);
2332 /* Now round up all inputs and PCast over them. */
2333 curr = definedOfType(Ity_I32);
2335 /* Inputs: unmasked args */
2336 for (i = 0; d->args[i]; i++) {
2337 if (d->cee->mcx_mask & (1<<i)) {
2338 /* ignore this arg */
2340 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2341 curr = mkUifU32(mce, here, curr);
2345 /* Inputs: guest state that we read. */
2346 for (i = 0; i < d->nFxState; i++) {
2347 tl_assert(d->fxState[i].fx != Ifx_None);
2348 if (d->fxState[i].fx == Ifx_Write)
2351 /* Ignore any sections marked as 'always defined'. */
2352 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2354 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2355 d->fxState[i].offset, d->fxState[i].size );
2359 /* This state element is read or modified. So we need to
2360 consider it. If larger than 8 bytes, deal with it in 8-byte
2362 gSz = d->fxState[i].size;
2363 gOff = d->fxState[i].offset;
2366 if (gSz == 0) break;
2367 n = gSz <= 8 ? gSz : 8;
2368 /* update 'curr' with UifU of the state slice
2370 tySrc = szToITy( n );
2371 src = assignNew( mce, tySrc,
2372 shadow_GET(mce, gOff, tySrc ) );
2373 here = mkPCastTo( mce, Ity_I32, src );
2374 curr = mkUifU32(mce, here, curr);
2381 /* Inputs: memory. First set up some info needed regardless of
2382 whether we're doing reads or writes. */
2383 tyAddr = Ity_INVALID;
2385 if (d->mFx != Ifx_None) {
2386 /* Because we may do multiple shadow loads/stores from the same
2387 base address, it's best to do a single test of its
2388 definedness right now. Post-instrumentation optimisation
2389 should remove all but this test. */
2390 tl_assert(d->mAddr);
2391 complainIfUndefined(mce, d->mAddr);
2393 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2394 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2395 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2398 /* Deal with memory inputs (reads or modifies) */
2399 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2402 /* chew off 32-bit chunks */
2406 expr2vbits_LDle ( mce, Ity_I32,
2407 d->mAddr, d->mSize - toDo )
2409 curr = mkUifU32(mce, here, curr);
2412 /* chew off 16-bit chunks */
2416 expr2vbits_LDle ( mce, Ity_I16,
2417 d->mAddr, d->mSize - toDo )
2419 curr = mkUifU32(mce, here, curr);
2422 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2425 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2426 all the inputs to the helper. Now we need to re-distribute the
2427 results to all destinations. */
2429 /* Outputs: the destination temporary, if there is one. */
2430 if (d->tmp != IRTemp_INVALID) {
2431 dst = findShadowTmp(mce, d->tmp);
2432 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2433 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2436 /* Outputs: guest state that we write or modify. */
2437 for (i = 0; i < d->nFxState; i++) {
2438 tl_assert(d->fxState[i].fx != Ifx_None);
2439 if (d->fxState[i].fx == Ifx_Read)
2441 /* Ignore any sections marked as 'always defined'. */
2442 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2444 /* This state element is written or modified. So we need to
2445 consider it. If larger than 8 bytes, deal with it in 8-byte
2447 gSz = d->fxState[i].size;
2448 gOff = d->fxState[i].offset;
2451 if (gSz == 0) break;
2452 n = gSz <= 8 ? gSz : 8;
2453 /* Write suitably-casted 'curr' to the state slice
2455 tyDst = szToITy( n );
2456 do_shadow_PUT( mce, gOff,
2457 NULL, /* original atom */
2458 mkPCastTo( mce, tyDst, curr ) );
2464 /* Outputs: memory that we write or modify. */
2465 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2468 /* chew off 32-bit chunks */
2470 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2471 NULL, /* original data */
2472 mkPCastTo( mce, Ity_I32, curr ) );
2475 /* chew off 16-bit chunks */
2477 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2478 NULL, /* original data */
2479 mkPCastTo( mce, Ity_I16, curr ) );
2482 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2488 /*------------------------------------------------------------*/
2489 /*--- Memcheck main ---*/
2490 /*------------------------------------------------------------*/
2492 static Bool isBogusAtom ( IRAtom* at )
2496 tl_assert(isIRAtom(at));
2497 if (at->tag == Iex_RdTmp)
2499 tl_assert(at->tag == Iex_Const);
2500 con = at->Iex.Const.con;
2502 case Ico_U8: n = (ULong)con->Ico.U8; break;
2503 case Ico_U16: n = (ULong)con->Ico.U16; break;
2504 case Ico_U32: n = (ULong)con->Ico.U32; break;
2505 case Ico_U64: n = (ULong)con->Ico.U64; break;
2506 default: ppIRExpr(at); tl_assert(0);
2508 /* VG_(printf)("%llx\n", n); */
2509 return (n == 0xFEFEFEFF
2515 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2521 e = st->Ist.WrTmp.data;
2527 return isBogusAtom(e->Iex.Unop.arg);
2529 return isBogusAtom(e->Iex.Binop.arg1)
2530 || isBogusAtom(e->Iex.Binop.arg2);
2532 return isBogusAtom(e->Iex.Mux0X.cond)
2533 || isBogusAtom(e->Iex.Mux0X.expr0)
2534 || isBogusAtom(e->Iex.Mux0X.exprX);
2536 return isBogusAtom(e->Iex.Load.addr);
2538 for (i = 0; e->Iex.CCall.args[i]; i++)
2539 if (isBogusAtom(e->Iex.CCall.args[i]))
2546 return isBogusAtom(st->Ist.Put.data);
2548 return isBogusAtom(st->Ist.Store.addr)
2549 || isBogusAtom(st->Ist.Store.data);
2551 return isBogusAtom(st->Ist.Exit.guard);
2555 VG_(tool_panic)("hasBogusLiterals");
2559 IRSB* mc_instrument ( void* closureV,
2560 IRSB* bb_in, VexGuestLayout* layout,
2561 VexGuestExtents* vge,
2562 IRType gWordTy, IRType hWordTy )
2564 Bool verboze = False; //True;
2566 /* Bool hasBogusLiterals = False; */
2568 Int i, j, first_stmt;
2573 IRSB* bb = emptyIRSB();
2574 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv);
2575 bb->next = deepCopyIRExpr(bb_in->next);
2576 bb->jumpkind = bb_in->jumpkind;
2578 /* Set up the running environment. Only .bb is modified as we go
2581 mce.layout = layout;
2582 mce.n_originalTmps = bb->tyenv->types_used;
2583 mce.hWordTy = hWordTy;
2584 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2585 for (i = 0; i < mce.n_originalTmps; i++)
2586 mce.tmpMap[i] = IRTemp_INVALID;
2588 /* Iterate over the stmts. */
2590 for (i = 0; i < bb_in->stmts_used; i++) {
2591 st = bb_in->stmts[i];
2594 tl_assert(isFlatIRStmt(st));
2597 if (!hasBogusLiterals) {
2598 hasBogusLiterals = checkForBogusLiterals(st);
2599 if (hasBogusLiterals) {
2600 VG_(printf)("bogus: ");
2606 first_stmt = bb->stmts_used;
2610 VG_(printf)("\n\n");
2616 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2617 expr2vbits( &mce, st->Ist.WrTmp.data) );
2621 do_shadow_PUT( &mce,
2624 NULL /* shadow atom */ );
2628 do_shadow_PUTI( &mce,
2632 st->Ist.PutI.data );
2636 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2638 NULL /* shadow data */ );
2642 /* if (!hasBogusLiterals) */
2643 complainIfUndefined( &mce, st->Ist.Exit.guard );
2647 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2658 VG_(tool_panic)("memcheck: unhandled IRStmt");
2660 } /* switch (st->tag) */
2663 for (j = first_stmt; j < bb->stmts_used; j++) {
2665 ppIRStmt(bb->stmts[j]);
2671 addStmtToIRSB(bb, st);
2675 /* Now we need to complain if the jump target is undefined. */
2676 first_stmt = bb->stmts_used;
2679 VG_(printf)("bb->next = ");
2681 VG_(printf)("\n\n");
2684 complainIfUndefined( &mce, bb->next );
2687 for (j = first_stmt; j < bb->stmts_used; j++) {
2689 ppIRStmt(bb->stmts[j]);
2699 /*--------------------------------------------------------------------*/
2700 /*--- end test_main.c ---*/
2701 /*--------------------------------------------------------------------*/