2 /*---------------------------------------------------------------*/
3 /*--- begin test_main.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
41 #include "libvex_basictypes.h"
44 #include "test_main.h"
47 /*---------------------------------------------------------------*/
49 /*---------------------------------------------------------------*/
52 __attribute__ ((noreturn))
54 void failure_exit ( void )
56 fprintf(stdout, "VEX did failure_exit. Bye.\n");
61 void log_bytes ( HChar* bytes, Int nbytes )
63 fwrite ( bytes, 1, nbytes, stdout );
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
75 static Bool verbose = True;
79 static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
81 IRSB* mc_instrument ( void* closureV,
82 IRSB* bb_in, VexGuestLayout* layout,
84 IRType gWordTy, IRType hWordTy );
87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) { return False; }
89 int main ( int argc, char** argv )
95 Int bb_number, n_bbs_done = 0;
96 Int orig_nbytes, trans_used;
97 VexTranslateResult tres;
100 VexArchInfo vai_x86, vai_amd64, vai_ppc32;
102 VexTranslateArgs vta;
105 fprintf(stderr, "usage: vex file.org\n");
108 f = fopen(argv[1], "r");
110 fprintf(stderr, "can't open `%s'\n", argv[1]);
114 /* Run with default params. However, we can't allow bb chasing
115 since that causes the front end to get segfaults when it tries
116 to read code outside the initial BB we hand it. So when calling
117 LibVEX_Translate, send in a chase-into predicate that always
119 LibVEX_default_VexControl ( &vcon );
120 vcon.iropt_level = 2;
121 vcon.guest_max_insns = 50;
123 LibVEX_Init ( &failure_exit, &log_bytes,
124 1, /* debug_paranoia */
125 TEST_VSUPPORT, /* valgrind support */
131 fgets(linebuf, N_LINEBUF,f);
132 if (linebuf[0] == 0) continue;
133 if (linebuf[0] != '.') continue;
135 if (n_bbs_done == TEST_N_BBS) break;
138 /* first line is: . bb-number bb-addr n-bytes */
139 assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
141 & orig_addr, & orig_nbytes ));
142 assert(orig_nbytes >= 1);
144 fgets(linebuf, N_LINEBUF,f);
145 assert(linebuf[0] == '.');
147 /* second line is: . byte byte byte etc */
149 printf("============ Basic Block %d, Done %d, "
150 "Start %x, nbytes %2d ============",
151 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
153 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
154 for (i = 0; i < orig_nbytes; i++) {
155 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
156 origbuf[i] = (UChar)u;
159 /* FIXME: put sensible values into the .hwcaps fields */
160 LibVEX_default_VexArchInfo(&vai_x86);
161 vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
162 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
164 LibVEX_default_VexArchInfo(&vai_amd64);
165 vai_amd64.hwcaps = 0;
167 LibVEX_default_VexArchInfo(&vai_ppc32);
168 vai_ppc32.hwcaps = 0;
169 vai_ppc32.ppc_cache_line_szB = 128;
171 LibVEX_default_VexAbiInfo(&vbi);
173 /* ----- Set up args for LibVEX_Translate ----- */
174 #if 0 /* ppc32 -> ppc32 */
175 vta.arch_guest = VexArchPPC32;
176 vta.archinfo_guest = vai_ppc32;
177 vta.arch_host = VexArchPPC32;
178 vta.archinfo_host = vai_ppc32;
180 #if 0 /* amd64 -> amd64 */
181 vta.arch_guest = VexArchAMD64;
182 vta.archinfo_guest = vai_amd64;
183 vta.arch_host = VexArchAMD64;
184 vta.archinfo_host = vai_amd64;
186 #if 1 /* x86 -> x86 */
187 vta.arch_guest = VexArchX86;
188 vta.archinfo_guest = vai_x86;
189 vta.arch_host = VexArchX86;
190 vta.archinfo_host = vai_x86;
192 vta.abiinfo_both = vbi;
193 vta.guest_bytes = origbuf;
194 vta.guest_bytes_addr = (Addr64)orig_addr;
195 vta.callback_opaque = NULL;
196 vta.chase_into_ok = chase_into_not_ok;
197 vta.guest_extents = &vge;
198 vta.host_bytes = transbuf;
199 vta.host_bytes_size = N_TRANSBUF;
200 vta.host_bytes_used = &trans_used;
201 #if 0 /* no instrumentation */
202 vta.instrument1 = NULL;
203 vta.instrument2 = NULL;
205 #if 0 /* addrcheck */
206 vta.instrument1 = ac_instrument;
207 vta.instrument2 = NULL;
210 vta.instrument1 = mc_instrument;
211 vta.instrument2 = NULL;
213 vta.do_self_check = False;
214 vta.preamble_function = NULL;
215 vta.traceflags = TEST_FLAGS;
216 #if 1 /* x86, amd64 hosts */
217 vta.dispatch = (void*)0x12345678;
218 #else /* ppc32, ppc64 hosts */
222 vta.finaltidy = NULL;
224 for (i = 0; i < TEST_N_ITERS; i++)
225 tres = LibVEX_Translate ( &vta );
227 if (tres != VexTransOK)
228 printf("\ntres = %d\n", (Int)tres);
229 assert(tres == VexTransOK);
230 assert(vge.n_used == 1);
231 assert((UInt)(vge.len[0]) == orig_nbytes);
234 for (i = 0; i < trans_used; i++)
235 sum += (UInt)transbuf[i];
236 printf ( " %6.2f ... %u\n",
237 (double)trans_used / (double)vge.len[0], sum );
242 LibVEX_ShowAllocStats();
247 //////////////////////////////////////////////////////////////////////
248 //////////////////////////////////////////////////////////////////////
249 //////////////////////////////////////////////////////////////////////
250 //////////////////////////////////////////////////////////////////////
251 //////////////////////////////////////////////////////////////////////
252 //////////////////////////////////////////////////////////////////////
253 //////////////////////////////////////////////////////////////////////
254 //////////////////////////////////////////////////////////////////////
259 __attribute((noreturn))
260 void panic ( HChar* s )
262 printf("\npanic: %s\n", s);
267 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
269 /* Use this rather than eg. -1 because it's a UInt. */
270 #define INVALID_DATA_SIZE 999999
281 IRSB* bb = emptyIRSB();
282 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
283 bb->next = dopyIRExpr(bb_in->next);
284 bb->jumpkind = bb_in->jumpkind;
286 /* No loads to consider in ->next. */
287 assert(isIRAtom(bb_in->next));
289 for (i = 0; i < bb_in->stmts_used; i++) {
290 st = bb_in->stmts[i];
296 data = st->Ist.Tmp.data;
297 if (data->tag == Iex_LDle) {
298 addr = data->Iex.LDle.addr;
299 sz = sizeofIRType(data->Iex.LDle.ty);
302 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
303 (void*)0x12345601); break;
304 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
305 (void*)0x12345602); break;
306 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
307 (void*)0x12345603); break;
308 default: helper = mkIRCallee(0, "ac_helperc_LOADN",
310 needSz = True; break;
316 unsafeIRDirty_0_N( helper->regparms,
317 helper->name, helper->addr,
318 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
324 unsafeIRDirty_0_N( helper->regparms,
325 helper->name, helper->addr,
326 mkIRExprVec_1(addr) )
333 data = st->Ist.STle.data;
334 addr = st->Ist.STle.addr;
335 assert(isIRAtom(data));
336 assert(isIRAtom(addr));
337 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
340 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
341 (void*)0x12345605); break;
342 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
343 (void*)0x12345606); break;
344 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
345 (void*)0x12345607); break;
346 default: helper = mkIRCallee(0, "ac_helperc_STOREN",
348 needSz = True; break;
354 unsafeIRDirty_0_N( helper->regparms,
355 helper->name, helper->addr,
356 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
362 unsafeIRDirty_0_N( helper->regparms,
363 helper->name, helper->addr,
364 mkIRExprVec_1(addr) )
370 assert(isIRAtom(st->Ist.Put.data));
374 assert(isIRAtom(st->Ist.PutI.ix));
375 assert(isIRAtom(st->Ist.PutI.data));
379 assert(isIRAtom(st->Ist.Exit.guard));
383 /* If the call doesn't interact with memory, we ain't
385 if (st->Ist.Dirty.details->mFx == Ifx_None)
394 panic("addrcheck: unhandled IRStmt");
397 addStmtToIRSB( bb, dopyIRStmt(st));
404 //////////////////////////////////////////////////////////////////////
405 //////////////////////////////////////////////////////////////////////
406 //////////////////////////////////////////////////////////////////////
407 //////////////////////////////////////////////////////////////////////
408 //////////////////////////////////////////////////////////////////////
409 //////////////////////////////////////////////////////////////////////
410 //////////////////////////////////////////////////////////////////////
411 //////////////////////////////////////////////////////////////////////
416 __attribute((noreturn))
417 void panic ( HChar* s )
419 printf("\npanic: %s\n", s);
423 #define tl_assert(xxx) assert(xxx)
424 #define VG_(xxxx) xxxx
425 #define tool_panic(zzz) panic(zzz)
426 #define MC_(zzzz) MC_##zzzz
427 #define TL_(zzzz) SK_##zzzz
430 static void MC_helperc_complain_undef ( void );
431 static void MC_helperc_LOADV8 ( void );
432 static void MC_helperc_LOADV4 ( void );
433 static void MC_helperc_LOADV2 ( void );
434 static void MC_helperc_LOADV1 ( void );
435 static void MC_helperc_STOREV8( void );
436 static void MC_helperc_STOREV4( void );
437 static void MC_helperc_STOREV2( void );
438 static void MC_helperc_STOREV1( void );
439 static void MC_helperc_value_check0_fail( void );
440 static void MC_helperc_value_check1_fail( void );
441 static void MC_helperc_value_check4_fail( void );
443 static void MC_helperc_complain_undef ( void ) { }
444 static void MC_helperc_LOADV8 ( void ) { }
445 static void MC_helperc_LOADV4 ( void ) { }
446 static void MC_helperc_LOADV2 ( void ) { }
447 static void MC_helperc_LOADV1 ( void ) { }
448 static void MC_helperc_STOREV8( void ) { }
449 static void MC_helperc_STOREV4( void ) { }
450 static void MC_helperc_STOREV2( void ) { }
451 static void MC_helperc_STOREV1( void ) { }
452 static void MC_helperc_value_check0_fail( void ) { }
453 static void MC_helperc_value_check1_fail( void ) { }
454 static void MC_helperc_value_check4_fail( void ) { }
457 /*--------------------------------------------------------------------*/
458 /*--- Instrument IR to perform memory checking operations. ---*/
459 /*--- mc_translate.c ---*/
460 /*--------------------------------------------------------------------*/
463 This file is part of MemCheck, a heavyweight Valgrind tool for
464 detecting memory errors.
466 Copyright (C) 2000-2010 Julian Seward
469 This program is free software; you can redistribute it and/or
470 modify it under the terms of the GNU General Public License as
471 published by the Free Software Foundation; either version 2 of the
472 License, or (at your option) any later version.
474 This program is distributed in the hope that it will be useful, but
475 WITHOUT ANY WARRANTY; without even the implied warranty of
476 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
477 General Public License for more details.
479 You should have received a copy of the GNU General Public License
480 along with this program; if not, write to the Free Software
481 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
484 The GNU General Public License is contained in the file COPYING.
487 //#include "mc_include.h"
490 /*------------------------------------------------------------*/
491 /*--- Forward decls ---*/
492 /*------------------------------------------------------------*/
496 static IRType shadowType ( IRType ty );
497 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
500 /*------------------------------------------------------------*/
501 /*--- Memcheck running state, and tmp management. ---*/
502 /*------------------------------------------------------------*/
504 /* Carries around state during memcheck instrumentation. */
507 /* MODIFIED: the bb being constructed. IRStmts are added. */
510 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
511 original temps to their current their current shadow temp.
512 Initially all entries are IRTemp_INVALID. Entries are added
513 lazily since many original temps are not used due to
514 optimisation prior to instrumentation. Note that floating
515 point original tmps are shadowed by integer tmps of the same
516 size, and Bit-typed original tmps are shadowed by the type
517 Ity_I8. See comment below. */
519 Int n_originalTmps; /* for range checking */
521 /* READONLY: the guest layout. This indicates which parts of
522 the guest state should be regarded as 'always defined'. */
523 VexGuestLayout* layout;
524 /* READONLY: the host word type. Needed for constructing
525 arguments of type 'HWord' to be passed to helper functions.
526 Ity_I32 or Ity_I64 only. */
531 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
532 demand), as they are encountered. This is for two reasons.
534 (1) (less important reason): Many original tmps are unused due to
535 initial IR optimisation, and we do not want to spaces in tables
538 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
539 table indexed [0 .. n_types-1], which gives the current shadow for
540 each original tmp, or INVALID_IRTEMP if none is so far assigned.
541 It is necessary to support making multiple assignments to a shadow
542 -- specifically, after testing a shadow for definedness, it needs
543 to be made defined. But IR's SSA property disallows this.
545 (2) (more important reason): Therefore, when a shadow needs to get
546 a new value, a new temporary is created, the value is assigned to
547 that, and the tmpMap is updated to reflect the new binding.
549 A corollary is that if the tmpMap maps a given tmp to
550 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
551 there's a read-before-write error in the original tmps. The IR
552 sanity checker should catch all such anomalies, however.
555 /* Find the tmp currently shadowing the given original tmp. If none
556 so far exists, allocate one. */
557 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
559 tl_assert(orig < mce->n_originalTmps);
560 if (mce->tmpMap[orig] == IRTemp_INVALID) {
562 = newIRTemp(mce->bb->tyenv,
563 shadowType(mce->bb->tyenv->types[orig]));
565 return mce->tmpMap[orig];
568 /* Allocate a new shadow for the given original tmp. This means any
569 previous shadow is abandoned. This is needed because it is
570 necessary to give a new value to a shadow once it has been tested
571 for undefinedness, but unfortunately IR's SSA property disallows
572 this. Instead we must abandon the old shadow, allocate a new one
573 and use that instead. */
574 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
576 tl_assert(orig < mce->n_originalTmps);
578 = newIRTemp(mce->bb->tyenv,
579 shadowType(mce->bb->tyenv->types[orig]));
583 /*------------------------------------------------------------*/
584 /*--- IRAtoms -- a subset of IRExprs ---*/
585 /*------------------------------------------------------------*/
587 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
588 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
589 input, most of this code deals in atoms. Usefully, a value atom
590 always has a V-value which is also an atom: constants are shadowed
591 by constants, and temps are shadowed by the corresponding shadow
594 typedef IRExpr IRAtom;
596 /* (used for sanity checks only): is this an atom which looks
597 like it's from original code? */
598 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
600 if (a1->tag == Iex_Const)
602 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
607 /* (used for sanity checks only): is this an atom which looks
608 like it's from shadow code? */
609 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
611 if (a1->tag == Iex_Const)
613 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
618 /* (used for sanity checks only): check that both args are atoms and
619 are identically-kinded. */
620 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
622 if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
624 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
630 /*------------------------------------------------------------*/
631 /*--- Type management ---*/
632 /*------------------------------------------------------------*/
634 /* Shadow state is always accessed using integer types. This returns
635 an integer type with the same size (as per sizeofIRType) as the
636 given type. The only valid shadow types are Bit, I8, I16, I32,
639 static IRType shadowType ( IRType ty )
646 case Ity_I64: return ty;
647 case Ity_F32: return Ity_I32;
648 case Ity_F64: return Ity_I64;
649 case Ity_V128: return Ity_V128;
650 default: ppIRType(ty);
651 VG_(tool_panic)("memcheck:shadowType");
655 /* Produce a 'defined' value of the given shadow type. Should only be
656 supplied shadow types (Bit/I8/I16/I32/UI64). */
657 static IRExpr* definedOfType ( IRType ty ) {
659 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
660 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
661 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
662 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
663 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
664 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
665 default: VG_(tool_panic)("memcheck:definedOfType");
670 /*------------------------------------------------------------*/
671 /*--- Constructing IR fragments ---*/
672 /*------------------------------------------------------------*/
674 /* assign value to tmp */
675 #define assign(_bb,_tmp,_expr) \
676 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
678 /* add stmt to a bb */
679 #define stmt(_bb,_stmt) \
680 addStmtToIRSB((_bb), (_stmt))
682 /* build various kinds of expressions */
683 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
684 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
685 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
686 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
687 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
688 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
689 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
690 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
692 /* bind the given expression to a new temporary, and return the
693 temporary. This effectively converts an arbitrary expression into
695 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
696 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
697 assign(mce->bb, t, e);
702 /*------------------------------------------------------------*/
703 /*--- Constructing definedness primitive ops ---*/
704 /*------------------------------------------------------------*/
706 /* --------- Defined-if-either-defined --------- */
708 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
709 tl_assert(isShadowAtom(mce,a1));
710 tl_assert(isShadowAtom(mce,a2));
711 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
714 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
715 tl_assert(isShadowAtom(mce,a1));
716 tl_assert(isShadowAtom(mce,a2));
717 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
720 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
721 tl_assert(isShadowAtom(mce,a1));
722 tl_assert(isShadowAtom(mce,a2));
723 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
726 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
727 tl_assert(isShadowAtom(mce,a1));
728 tl_assert(isShadowAtom(mce,a2));
729 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
732 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
733 tl_assert(isShadowAtom(mce,a1));
734 tl_assert(isShadowAtom(mce,a2));
735 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
738 /* --------- Undefined-if-either-undefined --------- */
740 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
741 tl_assert(isShadowAtom(mce,a1));
742 tl_assert(isShadowAtom(mce,a2));
743 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
746 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
747 tl_assert(isShadowAtom(mce,a1));
748 tl_assert(isShadowAtom(mce,a2));
749 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
752 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
753 tl_assert(isShadowAtom(mce,a1));
754 tl_assert(isShadowAtom(mce,a2));
755 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
758 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
759 tl_assert(isShadowAtom(mce,a1));
760 tl_assert(isShadowAtom(mce,a2));
761 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
764 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
765 tl_assert(isShadowAtom(mce,a1));
766 tl_assert(isShadowAtom(mce,a2));
767 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
770 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
772 case Ity_I8: return mkUifU8(mce, a1, a2);
773 case Ity_I16: return mkUifU16(mce, a1, a2);
774 case Ity_I32: return mkUifU32(mce, a1, a2);
775 case Ity_I64: return mkUifU64(mce, a1, a2);
776 case Ity_V128: return mkUifUV128(mce, a1, a2);
778 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
779 VG_(tool_panic)("memcheck:mkUifU");
783 /* --------- The Left-family of operations. --------- */
785 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
786 tl_assert(isShadowAtom(mce,a1));
787 /* It's safe to duplicate a1 since it's only an atom */
788 return assignNew(mce, Ity_I8,
790 assignNew(mce, Ity_I8,
791 /* unop(Iop_Neg8, a1)))); */
792 binop(Iop_Sub8, mkU8(0), a1) )));
795 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
796 tl_assert(isShadowAtom(mce,a1));
797 /* It's safe to duplicate a1 since it's only an atom */
798 return assignNew(mce, Ity_I16,
800 assignNew(mce, Ity_I16,
801 /* unop(Iop_Neg16, a1)))); */
802 binop(Iop_Sub16, mkU16(0), a1) )));
805 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
806 tl_assert(isShadowAtom(mce,a1));
807 /* It's safe to duplicate a1 since it's only an atom */
808 return assignNew(mce, Ity_I32,
810 assignNew(mce, Ity_I32,
811 /* unop(Iop_Neg32, a1)))); */
812 binop(Iop_Sub32, mkU32(0), a1) )));
815 /* --------- 'Improvement' functions for AND/OR. --------- */
817 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
818 defined (0); all other -> undefined (1).
820 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
822 tl_assert(isOriginalAtom(mce, data));
823 tl_assert(isShadowAtom(mce, vbits));
824 tl_assert(sameKindedAtoms(data, vbits));
825 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
828 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
830 tl_assert(isOriginalAtom(mce, data));
831 tl_assert(isShadowAtom(mce, vbits));
832 tl_assert(sameKindedAtoms(data, vbits));
833 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
836 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
838 tl_assert(isOriginalAtom(mce, data));
839 tl_assert(isShadowAtom(mce, vbits));
840 tl_assert(sameKindedAtoms(data, vbits));
841 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
844 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846 tl_assert(isOriginalAtom(mce, data));
847 tl_assert(isShadowAtom(mce, vbits));
848 tl_assert(sameKindedAtoms(data, vbits));
849 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
852 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
854 tl_assert(isOriginalAtom(mce, data));
855 tl_assert(isShadowAtom(mce, vbits));
856 tl_assert(sameKindedAtoms(data, vbits));
857 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
860 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
861 defined (0); all other -> undefined (1).
863 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
865 tl_assert(isOriginalAtom(mce, data));
866 tl_assert(isShadowAtom(mce, vbits));
867 tl_assert(sameKindedAtoms(data, vbits));
871 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
875 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
877 tl_assert(isOriginalAtom(mce, data));
878 tl_assert(isShadowAtom(mce, vbits));
879 tl_assert(sameKindedAtoms(data, vbits));
883 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
887 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
889 tl_assert(isOriginalAtom(mce, data));
890 tl_assert(isShadowAtom(mce, vbits));
891 tl_assert(sameKindedAtoms(data, vbits));
895 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
899 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
901 tl_assert(isOriginalAtom(mce, data));
902 tl_assert(isShadowAtom(mce, vbits));
903 tl_assert(sameKindedAtoms(data, vbits));
907 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
911 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
913 tl_assert(isOriginalAtom(mce, data));
914 tl_assert(isShadowAtom(mce, vbits));
915 tl_assert(sameKindedAtoms(data, vbits));
919 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
923 /* --------- Pessimising casts. --------- */
925 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
929 /* Note, dst_ty is a shadow type, not an original type. */
930 /* First of all, collapse vbits down to a single bit. */
931 tl_assert(isShadowAtom(mce,vbits));
932 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
939 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
942 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
945 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
948 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
951 VG_(tool_panic)("mkPCastTo(1)");
954 /* Now widen up to the dst type. */
959 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
961 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
963 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
965 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
967 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
968 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
972 VG_(tool_panic)("mkPCastTo(2)");
977 /*------------------------------------------------------------*/
978 /*--- Emit a test and complaint if something is undefined. ---*/
979 /*------------------------------------------------------------*/
981 /* Set the annotations on a dirty helper to indicate that the stack
982 pointer and instruction pointers might be read. This is the
983 behaviour of all 'emit-a-complaint' style functions we might
986 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
988 di->fxState[0].fx = Ifx_Read;
989 di->fxState[0].offset = mce->layout->offset_SP;
990 di->fxState[0].size = mce->layout->sizeof_SP;
991 di->fxState[1].fx = Ifx_Read;
992 di->fxState[1].offset = mce->layout->offset_IP;
993 di->fxState[1].size = mce->layout->sizeof_IP;
997 /* Check the supplied **original** atom for undefinedness, and emit a
998 complaint if so. Once that happens, mark it as defined. This is
999 possible because the atom is either a tmp or literal. If it's a
1000 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1001 be defined. In fact as mentioned above, we will have to allocate a
1002 new tmp to carry the new 'defined' shadow value, and update the
1003 original->tmp mapping accordingly; we cannot simply assign a new
1004 value to an existing shadow tmp as this breaks SSAness -- resulting
1005 in the post-instrumentation sanity checker spluttering in disapproval.
1007 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1015 /* Since the original expression is atomic, there's no duplicated
1016 work generated by making multiple V-expressions for it. So we
1017 don't really care about the possibility that someone else may
1018 also create a V-interpretion for it. */
1019 tl_assert(isOriginalAtom(mce, atom));
1020 vatom = expr2vbits( mce, atom );
1021 tl_assert(isShadowAtom(mce, vatom));
1022 tl_assert(sameKindedAtoms(atom, vatom));
1024 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1026 /* sz is only used for constructing the error message */
1027 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1029 cond = mkPCastTo( mce, Ity_I1, vatom );
1030 /* cond will be 0 if all defined, and 1 if any not defined. */
1034 di = unsafeIRDirty_0_N( 0/*regparms*/,
1035 "MC_(helperc_value_check0_fail)",
1036 &MC_(helperc_value_check0_fail),
1041 di = unsafeIRDirty_0_N( 0/*regparms*/,
1042 "MC_(helperc_value_check1_fail)",
1043 &MC_(helperc_value_check1_fail),
1048 di = unsafeIRDirty_0_N( 0/*regparms*/,
1049 "MC_(helperc_value_check4_fail)",
1050 &MC_(helperc_value_check4_fail),
1055 di = unsafeIRDirty_0_N( 1/*regparms*/,
1056 "MC_(helperc_complain_undef)",
1057 &MC_(helperc_complain_undef),
1058 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1063 setHelperAnns( mce, di );
1064 stmt( mce->bb, IRStmt_Dirty(di));
1066 /* Set the shadow tmp to be defined. First, update the
1067 orig->shadow tmp mapping to reflect the fact that this shadow is
1068 getting a new value. */
1069 tl_assert(isIRAtom(vatom));
1070 /* sameKindedAtoms ... */
1071 if (vatom->tag == Iex_RdTmp) {
1072 tl_assert(atom->tag == Iex_RdTmp);
1073 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1074 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1080 /*------------------------------------------------------------*/
1081 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1082 /*------------------------------------------------------------*/
1084 /* Examine the always-defined sections declared in layout to see if
1085 the (offset,size) section is within one. Note, is is an error to
1086 partially fall into such a region: (offset,size) should either be
1087 completely in such a region or completely not-in such a region.
1089 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1091 Int minoffD, maxoffD, i;
1092 Int minoff = offset;
1093 Int maxoff = minoff + size - 1;
1094 tl_assert((minoff & ~0xFFFF) == 0);
1095 tl_assert((maxoff & ~0xFFFF) == 0);
1097 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1098 minoffD = mce->layout->alwaysDefd[i].offset;
1099 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1100 tl_assert((minoffD & ~0xFFFF) == 0);
1101 tl_assert((maxoffD & ~0xFFFF) == 0);
1103 if (maxoff < minoffD || maxoffD < minoff)
1104 continue; /* no overlap */
1105 if (minoff >= minoffD && maxoff <= maxoffD)
1106 return True; /* completely contained in an always-defd section */
1108 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1110 return False; /* could not find any containing section */
1114 /* Generate into bb suitable actions to shadow this Put. If the state
1115 slice is marked 'always defined', do nothing. Otherwise, write the
1116 supplied V bits to the shadow state. We can pass in either an
1117 original atom or a V-atom, but not both. In the former case the
1118 relevant V-bits are then generated from the original.
1121 void do_shadow_PUT ( MCEnv* mce, Int offset,
1122 IRAtom* atom, IRAtom* vatom )
1127 tl_assert(isOriginalAtom(mce, atom));
1128 vatom = expr2vbits( mce, atom );
1131 tl_assert(isShadowAtom(mce, vatom));
1134 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1135 tl_assert(ty != Ity_I1);
1136 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1138 /* emit code to emit a complaint if any of the vbits are 1. */
1139 /* complainIfUndefined(mce, atom); */
1141 /* Do a plain shadow Put. */
1142 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1147 /* Return an expression which contains the V bits corresponding to the
1148 given GETI (passed in in pieces).
1151 void do_shadow_PUTI ( MCEnv* mce,
1152 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1158 tl_assert(isOriginalAtom(mce,atom));
1159 vatom = expr2vbits( mce, atom );
1160 tl_assert(sameKindedAtoms(atom, vatom));
1162 tyS = shadowType(ty);
1163 arrSize = descr->nElems * sizeofIRType(ty);
1164 tl_assert(ty != Ity_I1);
1165 tl_assert(isOriginalAtom(mce,ix));
1166 complainIfUndefined(mce,ix);
1167 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1169 /* emit code to emit a complaint if any of the vbits are 1. */
1170 /* complainIfUndefined(mce, atom); */
1172 /* Do a cloned version of the Put that refers to the shadow
1174 IRRegArray* new_descr
1175 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1176 tyS, descr->nElems);
1177 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1182 /* Return an expression which contains the V bits corresponding to the
1183 given GET (passed in in pieces).
1186 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1188 IRType tyS = shadowType(ty);
1189 tl_assert(ty != Ity_I1);
1190 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1191 /* Always defined, return all zeroes of the relevant type */
1192 return definedOfType(tyS);
1194 /* return a cloned version of the Get that refers to the shadow
1196 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1201 /* Return an expression which contains the V bits corresponding to the
1202 given GETI (passed in in pieces).
1205 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1207 IRType ty = descr->elemTy;
1208 IRType tyS = shadowType(ty);
1209 Int arrSize = descr->nElems * sizeofIRType(ty);
1210 tl_assert(ty != Ity_I1);
1211 tl_assert(isOriginalAtom(mce,ix));
1212 complainIfUndefined(mce,ix);
1213 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1214 /* Always defined, return all zeroes of the relevant type */
1215 return definedOfType(tyS);
1217 /* return a cloned version of the Get that refers to the shadow
1219 IRRegArray* new_descr
1220 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1221 tyS, descr->nElems);
1222 return IRExpr_GetI( new_descr, ix, bias );
1227 /*------------------------------------------------------------*/
1228 /*--- Generating approximations for unknown operations, ---*/
1229 /*--- using lazy-propagate semantics ---*/
1230 /*------------------------------------------------------------*/
1232 /* Lazy propagation of undefinedness from two values, resulting in the
1233 specified shadow type.
1236 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1238 /* force everything via 32-bit intermediaries. */
1240 tl_assert(isShadowAtom(mce,va1));
1241 tl_assert(isShadowAtom(mce,va2));
1242 at = mkPCastTo(mce, Ity_I32, va1);
1243 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1244 at = mkPCastTo(mce, finalVty, at);
1249 /* Do the lazy propagation game from a null-terminated vector of
1250 atoms. This is presumably the arguments to a helper call, so the
1251 IRCallee info is also supplied in order that we can know which
1252 arguments should be ignored (via the .mcx_mask field).
1255 IRAtom* mkLazyN ( MCEnv* mce,
1256 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1260 IRAtom* curr = definedOfType(Ity_I32);
1261 for (i = 0; exprvec[i]; i++) {
1263 tl_assert(isOriginalAtom(mce, exprvec[i]));
1264 /* Only take notice of this arg if the callee's mc-exclusion
1265 mask does not say it is to be excluded. */
1266 if (cee->mcx_mask & (1<<i)) {
1267 /* the arg is to be excluded from definedness checking. Do
1269 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1271 /* calculate the arg's definedness, and pessimistically merge
1273 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1274 curr = mkUifU32(mce, here, curr);
1277 return mkPCastTo(mce, finalVtype, curr );
1281 /*------------------------------------------------------------*/
1282 /*--- Generating expensive sequences for exact carry-chain ---*/
1283 /*--- propagation in add/sub and related operations. ---*/
1284 /*------------------------------------------------------------*/
1287 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1288 IRAtom* aa, IRAtom* bb )
1290 IRAtom *a_min, *b_min, *a_max, *b_max;
1292 IROp opAND, opOR, opXOR, opNOT, opADD;
1294 tl_assert(isShadowAtom(mce,qaa));
1295 tl_assert(isShadowAtom(mce,qbb));
1296 tl_assert(isOriginalAtom(mce,aa));
1297 tl_assert(isOriginalAtom(mce,bb));
1298 tl_assert(sameKindedAtoms(qaa,aa));
1299 tl_assert(sameKindedAtoms(qbb,bb));
1308 // a_min = aa & ~qaa
1309 a_min = assignNew(mce,ty,
1311 assignNew(mce,ty, unop(opNOT, qaa))));
1313 // b_min = bb & ~qbb
1314 b_min = assignNew(mce,ty,
1316 assignNew(mce,ty, unop(opNOT, qbb))));
1319 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1322 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1324 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1328 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1330 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1331 assignNew(mce,ty, binop(opADD, a_max, b_max))
1339 /*------------------------------------------------------------*/
1340 /*--- Helpers for dealing with vector primops. ---*/
1341 /*------------------------------------------------------------*/
1343 /* Vector pessimisation -- pessimise within each lane individually. */
1345 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1347 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1350 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1352 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1355 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1357 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1360 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1362 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1366 /* Here's a simple scheme capable of handling ops derived from SSE1
1367 code and while only generating ops that can be efficiently
1368 implemented in SSE1. */
1370 /* All-lanes versions are straightforward:
1372 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1374 unary32Fx4(x,y) ==> PCast32x4(x#)
1376 Lowest-lane-only versions are more complex:
1378 binary32F0x4(x,y) ==> SetV128lo32(
1380 PCast32(V128to32(UifUV128(x#,y#)))
1383 This is perhaps not so obvious. In particular, it's faster to
1384 do a V128-bit UifU and then take the bottom 32 bits than the more
1385 obvious scheme of taking the bottom 32 bits of each operand
1386 and doing a 32-bit UifU. Basically since UifU is fast and
1387 chopping lanes off vector values is slow.
1391 unary32F0x4(x) ==> SetV128lo32(
1393 PCast32(V128to32(x#))
1398 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1399 PCast32x4(v#) = CmpNEZ32x4(v#)
1403 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1406 tl_assert(isShadowAtom(mce, vatomX));
1407 tl_assert(isShadowAtom(mce, vatomY));
1408 at = mkUifUV128(mce, vatomX, vatomY);
1409 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1414 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1417 tl_assert(isShadowAtom(mce, vatomX));
1418 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1423 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1426 tl_assert(isShadowAtom(mce, vatomX));
1427 tl_assert(isShadowAtom(mce, vatomY));
1428 at = mkUifUV128(mce, vatomX, vatomY);
1429 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1430 at = mkPCastTo(mce, Ity_I32, at);
1431 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1436 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1439 tl_assert(isShadowAtom(mce, vatomX));
1440 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1441 at = mkPCastTo(mce, Ity_I32, at);
1442 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1446 /* --- ... and ... 64Fx2 versions of the same ... --- */
1449 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1452 tl_assert(isShadowAtom(mce, vatomX));
1453 tl_assert(isShadowAtom(mce, vatomY));
1454 at = mkUifUV128(mce, vatomX, vatomY);
1455 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1460 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1463 tl_assert(isShadowAtom(mce, vatomX));
1464 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1469 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1472 tl_assert(isShadowAtom(mce, vatomX));
1473 tl_assert(isShadowAtom(mce, vatomY));
1474 at = mkUifUV128(mce, vatomX, vatomY);
1475 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1476 at = mkPCastTo(mce, Ity_I64, at);
1477 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1482 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1485 tl_assert(isShadowAtom(mce, vatomX));
1486 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1487 at = mkPCastTo(mce, Ity_I64, at);
1488 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1492 /* --- --- Vector saturated narrowing --- --- */
1494 /* This is quite subtle. What to do is simple:
1496 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1498 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1500 Why this is right is not so simple. Consider a lane in the args,
1501 vatom1 or 2, doesn't matter.
1503 After the PCast, that lane is all 0s (defined) or all
1506 Both signed and unsigned saturating narrowing of all 0s produces
1507 all 0s, which is what we want.
1509 The all-1s case is more complex. Unsigned narrowing interprets an
1510 all-1s input as the largest unsigned integer, and so produces all
1511 1s as a result since that is the largest unsigned value at the
1514 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1515 to -1, so we still wind up with all 1s at the smaller width.
1517 So: In short, pessimise the args, then apply the original narrowing
1521 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1522 IRAtom* vatom1, IRAtom* vatom2)
1524 IRAtom *at1, *at2, *at3;
1525 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1526 switch (narrow_op) {
1527 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1528 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1529 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1530 default: VG_(tool_panic)("vectorNarrowV128");
1532 tl_assert(isShadowAtom(mce,vatom1));
1533 tl_assert(isShadowAtom(mce,vatom2));
1534 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1535 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1536 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1541 /* --- --- Vector integer arithmetic --- --- */
1543 /* Simple ... UifU the args and per-lane pessimise the results. */
1545 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1548 at = mkUifUV128(mce, vatom1, vatom2);
1549 at = mkPCast8x16(mce, at);
1554 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1557 at = mkUifUV128(mce, vatom1, vatom2);
1558 at = mkPCast16x8(mce, at);
1563 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1566 at = mkUifUV128(mce, vatom1, vatom2);
1567 at = mkPCast32x4(mce, at);
1572 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1575 at = mkUifUV128(mce, vatom1, vatom2);
1576 at = mkPCast64x2(mce, at);
1581 /*------------------------------------------------------------*/
1582 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1583 /*------------------------------------------------------------*/
1586 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1588 IRAtom* atom1, IRAtom* atom2 )
1591 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1592 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1593 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1595 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1596 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1598 tl_assert(isOriginalAtom(mce,atom1));
1599 tl_assert(isOriginalAtom(mce,atom2));
1600 tl_assert(isShadowAtom(mce,vatom1));
1601 tl_assert(isShadowAtom(mce,vatom2));
1602 tl_assert(sameKindedAtoms(atom1,vatom1));
1603 tl_assert(sameKindedAtoms(atom2,vatom2));
1606 /* V128-bit SIMD (SSE2-esque) */
1616 /* Same scheme as with all other shifts. */
1617 complainIfUndefined(mce, atom2);
1618 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1625 case Iop_CmpGT8Sx16:
1631 return binary8Ix16(mce, vatom1, vatom2);
1637 case Iop_MulHi16Sx8:
1638 case Iop_MulHi16Ux8:
1641 case Iop_CmpGT16Sx8:
1647 return binary16Ix8(mce, vatom1, vatom2);
1650 case Iop_CmpGT32Sx4:
1653 return binary32Ix4(mce, vatom1, vatom2);
1657 return binary64Ix2(mce, vatom1, vatom2);
1659 case Iop_QNarrow32Sx4:
1660 case Iop_QNarrow16Sx8:
1661 case Iop_QNarrow16Ux8:
1662 return vectorNarrowV128(mce, op, vatom1, vatom2);
1669 case Iop_CmpLT64Fx2:
1670 case Iop_CmpLE64Fx2:
1671 case Iop_CmpEQ64Fx2:
1673 return binary64Fx2(mce, vatom1, vatom2);
1680 case Iop_CmpLT64F0x2:
1681 case Iop_CmpLE64F0x2:
1682 case Iop_CmpEQ64F0x2:
1684 return binary64F0x2(mce, vatom1, vatom2);
1686 /* V128-bit SIMD (SSE1-esque) */
1693 case Iop_CmpLT32Fx4:
1694 case Iop_CmpLE32Fx4:
1695 case Iop_CmpEQ32Fx4:
1697 return binary32Fx4(mce, vatom1, vatom2);
1704 case Iop_CmpLT32F0x4:
1705 case Iop_CmpLE32F0x4:
1706 case Iop_CmpEQ32F0x4:
1708 return binary32F0x4(mce, vatom1, vatom2);
1710 /* V128-bit data-steering */
1711 case Iop_SetV128lo32:
1712 case Iop_SetV128lo64:
1713 case Iop_64HLtoV128:
1714 case Iop_InterleaveLO64x2:
1715 case Iop_InterleaveLO32x4:
1716 case Iop_InterleaveLO16x8:
1717 case Iop_InterleaveLO8x16:
1718 case Iop_InterleaveHI64x2:
1719 case Iop_InterleaveHI32x4:
1720 case Iop_InterleaveHI16x8:
1721 case Iop_InterleaveHI8x16:
1722 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1724 /* Scalar floating point */
1726 // case Iop_RoundF64:
1729 /* First arg is I32 (rounding mode), second is F64 or I64
1731 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1733 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1734 /* Takes two F64 args. */
1737 /* First arg is I32 (rounding mode), second is F64 (data). */
1738 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1741 /* First arg is I32 (rounding mode), second is F64 (data). */
1742 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1753 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1756 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1758 /* non-FP after here */
1760 case Iop_DivModU64to32:
1761 case Iop_DivModS64to32:
1762 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1765 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1767 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1771 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1772 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1773 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1778 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1779 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1780 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1785 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1786 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1787 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1792 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1796 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1801 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1805 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1807 case Iop_CmpLE32S: case Iop_CmpLE32U:
1808 case Iop_CmpLT32U: case Iop_CmpLT32S:
1809 case Iop_CmpEQ32: case Iop_CmpNE32:
1810 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1812 case Iop_CmpEQ16: case Iop_CmpNE16:
1813 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1815 case Iop_CmpEQ8: case Iop_CmpNE8:
1816 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1818 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1819 /* Complain if the shift amount is undefined. Then simply
1820 shift the first arg's V bits by the real shift amount. */
1821 complainIfUndefined(mce, atom2);
1822 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1824 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1825 /* Same scheme as with 32-bit shifts. */
1826 complainIfUndefined(mce, atom2);
1827 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1829 case Iop_Shl8: case Iop_Shr8:
1830 /* Same scheme as with 32-bit shifts. */
1831 complainIfUndefined(mce, atom2);
1832 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1834 case Iop_Shl64: case Iop_Shr64:
1835 /* Same scheme as with 32-bit shifts. */
1836 complainIfUndefined(mce, atom2);
1837 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1840 uifu = mkUifUV128; difd = mkDifDV128;
1841 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1843 uifu = mkUifU64; difd = mkDifD64;
1844 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1846 uifu = mkUifU32; difd = mkDifD32;
1847 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1849 uifu = mkUifU16; difd = mkDifD16;
1850 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1852 uifu = mkUifU8; difd = mkDifD8;
1853 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1856 uifu = mkUifUV128; difd = mkDifDV128;
1857 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1859 uifu = mkUifU64; difd = mkDifD64;
1860 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1862 uifu = mkUifU32; difd = mkDifD32;
1863 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1865 uifu = mkUifU16; difd = mkDifD16;
1866 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1868 uifu = mkUifU8; difd = mkDifD8;
1869 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1876 difd(mce, uifu(mce, vatom1, vatom2),
1877 difd(mce, improve(mce, atom1, vatom1),
1878 improve(mce, atom2, vatom2) ) ) );
1881 return mkUifU8(mce, vatom1, vatom2);
1883 return mkUifU16(mce, vatom1, vatom2);
1885 return mkUifU32(mce, vatom1, vatom2);
1887 return mkUifU64(mce, vatom1, vatom2);
1889 return mkUifUV128(mce, vatom1, vatom2);
1893 VG_(tool_panic)("memcheck:expr2vbits_Binop");
1899 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1901 IRAtom* vatom = expr2vbits( mce, atom );
1902 tl_assert(isOriginalAtom(mce,atom));
1906 return unary64Fx2(mce, vatom);
1908 case Iop_Sqrt64F0x2:
1909 return unary64F0x2(mce, vatom);
1912 case Iop_RSqrt32Fx4:
1913 case Iop_Recip32Fx4:
1914 return unary32Fx4(mce, vatom);
1916 case Iop_Sqrt32F0x4:
1917 case Iop_RSqrt32F0x4:
1918 case Iop_Recip32F0x4:
1919 return unary32F0x4(mce, vatom);
1923 return assignNew(mce, Ity_V128, unop(op, vatom));
1934 return mkPCastTo(mce, Ity_I64, vatom);
1938 return mkPCastTo(mce, Ity_I32, vatom);
1943 case Iop_V128HIto64:
1944 return assignNew(mce, Ity_I64, unop(op, vatom));
1953 return assignNew(mce, Ity_I32, unop(op, vatom));
1959 return assignNew(mce, Ity_I16, unop(op, vatom));
1964 return assignNew(mce, Ity_I8, unop(op, vatom));
1967 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1969 case Iop_ReinterpF64asI64:
1970 case Iop_ReinterpI64asF64:
1971 case Iop_ReinterpI32asF32:
1982 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1987 /* Worker function; do not call directly. */
1989 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1997 tl_assert(isOriginalAtom(mce,addr));
1999 /* First, emit a definedness test for the address. This also sets
2000 the address (shadow) to 'defined' following the test. */
2001 complainIfUndefined( mce, addr );
2003 /* Now cook up a call to the relevant helper function, to read the
2004 data V bits from shadow memory. */
2005 ty = shadowType(ty);
2007 case Ity_I64: helper = &MC_(helperc_LOADV8);
2008 hname = "MC_(helperc_LOADV8)";
2010 case Ity_I32: helper = &MC_(helperc_LOADV4);
2011 hname = "MC_(helperc_LOADV4)";
2013 case Ity_I16: helper = &MC_(helperc_LOADV2);
2014 hname = "MC_(helperc_LOADV2)";
2016 case Ity_I8: helper = &MC_(helperc_LOADV1);
2017 hname = "MC_(helperc_LOADV1)";
2019 default: ppIRType(ty);
2020 VG_(tool_panic)("memcheck:do_shadow_LDle");
2023 /* Generate the actual address into addrAct. */
2029 IRType tyAddr = mce->hWordTy;
2030 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2031 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2032 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2033 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2036 /* We need to have a place to park the V bits we're just about to
2038 datavbits = newIRTemp(mce->bb->tyenv, ty);
2039 di = unsafeIRDirty_1_N( datavbits,
2040 1/*regparms*/, hname, helper,
2041 mkIRExprVec_1( addrAct ));
2042 setHelperAnns( mce, di );
2043 stmt( mce->bb, IRStmt_Dirty(di) );
2045 return mkexpr(datavbits);
2050 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2052 IRAtom *v64hi, *v64lo;
2053 switch (shadowType(ty)) {
2058 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2060 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2061 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2062 return assignNew( mce,
2064 binop(Iop_64HLtoV128, v64hi, v64lo));
2066 VG_(tool_panic)("expr2vbits_LDle");
2072 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2073 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2075 IRAtom *vbitsC, *vbits0, *vbitsX;
2077 /* Given Mux0X(cond,expr0,exprX), generate
2078 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2079 That is, steer the V bits like the originals, but trash the
2080 result if the steering value is undefined. This gives
2081 lazy propagation. */
2082 tl_assert(isOriginalAtom(mce, cond));
2083 tl_assert(isOriginalAtom(mce, expr0));
2084 tl_assert(isOriginalAtom(mce, exprX));
2086 vbitsC = expr2vbits(mce, cond);
2087 vbits0 = expr2vbits(mce, expr0);
2088 vbitsX = expr2vbits(mce, exprX);
2089 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2092 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2093 mkPCastTo(mce, ty, vbitsC) );
2096 /* --------- This is the main expression-handling function. --------- */
2099 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2104 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2107 return shadow_GETI( mce, e->Iex.GetI.descr,
2108 e->Iex.GetI.ix, e->Iex.GetI.bias );
2111 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2114 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2117 return expr2vbits_Binop(
2120 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2124 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2127 return expr2vbits_LDle( mce, e->Iex.Load.ty,
2128 e->Iex.Load.addr, 0/*addr bias*/ );
2131 return mkLazyN( mce, e->Iex.CCall.args,
2136 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2137 e->Iex.Mux0X.exprX);
2143 VG_(tool_panic)("memcheck: expr2vbits");
2147 /*------------------------------------------------------------*/
2148 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2149 /*------------------------------------------------------------*/
2151 /* Widen a value to the host word size. */
2154 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2158 /* vatom is vbits-value and as such can only have a shadow type. */
2159 tl_assert(isShadowAtom(mce,vatom));
2161 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2164 if (tyH == Ity_I32) {
2166 case Ity_I32: return vatom;
2167 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2168 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2169 default: goto unhandled;
2175 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2176 VG_(tool_panic)("zwidenToHostWord");
2180 /* Generate a shadow store. addr is always the original address atom.
2181 You can pass in either originals or V-bits for the data atom, but
2182 obviously not both. */
2185 void do_shadow_STle ( MCEnv* mce,
2186 IRAtom* addr, UInt bias,
2187 IRAtom* data, IRAtom* vdata )
2191 IRDirty *di, *diLo64, *diHi64;
2192 IRAtom *addrAct, *addrLo64, *addrHi64;
2193 IRAtom *vdataLo64, *vdataHi64;
2194 IRAtom *eBias, *eBias0, *eBias8;
2195 void* helper = NULL;
2196 HChar* hname = NULL;
2198 tyAddr = mce->hWordTy;
2199 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2200 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2202 di = diLo64 = diHi64 = NULL;
2203 eBias = eBias0 = eBias8 = NULL;
2204 addrAct = addrLo64 = addrHi64 = NULL;
2205 vdataLo64 = vdataHi64 = NULL;
2209 tl_assert(isOriginalAtom(mce, data));
2210 tl_assert(bias == 0);
2211 vdata = expr2vbits( mce, data );
2216 tl_assert(isOriginalAtom(mce,addr));
2217 tl_assert(isShadowAtom(mce,vdata));
2219 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2221 /* First, emit a definedness test for the address. This also sets
2222 the address (shadow) to 'defined' following the test. */
2223 complainIfUndefined( mce, addr );
2225 /* Now decide which helper function to call to write the data V
2226 bits into shadow memory. */
2228 case Ity_V128: /* we'll use the helper twice */
2229 case Ity_I64: helper = &MC_(helperc_STOREV8);
2230 hname = "MC_(helperc_STOREV8)";
2232 case Ity_I32: helper = &MC_(helperc_STOREV4);
2233 hname = "MC_(helperc_STOREV4)";
2235 case Ity_I16: helper = &MC_(helperc_STOREV2);
2236 hname = "MC_(helperc_STOREV2)";
2238 case Ity_I8: helper = &MC_(helperc_STOREV1);
2239 hname = "MC_(helperc_STOREV1)";
2241 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2244 if (ty == Ity_V128) {
2247 /* See comment in next clause re 64-bit regparms */
2248 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2249 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2250 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2251 diLo64 = unsafeIRDirty_0_N(
2252 1/*regparms*/, hname, helper,
2253 mkIRExprVec_2( addrLo64, vdataLo64 ));
2255 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2256 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2257 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2258 diHi64 = unsafeIRDirty_0_N(
2259 1/*regparms*/, hname, helper,
2260 mkIRExprVec_2( addrHi64, vdataHi64 ));
2262 setHelperAnns( mce, diLo64 );
2263 setHelperAnns( mce, diHi64 );
2264 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2265 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2269 /* 8/16/32/64-bit cases */
2270 /* Generate the actual address into addrAct. */
2274 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2275 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2278 if (ty == Ity_I64) {
2279 /* We can't do this with regparm 2 on 32-bit platforms, since
2280 the back ends aren't clever enough to handle 64-bit
2281 regparm args. Therefore be different. */
2282 di = unsafeIRDirty_0_N(
2283 1/*regparms*/, hname, helper,
2284 mkIRExprVec_2( addrAct, vdata ));
2286 di = unsafeIRDirty_0_N(
2287 2/*regparms*/, hname, helper,
2288 mkIRExprVec_2( addrAct,
2289 zwidenToHostWord( mce, vdata )));
2291 setHelperAnns( mce, di );
2292 stmt( mce->bb, IRStmt_Dirty(di) );
2298 /* Do lazy pessimistic propagation through a dirty helper call, by
2299 looking at the annotations on it. This is the most complex part of
2302 static IRType szToITy ( Int n )
2305 case 1: return Ity_I8;
2306 case 2: return Ity_I16;
2307 case 4: return Ity_I32;
2308 case 8: return Ity_I64;
2309 default: VG_(tool_panic)("szToITy(memcheck)");
2314 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2316 Int i, n, offset, toDo, gSz, gOff;
2317 IRAtom *src, *here, *curr;
2318 IRType tyAddr, tySrc, tyDst;
2321 /* First check the guard. */
2322 complainIfUndefined(mce, d->guard);
2324 /* Now round up all inputs and PCast over them. */
2325 curr = definedOfType(Ity_I32);
2327 /* Inputs: unmasked args */
2328 for (i = 0; d->args[i]; i++) {
2329 if (d->cee->mcx_mask & (1<<i)) {
2330 /* ignore this arg */
2332 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2333 curr = mkUifU32(mce, here, curr);
2337 /* Inputs: guest state that we read. */
2338 for (i = 0; i < d->nFxState; i++) {
2339 tl_assert(d->fxState[i].fx != Ifx_None);
2340 if (d->fxState[i].fx == Ifx_Write)
2343 /* Ignore any sections marked as 'always defined'. */
2344 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2346 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2347 d->fxState[i].offset, d->fxState[i].size );
2351 /* This state element is read or modified. So we need to
2352 consider it. If larger than 8 bytes, deal with it in 8-byte
2354 gSz = d->fxState[i].size;
2355 gOff = d->fxState[i].offset;
2358 if (gSz == 0) break;
2359 n = gSz <= 8 ? gSz : 8;
2360 /* update 'curr' with UifU of the state slice
2362 tySrc = szToITy( n );
2363 src = assignNew( mce, tySrc,
2364 shadow_GET(mce, gOff, tySrc ) );
2365 here = mkPCastTo( mce, Ity_I32, src );
2366 curr = mkUifU32(mce, here, curr);
2373 /* Inputs: memory. First set up some info needed regardless of
2374 whether we're doing reads or writes. */
2375 tyAddr = Ity_INVALID;
2377 if (d->mFx != Ifx_None) {
2378 /* Because we may do multiple shadow loads/stores from the same
2379 base address, it's best to do a single test of its
2380 definedness right now. Post-instrumentation optimisation
2381 should remove all but this test. */
2382 tl_assert(d->mAddr);
2383 complainIfUndefined(mce, d->mAddr);
2385 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2386 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2387 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2390 /* Deal with memory inputs (reads or modifies) */
2391 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2394 /* chew off 32-bit chunks */
2398 expr2vbits_LDle ( mce, Ity_I32,
2399 d->mAddr, d->mSize - toDo )
2401 curr = mkUifU32(mce, here, curr);
2404 /* chew off 16-bit chunks */
2408 expr2vbits_LDle ( mce, Ity_I16,
2409 d->mAddr, d->mSize - toDo )
2411 curr = mkUifU32(mce, here, curr);
2414 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2417 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2418 all the inputs to the helper. Now we need to re-distribute the
2419 results to all destinations. */
2421 /* Outputs: the destination temporary, if there is one. */
2422 if (d->tmp != IRTemp_INVALID) {
2423 dst = findShadowTmp(mce, d->tmp);
2424 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2425 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2428 /* Outputs: guest state that we write or modify. */
2429 for (i = 0; i < d->nFxState; i++) {
2430 tl_assert(d->fxState[i].fx != Ifx_None);
2431 if (d->fxState[i].fx == Ifx_Read)
2433 /* Ignore any sections marked as 'always defined'. */
2434 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2436 /* This state element is written or modified. So we need to
2437 consider it. If larger than 8 bytes, deal with it in 8-byte
2439 gSz = d->fxState[i].size;
2440 gOff = d->fxState[i].offset;
2443 if (gSz == 0) break;
2444 n = gSz <= 8 ? gSz : 8;
2445 /* Write suitably-casted 'curr' to the state slice
2447 tyDst = szToITy( n );
2448 do_shadow_PUT( mce, gOff,
2449 NULL, /* original atom */
2450 mkPCastTo( mce, tyDst, curr ) );
2456 /* Outputs: memory that we write or modify. */
2457 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2460 /* chew off 32-bit chunks */
2462 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2463 NULL, /* original data */
2464 mkPCastTo( mce, Ity_I32, curr ) );
2467 /* chew off 16-bit chunks */
2469 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2470 NULL, /* original data */
2471 mkPCastTo( mce, Ity_I16, curr ) );
2474 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2480 /*------------------------------------------------------------*/
2481 /*--- Memcheck main ---*/
2482 /*------------------------------------------------------------*/
2484 static Bool isBogusAtom ( IRAtom* at )
2488 tl_assert(isIRAtom(at));
2489 if (at->tag == Iex_RdTmp)
2491 tl_assert(at->tag == Iex_Const);
2492 con = at->Iex.Const.con;
2494 case Ico_U8: n = (ULong)con->Ico.U8; break;
2495 case Ico_U16: n = (ULong)con->Ico.U16; break;
2496 case Ico_U32: n = (ULong)con->Ico.U32; break;
2497 case Ico_U64: n = (ULong)con->Ico.U64; break;
2498 default: ppIRExpr(at); tl_assert(0);
2500 /* VG_(printf)("%llx\n", n); */
2501 return (n == 0xFEFEFEFF
2507 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2513 e = st->Ist.WrTmp.data;
2519 return isBogusAtom(e->Iex.Unop.arg);
2521 return isBogusAtom(e->Iex.Binop.arg1)
2522 || isBogusAtom(e->Iex.Binop.arg2);
2524 return isBogusAtom(e->Iex.Mux0X.cond)
2525 || isBogusAtom(e->Iex.Mux0X.expr0)
2526 || isBogusAtom(e->Iex.Mux0X.exprX);
2528 return isBogusAtom(e->Iex.Load.addr);
2530 for (i = 0; e->Iex.CCall.args[i]; i++)
2531 if (isBogusAtom(e->Iex.CCall.args[i]))
2538 return isBogusAtom(st->Ist.Put.data);
2540 return isBogusAtom(st->Ist.Store.addr)
2541 || isBogusAtom(st->Ist.Store.data);
2543 return isBogusAtom(st->Ist.Exit.guard);
2547 VG_(tool_panic)("hasBogusLiterals");
2551 IRSB* mc_instrument ( void* closureV,
2552 IRSB* bb_in, VexGuestLayout* layout,
2553 VexGuestExtents* vge,
2554 IRType gWordTy, IRType hWordTy )
2556 Bool verboze = False; //True;
2558 /* Bool hasBogusLiterals = False; */
2560 Int i, j, first_stmt;
2565 IRSB* bb = emptyIRSB();
2566 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv);
2567 bb->next = deepCopyIRExpr(bb_in->next);
2568 bb->jumpkind = bb_in->jumpkind;
2570 /* Set up the running environment. Only .bb is modified as we go
2573 mce.layout = layout;
2574 mce.n_originalTmps = bb->tyenv->types_used;
2575 mce.hWordTy = hWordTy;
2576 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2577 for (i = 0; i < mce.n_originalTmps; i++)
2578 mce.tmpMap[i] = IRTemp_INVALID;
2580 /* Iterate over the stmts. */
2582 for (i = 0; i < bb_in->stmts_used; i++) {
2583 st = bb_in->stmts[i];
2586 tl_assert(isFlatIRStmt(st));
2589 if (!hasBogusLiterals) {
2590 hasBogusLiterals = checkForBogusLiterals(st);
2591 if (hasBogusLiterals) {
2592 VG_(printf)("bogus: ");
2598 first_stmt = bb->stmts_used;
2602 VG_(printf)("\n\n");
2608 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2609 expr2vbits( &mce, st->Ist.WrTmp.data) );
2613 do_shadow_PUT( &mce,
2616 NULL /* shadow atom */ );
2620 do_shadow_PUTI( &mce,
2624 st->Ist.PutI.data );
2628 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2630 NULL /* shadow data */ );
2634 /* if (!hasBogusLiterals) */
2635 complainIfUndefined( &mce, st->Ist.Exit.guard );
2639 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2650 VG_(tool_panic)("memcheck: unhandled IRStmt");
2652 } /* switch (st->tag) */
2655 for (j = first_stmt; j < bb->stmts_used; j++) {
2657 ppIRStmt(bb->stmts[j]);
2663 addStmtToIRSB(bb, st);
2667 /* Now we need to complain if the jump target is undefined. */
2668 first_stmt = bb->stmts_used;
2671 VG_(printf)("bb->next = ");
2673 VG_(printf)("\n\n");
2676 complainIfUndefined( &mce, bb->next );
2679 for (j = first_stmt; j < bb->stmts_used; j++) {
2681 ppIRStmt(bb->stmts[j]);
2691 /*--------------------------------------------------------------------*/
2692 /*--- end test_main.c ---*/
2693 /*--------------------------------------------------------------------*/