l4/pkg/valgrind/src/valgrind-3.6.0-svn/VEX/test_main.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                       test_main.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2010 OpenWorks LLP
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, write to the Free Software
  25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26    02110-1301, USA.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29
  30    Neither the names of the U.S. Department of Energy nor the
  31    University of California nor the names of its contributors may be
  32    used to endorse or promote products derived from this software
  33    without prior written permission.
  34 */
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <assert.h>
  39 #include <string.h>
  40
  41 #include "libvex_basictypes.h"
  42 #include "libvex.h"
  43
  44 #include "test_main.h"
  45
  46
  47 /*---------------------------------------------------------------*/
  48 /*--- Test                                                    ---*/
  49 /*---------------------------------------------------------------*/
  50
  51
  52 __attribute__ ((noreturn))
  53 static
  54 void failure_exit ( void )
  55 {
  56    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
  57    exit(1);
  58 }
  59
  60 static
  61 void log_bytes ( HChar* bytes, Int nbytes )
  62 {
  63    fwrite ( bytes, 1, nbytes, stdout );
  64 }
  65
  66 #define N_LINEBUF 10000
  67 static HChar linebuf[N_LINEBUF];
  68
  69 #define N_ORIGBUF 10000
  70 #define N_TRANSBUF 5000
  71
  72 static UChar origbuf[N_ORIGBUF];
  73 static UChar transbuf[N_TRANSBUF];
  74
  75 static Bool verbose = True;
  76
  77 /* Forwards */
  78 #if 1 /* UNUSED */
  79 static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
  80 static
  81 IRSB* mc_instrument ( void* closureV,
  82                       IRSB* bb_in, VexGuestLayout* layout,
  83                       VexGuestExtents* vge,
  84                       IRType gWordTy, IRType hWordTy );
  85 #endif
  86
  87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) { return False; }
  88
  89 int main ( int argc, char** argv )
  90 {
  91    FILE* f;
  92    Int i;
  93    UInt u, sum;
  94    Addr32 orig_addr;
  95    Int bb_number, n_bbs_done = 0;
  96    Int orig_nbytes, trans_used;
  97    VexTranslateResult tres;
  98    VexControl vcon;
  99    VexGuestExtents vge;
 100    VexArchInfo vai_x86, vai_amd64, vai_ppc32;
 101    VexAbiInfo vbi;
 102    VexTranslateArgs vta;
 103
 104    if (argc != 2) {
 105       fprintf(stderr, "usage: vex file.org\n");
 106       exit(1);
 107    }
 108    f = fopen(argv[1], "r");
 109    if (!f) {
 110       fprintf(stderr, "can't open `%s'\n", argv[1]);
 111       exit(1);
 112    }
 113
 114    /* Run with default params.  However, we can't allow bb chasing
 115       since that causes the front end to get segfaults when it tries
 116       to read code outside the initial BB we hand it.  So when calling
 117       LibVEX_Translate, send in a chase-into predicate that always
 118       returns False. */
 119    LibVEX_default_VexControl ( &vcon );
 120    vcon.iropt_level = 2;
 121    vcon.guest_max_insns = 50;
 122
 123    LibVEX_Init ( &failure_exit, &log_bytes,
 124                  1,  /* debug_paranoia */
 125                  TEST_VSUPPORT, /* valgrind support */
 126                  &vcon );
 127
 128
 129    while (!feof(f)) {
 130
 131       fgets(linebuf, N_LINEBUF,f);
 132       if (linebuf[0] == 0) continue;
 133       if (linebuf[0] != '.') continue;
 134
 135       if (n_bbs_done == TEST_N_BBS) break;
 136       n_bbs_done++;
 137
 138       /* first line is:   . bb-number bb-addr n-bytes */
 139       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
 140                                  & bb_number,
 141                                  & orig_addr, & orig_nbytes ));
 142       assert(orig_nbytes >= 1);
 143       assert(!feof(f));
 144       fgets(linebuf, N_LINEBUF,f);
 145       assert(linebuf[0] == '.');
 146
 147       /* second line is:   . byte byte byte etc */
 148       if (verbose)
 149          printf("============ Basic Block %d, Done %d, "
 150                 "Start %x, nbytes %2d ============",
 151                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
 152
 153       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
 154       for (i = 0; i < orig_nbytes; i++) {
 155          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
 156          origbuf[i] = (UChar)u;
 157       }
 158
 159       /* FIXME: put sensible values into the .hwcaps fields */
 160       LibVEX_default_VexArchInfo(&vai_x86);
 161       vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
 162                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
 163
 164       LibVEX_default_VexArchInfo(&vai_amd64);
 165       vai_amd64.hwcaps = 0;
 166
 167       LibVEX_default_VexArchInfo(&vai_ppc32);
 168       vai_ppc32.hwcaps = 0;
 169       vai_ppc32.ppc_cache_line_szB = 128;
 170
 171       LibVEX_default_VexAbiInfo(&vbi);
 172
 173       /* ----- Set up args for LibVEX_Translate ----- */
 174 #if 0 /* ppc32 -> ppc32 */
 175       vta.arch_guest     = VexArchPPC32;
 176       vta.archinfo_guest = vai_ppc32;
 177       vta.arch_host      = VexArchPPC32;
 178       vta.archinfo_host  = vai_ppc32;
 179 #endif
 180 #if 0 /* amd64 -> amd64 */
 181       vta.arch_guest     = VexArchAMD64;
 182       vta.archinfo_guest = vai_amd64;
 183       vta.arch_host      = VexArchAMD64;
 184       vta.archinfo_host  = vai_amd64;
 185 #endif
 186 #if 1 /* x86 -> x86 */
 187       vta.arch_guest     = VexArchX86;
 188       vta.archinfo_guest = vai_x86;
 189       vta.arch_host      = VexArchX86;
 190       vta.archinfo_host  = vai_x86;
 191 #endif
 192       vta.abiinfo_both    = vbi;
 193       vta.guest_bytes     = origbuf;
 194       vta.guest_bytes_addr = (Addr64)orig_addr;
 195       vta.callback_opaque = NULL;
 196       vta.chase_into_ok   = chase_into_not_ok;
 197       vta.guest_extents   = &vge;
 198       vta.host_bytes      = transbuf;
 199       vta.host_bytes_size = N_TRANSBUF;
 200       vta.host_bytes_used = &trans_used;
 201 #if 0 /* no instrumentation */
 202       vta.instrument1     = NULL;
 203       vta.instrument2     = NULL;
 204 #endif
 205 #if 0 /* addrcheck */
 206       vta.instrument1     = ac_instrument;
 207       vta.instrument2     = NULL;
 208 #endif
 209 #if 1 /* memcheck */
 210       vta.instrument1     = mc_instrument;
 211       vta.instrument2     = NULL;
 212 #endif
 213       vta.do_self_check   = False;
 214       vta.preamble_function = NULL;
 215       vta.traceflags      = TEST_FLAGS;
 216 #if 1 /* x86, amd64 hosts */
 217       vta.dispatch        = (void*)0x12345678;
 218 #else /* ppc32, ppc64 hosts */
 219       vta.dispatch        = NULL;
 220 #endif
 221
 222       vta.finaltidy = NULL;
 223
 224       for (i = 0; i < TEST_N_ITERS; i++)
 225          tres = LibVEX_Translate ( &vta );
 226
 227       if (tres != VexTransOK)
 228          printf("\ntres = %d\n", (Int)tres);
 229       assert(tres == VexTransOK);
 230       assert(vge.n_used == 1);
 231       assert((UInt)(vge.len[0]) == orig_nbytes);
 232
 233       sum = 0;
 234       for (i = 0; i < trans_used; i++)
 235          sum += (UInt)transbuf[i];
 236       printf ( " %6.2f ... %u\n",
 237                (double)trans_used / (double)vge.len[0], sum );
 238    }
 239
 240    fclose(f);
 241    printf("\n");
 242    LibVEX_ShowAllocStats();
 243
 244    return 0;
 245 }
 246
 247 //////////////////////////////////////////////////////////////////////
 248 //////////////////////////////////////////////////////////////////////
 249 //////////////////////////////////////////////////////////////////////
 250 //////////////////////////////////////////////////////////////////////
 251 //////////////////////////////////////////////////////////////////////
 252 //////////////////////////////////////////////////////////////////////
 253 //////////////////////////////////////////////////////////////////////
 254 //////////////////////////////////////////////////////////////////////
 255
 256 #if 0 /* UNUSED */
 257
 258 static
 259 __attribute((noreturn))
 260 void panic ( HChar* s )
 261 {
 262   printf("\npanic: %s\n", s);
 263   failure_exit();
 264 }
 265
 266 static
 267 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
 268 {
 269 /* Use this rather than eg. -1 because it's a UInt. */
 270 #define INVALID_DATA_SIZE   999999
 271
 272    Int         i;
 273    Int         sz;
 274    IRCallee*   helper;
 275    IRStmt*    st;
 276    IRExpr* data;
 277    IRExpr* addr;
 278    Bool needSz;
 279
 280    /* Set up BB */
 281    IRSB* bb     = emptyIRSB();
 282    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
 283    bb->next     = dopyIRExpr(bb_in->next);
 284    bb->jumpkind = bb_in->jumpkind;
 285
 286    /* No loads to consider in ->next. */
 287    assert(isIRAtom(bb_in->next));
 288
 289    for (i = 0; i <  bb_in->stmts_used; i++) {
 290       st = bb_in->stmts[i];
 291       if (!st) continue;
 292
 293       switch (st->tag) {
 294
 295          case Ist_Tmp:
 296             data = st->Ist.Tmp.data;
 297             if (data->tag == Iex_LDle) {
 298                addr = data->Iex.LDle.addr;
 299                sz = sizeofIRType(data->Iex.LDle.ty);
 300                needSz = False;
 301                switch (sz) {
 302                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
 303                                                  (void*)0x12345601); break;
 304                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
 305                                                  (void*)0x12345602); break;
 306                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
 307                                                  (void*)0x12345603); break;
 308                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
 309                                                   (void*)0x12345604);
 310                                                   needSz = True; break;
 311                }
 312                if (needSz) {
 313                   addStmtToIRSB(
 314                      bb,
 315                      IRStmt_Dirty(
 316                         unsafeIRDirty_0_N( helper->regparms,
 317                                            helper->name, helper->addr,
 318                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
 319                   ));
 320                } else {
 321                   addStmtToIRSB(
 322                      bb,
 323                      IRStmt_Dirty(
 324                         unsafeIRDirty_0_N( helper->regparms,
 325                                            helper->name, helper->addr,
 326                                            mkIRExprVec_1(addr) )
 327                   ));
 328                }
 329             }
 330             break;
 331
 332          case Ist_STle:
 333             data = st->Ist.STle.data;
 334             addr = st->Ist.STle.addr;
 335             assert(isIRAtom(data));
 336             assert(isIRAtom(addr));
 337             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
 338             needSz = False;
 339             switch (sz) {
 340                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
 341                                               (void*)0x12345605); break;
 342                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
 343                                               (void*)0x12345606); break;
 344                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
 345                                               (void*)0x12345607); break;
 346                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
 347                                                (void*)0x12345608);
 348                                                needSz = True; break;
 349             }
 350             if (needSz) {
 351                addStmtToIRSB(
 352                   bb,
 353                   IRStmt_Dirty(
 354                      unsafeIRDirty_0_N( helper->regparms,
 355                                         helper->name, helper->addr,
 356                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
 357                ));
 358             } else {
 359                addStmtToIRSB(
 360                   bb,
 361                   IRStmt_Dirty(
 362                      unsafeIRDirty_0_N( helper->regparms,
 363                                         helper->name, helper->addr,
 364                                         mkIRExprVec_1(addr) )
 365                ));
 366             }
 367             break;
 368
 369          case Ist_Put:
 370             assert(isIRAtom(st->Ist.Put.data));
 371             break;
 372
 373          case Ist_PutI:
 374             assert(isIRAtom(st->Ist.PutI.ix));
 375             assert(isIRAtom(st->Ist.PutI.data));
 376             break;
 377
 378          case Ist_Exit:
 379             assert(isIRAtom(st->Ist.Exit.guard));
 380             break;
 381
 382          case Ist_Dirty:
 383             /* If the call doesn't interact with memory, we ain't
 384                interested. */
 385             if (st->Ist.Dirty.details->mFx == Ifx_None)
 386                break;
 387             goto unhandled;
 388
 389          default:
 390          unhandled:
 391             printf("\n");
 392             ppIRStmt(st);
 393             printf("\n");
 394             panic("addrcheck: unhandled IRStmt");
 395       }
 396
 397       addStmtToIRSB( bb, dopyIRStmt(st));
 398    }
 399
 400    return bb;
 401 }
 402 #endif /* UNUSED */
 403
 404 //////////////////////////////////////////////////////////////////////
 405 //////////////////////////////////////////////////////////////////////
 406 //////////////////////////////////////////////////////////////////////
 407 //////////////////////////////////////////////////////////////////////
 408 //////////////////////////////////////////////////////////////////////
 409 //////////////////////////////////////////////////////////////////////
 410 //////////////////////////////////////////////////////////////////////
 411 //////////////////////////////////////////////////////////////////////
 412
 413 #if 1 /* UNUSED */
 414
 415 static
 416 __attribute((noreturn))
 417 void panic ( HChar* s )
 418 {
 419   printf("\npanic: %s\n", s);
 420   failure_exit();
 421 }
 422
 423 #define tl_assert(xxx) assert(xxx)
 424 #define VG_(xxxx) xxxx
 425 #define tool_panic(zzz) panic(zzz)
 426 #define MC_(zzzz) MC_##zzzz
 427 #define TL_(zzzz) SK_##zzzz
 428
 429
 430 static void MC_helperc_complain_undef ( void );
 431 static void MC_helperc_LOADV8 ( void );
 432 static void MC_helperc_LOADV4 ( void );
 433 static void MC_helperc_LOADV2 ( void );
 434 static void MC_helperc_LOADV1 ( void );
 435 static void MC_helperc_STOREV8( void );
 436 static void MC_helperc_STOREV4( void );
 437 static void MC_helperc_STOREV2( void );
 438 static void MC_helperc_STOREV1( void );
 439 static void MC_helperc_value_check0_fail( void );
 440 static void MC_helperc_value_check1_fail( void );
 441 static void MC_helperc_value_check4_fail( void );
 442
 443 static void MC_helperc_complain_undef ( void ) { }
 444 static void MC_helperc_LOADV8 ( void ) { }
 445 static void MC_helperc_LOADV4 ( void ) { }
 446 static void MC_helperc_LOADV2 ( void ) { }
 447 static void MC_helperc_LOADV1 ( void ) { }
 448 static void MC_helperc_STOREV8( void ) { }
 449 static void MC_helperc_STOREV4( void ) { }
 450 static void MC_helperc_STOREV2( void ) { }
 451 static void MC_helperc_STOREV1( void ) { }
 452 static void MC_helperc_value_check0_fail( void ) { }
 453 static void MC_helperc_value_check1_fail( void ) { }
 454 static void MC_helperc_value_check4_fail( void ) { }
 455
 456
 457 /*--------------------------------------------------------------------*/
 458 /*--- Instrument IR to perform memory checking operations.         ---*/
 459 /*---                                               mc_translate.c ---*/
 460 /*--------------------------------------------------------------------*/
 461
 462 /*
 463    This file is part of MemCheck, a heavyweight Valgrind tool for
 464    detecting memory errors.
 465
 466    Copyright (C) 2000-2010 Julian Seward
 467       jseward@acm.org
 468
 469    This program is free software; you can redistribute it and/or
 470    modify it under the terms of the GNU General Public License as
 471    published by the Free Software Foundation; either version 2 of the
 472    License, or (at your option) any later version.
 473
 474    This program is distributed in the hope that it will be useful, but
 475    WITHOUT ANY WARRANTY; without even the implied warranty of
 476    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 477    General Public License for more details.
 478
 479    You should have received a copy of the GNU General Public License
 480    along with this program; if not, write to the Free Software
 481    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 482    02111-1307, USA.
 483
 484    The GNU General Public License is contained in the file COPYING.
 485 */
 486
 487 //#include "mc_include.h"
 488
 489
 490 /*------------------------------------------------------------*/
 491 /*--- Forward decls                                        ---*/
 492 /*------------------------------------------------------------*/
 493
 494 struct _MCEnv;
 495
 496 static IRType  shadowType ( IRType ty );
 497 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
 498
 499
 500 /*------------------------------------------------------------*/
 501 /*--- Memcheck running state, and tmp management.          ---*/
 502 /*------------------------------------------------------------*/
 503
 504 /* Carries around state during memcheck instrumentation. */
 505 typedef
 506    struct _MCEnv {
 507       /* MODIFIED: the bb being constructed.  IRStmts are added. */
 508       IRSB* bb;
 509
 510       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
 511          original temps to their current their current shadow temp.
 512          Initially all entries are IRTemp_INVALID.  Entries are added
 513          lazily since many original temps are not used due to
 514          optimisation prior to instrumentation.  Note that floating
 515          point original tmps are shadowed by integer tmps of the same
 516          size, and Bit-typed original tmps are shadowed by the type
 517          Ity_I8.  See comment below. */
 518       IRTemp* tmpMap;
 519       Int     n_originalTmps; /* for range checking */
 520
 521       /* READONLY: the guest layout.  This indicates which parts of
 522          the guest state should be regarded as 'always defined'. */
 523       VexGuestLayout* layout;
 524       /* READONLY: the host word type.  Needed for constructing
 525          arguments of type 'HWord' to be passed to helper functions.
 526          Ity_I32 or Ity_I64 only. */
 527       IRType hWordTy;
 528    }
 529    MCEnv;
 530
 531 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
 532    demand), as they are encountered.  This is for two reasons.
 533
 534    (1) (less important reason): Many original tmps are unused due to
 535    initial IR optimisation, and we do not want to spaces in tables
 536    tracking them.
 537
 538    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
 539    table indexed [0 .. n_types-1], which gives the current shadow for
 540    each original tmp, or INVALID_IRTEMP if none is so far assigned.
 541    It is necessary to support making multiple assignments to a shadow
 542    -- specifically, after testing a shadow for definedness, it needs
 543    to be made defined.  But IR's SSA property disallows this.
 544
 545    (2) (more important reason): Therefore, when a shadow needs to get
 546    a new value, a new temporary is created, the value is assigned to
 547    that, and the tmpMap is updated to reflect the new binding.
 548
 549    A corollary is that if the tmpMap maps a given tmp to
 550    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
 551    there's a read-before-write error in the original tmps.  The IR
 552    sanity checker should catch all such anomalies, however.
 553 */
 554
 555 /* Find the tmp currently shadowing the given original tmp.  If none
 556    so far exists, allocate one.  */
 557 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
 558 {
 559    tl_assert(orig < mce->n_originalTmps);
 560    if (mce->tmpMap[orig] == IRTemp_INVALID) {
 561       mce->tmpMap[orig]
 562          = newIRTemp(mce->bb->tyenv,
 563                      shadowType(mce->bb->tyenv->types[orig]));
 564    }
 565    return mce->tmpMap[orig];
 566 }
 567
 568 /* Allocate a new shadow for the given original tmp.  This means any
 569    previous shadow is abandoned.  This is needed because it is
 570    necessary to give a new value to a shadow once it has been tested
 571    for undefinedness, but unfortunately IR's SSA property disallows
 572    this.  Instead we must abandon the old shadow, allocate a new one
 573    and use that instead. */
 574 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
 575 {
 576    tl_assert(orig < mce->n_originalTmps);
 577    mce->tmpMap[orig]
 578       = newIRTemp(mce->bb->tyenv,
 579                   shadowType(mce->bb->tyenv->types[orig]));
 580 }
 581
 582
 583 /*------------------------------------------------------------*/
 584 /*--- IRAtoms -- a subset of IRExprs                       ---*/
 585 /*------------------------------------------------------------*/
 586
 587 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
 588    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
 589    input, most of this code deals in atoms.  Usefully, a value atom
 590    always has a V-value which is also an atom: constants are shadowed
 591    by constants, and temps are shadowed by the corresponding shadow
 592    temporary. */
 593
 594 typedef  IRExpr  IRAtom;
 595
 596 /* (used for sanity checks only): is this an atom which looks
 597    like it's from original code? */
 598 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
 599 {
 600    if (a1->tag == Iex_Const)
 601       return True;
 602    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
 603       return True;
 604    return False;
 605 }
 606
 607 /* (used for sanity checks only): is this an atom which looks
 608    like it's from shadow code? */
 609 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
 610 {
 611    if (a1->tag == Iex_Const)
 612       return True;
 613    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
 614       return True;
 615    return False;
 616 }
 617
 618 /* (used for sanity checks only): check that both args are atoms and
 619    are identically-kinded. */
 620 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
 621 {
 622    if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
 623       return True;
 624    if (a1->tag == Iex_Const && a1->tag == Iex_Const)
 625       return True;
 626    return False;
 627 }
 628
 629
 630 /*------------------------------------------------------------*/
 631 /*--- Type management                                      ---*/
 632 /*------------------------------------------------------------*/
 633
 634 /* Shadow state is always accessed using integer types.  This returns
 635    an integer type with the same size (as per sizeofIRType) as the
 636    given type.  The only valid shadow types are Bit, I8, I16, I32,
 637    I64, V128. */
 638
 639 static IRType shadowType ( IRType ty )
 640 {
 641    switch (ty) {
 642       case Ity_I1:
 643       case Ity_I8:
 644       case Ity_I16:
 645       case Ity_I32:
 646       case Ity_I64:  return ty;
 647       case Ity_F32:  return Ity_I32;
 648       case Ity_F64:  return Ity_I64;
 649       case Ity_V128: return Ity_V128;
 650       default: ppIRType(ty);
 651                VG_(tool_panic)("memcheck:shadowType");
 652    }
 653 }
 654
 655 /* Produce a 'defined' value of the given shadow type.  Should only be
 656    supplied shadow types (Bit/I8/I16/I32/UI64). */
 657 static IRExpr* definedOfType ( IRType ty ) {
 658    switch (ty) {
 659       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
 660       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
 661       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
 662       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
 663       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
 664       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
 665       default:      VG_(tool_panic)("memcheck:definedOfType");
 666    }
 667 }
 668
 669
 670 /*------------------------------------------------------------*/
 671 /*--- Constructing IR fragments                            ---*/
 672 /*------------------------------------------------------------*/
 673
 674 /* assign value to tmp */
 675 #define assign(_bb,_tmp,_expr)   \
 676    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
 677
 678 /* add stmt to a bb */
 679 #define stmt(_bb,_stmt)    \
 680    addStmtToIRSB((_bb), (_stmt))
 681
 682 /* build various kinds of expressions */
 683 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
 684 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
 685 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
 686 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
 687 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
 688 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
 689 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
 690 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
 691
 692 /* bind the given expression to a new temporary, and return the
 693    temporary.  This effectively converts an arbitrary expression into
 694    an atom. */
 695 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
 696    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
 697    assign(mce->bb, t, e);
 698    return mkexpr(t);
 699 }
 700
 701
 702 /*------------------------------------------------------------*/
 703 /*--- Constructing definedness primitive ops               ---*/
 704 /*------------------------------------------------------------*/
 705
 706 /* --------- Defined-if-either-defined --------- */
 707
 708 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 709    tl_assert(isShadowAtom(mce,a1));
 710    tl_assert(isShadowAtom(mce,a2));
 711    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
 712 }
 713
 714 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 715    tl_assert(isShadowAtom(mce,a1));
 716    tl_assert(isShadowAtom(mce,a2));
 717    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
 718 }
 719
 720 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 721    tl_assert(isShadowAtom(mce,a1));
 722    tl_assert(isShadowAtom(mce,a2));
 723    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
 724 }
 725
 726 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 727    tl_assert(isShadowAtom(mce,a1));
 728    tl_assert(isShadowAtom(mce,a2));
 729    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
 730 }
 731
 732 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 733    tl_assert(isShadowAtom(mce,a1));
 734    tl_assert(isShadowAtom(mce,a2));
 735    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
 736 }
 737
 738 /* --------- Undefined-if-either-undefined --------- */
 739
 740 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 741    tl_assert(isShadowAtom(mce,a1));
 742    tl_assert(isShadowAtom(mce,a2));
 743    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
 744 }
 745
 746 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 747    tl_assert(isShadowAtom(mce,a1));
 748    tl_assert(isShadowAtom(mce,a2));
 749    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
 750 }
 751
 752 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 753    tl_assert(isShadowAtom(mce,a1));
 754    tl_assert(isShadowAtom(mce,a2));
 755    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
 756 }
 757
 758 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 759    tl_assert(isShadowAtom(mce,a1));
 760    tl_assert(isShadowAtom(mce,a2));
 761    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
 762 }
 763
 764 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 765    tl_assert(isShadowAtom(mce,a1));
 766    tl_assert(isShadowAtom(mce,a2));
 767    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
 768 }
 769
 770 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
 771    switch (vty) {
 772       case Ity_I8:   return mkUifU8(mce, a1, a2);
 773       case Ity_I16:  return mkUifU16(mce, a1, a2);
 774       case Ity_I32:  return mkUifU32(mce, a1, a2);
 775       case Ity_I64:  return mkUifU64(mce, a1, a2);
 776       case Ity_V128: return mkUifUV128(mce, a1, a2);
 777       default:
 778          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
 779          VG_(tool_panic)("memcheck:mkUifU");
 780    }
 781 }
 782
 783 /* --------- The Left-family of operations. --------- */
 784
 785 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
 786    tl_assert(isShadowAtom(mce,a1));
 787    /* It's safe to duplicate a1 since it's only an atom */
 788    return assignNew(mce, Ity_I8,
 789                     binop(Iop_Or8, a1,
 790                           assignNew(mce, Ity_I8,
 791                                     /* unop(Iop_Neg8, a1)))); */
 792                                     binop(Iop_Sub8, mkU8(0), a1) )));
 793 }
 794
 795 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
 796    tl_assert(isShadowAtom(mce,a1));
 797    /* It's safe to duplicate a1 since it's only an atom */
 798    return assignNew(mce, Ity_I16,
 799                     binop(Iop_Or16, a1,
 800                           assignNew(mce, Ity_I16,
 801                                     /* unop(Iop_Neg16, a1)))); */
 802                                     binop(Iop_Sub16, mkU16(0), a1) )));
 803 }
 804
 805 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
 806    tl_assert(isShadowAtom(mce,a1));
 807    /* It's safe to duplicate a1 since it's only an atom */
 808    return assignNew(mce, Ity_I32,
 809                     binop(Iop_Or32, a1,
 810                           assignNew(mce, Ity_I32,
 811                                     /* unop(Iop_Neg32, a1)))); */
 812                                     binop(Iop_Sub32, mkU32(0), a1) )));
 813 }
 814
 815 /* --------- 'Improvement' functions for AND/OR. --------- */
 816
 817 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
 818    defined (0); all other -> undefined (1).
 819 */
 820 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 821 {
 822    tl_assert(isOriginalAtom(mce, data));
 823    tl_assert(isShadowAtom(mce, vbits));
 824    tl_assert(sameKindedAtoms(data, vbits));
 825    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
 826 }
 827
 828 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 829 {
 830    tl_assert(isOriginalAtom(mce, data));
 831    tl_assert(isShadowAtom(mce, vbits));
 832    tl_assert(sameKindedAtoms(data, vbits));
 833    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
 834 }
 835
 836 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 837 {
 838    tl_assert(isOriginalAtom(mce, data));
 839    tl_assert(isShadowAtom(mce, vbits));
 840    tl_assert(sameKindedAtoms(data, vbits));
 841    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
 842 }
 843
 844 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 845 {
 846    tl_assert(isOriginalAtom(mce, data));
 847    tl_assert(isShadowAtom(mce, vbits));
 848    tl_assert(sameKindedAtoms(data, vbits));
 849    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
 850 }
 851
 852 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 853 {
 854    tl_assert(isOriginalAtom(mce, data));
 855    tl_assert(isShadowAtom(mce, vbits));
 856    tl_assert(sameKindedAtoms(data, vbits));
 857    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
 858 }
 859
 860 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
 861    defined (0); all other -> undefined (1).
 862 */
 863 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 864 {
 865    tl_assert(isOriginalAtom(mce, data));
 866    tl_assert(isShadowAtom(mce, vbits));
 867    tl_assert(sameKindedAtoms(data, vbits));
 868    return assignNew(
 869              mce, Ity_I8,
 870              binop(Iop_Or8,
 871                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
 872                    vbits) );
 873 }
 874
 875 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 876 {
 877    tl_assert(isOriginalAtom(mce, data));
 878    tl_assert(isShadowAtom(mce, vbits));
 879    tl_assert(sameKindedAtoms(data, vbits));
 880    return assignNew(
 881              mce, Ity_I16,
 882              binop(Iop_Or16,
 883                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
 884                    vbits) );
 885 }
 886
 887 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 888 {
 889    tl_assert(isOriginalAtom(mce, data));
 890    tl_assert(isShadowAtom(mce, vbits));
 891    tl_assert(sameKindedAtoms(data, vbits));
 892    return assignNew(
 893              mce, Ity_I32,
 894              binop(Iop_Or32,
 895                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
 896                    vbits) );
 897 }
 898
 899 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 900 {
 901    tl_assert(isOriginalAtom(mce, data));
 902    tl_assert(isShadowAtom(mce, vbits));
 903    tl_assert(sameKindedAtoms(data, vbits));
 904    return assignNew(
 905              mce, Ity_I64,
 906              binop(Iop_Or64,
 907                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
 908                    vbits) );
 909 }
 910
 911 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 912 {
 913    tl_assert(isOriginalAtom(mce, data));
 914    tl_assert(isShadowAtom(mce, vbits));
 915    tl_assert(sameKindedAtoms(data, vbits));
 916    return assignNew(
 917              mce, Ity_V128,
 918              binop(Iop_OrV128,
 919                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
 920                    vbits) );
 921 }
 922
 923 /* --------- Pessimising casts. --------- */
 924
 925 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
 926 {
 927    IRType  ty;
 928    IRAtom* tmp1;
 929    /* Note, dst_ty is a shadow type, not an original type. */
 930    /* First of all, collapse vbits down to a single bit. */
 931    tl_assert(isShadowAtom(mce,vbits));
 932    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
 933    tmp1 = NULL;
 934    switch (ty) {
 935       case Ity_I1:
 936          tmp1 = vbits;
 937          break;
 938       case Ity_I8:
 939          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
 940          break;
 941       case Ity_I16:
 942          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
 943          break;
 944       case Ity_I32:
 945          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
 946          break;
 947       case Ity_I64:
 948          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
 949          break;
 950       default:
 951          VG_(tool_panic)("mkPCastTo(1)");
 952    }
 953    tl_assert(tmp1);
 954    /* Now widen up to the dst type. */
 955    switch (dst_ty) {
 956       case Ity_I1:
 957          return tmp1;
 958       case Ity_I8:
 959          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
 960       case Ity_I16:
 961          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
 962       case Ity_I32:
 963          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
 964       case Ity_I64:
 965          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
 966       case Ity_V128:
 967          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
 968          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
 969          return tmp1;
 970       default:
 971          ppIRType(dst_ty);
 972          VG_(tool_panic)("mkPCastTo(2)");
 973    }
 974 }
 975
 976
 977 /*------------------------------------------------------------*/
 978 /*--- Emit a test and complaint if something is undefined. ---*/
 979 /*------------------------------------------------------------*/
 980
 981 /* Set the annotations on a dirty helper to indicate that the stack
 982    pointer and instruction pointers might be read.  This is the
 983    behaviour of all 'emit-a-complaint' style functions we might
 984    call. */
 985
 986 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
 987    di->nFxState = 2;
 988    di->fxState[0].fx     = Ifx_Read;
 989    di->fxState[0].offset = mce->layout->offset_SP;
 990    di->fxState[0].size   = mce->layout->sizeof_SP;
 991    di->fxState[1].fx     = Ifx_Read;
 992    di->fxState[1].offset = mce->layout->offset_IP;
 993    di->fxState[1].size   = mce->layout->sizeof_IP;
 994 }
 995
 996
 997 /* Check the supplied **original** atom for undefinedness, and emit a
 998    complaint if so.  Once that happens, mark it as defined.  This is
 999    possible because the atom is either a tmp or literal.  If it's a
1000    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1001    be defined.  In fact as mentioned above, we will have to allocate a
1002    new tmp to carry the new 'defined' shadow value, and update the
1003    original->tmp mapping accordingly; we cannot simply assign a new
1004    value to an existing shadow tmp as this breaks SSAness -- resulting
1005    in the post-instrumentation sanity checker spluttering in disapproval.
1006 */
1007 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1008 {
1009    IRAtom*  vatom;
1010    IRType   ty;
1011    Int      sz;
1012    IRDirty* di;
1013    IRAtom*  cond;
1014
1015    /* Since the original expression is atomic, there's no duplicated
1016       work generated by making multiple V-expressions for it.  So we
1017       don't really care about the possibility that someone else may
1018       also create a V-interpretion for it. */
1019    tl_assert(isOriginalAtom(mce, atom));
1020    vatom = expr2vbits( mce, atom );
1021    tl_assert(isShadowAtom(mce, vatom));
1022    tl_assert(sameKindedAtoms(atom, vatom));
1023
1024    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1025
1026    /* sz is only used for constructing the error message */
1027    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1028
1029    cond = mkPCastTo( mce, Ity_I1, vatom );
1030    /* cond will be 0 if all defined, and 1 if any not defined. */
1031
1032    switch (sz) {
1033       case 0:
1034          di = unsafeIRDirty_0_N( 0/*regparms*/,
1035                                  "MC_(helperc_value_check0_fail)",
1036                                  &MC_(helperc_value_check0_fail),
1037                                  mkIRExprVec_0()
1038                                );
1039          break;
1040       case 1:
1041          di = unsafeIRDirty_0_N( 0/*regparms*/,
1042                                  "MC_(helperc_value_check1_fail)",
1043                                  &MC_(helperc_value_check1_fail),
1044                                  mkIRExprVec_0()
1045                                );
1046          break;
1047       case 4:
1048          di = unsafeIRDirty_0_N( 0/*regparms*/,
1049                                  "MC_(helperc_value_check4_fail)",
1050                                  &MC_(helperc_value_check4_fail),
1051                                  mkIRExprVec_0()
1052                                );
1053          break;
1054       default:
1055          di = unsafeIRDirty_0_N( 1/*regparms*/,
1056                                  "MC_(helperc_complain_undef)",
1057                                  &MC_(helperc_complain_undef),
1058                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
1059                                );
1060          break;
1061    }
1062    di->guard = cond;
1063    setHelperAnns( mce, di );
1064    stmt( mce->bb, IRStmt_Dirty(di));
1065
1066    /* Set the shadow tmp to be defined.  First, update the
1067       orig->shadow tmp mapping to reflect the fact that this shadow is
1068       getting a new value. */
1069    tl_assert(isIRAtom(vatom));
1070    /* sameKindedAtoms ... */
1071    if (vatom->tag == Iex_RdTmp) {
1072       tl_assert(atom->tag == Iex_RdTmp);
1073       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1074       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1075                       definedOfType(ty));
1076    }
1077 }
1078
1079
1080 /*------------------------------------------------------------*/
1081 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1082 /*------------------------------------------------------------*/
1083
1084 /* Examine the always-defined sections declared in layout to see if
1085    the (offset,size) section is within one.  Note, is is an error to
1086    partially fall into such a region: (offset,size) should either be
1087    completely in such a region or completely not-in such a region.
1088 */
1089 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1090 {
1091    Int minoffD, maxoffD, i;
1092    Int minoff = offset;
1093    Int maxoff = minoff + size - 1;
1094    tl_assert((minoff & ~0xFFFF) == 0);
1095    tl_assert((maxoff & ~0xFFFF) == 0);
1096
1097    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1098       minoffD = mce->layout->alwaysDefd[i].offset;
1099       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1100       tl_assert((minoffD & ~0xFFFF) == 0);
1101       tl_assert((maxoffD & ~0xFFFF) == 0);
1102
1103       if (maxoff < minoffD || maxoffD < minoff)
1104          continue; /* no overlap */
1105       if (minoff >= minoffD && maxoff <= maxoffD)
1106          return True; /* completely contained in an always-defd section */
1107
1108       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1109    }
1110    return False; /* could not find any containing section */
1111 }
1112
1113
1114 /* Generate into bb suitable actions to shadow this Put.  If the state
1115    slice is marked 'always defined', do nothing.  Otherwise, write the
1116    supplied V bits to the shadow state.  We can pass in either an
1117    original atom or a V-atom, but not both.  In the former case the
1118    relevant V-bits are then generated from the original.
1119 */
1120 static
1121 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1122                      IRAtom* atom, IRAtom* vatom )
1123 {
1124    IRType ty;
1125    if (atom) {
1126       tl_assert(!vatom);
1127       tl_assert(isOriginalAtom(mce, atom));
1128       vatom = expr2vbits( mce, atom );
1129    } else {
1130       tl_assert(vatom);
1131       tl_assert(isShadowAtom(mce, vatom));
1132    }
1133
1134    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1135    tl_assert(ty != Ity_I1);
1136    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1137       /* later: no ... */
1138       /* emit code to emit a complaint if any of the vbits are 1. */
1139       /* complainIfUndefined(mce, atom); */
1140    } else {
1141       /* Do a plain shadow Put. */
1142       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1143    }
1144 }
1145
1146
1147 /* Return an expression which contains the V bits corresponding to the
1148    given GETI (passed in in pieces).
1149 */
1150 static
1151 void do_shadow_PUTI ( MCEnv* mce,
1152                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1153 {
1154    IRAtom* vatom;
1155    IRType  ty, tyS;
1156    Int     arrSize;;
1157
1158    tl_assert(isOriginalAtom(mce,atom));
1159    vatom = expr2vbits( mce, atom );
1160    tl_assert(sameKindedAtoms(atom, vatom));
1161    ty   = descr->elemTy;
1162    tyS  = shadowType(ty);
1163    arrSize = descr->nElems * sizeofIRType(ty);
1164    tl_assert(ty != Ity_I1);
1165    tl_assert(isOriginalAtom(mce,ix));
1166    complainIfUndefined(mce,ix);
1167    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1168       /* later: no ... */
1169       /* emit code to emit a complaint if any of the vbits are 1. */
1170       /* complainIfUndefined(mce, atom); */
1171    } else {
1172       /* Do a cloned version of the Put that refers to the shadow
1173          area. */
1174       IRRegArray* new_descr
1175          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1176                       tyS, descr->nElems);
1177       stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1178    }
1179 }
1180
1181
1182 /* Return an expression which contains the V bits corresponding to the
1183    given GET (passed in in pieces).
1184 */
1185 static
1186 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1187 {
1188    IRType tyS = shadowType(ty);
1189    tl_assert(ty != Ity_I1);
1190    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1191       /* Always defined, return all zeroes of the relevant type */
1192       return definedOfType(tyS);
1193    } else {
1194       /* return a cloned version of the Get that refers to the shadow
1195          area. */
1196       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1197    }
1198 }
1199
1200
1201 /* Return an expression which contains the V bits corresponding to the
1202    given GETI (passed in in pieces).
1203 */
1204 static
1205 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1206 {
1207    IRType ty   = descr->elemTy;
1208    IRType tyS  = shadowType(ty);
1209    Int arrSize = descr->nElems * sizeofIRType(ty);
1210    tl_assert(ty != Ity_I1);
1211    tl_assert(isOriginalAtom(mce,ix));
1212    complainIfUndefined(mce,ix);
1213    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1214       /* Always defined, return all zeroes of the relevant type */
1215       return definedOfType(tyS);
1216    } else {
1217       /* return a cloned version of the Get that refers to the shadow
1218          area. */
1219       IRRegArray* new_descr
1220          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1221                       tyS, descr->nElems);
1222       return IRExpr_GetI( new_descr, ix, bias );
1223    }
1224 }
1225
1226
1227 /*------------------------------------------------------------*/
1228 /*--- Generating approximations for unknown operations,    ---*/
1229 /*--- using lazy-propagate semantics                       ---*/
1230 /*------------------------------------------------------------*/
1231
1232 /* Lazy propagation of undefinedness from two values, resulting in the
1233    specified shadow type.
1234 */
1235 static
1236 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1237 {
1238    /* force everything via 32-bit intermediaries. */
1239    IRAtom* at;
1240    tl_assert(isShadowAtom(mce,va1));
1241    tl_assert(isShadowAtom(mce,va2));
1242    at = mkPCastTo(mce, Ity_I32, va1);
1243    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1244    at = mkPCastTo(mce, finalVty, at);
1245    return at;
1246 }
1247
1248
1249 /* Do the lazy propagation game from a null-terminated vector of
1250    atoms.  This is presumably the arguments to a helper call, so the
1251    IRCallee info is also supplied in order that we can know which
1252    arguments should be ignored (via the .mcx_mask field).
1253 */
1254 static
1255 IRAtom* mkLazyN ( MCEnv* mce,
1256                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1257 {
1258    Int i;
1259    IRAtom* here;
1260    IRAtom* curr = definedOfType(Ity_I32);
1261    for (i = 0; exprvec[i]; i++) {
1262       tl_assert(i < 32);
1263       tl_assert(isOriginalAtom(mce, exprvec[i]));
1264       /* Only take notice of this arg if the callee's mc-exclusion
1265          mask does not say it is to be excluded. */
1266       if (cee->mcx_mask & (1<<i)) {
1267          /* the arg is to be excluded from definedness checking.  Do
1268             nothing. */
1269          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1270       } else {
1271          /* calculate the arg's definedness, and pessimistically merge
1272             it in. */
1273          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1274          curr = mkUifU32(mce, here, curr);
1275       }
1276    }
1277    return mkPCastTo(mce, finalVtype, curr );
1278 }
1279
1280
1281 /*------------------------------------------------------------*/
1282 /*--- Generating expensive sequences for exact carry-chain ---*/
1283 /*--- propagation in add/sub and related operations.       ---*/
1284 /*------------------------------------------------------------*/
1285
1286 static
1287 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1288                                      IRAtom* aa,  IRAtom* bb )
1289 {
1290    IRAtom *a_min, *b_min, *a_max, *b_max;
1291    IRType ty;
1292    IROp   opAND, opOR, opXOR, opNOT, opADD;
1293
1294    tl_assert(isShadowAtom(mce,qaa));
1295    tl_assert(isShadowAtom(mce,qbb));
1296    tl_assert(isOriginalAtom(mce,aa));
1297    tl_assert(isOriginalAtom(mce,bb));
1298    tl_assert(sameKindedAtoms(qaa,aa));
1299    tl_assert(sameKindedAtoms(qbb,bb));
1300
1301    ty    = Ity_I32;
1302    opAND = Iop_And32;
1303    opOR  = Iop_Or32;
1304    opXOR = Iop_Xor32;
1305    opNOT = Iop_Not32;
1306    opADD = Iop_Add32;
1307
1308    // a_min = aa & ~qaa
1309    a_min = assignNew(mce,ty,
1310                      binop(opAND, aa,
1311                                   assignNew(mce,ty, unop(opNOT, qaa))));
1312
1313    // b_min = bb & ~qbb
1314    b_min = assignNew(mce,ty,
1315                      binop(opAND, bb,
1316                                   assignNew(mce,ty, unop(opNOT, qbb))));
1317
1318    // a_max = aa | qaa
1319    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1320
1321    // b_max = bb | qbb
1322    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1323
1324    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1325    return
1326    assignNew(mce,ty,
1327       binop( opOR,
1328              assignNew(mce,ty, binop(opOR, qaa, qbb)),
1329              assignNew(mce,ty,
1330                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1331                              assignNew(mce,ty, binop(opADD, a_max, b_max))
1332                 )
1333              )
1334       )
1335    );
1336 }
1337
1338
1339 /*------------------------------------------------------------*/
1340 /*--- Helpers for dealing with vector primops.            ---*/
1341 /*------------------------------------------------------------*/
1342
1343 /* Vector pessimisation -- pessimise within each lane individually. */
1344
1345 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1346 {
1347    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1348 }
1349
1350 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1351 {
1352    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1353 }
1354
1355 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1356 {
1357    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1358 }
1359
1360 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1361 {
1362    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1363 }
1364
1365
1366 /* Here's a simple scheme capable of handling ops derived from SSE1
1367    code and while only generating ops that can be efficiently
1368    implemented in SSE1. */
1369
1370 /* All-lanes versions are straightforward:
1371
1372    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1373
1374    unary32Fx4(x,y)    ==> PCast32x4(x#)
1375
1376    Lowest-lane-only versions are more complex:
1377
1378    binary32F0x4(x,y)  ==> SetV128lo32(
1379                              x#,
1380                              PCast32(V128to32(UifUV128(x#,y#)))
1381                           )
1382
1383    This is perhaps not so obvious.  In particular, it's faster to
1384    do a V128-bit UifU and then take the bottom 32 bits than the more
1385    obvious scheme of taking the bottom 32 bits of each operand
1386    and doing a 32-bit UifU.  Basically since UifU is fast and
1387    chopping lanes off vector values is slow.
1388
1389    Finally:
1390
1391    unary32F0x4(x)     ==> SetV128lo32(
1392                              x#,
1393                              PCast32(V128to32(x#))
1394                           )
1395
1396    Where:
1397
1398    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1399    PCast32x4(v#) = CmpNEZ32x4(v#)
1400 */
1401
1402 static
1403 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1404 {
1405    IRAtom* at;
1406    tl_assert(isShadowAtom(mce, vatomX));
1407    tl_assert(isShadowAtom(mce, vatomY));
1408    at = mkUifUV128(mce, vatomX, vatomY);
1409    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1410    return at;
1411 }
1412
1413 static
1414 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1415 {
1416    IRAtom* at;
1417    tl_assert(isShadowAtom(mce, vatomX));
1418    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1419    return at;
1420 }
1421
1422 static
1423 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1424 {
1425    IRAtom* at;
1426    tl_assert(isShadowAtom(mce, vatomX));
1427    tl_assert(isShadowAtom(mce, vatomY));
1428    at = mkUifUV128(mce, vatomX, vatomY);
1429    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1430    at = mkPCastTo(mce, Ity_I32, at);
1431    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1432    return at;
1433 }
1434
1435 static
1436 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1437 {
1438    IRAtom* at;
1439    tl_assert(isShadowAtom(mce, vatomX));
1440    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1441    at = mkPCastTo(mce, Ity_I32, at);
1442    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1443    return at;
1444 }
1445
1446 /* --- ... and ... 64Fx2 versions of the same ... --- */
1447
1448 static
1449 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1450 {
1451    IRAtom* at;
1452    tl_assert(isShadowAtom(mce, vatomX));
1453    tl_assert(isShadowAtom(mce, vatomY));
1454    at = mkUifUV128(mce, vatomX, vatomY);
1455    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1456    return at;
1457 }
1458
1459 static
1460 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1461 {
1462    IRAtom* at;
1463    tl_assert(isShadowAtom(mce, vatomX));
1464    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1465    return at;
1466 }
1467
1468 static
1469 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1470 {
1471    IRAtom* at;
1472    tl_assert(isShadowAtom(mce, vatomX));
1473    tl_assert(isShadowAtom(mce, vatomY));
1474    at = mkUifUV128(mce, vatomX, vatomY);
1475    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1476    at = mkPCastTo(mce, Ity_I64, at);
1477    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1478    return at;
1479 }
1480
1481 static
1482 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1483 {
1484    IRAtom* at;
1485    tl_assert(isShadowAtom(mce, vatomX));
1486    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1487    at = mkPCastTo(mce, Ity_I64, at);
1488    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1489    return at;
1490 }
1491
1492 /* --- --- Vector saturated narrowing --- --- */
1493
1494 /* This is quite subtle.  What to do is simple:
1495
1496    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1497
1498       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1499
1500    Why this is right is not so simple.  Consider a lane in the args,
1501    vatom1 or 2, doesn't matter.
1502
1503    After the PCast, that lane is all 0s (defined) or all
1504    1s(undefined).
1505
1506    Both signed and unsigned saturating narrowing of all 0s produces
1507    all 0s, which is what we want.
1508
1509    The all-1s case is more complex.  Unsigned narrowing interprets an
1510    all-1s input as the largest unsigned integer, and so produces all
1511    1s as a result since that is the largest unsigned value at the
1512    smaller width.
1513
1514    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1515    to -1, so we still wind up with all 1s at the smaller width.
1516
1517    So: In short, pessimise the args, then apply the original narrowing
1518    op.
1519 */
1520 static
1521 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1522                           IRAtom* vatom1, IRAtom* vatom2)
1523 {
1524    IRAtom *at1, *at2, *at3;
1525    IRAtom* (*pcast)( MCEnv*, IRAtom* );
1526    switch (narrow_op) {
1527       case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1528       case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1529       case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1530       default: VG_(tool_panic)("vectorNarrowV128");
1531    }
1532    tl_assert(isShadowAtom(mce,vatom1));
1533    tl_assert(isShadowAtom(mce,vatom2));
1534    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1535    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1536    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1537    return at3;
1538 }
1539
1540
1541 /* --- --- Vector integer arithmetic --- --- */
1542
1543 /* Simple ... UifU the args and per-lane pessimise the results. */
1544 static
1545 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1546 {
1547    IRAtom* at;
1548    at = mkUifUV128(mce, vatom1, vatom2);
1549    at = mkPCast8x16(mce, at);
1550    return at;
1551 }
1552
1553 static
1554 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1555 {
1556    IRAtom* at;
1557    at = mkUifUV128(mce, vatom1, vatom2);
1558    at = mkPCast16x8(mce, at);
1559    return at;
1560 }
1561
1562 static
1563 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1564 {
1565    IRAtom* at;
1566    at = mkUifUV128(mce, vatom1, vatom2);
1567    at = mkPCast32x4(mce, at);
1568    return at;
1569 }
1570
1571 static
1572 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1573 {
1574    IRAtom* at;
1575    at = mkUifUV128(mce, vatom1, vatom2);
1576    at = mkPCast64x2(mce, at);
1577    return at;
1578 }
1579
1580
1581 /*------------------------------------------------------------*/
1582 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
1583 /*------------------------------------------------------------*/
1584
1585 static
1586 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1587                            IROp op,
1588                            IRAtom* atom1, IRAtom* atom2 )
1589 {
1590    IRType  and_or_ty;
1591    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1592    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1593    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1594
1595    IRAtom* vatom1 = expr2vbits( mce, atom1 );
1596    IRAtom* vatom2 = expr2vbits( mce, atom2 );
1597
1598    tl_assert(isOriginalAtom(mce,atom1));
1599    tl_assert(isOriginalAtom(mce,atom2));
1600    tl_assert(isShadowAtom(mce,vatom1));
1601    tl_assert(isShadowAtom(mce,vatom2));
1602    tl_assert(sameKindedAtoms(atom1,vatom1));
1603    tl_assert(sameKindedAtoms(atom2,vatom2));
1604    switch (op) {
1605
1606       /* V128-bit SIMD (SSE2-esque) */
1607
1608       case Iop_ShrN16x8:
1609       case Iop_ShrN32x4:
1610       case Iop_ShrN64x2:
1611       case Iop_SarN16x8:
1612       case Iop_SarN32x4:
1613       case Iop_ShlN16x8:
1614       case Iop_ShlN32x4:
1615       case Iop_ShlN64x2:
1616          /* Same scheme as with all other shifts. */
1617          complainIfUndefined(mce, atom2);
1618          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1619
1620       case Iop_QSub8Ux16:
1621       case Iop_QSub8Sx16:
1622       case Iop_Sub8x16:
1623       case Iop_Min8Ux16:
1624       case Iop_Max8Ux16:
1625       case Iop_CmpGT8Sx16:
1626       case Iop_CmpEQ8x16:
1627       case Iop_Avg8Ux16:
1628       case Iop_QAdd8Ux16:
1629       case Iop_QAdd8Sx16:
1630       case Iop_Add8x16:
1631          return binary8Ix16(mce, vatom1, vatom2);
1632
1633       case Iop_QSub16Ux8:
1634       case Iop_QSub16Sx8:
1635       case Iop_Sub16x8:
1636       case Iop_Mul16x8:
1637       case Iop_MulHi16Sx8:
1638       case Iop_MulHi16Ux8:
1639       case Iop_Min16Sx8:
1640       case Iop_Max16Sx8:
1641       case Iop_CmpGT16Sx8:
1642       case Iop_CmpEQ16x8:
1643       case Iop_Avg16Ux8:
1644       case Iop_QAdd16Ux8:
1645       case Iop_QAdd16Sx8:
1646       case Iop_Add16x8:
1647          return binary16Ix8(mce, vatom1, vatom2);
1648
1649       case Iop_Sub32x4:
1650       case Iop_QSub32Sx4:
1651       case Iop_QSub32Ux4:
1652       case Iop_CmpGT32Sx4:
1653       case Iop_CmpEQ32x4:
1654       case Iop_Add32x4:
1655       case Iop_QAdd32Ux4:
1656       case Iop_QAdd32Sx4:
1657          return binary32Ix4(mce, vatom1, vatom2);
1658
1659       case Iop_Sub64x2:
1660       case Iop_QSub64Ux2:
1661       case Iop_QSub64Sx2:
1662       case Iop_Add64x2:
1663       case Iop_QAdd64Ux2:
1664       case Iop_QAdd64Sx2:
1665          return binary64Ix2(mce, vatom1, vatom2);
1666
1667       case Iop_QNarrow32Sx4:
1668       case Iop_QNarrow16Sx8:
1669       case Iop_QNarrow16Ux8:
1670          return vectorNarrowV128(mce, op, vatom1, vatom2);
1671
1672       case Iop_Sub64Fx2:
1673       case Iop_Mul64Fx2:
1674       case Iop_Min64Fx2:
1675       case Iop_Max64Fx2:
1676       case Iop_Div64Fx2:
1677       case Iop_CmpLT64Fx2:
1678       case Iop_CmpLE64Fx2:
1679       case Iop_CmpEQ64Fx2:
1680       case Iop_Add64Fx2:
1681          return binary64Fx2(mce, vatom1, vatom2);
1682
1683       case Iop_Sub64F0x2:
1684       case Iop_Mul64F0x2:
1685       case Iop_Min64F0x2:
1686       case Iop_Max64F0x2:
1687       case Iop_Div64F0x2:
1688       case Iop_CmpLT64F0x2:
1689       case Iop_CmpLE64F0x2:
1690       case Iop_CmpEQ64F0x2:
1691       case Iop_Add64F0x2:
1692          return binary64F0x2(mce, vatom1, vatom2);
1693
1694       /* V128-bit SIMD (SSE1-esque) */
1695
1696       case Iop_Sub32Fx4:
1697       case Iop_Mul32Fx4:
1698       case Iop_Min32Fx4:
1699       case Iop_Max32Fx4:
1700       case Iop_Div32Fx4:
1701       case Iop_CmpLT32Fx4:
1702       case Iop_CmpLE32Fx4:
1703       case Iop_CmpEQ32Fx4:
1704       case Iop_Add32Fx4:
1705          return binary32Fx4(mce, vatom1, vatom2);
1706
1707       case Iop_Sub32F0x4:
1708       case Iop_Mul32F0x4:
1709       case Iop_Min32F0x4:
1710       case Iop_Max32F0x4:
1711       case Iop_Div32F0x4:
1712       case Iop_CmpLT32F0x4:
1713       case Iop_CmpLE32F0x4:
1714       case Iop_CmpEQ32F0x4:
1715       case Iop_Add32F0x4:
1716          return binary32F0x4(mce, vatom1, vatom2);
1717
1718       /* V128-bit data-steering */
1719       case Iop_SetV128lo32:
1720       case Iop_SetV128lo64:
1721       case Iop_64HLtoV128:
1722       case Iop_InterleaveLO64x2:
1723       case Iop_InterleaveLO32x4:
1724       case Iop_InterleaveLO16x8:
1725       case Iop_InterleaveLO8x16:
1726       case Iop_InterleaveHI64x2:
1727       case Iop_InterleaveHI32x4:
1728       case Iop_InterleaveHI16x8:
1729       case Iop_InterleaveHI8x16:
1730          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1731
1732       /* Scalar floating point */
1733
1734          //      case Iop_RoundF64:
1735       case Iop_F64toI64S:
1736       case Iop_I64StoF64:
1737          /* First arg is I32 (rounding mode), second is F64 or I64
1738             (data). */
1739          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1740
1741       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1742          /* Takes two F64 args. */
1743       case Iop_F64toI32S:
1744       case Iop_F64toF32:
1745          /* First arg is I32 (rounding mode), second is F64 (data). */
1746          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1747
1748       case Iop_F64toI16S:
1749          /* First arg is I32 (rounding mode), second is F64 (data). */
1750          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1751
1752       case Iop_ScaleF64:
1753       case Iop_Yl2xF64:
1754       case Iop_Yl2xp1F64:
1755       case Iop_PRemF64:
1756       case Iop_AtanF64:
1757       case Iop_AddF64:
1758       case Iop_DivF64:
1759       case Iop_SubF64:
1760       case Iop_MulF64:
1761          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1762
1763       case Iop_CmpF64:
1764          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1765
1766       /* non-FP after here */
1767
1768       case Iop_DivModU64to32:
1769       case Iop_DivModS64to32:
1770          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1771
1772       case Iop_16HLto32:
1773          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1774       case Iop_32HLto64:
1775          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1776
1777       case Iop_MullS32:
1778       case Iop_MullU32: {
1779          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1780          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1781          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1782       }
1783
1784       case Iop_MullS16:
1785       case Iop_MullU16: {
1786          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1787          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1788          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1789       }
1790
1791       case Iop_MullS8:
1792       case Iop_MullU8: {
1793          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1794          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1795          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1796       }
1797
1798       case Iop_Add32:
1799 #        if 0
1800          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1801 #        endif
1802       case Iop_Sub32:
1803       case Iop_Mul32:
1804          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1805
1806       case Iop_Mul16:
1807       case Iop_Add16:
1808       case Iop_Sub16:
1809          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1810
1811       case Iop_Sub8:
1812       case Iop_Add8:
1813          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1814
1815       case Iop_CmpLE32S: case Iop_CmpLE32U:
1816       case Iop_CmpLT32U: case Iop_CmpLT32S:
1817       case Iop_CmpEQ32: case Iop_CmpNE32:
1818          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1819
1820       case Iop_CmpEQ16: case Iop_CmpNE16:
1821          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1822
1823       case Iop_CmpEQ8: case Iop_CmpNE8:
1824          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1825
1826       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1827          /* Complain if the shift amount is undefined.  Then simply
1828             shift the first arg's V bits by the real shift amount. */
1829          complainIfUndefined(mce, atom2);
1830          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1831
1832       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1833          /* Same scheme as with 32-bit shifts. */
1834          complainIfUndefined(mce, atom2);
1835          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1836
1837       case Iop_Shl8: case Iop_Shr8:
1838          /* Same scheme as with 32-bit shifts. */
1839          complainIfUndefined(mce, atom2);
1840          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1841
1842       case Iop_Shl64: case Iop_Shr64:
1843          /* Same scheme as with 32-bit shifts. */
1844          complainIfUndefined(mce, atom2);
1845          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1846
1847       case Iop_AndV128:
1848          uifu = mkUifUV128; difd = mkDifDV128;
1849          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1850       case Iop_And64:
1851          uifu = mkUifU64; difd = mkDifD64;
1852          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1853       case Iop_And32:
1854          uifu = mkUifU32; difd = mkDifD32;
1855          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1856       case Iop_And16:
1857          uifu = mkUifU16; difd = mkDifD16;
1858          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1859       case Iop_And8:
1860          uifu = mkUifU8; difd = mkDifD8;
1861          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1862
1863       case Iop_OrV128:
1864          uifu = mkUifUV128; difd = mkDifDV128;
1865          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1866       case Iop_Or64:
1867          uifu = mkUifU64; difd = mkDifD64;
1868          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1869       case Iop_Or32:
1870          uifu = mkUifU32; difd = mkDifD32;
1871          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1872       case Iop_Or16:
1873          uifu = mkUifU16; difd = mkDifD16;
1874          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1875       case Iop_Or8:
1876          uifu = mkUifU8; difd = mkDifD8;
1877          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1878
1879       do_And_Or:
1880          return
1881          assignNew(
1882             mce,
1883             and_or_ty,
1884             difd(mce, uifu(mce, vatom1, vatom2),
1885                       difd(mce, improve(mce, atom1, vatom1),
1886                                 improve(mce, atom2, vatom2) ) ) );
1887
1888       case Iop_Xor8:
1889          return mkUifU8(mce, vatom1, vatom2);
1890       case Iop_Xor16:
1891          return mkUifU16(mce, vatom1, vatom2);
1892       case Iop_Xor32:
1893          return mkUifU32(mce, vatom1, vatom2);
1894       case Iop_Xor64:
1895          return mkUifU64(mce, vatom1, vatom2);
1896       case Iop_XorV128:
1897          return mkUifUV128(mce, vatom1, vatom2);
1898
1899       default:
1900          ppIROp(op);
1901          VG_(tool_panic)("memcheck:expr2vbits_Binop");
1902    }
1903 }
1904
1905
1906 static
1907 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1908 {
1909    IRAtom* vatom = expr2vbits( mce, atom );
1910    tl_assert(isOriginalAtom(mce,atom));
1911    switch (op) {
1912
1913       case Iop_Sqrt64Fx2:
1914          return unary64Fx2(mce, vatom);
1915
1916       case Iop_Sqrt64F0x2:
1917          return unary64F0x2(mce, vatom);
1918
1919       case Iop_Sqrt32Fx4:
1920       case Iop_RSqrt32Fx4:
1921       case Iop_Recip32Fx4:
1922          return unary32Fx4(mce, vatom);
1923
1924       case Iop_Sqrt32F0x4:
1925       case Iop_RSqrt32F0x4:
1926       case Iop_Recip32F0x4:
1927          return unary32F0x4(mce, vatom);
1928
1929       case Iop_32UtoV128:
1930       case Iop_64UtoV128:
1931          return assignNew(mce, Ity_V128, unop(op, vatom));
1932
1933       case Iop_F32toF64:
1934       case Iop_I32StoF64:
1935       case Iop_NegF64:
1936       case Iop_SinF64:
1937       case Iop_CosF64:
1938       case Iop_TanF64:
1939       case Iop_SqrtF64:
1940       case Iop_AbsF64:
1941       case Iop_2xm1F64:
1942          return mkPCastTo(mce, Ity_I64, vatom);
1943
1944       case Iop_Clz32:
1945       case Iop_Ctz32:
1946          return mkPCastTo(mce, Ity_I32, vatom);
1947
1948       case Iop_32Sto64:
1949       case Iop_32Uto64:
1950       case Iop_V128to64:
1951       case Iop_V128HIto64:
1952          return assignNew(mce, Ity_I64, unop(op, vatom));
1953
1954       case Iop_64to32:
1955       case Iop_64HIto32:
1956       case Iop_1Uto32:
1957       case Iop_8Uto32:
1958       case Iop_16Uto32:
1959       case Iop_16Sto32:
1960       case Iop_8Sto32:
1961          return assignNew(mce, Ity_I32, unop(op, vatom));
1962
1963       case Iop_8Sto16:
1964       case Iop_8Uto16:
1965       case Iop_32to16:
1966       case Iop_32HIto16:
1967          return assignNew(mce, Ity_I16, unop(op, vatom));
1968
1969       case Iop_1Uto8:
1970       case Iop_16to8:
1971       case Iop_32to8:
1972          return assignNew(mce, Ity_I8, unop(op, vatom));
1973
1974       case Iop_32to1:
1975          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1976
1977       case Iop_ReinterpF64asI64:
1978       case Iop_ReinterpI64asF64:
1979       case Iop_ReinterpI32asF32:
1980       case Iop_NotV128:
1981       case Iop_Not64:
1982       case Iop_Not32:
1983       case Iop_Not16:
1984       case Iop_Not8:
1985       case Iop_Not1:
1986          return vatom;
1987
1988       default:
1989          ppIROp(op);
1990          VG_(tool_panic)("memcheck:expr2vbits_Unop");
1991    }
1992 }
1993
1994
1995 /* Worker function; do not call directly. */
1996 static
1997 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1998 {
1999    void*    helper;
2000    HChar*   hname;
2001    IRDirty* di;
2002    IRTemp   datavbits;
2003    IRAtom*  addrAct;
2004
2005    tl_assert(isOriginalAtom(mce,addr));
2006
2007    /* First, emit a definedness test for the address.  This also sets
2008       the address (shadow) to 'defined' following the test. */
2009    complainIfUndefined( mce, addr );
2010
2011    /* Now cook up a call to the relevant helper function, to read the
2012       data V bits from shadow memory. */
2013    ty = shadowType(ty);
2014    switch (ty) {
2015       case Ity_I64: helper = &MC_(helperc_LOADV8);
2016                     hname = "MC_(helperc_LOADV8)";
2017                     break;
2018       case Ity_I32: helper = &MC_(helperc_LOADV4);
2019                     hname = "MC_(helperc_LOADV4)";
2020                     break;
2021       case Ity_I16: helper = &MC_(helperc_LOADV2);
2022                     hname = "MC_(helperc_LOADV2)";
2023                     break;
2024       case Ity_I8:  helper = &MC_(helperc_LOADV1);
2025                     hname = "MC_(helperc_LOADV1)";
2026                     break;
2027       default:      ppIRType(ty);
2028                     VG_(tool_panic)("memcheck:do_shadow_LDle");
2029    }
2030
2031    /* Generate the actual address into addrAct. */
2032    if (bias == 0) {
2033       addrAct = addr;
2034    } else {
2035       IROp    mkAdd;
2036       IRAtom* eBias;
2037       IRType  tyAddr  = mce->hWordTy;
2038       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2039       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2040       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2041       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2042    }
2043
2044    /* We need to have a place to park the V bits we're just about to
2045       read. */
2046    datavbits = newIRTemp(mce->bb->tyenv, ty);
2047    di = unsafeIRDirty_1_N( datavbits,
2048                            1/*regparms*/, hname, helper,
2049                            mkIRExprVec_1( addrAct ));
2050    setHelperAnns( mce, di );
2051    stmt( mce->bb, IRStmt_Dirty(di) );
2052
2053    return mkexpr(datavbits);
2054 }
2055
2056
2057 static
2058 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2059 {
2060    IRAtom *v64hi, *v64lo;
2061    switch (shadowType(ty)) {
2062       case Ity_I8:
2063       case Ity_I16:
2064       case Ity_I32:
2065       case Ity_I64:
2066          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2067       case Ity_V128:
2068          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2069          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2070          return assignNew( mce,
2071                            Ity_V128,
2072                            binop(Iop_64HLtoV128, v64hi, v64lo));
2073       default:
2074          VG_(tool_panic)("expr2vbits_LDle");
2075    }
2076 }
2077
2078
2079 static
2080 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2081                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2082 {
2083    IRAtom *vbitsC, *vbits0, *vbitsX;
2084    IRType ty;
2085    /* Given Mux0X(cond,expr0,exprX), generate
2086          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2087       That is, steer the V bits like the originals, but trash the
2088       result if the steering value is undefined.  This gives
2089       lazy propagation. */
2090    tl_assert(isOriginalAtom(mce, cond));
2091    tl_assert(isOriginalAtom(mce, expr0));
2092    tl_assert(isOriginalAtom(mce, exprX));
2093
2094    vbitsC = expr2vbits(mce, cond);
2095    vbits0 = expr2vbits(mce, expr0);
2096    vbitsX = expr2vbits(mce, exprX);
2097    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2098
2099    return
2100       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2101                       mkPCastTo(mce, ty, vbitsC) );
2102 }
2103
2104 /* --------- This is the main expression-handling function. --------- */
2105
2106 static
2107 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2108 {
2109    switch (e->tag) {
2110
2111       case Iex_Get:
2112          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2113
2114       case Iex_GetI:
2115          return shadow_GETI( mce, e->Iex.GetI.descr,
2116                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
2117
2118       case Iex_RdTmp:
2119          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2120
2121       case Iex_Const:
2122          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2123
2124       case Iex_Binop:
2125          return expr2vbits_Binop(
2126                    mce,
2127                    e->Iex.Binop.op,
2128                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
2129                 );
2130
2131       case Iex_Unop:
2132          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2133
2134       case Iex_Load:
2135          return expr2vbits_LDle( mce, e->Iex.Load.ty,
2136                                       e->Iex.Load.addr, 0/*addr bias*/ );
2137
2138       case Iex_CCall:
2139          return mkLazyN( mce, e->Iex.CCall.args,
2140                               e->Iex.CCall.retty,
2141                               e->Iex.CCall.cee );
2142
2143       case Iex_Mux0X:
2144          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2145                                        e->Iex.Mux0X.exprX);
2146
2147       default:
2148          VG_(printf)("\n");
2149          ppIRExpr(e);
2150          VG_(printf)("\n");
2151          VG_(tool_panic)("memcheck: expr2vbits");
2152    }
2153 }
2154
2155 /*------------------------------------------------------------*/
2156 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2157 /*------------------------------------------------------------*/
2158
2159 /* Widen a value to the host word size. */
2160
2161 static
2162 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2163 {
2164    IRType ty, tyH;
2165
2166    /* vatom is vbits-value and as such can only have a shadow type. */
2167    tl_assert(isShadowAtom(mce,vatom));
2168
2169    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2170    tyH = mce->hWordTy;
2171
2172    if (tyH == Ity_I32) {
2173       switch (ty) {
2174          case Ity_I32: return vatom;
2175          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2176          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2177          default:      goto unhandled;
2178       }
2179    } else {
2180       goto unhandled;
2181    }
2182   unhandled:
2183    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2184    VG_(tool_panic)("zwidenToHostWord");
2185 }
2186
2187
2188 /* Generate a shadow store.  addr is always the original address atom.
2189    You can pass in either originals or V-bits for the data atom, but
2190    obviously not both.  */
2191
2192 static
2193 void do_shadow_STle ( MCEnv* mce,
2194                       IRAtom* addr, UInt bias,
2195                       IRAtom* data, IRAtom* vdata )
2196 {
2197    IROp     mkAdd;
2198    IRType   ty, tyAddr;
2199    IRDirty  *di, *diLo64, *diHi64;
2200    IRAtom   *addrAct, *addrLo64, *addrHi64;
2201    IRAtom   *vdataLo64, *vdataHi64;
2202    IRAtom   *eBias, *eBias0, *eBias8;
2203    void*    helper = NULL;
2204    HChar*   hname = NULL;
2205
2206    tyAddr = mce->hWordTy;
2207    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2208    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2209
2210    di = diLo64 = diHi64 = NULL;
2211    eBias = eBias0 = eBias8 = NULL;
2212    addrAct = addrLo64 = addrHi64 = NULL;
2213    vdataLo64 = vdataHi64 = NULL;
2214
2215    if (data) {
2216       tl_assert(!vdata);
2217       tl_assert(isOriginalAtom(mce, data));
2218       tl_assert(bias == 0);
2219       vdata = expr2vbits( mce, data );
2220    } else {
2221       tl_assert(vdata);
2222    }
2223
2224    tl_assert(isOriginalAtom(mce,addr));
2225    tl_assert(isShadowAtom(mce,vdata));
2226
2227    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2228
2229    /* First, emit a definedness test for the address.  This also sets
2230       the address (shadow) to 'defined' following the test. */
2231    complainIfUndefined( mce, addr );
2232
2233    /* Now decide which helper function to call to write the data V
2234       bits into shadow memory. */
2235    switch (ty) {
2236       case Ity_V128: /* we'll use the helper twice */
2237       case Ity_I64: helper = &MC_(helperc_STOREV8);
2238                     hname = "MC_(helperc_STOREV8)";
2239                     break;
2240       case Ity_I32: helper = &MC_(helperc_STOREV4);
2241                     hname = "MC_(helperc_STOREV4)";
2242                     break;
2243       case Ity_I16: helper = &MC_(helperc_STOREV2);
2244                     hname = "MC_(helperc_STOREV2)";
2245                     break;
2246       case Ity_I8:  helper = &MC_(helperc_STOREV1);
2247                     hname = "MC_(helperc_STOREV1)";
2248                     break;
2249       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2250    }
2251
2252    if (ty == Ity_V128) {
2253
2254       /* V128-bit case */
2255       /* See comment in next clause re 64-bit regparms */
2256       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2257       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2258       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2259       diLo64    = unsafeIRDirty_0_N(
2260                      1/*regparms*/, hname, helper,
2261                      mkIRExprVec_2( addrLo64, vdataLo64 ));
2262
2263       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2264       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2265       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2266       diHi64    = unsafeIRDirty_0_N(
2267                      1/*regparms*/, hname, helper,
2268                      mkIRExprVec_2( addrHi64, vdataHi64 ));
2269
2270       setHelperAnns( mce, diLo64 );
2271       setHelperAnns( mce, diHi64 );
2272       stmt( mce->bb, IRStmt_Dirty(diLo64) );
2273       stmt( mce->bb, IRStmt_Dirty(diHi64) );
2274
2275    } else {
2276
2277       /* 8/16/32/64-bit cases */
2278       /* Generate the actual address into addrAct. */
2279       if (bias == 0) {
2280          addrAct = addr;
2281       } else {
2282          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2283          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2284       }
2285
2286       if (ty == Ity_I64) {
2287          /* We can't do this with regparm 2 on 32-bit platforms, since
2288             the back ends aren't clever enough to handle 64-bit
2289             regparm args.  Therefore be different. */
2290          di = unsafeIRDirty_0_N(
2291                  1/*regparms*/, hname, helper,
2292                  mkIRExprVec_2( addrAct, vdata ));
2293       } else {
2294          di = unsafeIRDirty_0_N(
2295                  2/*regparms*/, hname, helper,
2296                  mkIRExprVec_2( addrAct,
2297                                 zwidenToHostWord( mce, vdata )));
2298       }
2299       setHelperAnns( mce, di );
2300       stmt( mce->bb, IRStmt_Dirty(di) );
2301    }
2302
2303 }
2304
2305
2306 /* Do lazy pessimistic propagation through a dirty helper call, by
2307    looking at the annotations on it.  This is the most complex part of
2308    Memcheck. */
2309
2310 static IRType szToITy ( Int n )
2311 {
2312    switch (n) {
2313       case 1: return Ity_I8;
2314       case 2: return Ity_I16;
2315       case 4: return Ity_I32;
2316       case 8: return Ity_I64;
2317       default: VG_(tool_panic)("szToITy(memcheck)");
2318    }
2319 }
2320
2321 static
2322 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2323 {
2324    Int     i, n, offset, toDo, gSz, gOff;
2325    IRAtom  *src, *here, *curr;
2326    IRType  tyAddr, tySrc, tyDst;
2327    IRTemp  dst;
2328
2329    /* First check the guard. */
2330    complainIfUndefined(mce, d->guard);
2331
2332    /* Now round up all inputs and PCast over them. */
2333    curr = definedOfType(Ity_I32);
2334
2335    /* Inputs: unmasked args */
2336    for (i = 0; d->args[i]; i++) {
2337       if (d->cee->mcx_mask & (1<<i)) {
2338          /* ignore this arg */
2339       } else {
2340          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2341          curr = mkUifU32(mce, here, curr);
2342       }
2343    }
2344
2345    /* Inputs: guest state that we read. */
2346    for (i = 0; i < d->nFxState; i++) {
2347       tl_assert(d->fxState[i].fx != Ifx_None);
2348       if (d->fxState[i].fx == Ifx_Write)
2349          continue;
2350
2351       /* Ignore any sections marked as 'always defined'. */
2352       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2353          if (0)
2354          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2355                      d->fxState[i].offset, d->fxState[i].size );
2356          continue;
2357       }
2358
2359       /* This state element is read or modified.  So we need to
2360          consider it.  If larger than 8 bytes, deal with it in 8-byte
2361          chunks. */
2362       gSz  = d->fxState[i].size;
2363       gOff = d->fxState[i].offset;
2364       tl_assert(gSz > 0);
2365       while (True) {
2366          if (gSz == 0) break;
2367          n = gSz <= 8 ? gSz : 8;
2368          /* update 'curr' with UifU of the state slice
2369             gOff .. gOff+n-1 */
2370          tySrc = szToITy( n );
2371          src   = assignNew( mce, tySrc,
2372                             shadow_GET(mce, gOff, tySrc ) );
2373          here = mkPCastTo( mce, Ity_I32, src );
2374          curr = mkUifU32(mce, here, curr);
2375          gSz -= n;
2376          gOff += n;
2377       }
2378
2379    }
2380
2381    /* Inputs: memory.  First set up some info needed regardless of
2382       whether we're doing reads or writes. */
2383    tyAddr = Ity_INVALID;
2384
2385    if (d->mFx != Ifx_None) {
2386       /* Because we may do multiple shadow loads/stores from the same
2387          base address, it's best to do a single test of its
2388          definedness right now.  Post-instrumentation optimisation
2389          should remove all but this test. */
2390       tl_assert(d->mAddr);
2391       complainIfUndefined(mce, d->mAddr);
2392
2393       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2394       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2395       tl_assert(tyAddr == mce->hWordTy); /* not really right */
2396    }
2397
2398    /* Deal with memory inputs (reads or modifies) */
2399    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2400       offset = 0;
2401       toDo   = d->mSize;
2402       /* chew off 32-bit chunks */
2403       while (toDo >= 4) {
2404          here = mkPCastTo(
2405                    mce, Ity_I32,
2406                    expr2vbits_LDle ( mce, Ity_I32,
2407                                      d->mAddr, d->mSize - toDo )
2408                 );
2409          curr = mkUifU32(mce, here, curr);
2410          toDo -= 4;
2411       }
2412       /* chew off 16-bit chunks */
2413       while (toDo >= 2) {
2414          here = mkPCastTo(
2415                    mce, Ity_I32,
2416                    expr2vbits_LDle ( mce, Ity_I16,
2417                                      d->mAddr, d->mSize - toDo )
2418                 );
2419          curr = mkUifU32(mce, here, curr);
2420          toDo -= 2;
2421       }
2422       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2423    }
2424
2425    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2426       all the inputs to the helper.  Now we need to re-distribute the
2427       results to all destinations. */
2428
2429    /* Outputs: the destination temporary, if there is one. */
2430    if (d->tmp != IRTemp_INVALID) {
2431       dst   = findShadowTmp(mce, d->tmp);
2432       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2433       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2434    }
2435
2436    /* Outputs: guest state that we write or modify. */
2437    for (i = 0; i < d->nFxState; i++) {
2438       tl_assert(d->fxState[i].fx != Ifx_None);
2439       if (d->fxState[i].fx == Ifx_Read)
2440          continue;
2441       /* Ignore any sections marked as 'always defined'. */
2442       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2443          continue;
2444       /* This state element is written or modified.  So we need to
2445          consider it.  If larger than 8 bytes, deal with it in 8-byte
2446          chunks. */
2447       gSz  = d->fxState[i].size;
2448       gOff = d->fxState[i].offset;
2449       tl_assert(gSz > 0);
2450       while (True) {
2451          if (gSz == 0) break;
2452          n = gSz <= 8 ? gSz : 8;
2453          /* Write suitably-casted 'curr' to the state slice
2454             gOff .. gOff+n-1 */
2455          tyDst = szToITy( n );
2456          do_shadow_PUT( mce, gOff,
2457                              NULL, /* original atom */
2458                              mkPCastTo( mce, tyDst, curr ) );
2459          gSz -= n;
2460          gOff += n;
2461       }
2462    }
2463
2464    /* Outputs: memory that we write or modify. */
2465    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2466       offset = 0;
2467       toDo   = d->mSize;
2468       /* chew off 32-bit chunks */
2469       while (toDo >= 4) {
2470          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2471                          NULL, /* original data */
2472                          mkPCastTo( mce, Ity_I32, curr ) );
2473          toDo -= 4;
2474       }
2475       /* chew off 16-bit chunks */
2476       while (toDo >= 2) {
2477          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2478                          NULL, /* original data */
2479                          mkPCastTo( mce, Ity_I16, curr ) );
2480          toDo -= 2;
2481       }
2482       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2483    }
2484
2485 }
2486
2487
2488 /*------------------------------------------------------------*/
2489 /*--- Memcheck main                                        ---*/
2490 /*------------------------------------------------------------*/
2491
2492 static Bool isBogusAtom ( IRAtom* at )
2493 {
2494    ULong n = 0;
2495    IRConst* con;
2496    tl_assert(isIRAtom(at));
2497    if (at->tag == Iex_RdTmp)
2498       return False;
2499    tl_assert(at->tag == Iex_Const);
2500    con = at->Iex.Const.con;
2501    switch (con->tag) {
2502       case Ico_U8:  n = (ULong)con->Ico.U8; break;
2503       case Ico_U16: n = (ULong)con->Ico.U16; break;
2504       case Ico_U32: n = (ULong)con->Ico.U32; break;
2505       case Ico_U64: n = (ULong)con->Ico.U64; break;
2506       default: ppIRExpr(at); tl_assert(0);
2507    }
2508    /* VG_(printf)("%llx\n", n); */
2509    return (n == 0xFEFEFEFF
2510            || n == 0x80808080
2511            || n == 0x1010101
2512            || n == 1010100);
2513 }
2514
2515 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2516 {
2517    Int     i;
2518    IRExpr* e;
2519    switch (st->tag) {
2520       case Ist_WrTmp:
2521          e = st->Ist.WrTmp.data;
2522          switch (e->tag) {
2523             case Iex_Get:
2524             case Iex_RdTmp:
2525                return False;
2526             case Iex_Unop:
2527                return isBogusAtom(e->Iex.Unop.arg);
2528             case Iex_Binop:
2529                return isBogusAtom(e->Iex.Binop.arg1)
2530                       || isBogusAtom(e->Iex.Binop.arg2);
2531             case Iex_Mux0X:
2532                return isBogusAtom(e->Iex.Mux0X.cond)
2533                       || isBogusAtom(e->Iex.Mux0X.expr0)
2534                       || isBogusAtom(e->Iex.Mux0X.exprX);
2535             case Iex_Load:
2536                return isBogusAtom(e->Iex.Load.addr);
2537             case Iex_CCall:
2538                for (i = 0; e->Iex.CCall.args[i]; i++)
2539                   if (isBogusAtom(e->Iex.CCall.args[i]))
2540                      return True;
2541                return False;
2542             default:
2543                goto unhandled;
2544          }
2545       case Ist_Put:
2546          return isBogusAtom(st->Ist.Put.data);
2547       case Ist_Store:
2548          return isBogusAtom(st->Ist.Store.addr)
2549                 || isBogusAtom(st->Ist.Store.data);
2550       case Ist_Exit:
2551          return isBogusAtom(st->Ist.Exit.guard);
2552       default:
2553       unhandled:
2554          ppIRStmt(st);
2555          VG_(tool_panic)("hasBogusLiterals");
2556    }
2557 }
2558
2559 IRSB* mc_instrument ( void* closureV,
2560                       IRSB* bb_in, VexGuestLayout* layout,
2561                       VexGuestExtents* vge,
2562                       IRType gWordTy, IRType hWordTy )
2563 {
2564    Bool verboze = False; //True;
2565
2566    /* Bool hasBogusLiterals = False; */
2567
2568    Int i, j, first_stmt;
2569    IRStmt* st;
2570    MCEnv mce;
2571
2572    /* Set up BB */
2573    IRSB* bb     = emptyIRSB();
2574    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2575    bb->next     = deepCopyIRExpr(bb_in->next);
2576    bb->jumpkind = bb_in->jumpkind;
2577
2578    /* Set up the running environment.  Only .bb is modified as we go
2579       along. */
2580    mce.bb             = bb;
2581    mce.layout         = layout;
2582    mce.n_originalTmps = bb->tyenv->types_used;
2583    mce.hWordTy        = hWordTy;
2584    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2585    for (i = 0; i < mce.n_originalTmps; i++)
2586       mce.tmpMap[i] = IRTemp_INVALID;
2587
2588    /* Iterate over the stmts. */
2589
2590    for (i = 0; i <  bb_in->stmts_used; i++) {
2591       st = bb_in->stmts[i];
2592       if (!st) continue;
2593
2594       tl_assert(isFlatIRStmt(st));
2595
2596       /*
2597       if (!hasBogusLiterals) {
2598          hasBogusLiterals = checkForBogusLiterals(st);
2599          if (hasBogusLiterals) {
2600             VG_(printf)("bogus: ");
2601             ppIRStmt(st);
2602             VG_(printf)("\n");
2603          }
2604       }
2605       */
2606       first_stmt = bb->stmts_used;
2607
2608       if (verboze) {
2609          ppIRStmt(st);
2610          VG_(printf)("\n\n");
2611       }
2612
2613       switch (st->tag) {
2614
2615          case Ist_WrTmp:
2616             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2617                         expr2vbits( &mce, st->Ist.WrTmp.data) );
2618             break;
2619
2620          case Ist_Put:
2621             do_shadow_PUT( &mce,
2622                            st->Ist.Put.offset,
2623                            st->Ist.Put.data,
2624                            NULL /* shadow atom */ );
2625             break;
2626
2627          case Ist_PutI:
2628             do_shadow_PUTI( &mce,
2629                             st->Ist.PutI.descr,
2630                             st->Ist.PutI.ix,
2631                             st->Ist.PutI.bias,
2632                             st->Ist.PutI.data );
2633             break;
2634
2635          case Ist_Store:
2636             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2637                                   st->Ist.Store.data,
2638                                   NULL /* shadow data */ );
2639             break;
2640
2641          case Ist_Exit:
2642             /* if (!hasBogusLiterals) */
2643                complainIfUndefined( &mce, st->Ist.Exit.guard );
2644             break;
2645
2646          case Ist_Dirty:
2647             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2648             break;
2649
2650          case Ist_IMark:
2651          case Ist_NoOp:
2652             break;
2653
2654          default:
2655             VG_(printf)("\n");
2656             ppIRStmt(st);
2657             VG_(printf)("\n");
2658             VG_(tool_panic)("memcheck: unhandled IRStmt");
2659
2660       } /* switch (st->tag) */
2661
2662       if (verboze) {
2663          for (j = first_stmt; j < bb->stmts_used; j++) {
2664             VG_(printf)("   ");
2665             ppIRStmt(bb->stmts[j]);
2666             VG_(printf)("\n");
2667          }
2668          VG_(printf)("\n");
2669       }
2670
2671       addStmtToIRSB(bb, st);
2672
2673    }
2674
2675    /* Now we need to complain if the jump target is undefined. */
2676    first_stmt = bb->stmts_used;
2677
2678    if (verboze) {
2679       VG_(printf)("bb->next = ");
2680       ppIRExpr(bb->next);
2681       VG_(printf)("\n\n");
2682    }
2683
2684    complainIfUndefined( &mce, bb->next );
2685
2686    if (verboze) {
2687       for (j = first_stmt; j < bb->stmts_used; j++) {
2688          VG_(printf)("   ");
2689          ppIRStmt(bb->stmts[j]);
2690          VG_(printf)("\n");
2691       }
2692       VG_(printf)("\n");
2693    }
2694
2695    return bb;
2696 }
2697 #endif /* UNUSED */
2698
2699 /*--------------------------------------------------------------------*/
2700 /*--- end                                              test_main.c ---*/
2701 /*--------------------------------------------------------------------*/