2 /*---------------------------------------------------------------*/
3 /*--- begin ir_opt.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
40 #include "main_util.h"
41 #include "main_globals.h"
45 /* Set to 1 for lots of debugging output. */
49 /* What iropt does, 29 Dec 04.
51 It takes an IRSB and produces a new one with the same meaning,
54 After execution of the new BB, all guest state and guest memory is
55 the same as after execution of the original. This is true
56 regardless of how the block was exited (at the end vs side exit).
58 In addition, parts of the guest state will be identical to that
59 created by execution of the original at the following observation
62 * In a dirty helper call, any parts of the guest state that the
63 helper states that it reads or modifies will be up to date.
64 Also, guest memory will be up to date. Parts of the guest state
65 not marked as being read or modified by the helper cannot be
66 assumed to be up-to-date at the point where the helper is called.
68 * Immediately prior to any load or store, those parts of the guest
69 state marked as requiring precise exceptions will be up to date.
70 Also, guest memory will be up to date. Parts of the guest state
71 not marked as requiring precise exceptions cannot be assumed to
72 be up-to-date at the point of the load/store.
74 The relative order of loads and stores (including loads/stores of
75 guest memory done by dirty helpers annotated as such) is not
76 changed. However, the relative order of loads with no intervening
77 stores/modifies may be changed.
82 There are three levels of optimisation, controlled by
83 vex_control.iropt_level. Define first:
85 "Cheap transformations" are the following sequence:
86 * Redundant-Get removal
87 * Redundant-Put removal
88 * Constant propagation/folding
90 * Specialisation of clean helper functions
93 "Expensive transformations" are the following sequence:
95 * Folding of add/sub chains
96 * Redundant-GetI removal
97 * Redundant-PutI removal
100 Then the transformations are as follows, as defined by
101 vex_control.iropt_level:
104 * Flatten into atomic form.
106 Level 1: the following sequence:
107 * Flatten into atomic form.
108 * Cheap transformations.
110 Level 2: the following sequence
111 * Flatten into atomic form.
112 * Cheap transformations.
113 * If block contains any floating or vector types, CSE.
114 * If block contains GetI or PutI, Expensive transformations.
115 * Try unrolling loops. Three possible outcomes:
116 - No effect: do nothing more.
117 - Unrolled a loop, and block does not contain GetI or PutI:
120 - Unrolled a loop, and block contains GetI or PutI:
121 Do: * Expensive transformations
122 * Cheap transformations
125 /* Implementation notes, 29 Dec 04.
127 TODO (important): I think rPutI removal ignores precise exceptions
128 and is therefore in a sense, wrong. In the sense that PutIs are
129 assumed not to write parts of the guest state that we need to have
130 up-to-date at loads/stores. So far on x86 guest that has not
131 mattered since indeed only the x87 FP registers and tags are
132 accessed using GetI/PutI, and there is no need so far for them to
133 be up to date at mem exception points. The rPutI pass should be
136 TODO: improve pessimistic handling of precise exceptions
139 TODO: check interaction of rGetI and dirty helpers.
141 F64i constants are treated differently from other constants.
142 They are not regarded as atoms, and instead lifted off and
143 bound to temps. This allows them to participate in CSE, which
144 is important for getting good performance for x86 guest code.
146 CSE up F64 literals (already doing F64is)
148 CSE: consider carefully the requirement for precise exns
149 prior to making CSE any more aggressive. */
152 /*---------------------------------------------------------------*/
153 /*--- Finite mappery, of a sort ---*/
154 /*---------------------------------------------------------------*/
156 /* General map from HWord-sized thing HWord-sized thing. Could be by
157 hashing, but it's not clear whether or not this would really be any
170 static HashHW* newHHW ( void )
172 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
175 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
176 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
177 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
182 /* Look up key in the map. */
184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
187 /* vex_printf("lookupHHW(%llx)\n", key ); */
188 for (i = 0; i < h->used; i++) {
189 if (h->inuse[i] && h->key[i] == key) {
199 /* Add key->val to the map. Replaces any existing binding for key. */
201 static void addToHHW ( HashHW* h, HWord key, HWord val )
204 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
206 /* Find and replace existing binding, if any. */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
214 /* Ensure a space is available. */
215 if (h->used == h->size) {
216 /* Copy into arrays twice the size. */
217 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220 for (i = j = 0; i < h->size; i++) {
221 if (!h->inuse[i]) continue;
234 /* Finally, add it. */
235 vassert(h->used < h->size);
236 h->inuse[h->used] = True;
237 h->key[h->used] = key;
238 h->val[h->used] = val;
243 /*---------------------------------------------------------------*/
244 /*--- Flattening out a BB into atomic SSA form ---*/
245 /*---------------------------------------------------------------*/
247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
248 copies made by flattening. If in doubt return False. */
250 static Bool isFlat ( IRExpr* e )
252 if (e->tag == Iex_Get)
254 if (e->tag == Iex_Binop)
255 return toBool( isIRAtom(e->Iex.Binop.arg1)
256 && isIRAtom(e->Iex.Binop.arg2) );
257 if (e->tag == Iex_Load)
258 return isIRAtom(e->Iex.Load.addr);
262 /* Flatten out 'ex' so it is atomic, returning a new expression with
263 the same value, after having appended extra IRTemp assignments to
266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
270 IRType ty = typeOfIRExpr(bb->tyenv, ex);
276 t1 = newIRTemp(bb->tyenv, ty);
277 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278 IRExpr_GetI(ex->Iex.GetI.descr,
279 flatten_Expr(bb, ex->Iex.GetI.ix),
280 ex->Iex.GetI.bias)));
281 return IRExpr_RdTmp(t1);
284 t1 = newIRTemp(bb->tyenv, ty);
286 IRStmt_WrTmp(t1, ex));
287 return IRExpr_RdTmp(t1);
290 t1 = newIRTemp(bb->tyenv, ty);
291 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292 IRExpr_Qop(ex->Iex.Qop.op,
293 flatten_Expr(bb, ex->Iex.Qop.arg1),
294 flatten_Expr(bb, ex->Iex.Qop.arg2),
295 flatten_Expr(bb, ex->Iex.Qop.arg3),
296 flatten_Expr(bb, ex->Iex.Qop.arg4))));
297 return IRExpr_RdTmp(t1);
300 t1 = newIRTemp(bb->tyenv, ty);
301 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302 IRExpr_Triop(ex->Iex.Triop.op,
303 flatten_Expr(bb, ex->Iex.Triop.arg1),
304 flatten_Expr(bb, ex->Iex.Triop.arg2),
305 flatten_Expr(bb, ex->Iex.Triop.arg3))));
306 return IRExpr_RdTmp(t1);
309 t1 = newIRTemp(bb->tyenv, ty);
310 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311 IRExpr_Binop(ex->Iex.Binop.op,
312 flatten_Expr(bb, ex->Iex.Binop.arg1),
313 flatten_Expr(bb, ex->Iex.Binop.arg2))));
314 return IRExpr_RdTmp(t1);
317 t1 = newIRTemp(bb->tyenv, ty);
318 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319 IRExpr_Unop(ex->Iex.Unop.op,
320 flatten_Expr(bb, ex->Iex.Unop.arg))));
321 return IRExpr_RdTmp(t1);
324 t1 = newIRTemp(bb->tyenv, ty);
325 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326 IRExpr_Load(ex->Iex.Load.end,
328 flatten_Expr(bb, ex->Iex.Load.addr))));
329 return IRExpr_RdTmp(t1);
332 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333 for (i = 0; newargs[i]; i++)
334 newargs[i] = flatten_Expr(bb, newargs[i]);
335 t1 = newIRTemp(bb->tyenv, ty);
336 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337 IRExpr_CCall(ex->Iex.CCall.cee,
340 return IRExpr_RdTmp(t1);
343 t1 = newIRTemp(bb->tyenv, ty);
344 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348 return IRExpr_RdTmp(t1);
351 /* Lift F64i constants out onto temps so they can be CSEd
353 if (ex->Iex.Const.con->tag == Ico_F64i) {
354 t1 = newIRTemp(bb->tyenv, ty);
355 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356 IRExpr_Const(ex->Iex.Const.con)));
357 return IRExpr_RdTmp(t1);
359 /* Leave all other constants alone. */
370 vpanic("flatten_Expr");
375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
380 IRExpr *e1, *e2, *e3, *e4, *e5;
385 if (isIRAtom(st->Ist.Put.data)) {
386 /* optimisation to reduce the amount of heap wasted
388 addStmtToIRSB(bb, st);
390 /* general case, always correct */
391 e1 = flatten_Expr(bb, st->Ist.Put.data);
392 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
396 e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397 e2 = flatten_Expr(bb, st->Ist.PutI.data);
398 addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
404 if (isFlat(st->Ist.WrTmp.data)) {
405 /* optimisation, to reduce the number of tmp-tmp
407 addStmtToIRSB(bb, st);
409 /* general case, always correct */
410 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
415 e1 = flatten_Expr(bb, st->Ist.Store.addr);
416 e2 = flatten_Expr(bb, st->Ist.Store.data);
417 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
420 cas = st->Ist.CAS.details;
421 e1 = flatten_Expr(bb, cas->addr);
422 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423 e3 = flatten_Expr(bb, cas->expdLo);
424 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425 e5 = flatten_Expr(bb, cas->dataLo);
426 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427 e1, e2, e3, e4, e5 );
428 addStmtToIRSB(bb, IRStmt_CAS(cas2));
431 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432 e2 = st->Ist.LLSC.storedata
433 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
435 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436 st->Ist.LLSC.result, e1, e2));
439 d = st->Ist.Dirty.details;
442 d2->args = shallowCopyIRExprVec(d2->args);
443 if (d2->mFx != Ifx_None) {
444 d2->mAddr = flatten_Expr(bb, d2->mAddr);
446 vassert(d2->mAddr == NULL);
448 d2->guard = flatten_Expr(bb, d2->guard);
449 for (i = 0; d2->args[i]; i++)
450 d2->args[i] = flatten_Expr(bb, d2->args[i]);
451 addStmtToIRSB(bb, IRStmt_Dirty(d2));
456 addStmtToIRSB(bb, st);
459 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
464 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
472 vpanic("flatten_Stmt");
477 static IRSB* flatten_BB ( IRSB* in )
482 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483 for (i = 0; i < in->stmts_used; i++)
485 flatten_Stmt( out, in->stmts[i] );
486 out->next = flatten_Expr( out, in->next );
487 out->jumpkind = in->jumpkind;
492 /*---------------------------------------------------------------*/
493 /*--- In-place removal of redundant GETs ---*/
494 /*---------------------------------------------------------------*/
496 /* Scan forwards, building up an environment binding (min offset, max
497 offset) pairs to values, which will either be temps or constants.
499 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500 env and if it matches, replace the Get with the stored value. If
501 there is no match, add a (minoff,maxoff) :-> t binding.
503 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504 any binding which fully or partially overlaps with (minoff,maxoff).
505 Then add a new (minoff,maxoff) :-> t or c binding. */
507 /* Extract the min/max offsets from a guest state array descriptor. */
510 static void getArrayBounds ( IRRegArray* descr,
511 UInt* minoff, UInt* maxoff )
513 *minoff = descr->base;
514 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515 vassert((*minoff & ~0xFFFF) == 0);
516 vassert((*maxoff & ~0xFFFF) == 0);
517 vassert(*minoff <= *maxoff);
520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
522 static UInt mk_key_GetPut ( Int offset, IRType ty )
524 /* offset should fit in 16 bits. */
525 UInt minoff = offset;
526 UInt maxoff = minoff + sizeofIRType(ty) - 1;
527 vassert((minoff & ~0xFFFF) == 0);
528 vassert((maxoff & ~0xFFFF) == 0);
529 return (minoff << 16) | maxoff;
532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
535 getArrayBounds( descr, &minoff, &maxoff );
536 vassert((minoff & ~0xFFFF) == 0);
537 vassert((maxoff & ~0xFFFF) == 0);
538 return (minoff << 16) | maxoff;
541 /* Supposing h has keys of the form generated by mk_key_GetPut and
542 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
549 vassert(k_lo <= k_hi);
550 /* invalidate any env entries which in any way overlap (k_lo
552 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
554 for (j = 0; j < h->used; j++) {
557 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558 e_hi = ((UInt)h->key[j]) & 0xFFFF;
559 vassert(e_lo <= e_hi);
560 if (e_hi < k_lo || k_hi < e_lo)
561 continue; /* no overlap possible */
563 /* overlap; invalidate */
569 static void redundant_get_removal_BB ( IRSB* bb )
571 HashHW* env = newHHW();
572 UInt key = 0; /* keep gcc -O happy */
576 for (i = 0; i < bb->stmts_used; i++) {
577 IRStmt* st = bb->stmts[i];
579 if (st->tag == Ist_NoOp)
583 if (st->tag == Ist_WrTmp
584 && st->Ist.WrTmp.data->tag == Iex_Get) {
585 /* st is 't = Get(...)'. Look up in the environment and see
586 if the Get can be replaced. */
587 IRExpr* get = st->Ist.WrTmp.data;
588 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
590 if (lookupHHW(env, &val, (HWord)key)) {
592 /* Note, we could do better here. If the types are
593 different we don't do the substitution, since doing so
594 could lead to invalidly-typed IR. An improvement would
595 be to stick in a reinterpret-style cast, although that
596 would make maintaining flatness more difficult. */
597 IRExpr* valE = (IRExpr*)val;
598 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599 == st->Ist.WrTmp.data->Iex.Get.ty );
600 if (typesOK && DEBUG_IROPT) {
601 vex_printf("rGET: "); ppIRExpr(get);
602 vex_printf(" -> "); ppIRExpr(valE);
606 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
608 /* Not found, but at least we know that t and the Get(...)
609 are now associated. So add a binding to reflect that
611 addToHHW( env, (HWord)key,
612 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
616 /* Deal with Puts: invalidate any env entries overlapped by this
618 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
620 if (st->tag == Ist_Put) {
621 key = mk_key_GetPut( st->Ist.Put.offset,
622 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
624 vassert(st->tag == Ist_PutI);
625 key = mk_key_GetIPutI( st->Ist.PutI.descr );
628 k_lo = (key >> 16) & 0xFFFF;
630 invalidateOverlaps(env, k_lo, k_hi);
633 if (st->tag == Ist_Dirty) {
634 /* Deal with dirty helpers which write or modify guest state.
635 Invalidate the entire env. We could do a lot better
637 IRDirty* d = st->Ist.Dirty.details;
639 for (j = 0; j < d->nFxState; j++) {
640 if (d->fxState[j].fx == Ifx_Modify
641 || d->fxState[j].fx == Ifx_Write)
645 /* dump the entire env (not clever, but correct ...) */
646 for (j = 0; j < env->used; j++)
647 env->inuse[j] = False;
648 if (0) vex_printf("rGET: trash env due to dirty helper\n");
652 /* add this one to the env, if appropriate */
653 if (st->tag == Ist_Put) {
654 vassert(isIRAtom(st->Ist.Put.data));
655 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
658 } /* for (i = 0; i < bb->stmts_used; i++) */
663 /*---------------------------------------------------------------*/
664 /*--- In-place removal of redundant PUTs ---*/
665 /*---------------------------------------------------------------*/
667 /* Find any Get uses in st and invalidate any partially or fully
668 overlapping ranges listed in env. Due to the flattening phase, the
669 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
671 static void handle_gets_Stmt (
674 Bool (*preciseMemExnsFn)(Int,Int)
678 UInt key = 0; /* keep gcc -O happy */
685 /* This is the only interesting case. Deal with Gets in the RHS
688 e = st->Ist.WrTmp.data;
692 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
696 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
707 k_lo = (key >> 16) & 0xFFFF;
709 invalidateOverlaps(env, k_lo, k_hi);
713 /* Be very conservative for dirty helper calls; dump the entire
714 environment. The helper might read guest state, in which
715 case it needs to be flushed first. Also, the helper might
716 access guest memory, in which case all parts of the guest
717 state requiring precise exceptions needs to be flushed. The
718 crude solution is just to flush everything; we could easily
719 enough do a lot better if needed. */
720 /* Probably also overly-conservative, but also dump everything
721 if we hit a memory bus event (fence, lock, unlock). Ditto
722 AbiHints, CASs, LLs and SCs. */
724 vassert(isIRAtom(st->Ist.AbiHint.base));
725 vassert(isIRAtom(st->Ist.AbiHint.nia));
731 for (j = 0; j < env->used; j++)
732 env->inuse[j] = False;
735 /* all other cases are boring. */
737 vassert(isIRAtom(st->Ist.Store.addr));
738 vassert(isIRAtom(st->Ist.Store.data));
743 vassert(isIRAtom(st->Ist.Exit.guard));
747 vassert(isIRAtom(st->Ist.PutI.ix));
748 vassert(isIRAtom(st->Ist.PutI.data));
759 vpanic("handle_gets_Stmt");
763 /* This statement accesses memory. So we need to dump all parts
764 of the environment corresponding to guest state that may not
765 be reordered with respect to memory references. That means
766 at least the stack pointer. */
767 for (j = 0; j < env->used; j++) {
770 if (vex_control.iropt_precise_memory_exns) {
771 /* Precise exceptions required. Flush all guest state. */
772 env->inuse[j] = False;
774 /* Just flush the minimal amount required, as computed by
776 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777 HWord k_hi = env->key[j] & 0xFFFF;
778 if (preciseMemExnsFn( k_lo, k_hi ))
779 env->inuse[j] = False;
787 /* Scan backwards, building up a set of (min offset, max
788 offset) pairs, indicating those parts of the guest state
789 for which the next event is a write.
791 On seeing a conditional exit, empty the set.
793 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794 completely within the set, remove the Put. Otherwise, add
795 (minoff,maxoff) to the set.
797 On seeing 'Get (minoff,maxoff)', remove any part of the set
798 overlapping (minoff,maxoff). The same has to happen for any events
799 which implicitly read parts of the guest state: dirty helper calls
803 static void redundant_put_removal_BB (
805 Bool (*preciseMemExnsFn)(Int,Int)
811 UInt key = 0; /* keep gcc -O happy */
813 HashHW* env = newHHW();
814 for (i = bb->stmts_used-1; i >= 0; i--) {
817 if (st->tag == Ist_NoOp)
820 /* Deal with conditional exits. */
821 if (st->tag == Ist_Exit) {
822 /* Since control may not get beyond this point, we must empty
823 out the set, since we can no longer claim that the next
824 event for any part of the guest state is definitely a
826 vassert(isIRAtom(st->Ist.Exit.guard));
827 for (j = 0; j < env->used; j++)
828 env->inuse[j] = False;
836 key = mk_key_GetPut( st->Ist.Put.offset,
837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838 vassert(isIRAtom(st->Ist.Put.data));
842 key = mk_key_GetIPutI( st->Ist.PutI.descr );
843 vassert(isIRAtom(st->Ist.PutI.ix));
844 vassert(isIRAtom(st->Ist.PutI.data));
849 if (isPut && st->tag != Ist_PutI) {
850 /* See if any single entry in env overlaps this Put. This is
851 simplistic in that the transformation is valid if, say, two
852 or more entries in the env overlap this Put, but the use of
853 lookupHHW will only find a single entry which exactly
854 overlaps this Put. This is suboptimal but safe. */
855 if (lookupHHW(env, NULL, (HWord)key)) {
856 /* This Put is redundant because a later one will overwrite
857 it. So NULL (nop) it out. */
859 vex_printf("rPUT: "); ppIRStmt(st);
862 bb->stmts[i] = IRStmt_NoOp();
864 /* We can't demonstrate that this Put is redundant, so add it
865 to the running collection. */
866 addToHHW(env, (HWord)key, 0);
871 /* Deal with Gets. These remove bits of the environment since
872 appearance of a Get means that the next event for that slice
873 of the guest state is no longer a write, but a read. Also
874 deals with implicit reads of guest state needed to maintain
875 precise exceptions. */
876 handle_gets_Stmt( env, st, preciseMemExnsFn );
881 /*---------------------------------------------------------------*/
882 /*--- Constant propagation and folding ---*/
883 /*---------------------------------------------------------------*/
885 /* The env in this section is a map from IRTemp to IRExpr*,
886 that is, an array indexed by IRTemp. */
888 /* Are both expressions simply the same IRTemp ? */
889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
891 return toBool( e1->tag == Iex_RdTmp
892 && e2->tag == Iex_RdTmp
893 && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
898 return toBool( e1->tag == Iex_Const
899 && e2->tag == Iex_Const
900 && e1->Iex.Const.con->tag == Ico_U32
901 && e2->Iex.Const.con->tag == Ico_U32
902 && e1->Iex.Const.con->Ico.U32
903 == e2->Iex.Const.con->Ico.U32 );
906 /* Are both expressions either the same IRTemp or IRConst-U32s ? If
908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
912 return sameIRTemps(e1, e2);
914 return sameIcoU32s(e1, e2);
920 static Bool notBool ( Bool b )
922 if (b == True) return False;
923 if (b == False) return True;
927 /* Make a zero which has the same type as the result of the given
929 static IRExpr* mkZeroForXor ( IROp op )
932 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
933 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
934 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
935 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
936 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
937 default: vpanic("mkZeroForXor: bad primop");
942 static IRExpr* fold_Expr ( IRExpr* e )
945 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
948 if (e->tag == Iex_Unop
949 && e->Iex.Unop.arg->tag == Iex_Const) {
950 switch (e->Iex.Unop.op) {
952 e2 = IRExpr_Const(IRConst_U8(toUChar(
953 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
957 e2 = IRExpr_Const(IRConst_U32(
958 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
962 e2 = IRExpr_Const(IRConst_U64(
963 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
968 e2 = IRExpr_Const(IRConst_U8(toUChar(
969 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
973 e2 = IRExpr_Const(IRConst_U16(toUShort(
974 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
978 e2 = IRExpr_Const(IRConst_U32(
979 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
983 e2 = IRExpr_Const(IRConst_U64(
984 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
985 ? 0xFFFFFFFFFFFFFFFFULL : 0));
989 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
992 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
996 e2 = IRExpr_Const(IRConst_U64(
997 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1000 e2 = IRExpr_Const(IRConst_U64(
1001 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1004 e2 = IRExpr_Const(IRConst_U32(
1005 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1008 e2 = IRExpr_Const(IRConst_U32(
1009 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1012 e2 = IRExpr_Const(IRConst_U16(toUShort(
1013 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1016 e2 = IRExpr_Const(IRConst_U8(toUChar(
1017 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1020 e2 = IRExpr_Const(IRConst_U1(toBool(
1021 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1025 e2 = IRExpr_Const(IRConst_U1(toBool(
1026 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1031 e2 = IRExpr_Const(IRConst_U64(
1032 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1035 e2 = IRExpr_Const(IRConst_U32(
1036 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1039 e2 = IRExpr_Const(IRConst_U16(toUShort(
1040 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1043 e2 = IRExpr_Const(IRConst_U8(toUChar(
1044 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1048 e2 = IRExpr_Const(IRConst_U1(
1049 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1053 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1055 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1059 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1061 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1065 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1066 w64 &= 0x00000000FFFFFFFFULL;
1067 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1070 case Iop_64HIto32: {
1071 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1073 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1077 e2 = IRExpr_Const(IRConst_U64(
1079 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1083 e2 = IRExpr_Const(IRConst_U1(toBool(
1085 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1089 e2 = IRExpr_Const(IRConst_U1(toBool(
1091 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1095 e2 = IRExpr_Const(IRConst_U1(toBool(
1096 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1100 case Iop_CmpwNEZ32: {
1101 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1103 e2 = IRExpr_Const(IRConst_U32( 0 ));
1105 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1108 case Iop_CmpwNEZ64: {
1109 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1111 e2 = IRExpr_Const(IRConst_U64( 0 ));
1113 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1118 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1119 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1120 s32 = (s32 | (-s32));
1121 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1126 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1127 Long s64 = (Long)u64;
1128 s64 = (s64 | (-s64));
1129 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1139 if (e->tag == Iex_Binop) {
1140 if (e->Iex.Binop.arg1->tag == Iex_Const
1141 && e->Iex.Binop.arg2->tag == Iex_Const) {
1142 /* cases where both args are consts */
1143 switch (e->Iex.Binop.op) {
1147 e2 = IRExpr_Const(IRConst_U8(toUChar(
1148 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1149 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1152 e2 = IRExpr_Const(IRConst_U16(toUShort(
1153 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1154 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1157 e2 = IRExpr_Const(IRConst_U32(
1158 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1159 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1162 e2 = IRExpr_Const(IRConst_U64(
1163 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1164 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1169 e2 = IRExpr_Const(IRConst_U8(toUChar(
1170 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1171 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1174 e2 = IRExpr_Const(IRConst_U16(toUShort(
1175 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1176 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1179 e2 = IRExpr_Const(IRConst_U32(
1180 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1181 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1184 e2 = IRExpr_Const(IRConst_U64(
1185 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1186 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1191 e2 = IRExpr_Const(IRConst_U8(toUChar(
1192 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1193 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1196 e2 = IRExpr_Const(IRConst_U32(
1197 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1198 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1201 e2 = IRExpr_Const(IRConst_U64(
1202 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1203 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1208 e2 = IRExpr_Const(IRConst_U8(toUChar(
1209 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1210 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1213 e2 = IRExpr_Const(IRConst_U32(
1214 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1215 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1218 e2 = IRExpr_Const(IRConst_U64(
1219 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1220 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1225 e2 = IRExpr_Const(IRConst_U8(toUChar(
1226 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1227 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1230 e2 = IRExpr_Const(IRConst_U32(
1231 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1232 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1235 e2 = IRExpr_Const(IRConst_U64(
1236 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1237 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1242 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1243 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1244 UInt res = u32a > u32b ? u32a : u32b;
1245 e2 = IRExpr_Const(IRConst_U32(res));
1251 e2 = IRExpr_Const(IRConst_U32(
1252 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1253 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1256 e2 = IRExpr_Const(IRConst_U64(
1257 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1258 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1263 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1264 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1265 Int s32a = (Int)u32a;
1266 Int s32b = (Int)u32b;
1267 Long s64a = (Long)s32a;
1268 Long s64b = (Long)s32b;
1269 Long sres = s64a * s64b;
1270 ULong ures = (ULong)sres;
1271 e2 = IRExpr_Const(IRConst_U64(ures));
1277 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1278 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1279 if (shift >= 0 && shift <= 31)
1280 e2 = IRExpr_Const(IRConst_U32(
1281 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1285 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1286 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1287 if (shift >= 0 && shift <= 63)
1288 e2 = IRExpr_Const(IRConst_U64(
1289 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1297 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1298 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1299 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1300 if (shift >= 0 && shift <= 31) {
1301 s32 >>=/*signed*/ shift;
1302 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1308 /*signed*/ Long s64;
1309 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1310 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1311 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1312 if (shift >= 0 && shift <= 63) {
1313 s64 >>=/*signed*/ shift;
1314 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1322 /*unsigned*/ UInt u32;
1323 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1324 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1325 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1326 if (shift >= 0 && shift <= 31) {
1327 u32 >>=/*unsigned*/ shift;
1328 e2 = IRExpr_Const(IRConst_U32(u32));
1334 /*unsigned*/ ULong u64;
1335 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1336 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1337 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1338 if (shift >= 0 && shift <= 63) {
1339 u64 >>=/*unsigned*/ shift;
1340 e2 = IRExpr_Const(IRConst_U64(u64));
1347 e2 = IRExpr_Const(IRConst_U1(toBool(
1348 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1349 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1352 e2 = IRExpr_Const(IRConst_U1(toBool(
1353 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1354 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1359 e2 = IRExpr_Const(IRConst_U1(toBool(
1360 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1361 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1364 e2 = IRExpr_Const(IRConst_U1(toBool(
1365 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1366 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1369 e2 = IRExpr_Const(IRConst_U1(toBool(
1370 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1371 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1376 e2 = IRExpr_Const(IRConst_U1(toBool(
1377 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1378 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1383 e2 = IRExpr_Const(IRConst_U1(toBool(
1384 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1385 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1390 e2 = IRExpr_Const(IRConst_U1(toBool(
1391 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1392 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1397 e2 = IRExpr_Const(IRConst_U1(toBool(
1398 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1399 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1403 case Iop_CmpORD32S: {
1405 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1406 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1407 Int s32a = (Int)u32a;
1408 Int s32b = (Int)u32b;
1409 Int r = 0x2; /* EQ */
1413 else if (s32a > s32b) {
1416 e2 = IRExpr_Const(IRConst_U32(r));
1422 e2 = IRExpr_Const(IRConst_U64(
1423 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1424 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1428 /* We can't fold this, because there is no way to
1429 express he result in IR, but at least pretend to
1430 handle it, so as to stop getting blasted with
1431 no-rule-for-this-primop messages. */
1440 /* other cases (identities, etc) */
1442 /* Shl64/Shr64(x,0) ==> x */
1443 if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1444 && e->Iex.Binop.arg2->tag == Iex_Const
1445 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1446 e2 = e->Iex.Binop.arg1;
1449 /* Shl32/Shr32(x,0) ==> x */
1450 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1451 && e->Iex.Binop.arg2->tag == Iex_Const
1452 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1453 e2 = e->Iex.Binop.arg1;
1456 /* Or8(x,0) ==> x */
1457 if ((e->Iex.Binop.op == Iop_Or8)
1458 && e->Iex.Binop.arg2->tag == Iex_Const
1459 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1460 e2 = e->Iex.Binop.arg1;
1463 /* Or16(x,0) ==> x */
1464 if ((e->Iex.Binop.op == Iop_Or16)
1465 && e->Iex.Binop.arg2->tag == Iex_Const
1466 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1467 e2 = e->Iex.Binop.arg1;
1470 /* Or32/Add32/Max32U(x,0) ==> x */
1471 if ((e->Iex.Binop.op == Iop_Add32
1472 || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1473 && e->Iex.Binop.arg2->tag == Iex_Const
1474 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1475 e2 = e->Iex.Binop.arg1;
1478 /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that
1479 x+x produces a defined least significant bit, and it seems
1480 simplest just to get rid of the problem by rewriting it
1481 out, since the opportunity to do so exists. */
1482 if (e->Iex.Binop.op == Iop_Add32
1483 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1484 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1485 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1486 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1487 e2 = IRExpr_Binop(Iop_Shl32,
1489 IRExpr_Const(IRConst_U8(1)));
1492 /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1493 if (e->Iex.Binop.op == Iop_Add64
1494 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1495 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1496 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1497 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1498 e2 = IRExpr_Binop(Iop_Shl64,
1500 IRExpr_Const(IRConst_U8(1)));
1503 /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1504 if (e->Iex.Binop.op == Iop_Add8
1505 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1506 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1507 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1508 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1509 e2 = IRExpr_Binop(Iop_Shl8,
1511 IRExpr_Const(IRConst_U8(1)));
1513 /* NB no Add16(t,t) case yet as no known test case exists */
1515 /* Or64/Add64(x,0) ==> x */
1516 if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
1517 && e->Iex.Binop.arg2->tag == Iex_Const
1518 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
1519 e2 = e->Iex.Binop.arg1;
1522 /* And32(x,0xFFFFFFFF) ==> x */
1523 if (e->Iex.Binop.op == Iop_And32
1524 && e->Iex.Binop.arg2->tag == Iex_Const
1525 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1526 e2 = e->Iex.Binop.arg1;
1529 /* And32(x,0) ==> 0 */
1530 if (e->Iex.Binop.op == Iop_And32
1531 && e->Iex.Binop.arg2->tag == Iex_Const
1532 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1533 e2 = IRExpr_Const(IRConst_U32(0));
1536 /* And32/Shl32(0,x) ==> 0 */
1537 if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1538 && e->Iex.Binop.arg1->tag == Iex_Const
1539 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1540 e2 = IRExpr_Const(IRConst_U32(0));
1543 /* Or8(0,x) ==> x */
1544 if (e->Iex.Binop.op == Iop_Or8
1545 && e->Iex.Binop.arg1->tag == Iex_Const
1546 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1547 e2 = e->Iex.Binop.arg2;
1550 /* Or32/Max32U(0,x) ==> x */
1551 if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1552 && e->Iex.Binop.arg1->tag == Iex_Const
1553 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1554 e2 = e->Iex.Binop.arg2;
1557 /* Or64(0,x) ==> x */
1558 if (e->Iex.Binop.op == Iop_Or64
1559 && e->Iex.Binop.arg1->tag == Iex_Const
1560 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
1561 e2 = e->Iex.Binop.arg2;
1564 /* Or8/16/32/64(t,t) ==> t, for some IRTemp t */
1565 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1566 /* Max32U(t,t) ==> t, for some IRTemp t */
1567 if ( (e->Iex.Binop.op == Iop_And64
1568 || e->Iex.Binop.op == Iop_And32
1569 || e->Iex.Binop.op == Iop_And16
1570 || e->Iex.Binop.op == Iop_And8
1571 || e->Iex.Binop.op == Iop_Or64
1572 || e->Iex.Binop.op == Iop_Or32
1573 || e->Iex.Binop.op == Iop_Or16
1574 || e->Iex.Binop.op == Iop_Or8
1575 || e->Iex.Binop.op == Iop_Max32U)
1576 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1577 e2 = e->Iex.Binop.arg1;
1580 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1581 if ( (e->Iex.Binop.op == Iop_Xor64
1582 || e->Iex.Binop.op == Iop_Xor32
1583 || e->Iex.Binop.op == Iop_Xor16
1584 || e->Iex.Binop.op == Iop_Xor8
1585 || e->Iex.Binop.op == Iop_XorV128)
1586 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1587 e2 = mkZeroForXor(e->Iex.Binop.op);
1594 if (e->tag == Iex_Mux0X) {
1595 /* is the discriminant is a constant? */
1596 if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1598 /* assured us by the IR type rules */
1599 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1600 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1601 ->Iex.Const.con->Ico.U8));
1602 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1605 /* are the arms identical? (pretty weedy test) */
1606 if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1607 e->Iex.Mux0X.exprX)) {
1608 e2 = e->Iex.Mux0X.expr0;
1612 if (DEBUG_IROPT && e2 != e) {
1613 vex_printf("FOLD: ");
1614 ppIRExpr(e); vex_printf(" -> ");
1615 ppIRExpr(e2); vex_printf("\n");
1624 vpanic("fold_Expr: no rule for the above");
1626 if (vex_control.iropt_verbosity > 0) {
1627 vex_printf("vex iropt: fold_Expr: no rule for: ");
1636 /* Apply the subst to a simple 1-level expression -- guaranteed to be
1637 1-level due to previous flattening pass. */
1639 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1643 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1644 return env[(Int)ex->Iex.RdTmp.tmp];
1646 /* not bound in env */
1655 vassert(isIRAtom(ex->Iex.GetI.ix));
1658 subst_Expr(env, ex->Iex.GetI.ix),
1663 vassert(isIRAtom(ex->Iex.Qop.arg1));
1664 vassert(isIRAtom(ex->Iex.Qop.arg2));
1665 vassert(isIRAtom(ex->Iex.Qop.arg3));
1666 vassert(isIRAtom(ex->Iex.Qop.arg4));
1669 subst_Expr(env, ex->Iex.Qop.arg1),
1670 subst_Expr(env, ex->Iex.Qop.arg2),
1671 subst_Expr(env, ex->Iex.Qop.arg3),
1672 subst_Expr(env, ex->Iex.Qop.arg4)
1676 vassert(isIRAtom(ex->Iex.Triop.arg1));
1677 vassert(isIRAtom(ex->Iex.Triop.arg2));
1678 vassert(isIRAtom(ex->Iex.Triop.arg3));
1679 return IRExpr_Triop(
1681 subst_Expr(env, ex->Iex.Triop.arg1),
1682 subst_Expr(env, ex->Iex.Triop.arg2),
1683 subst_Expr(env, ex->Iex.Triop.arg3)
1687 vassert(isIRAtom(ex->Iex.Binop.arg1));
1688 vassert(isIRAtom(ex->Iex.Binop.arg2));
1689 return IRExpr_Binop(
1691 subst_Expr(env, ex->Iex.Binop.arg1),
1692 subst_Expr(env, ex->Iex.Binop.arg2)
1696 vassert(isIRAtom(ex->Iex.Unop.arg));
1699 subst_Expr(env, ex->Iex.Unop.arg)
1703 vassert(isIRAtom(ex->Iex.Load.addr));
1707 subst_Expr(env, ex->Iex.Load.addr)
1712 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1713 for (i = 0; args2[i]; i++) {
1714 vassert(isIRAtom(args2[i]));
1715 args2[i] = subst_Expr(env, args2[i]);
1717 return IRExpr_CCall(
1719 ex->Iex.CCall.retty,
1725 vassert(isIRAtom(ex->Iex.Mux0X.cond));
1726 vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1727 vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1728 return IRExpr_Mux0X(
1729 subst_Expr(env, ex->Iex.Mux0X.cond),
1730 subst_Expr(env, ex->Iex.Mux0X.expr0),
1731 subst_Expr(env, ex->Iex.Mux0X.exprX)
1735 vex_printf("\n\n"); ppIRExpr(ex);
1736 vpanic("subst_Expr");
1742 /* Apply the subst to stmt, then fold the result as much as possible.
1743 Much simplified due to stmt being previously flattened. As a
1744 result of this, the stmt may wind up being turned into a no-op.
1746 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1749 vex_printf("\nsubst and fold stmt\n");
1756 vassert(isIRAtom(st->Ist.AbiHint.base));
1757 vassert(isIRAtom(st->Ist.AbiHint.nia));
1758 return IRStmt_AbiHint(
1759 fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1760 st->Ist.AbiHint.len,
1761 fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1764 vassert(isIRAtom(st->Ist.Put.data));
1767 fold_Expr(subst_Expr(env, st->Ist.Put.data))
1771 vassert(isIRAtom(st->Ist.PutI.ix));
1772 vassert(isIRAtom(st->Ist.PutI.data));
1775 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1777 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1781 /* This is the one place where an expr (st->Ist.WrTmp.data) is
1782 allowed to be more than just a constant or a tmp. */
1783 return IRStmt_WrTmp(
1785 fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1789 vassert(isIRAtom(st->Ist.Store.addr));
1790 vassert(isIRAtom(st->Ist.Store.data));
1791 return IRStmt_Store(
1793 fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1794 fold_Expr(subst_Expr(env, st->Ist.Store.data))
1799 cas = st->Ist.CAS.details;
1800 vassert(isIRAtom(cas->addr));
1801 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1802 vassert(isIRAtom(cas->expdLo));
1803 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1804 vassert(isIRAtom(cas->dataLo));
1806 cas->oldHi, cas->oldLo, cas->end,
1807 fold_Expr(subst_Expr(env, cas->addr)),
1808 cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1809 fold_Expr(subst_Expr(env, cas->expdLo)),
1810 cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1811 fold_Expr(subst_Expr(env, cas->dataLo))
1813 return IRStmt_CAS(cas2);
1817 vassert(isIRAtom(st->Ist.LLSC.addr));
1818 if (st->Ist.LLSC.storedata)
1819 vassert(isIRAtom(st->Ist.LLSC.storedata));
1822 st->Ist.LLSC.result,
1823 fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1824 st->Ist.LLSC.storedata
1825 ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1832 d = st->Ist.Dirty.details;
1833 d2 = emptyIRDirty();
1835 d2->args = shallowCopyIRExprVec(d2->args);
1836 if (d2->mFx != Ifx_None) {
1837 vassert(isIRAtom(d2->mAddr));
1838 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
1840 vassert(isIRAtom(d2->guard));
1841 d2->guard = fold_Expr(subst_Expr(env, d2->guard));
1842 for (i = 0; d2->args[i]; i++) {
1843 vassert(isIRAtom(d2->args[i]));
1844 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
1846 return IRStmt_Dirty(d2);
1850 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
1853 return IRStmt_NoOp();
1856 return IRStmt_MBE(st->Ist.MBE.event);
1860 vassert(isIRAtom(st->Ist.Exit.guard));
1861 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
1862 if (fcond->tag == Iex_Const) {
1863 /* Interesting. The condition on this exit has folded down to
1865 vassert(fcond->Iex.Const.con->tag == Ico_U1);
1866 vassert(fcond->Iex.Const.con->Ico.U1 == False
1867 || fcond->Iex.Const.con->Ico.U1 == True);
1868 if (fcond->Iex.Const.con->Ico.U1 == False) {
1869 /* exit is never going to happen, so dump the statement. */
1870 return IRStmt_NoOp();
1872 vassert(fcond->Iex.Const.con->Ico.U1 == True);
1873 /* Hmmm. The exit has become unconditional. Leave it
1874 as it is for now, since we'd have to truncate the BB
1875 at this point, which is tricky. Such truncation is
1876 done later by the dead-code elimination pass. */
1877 /* fall out into the reconstruct-the-exit code. */
1878 if (vex_control.iropt_verbosity > 0)
1879 /* really a misuse of vex_control.iropt_verbosity */
1880 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
1883 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
1887 vex_printf("\n"); ppIRStmt(st);
1888 vpanic("subst_and_fold_Stmt");
1893 IRSB* cprop_BB ( IRSB* in )
1898 Int n_tmps = in->tyenv->types_used;
1899 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
1902 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
1904 /* Set up the env with which travels forward. This holds a
1905 substitution, mapping IRTemps to atoms, that is, IRExprs which
1906 are either IRTemps or IRConsts. Thus, copy and constant
1907 propagation is done. The environment is to be applied as we
1908 move along. Keys are IRTemps. Values are IRExpr*s.
1910 for (i = 0; i < n_tmps; i++)
1913 /* For each original SSA-form stmt ... */
1914 for (i = 0; i < in->stmts_used; i++) {
1916 /* First apply the substitution to the current stmt. This
1917 propagates in any constants and tmp-tmp assignments
1918 accumulated prior to this point. As part of the subst_Stmt
1919 call, also then fold any constant expressions resulting. */
1923 /* perhaps st2 is already a no-op? */
1924 if (st2->tag == Ist_NoOp) continue;
1926 st2 = subst_and_fold_Stmt( env, st2 );
1928 /* If the statement has been folded into a no-op, forget it. */
1929 if (st2->tag == Ist_NoOp) continue;
1931 /* Now consider what the stmt looks like. If it's of the form
1932 't = const' or 't1 = t2', add it to the running environment
1933 and not to the output BB. Otherwise, add it to the output
1934 BB. Note, we choose not to propagate const when const is an
1935 F64i, so that F64i literals can be CSE'd later. This helps
1936 x86 floating point code generation. */
1938 if (st2->tag == Ist_WrTmp
1939 && st2->Ist.WrTmp.data->tag == Iex_Const
1940 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
1941 /* 't = const' -- add to env.
1942 The pair (IRTemp, IRExpr*) is added. */
1943 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1946 if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
1947 /* 't1 = t2' -- add to env.
1948 The pair (IRTemp, IRExpr*) is added. */
1949 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1952 /* Not interesting, copy st2 into the output block. */
1953 addStmtToIRSB( out, st2 );
1957 out->next = subst_Expr( env, in->next );
1958 out->jumpkind = in->jumpkind;
1963 /*---------------------------------------------------------------*/
1964 /*--- Dead code (t = E) removal ---*/
1965 /*---------------------------------------------------------------*/
1967 /* As a side effect, also removes all code following an unconditional
1970 /* The type of the HashHW map is: a map from IRTemp to nothing
1971 -- really just operating a set or IRTemps.
1975 static void addUses_Temp ( Bool* set, IRTemp tmp )
1977 set[(Int)tmp] = True;
1980 static void addUses_Expr ( Bool* set, IRExpr* e )
1985 addUses_Expr(set, e->Iex.GetI.ix);
1988 addUses_Expr(set, e->Iex.Mux0X.cond);
1989 addUses_Expr(set, e->Iex.Mux0X.expr0);
1990 addUses_Expr(set, e->Iex.Mux0X.exprX);
1993 for (i = 0; e->Iex.CCall.args[i]; i++)
1994 addUses_Expr(set, e->Iex.CCall.args[i]);
1997 addUses_Expr(set, e->Iex.Load.addr);
2000 addUses_Expr(set, e->Iex.Qop.arg1);
2001 addUses_Expr(set, e->Iex.Qop.arg2);
2002 addUses_Expr(set, e->Iex.Qop.arg3);
2003 addUses_Expr(set, e->Iex.Qop.arg4);
2006 addUses_Expr(set, e->Iex.Triop.arg1);
2007 addUses_Expr(set, e->Iex.Triop.arg2);
2008 addUses_Expr(set, e->Iex.Triop.arg3);
2011 addUses_Expr(set, e->Iex.Binop.arg1);
2012 addUses_Expr(set, e->Iex.Binop.arg2);
2015 addUses_Expr(set, e->Iex.Unop.arg);
2018 addUses_Temp(set, e->Iex.RdTmp.tmp);
2026 vpanic("addUses_Expr");
2030 static void addUses_Stmt ( Bool* set, IRStmt* st )
2037 addUses_Expr(set, st->Ist.AbiHint.base);
2038 addUses_Expr(set, st->Ist.AbiHint.nia);
2041 addUses_Expr(set, st->Ist.PutI.ix);
2042 addUses_Expr(set, st->Ist.PutI.data);
2045 addUses_Expr(set, st->Ist.WrTmp.data);
2048 addUses_Expr(set, st->Ist.Put.data);
2051 addUses_Expr(set, st->Ist.Store.addr);
2052 addUses_Expr(set, st->Ist.Store.data);
2055 cas = st->Ist.CAS.details;
2056 addUses_Expr(set, cas->addr);
2058 addUses_Expr(set, cas->expdHi);
2059 addUses_Expr(set, cas->expdLo);
2061 addUses_Expr(set, cas->dataHi);
2062 addUses_Expr(set, cas->dataLo);
2065 addUses_Expr(set, st->Ist.LLSC.addr);
2066 if (st->Ist.LLSC.storedata)
2067 addUses_Expr(set, st->Ist.LLSC.storedata);
2070 d = st->Ist.Dirty.details;
2071 if (d->mFx != Ifx_None)
2072 addUses_Expr(set, d->mAddr);
2073 addUses_Expr(set, d->guard);
2074 for (i = 0; d->args[i] != NULL; i++)
2075 addUses_Expr(set, d->args[i]);
2082 addUses_Expr(set, st->Ist.Exit.guard);
2087 vpanic("addUses_Stmt");
2092 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
2093 static Bool isZeroU1 ( IRExpr* e )
2095 return toBool( e->tag == Iex_Const
2096 && e->Iex.Const.con->tag == Ico_U1
2097 && e->Iex.Const.con->Ico.U1 == False );
2100 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
2101 static Bool isOneU1 ( IRExpr* e )
2103 return toBool( e->tag == Iex_Const
2104 && e->Iex.Const.con->tag == Ico_U1
2105 && e->Iex.Const.con->Ico.U1 == True );
2109 /* Note, this destructively modifies the given IRSB. */
2111 /* Scan backwards through statements, carrying a set of IRTemps which
2112 are known to be used after the current point. On encountering 't =
2113 E', delete the binding if it is not used. Otherwise, add any temp
2114 uses to the set and keep on moving backwards.
2116 As an enhancement, the first (backwards) pass searches for IR exits
2117 with always-taken conditions and notes the location of the earliest
2118 one in the block. If any such are found, a second pass copies the
2119 exit destination and jump kind to the bb-end. Then, the exit and
2120 all statements following it are turned into no-ops.
2123 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
2125 Int i, i_unconditional_exit;
2126 Int n_tmps = bb->tyenv->types_used;
2127 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2130 for (i = 0; i < n_tmps; i++)
2133 /* start off by recording IRTemp uses in the next field. */
2134 addUses_Expr(set, bb->next);
2138 /* Work backwards through the stmts */
2139 i_unconditional_exit = -1;
2140 for (i = bb->stmts_used-1; i >= 0; i--) {
2142 if (st->tag == Ist_NoOp)
2144 /* take note of any unconditional exits */
2145 if (st->tag == Ist_Exit
2146 && isOneU1(st->Ist.Exit.guard))
2147 i_unconditional_exit = i;
2148 if (st->tag == Ist_WrTmp
2149 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2150 /* it's an IRTemp which never got used. Delete it. */
2152 vex_printf("DEAD: ");
2156 bb->stmts[i] = IRStmt_NoOp();
2159 if (st->tag == Ist_Dirty
2160 && st->Ist.Dirty.details->guard
2161 && isZeroU1(st->Ist.Dirty.details->guard)) {
2162 /* This is a dirty helper which will never get called.
2164 bb->stmts[i] = IRStmt_NoOp();
2167 /* Note any IRTemp uses made by the current statement. */
2168 addUses_Stmt(set, st);
2172 /* Optional second pass: if any unconditional exits were found,
2173 delete them and all following statements. */
2175 if (i_unconditional_exit != -1) {
2176 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2177 i_unconditional_exit);
2178 vassert(i_unconditional_exit >= 0
2179 && i_unconditional_exit < bb->stmts_used);
2181 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2183 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2184 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2185 bb->stmts[i] = IRStmt_NoOp();
2190 /*---------------------------------------------------------------*/
2191 /*--- Specialisation of helper function calls, in ---*/
2192 /*--- collaboration with the front end ---*/
2193 /*---------------------------------------------------------------*/
2196 IRSB* spec_helpers_BB ( IRSB* bb,
2197 IRExpr* (*specHelper) ( HChar*, IRExpr**) )
2204 for (i = bb->stmts_used-1; i >= 0; i--) {
2207 if (st->tag != Ist_WrTmp
2208 || st->Ist.WrTmp.data->tag != Iex_CCall)
2211 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2212 st->Ist.WrTmp.data->Iex.CCall.args );
2214 /* the front end can't think of a suitable replacement */
2217 /* We got something better. Install it in the bb. */
2220 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2223 vex_printf("SPEC: ");
2224 ppIRExpr(st->Ist.WrTmp.data);
2225 vex_printf(" --> ");
2232 bb = flatten_BB(bb);
2237 /*---------------------------------------------------------------*/
2238 /*--- Determination of guest state aliasing relationships ---*/
2239 /*---------------------------------------------------------------*/
2241 /* These are helper functions for CSE and GetI/PutI transformations.
2243 Determine, to the extent possible, the relationship between two
2244 guest state accesses. The possible outcomes are:
2246 * Exact alias. These two accesses denote precisely the same
2247 piece of the guest state.
2249 * Definitely no alias. These two accesses are guaranteed not to
2250 overlap any part of the guest state.
2252 * Unknown -- if neither of the above can be established.
2254 If in doubt, return Unknown. */
2257 enum { ExactAlias, NoAlias, UnknownAlias }
2261 /* Produces the alias relation between an indexed guest
2262 state access and a non-indexed access. */
2265 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2266 Int offset2, IRType ty2 )
2268 UInt minoff1, maxoff1, minoff2, maxoff2;
2270 getArrayBounds( descr1, &minoff1, &maxoff1 );
2272 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2274 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2277 /* Could probably do better here if required. For the moment
2278 however just claim not to know anything more. */
2279 return UnknownAlias;
2283 /* Produces the alias relation between two indexed guest state
2287 GSAliasing getAliasingRelation_II (
2288 IRRegArray* descr1, IRExpr* ix1, Int bias1,
2289 IRRegArray* descr2, IRExpr* ix2, Int bias2
2292 UInt minoff1, maxoff1, minoff2, maxoff2;
2295 /* First try hard to show they don't alias. */
2296 getArrayBounds( descr1, &minoff1, &maxoff1 );
2297 getArrayBounds( descr2, &minoff2, &maxoff2 );
2298 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2301 /* So the two arrays at least partially overlap. To get any
2302 further we'll have to be sure that the descriptors are
2304 if (!eqIRRegArray(descr1, descr2))
2305 return UnknownAlias;
2307 /* The descriptors are identical. Now the only difference can be
2308 in the index expressions. If they cannot be shown to be
2309 identical, we have to say we don't know what the aliasing
2310 relation will be. Now, since the IR is flattened, the index
2311 expressions should be atoms -- either consts or tmps. So that
2312 makes the comparison simple. */
2313 vassert(isIRAtom(ix1));
2314 vassert(isIRAtom(ix2));
2315 if (!eqIRAtom(ix1,ix2))
2316 return UnknownAlias;
2318 /* Ok, the index expressions are identical. So now the only way
2319 they can be different is in the bias. Normalise this
2320 paranoidly, to reliably establish equality/non-equality. */
2322 /* So now we know that the GetI and PutI index the same array
2323 with the same base. Are the offsets the same, modulo the
2324 array size? Do this paranoidly. */
2325 vassert(descr1->nElems == descr2->nElems);
2326 vassert(descr1->elemTy == descr2->elemTy);
2327 vassert(descr1->base == descr2->base);
2329 while (bias1 < 0 || bias2 < 0) {
2330 bias1 += descr1->nElems;
2331 bias2 += descr1->nElems;
2334 vpanic("getAliasingRelation: iters");
2336 vassert(bias1 >= 0 && bias2 >= 0);
2337 bias1 %= descr1->nElems;
2338 bias2 %= descr1->nElems;
2339 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2340 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2342 /* Finally, biasP and biasG are normalised into the range
2343 0 .. descrP/G->nElems - 1. And so we can establish
2344 equality/non-equality. */
2346 return bias1==bias2 ? ExactAlias : NoAlias;
2350 /*---------------------------------------------------------------*/
2351 /*--- Common Subexpression Elimination ---*/
2352 /*---------------------------------------------------------------*/
2354 /* Expensive in time and space. */
2356 /* Uses two environments:
2357 a IRTemp -> IRTemp mapping
2358 a mapping from AvailExpr* to IRTemp
2363 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2370 /* binop(tmp,tmp) */
2376 /* binop(tmp,const) */
2382 /* binop(const,tmp) */
2388 /* F64i-style const */
2392 /* Mux0X(tmp,tmp,tmp) */
2398 /* GetI(descr,tmp,bias)*/
2408 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2410 if (a1->tag != a2->tag)
2415 a1->u.Ut.op == a2->u.Ut.op
2416 && a1->u.Ut.arg == a2->u.Ut.arg);
2419 a1->u.Btt.op == a2->u.Btt.op
2420 && a1->u.Btt.arg1 == a2->u.Btt.arg1
2421 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2424 a1->u.Btc.op == a2->u.Btc.op
2425 && a1->u.Btc.arg1 == a2->u.Btc.arg1
2426 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2429 a1->u.Bct.op == a2->u.Bct.op
2430 && a1->u.Bct.arg2 == a2->u.Bct.arg2
2431 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2433 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2435 return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2436 && a1->u.Mttt.e0 == a2->u.Mttt.e0
2437 && a1->u.Mttt.eX == a2->u.Mttt.eX);
2439 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2440 && a1->u.GetIt.ix == a2->u.GetIt.ix
2441 && a1->u.GetIt.bias == a2->u.GetIt.bias);
2442 default: vpanic("eq_AvailExpr");
2446 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2451 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2453 return IRExpr_Binop( ae->u.Btt.op,
2454 IRExpr_RdTmp(ae->u.Btt.arg1),
2455 IRExpr_RdTmp(ae->u.Btt.arg2) );
2457 con = LibVEX_Alloc(sizeof(IRConst));
2458 *con = ae->u.Btc.con2;
2459 return IRExpr_Binop( ae->u.Btc.op,
2460 IRExpr_RdTmp(ae->u.Btc.arg1),
2461 IRExpr_Const(con) );
2463 con = LibVEX_Alloc(sizeof(IRConst));
2464 *con = ae->u.Bct.con1;
2465 return IRExpr_Binop( ae->u.Bct.op,
2467 IRExpr_RdTmp(ae->u.Bct.arg2) );
2469 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2471 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2472 IRExpr_RdTmp(ae->u.Mttt.e0),
2473 IRExpr_RdTmp(ae->u.Mttt.eX));
2475 return IRExpr_GetI(ae->u.GetIt.descr,
2476 IRExpr_RdTmp(ae->u.GetIt.ix),
2479 vpanic("availExpr_to_IRExpr");
2484 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2487 /* env :: IRTemp -> IRTemp */
2488 if (lookupHHW( env, &res, (HWord)tmp ))
2494 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2496 /* env :: IRTemp -> IRTemp */
2499 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2502 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2503 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2506 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2509 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2514 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2515 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2516 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2519 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2522 vpanic("subst_AvailExpr");
2526 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2530 if (e->tag == Iex_Unop
2531 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2532 ae = LibVEX_Alloc(sizeof(AvailExpr));
2534 ae->u.Ut.op = e->Iex.Unop.op;
2535 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2539 if (e->tag == Iex_Binop
2540 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2541 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2542 ae = LibVEX_Alloc(sizeof(AvailExpr));
2544 ae->u.Btt.op = e->Iex.Binop.op;
2545 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2546 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2550 if (e->tag == Iex_Binop
2551 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2552 && e->Iex.Binop.arg2->tag == Iex_Const) {
2553 ae = LibVEX_Alloc(sizeof(AvailExpr));
2555 ae->u.Btc.op = e->Iex.Binop.op;
2556 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2557 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2561 if (e->tag == Iex_Binop
2562 && e->Iex.Binop.arg1->tag == Iex_Const
2563 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2564 ae = LibVEX_Alloc(sizeof(AvailExpr));
2566 ae->u.Bct.op = e->Iex.Binop.op;
2567 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2568 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2572 if (e->tag == Iex_Const
2573 && e->Iex.Const.con->tag == Ico_F64i) {
2574 ae = LibVEX_Alloc(sizeof(AvailExpr));
2576 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2580 if (e->tag == Iex_Mux0X
2581 && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2582 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2583 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2584 ae = LibVEX_Alloc(sizeof(AvailExpr));
2586 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2587 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2588 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2592 if (e->tag == Iex_GetI
2593 && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2594 ae = LibVEX_Alloc(sizeof(AvailExpr));
2596 ae->u.GetIt.descr = e->Iex.GetI.descr;
2597 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2598 ae->u.GetIt.bias = e->Iex.GetI.bias;
2606 /* The BB is modified in-place. Returns True if any changes were
2609 static Bool do_cse_BB ( IRSB* bb )
2617 Bool anyDone = False;
2619 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2620 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2622 vassert(sizeof(IRTemp) <= sizeof(HWord));
2624 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2626 /* Iterate forwards over the stmts.
2627 On seeing "t = E", where E is one of the 5 AvailExpr forms:
2628 let E' = apply tenv substitution to E
2630 if a mapping E' -> q is found,
2631 replace this stmt by "t = q"
2632 and add binding t -> q to tenv
2634 add binding E' -> t to aenv
2635 replace this stmt by "t = E'"
2637 Other statements are only interesting to the extent that they
2638 might invalidate some of the expressions in aenv. So there is
2639 an invalidate-bindings check for each statement seen.
2641 for (i = 0; i < bb->stmts_used; i++) {
2644 /* ------ BEGIN invalidate aenv bindings ------ */
2645 /* This is critical: remove from aenv any E' -> .. bindings
2646 which might be invalidated by this statement. The only
2647 vulnerable kind of bindings are the GetI kind.
2648 Dirty call - dump (paranoia level -> 2)
2649 Store - dump (ditto)
2650 Put, PutI - dump unless no-overlap is proven (.. -> 1)
2651 Uses getAliasingRelation_IC and getAliasingRelation_II
2652 to do the no-overlap assessments needed for Put/PutI.
2655 case Ist_Dirty: case Ist_Store: case Ist_MBE:
2656 case Ist_CAS: case Ist_LLSC:
2657 paranoia = 2; break;
2658 case Ist_Put: case Ist_PutI:
2659 paranoia = 1; break;
2660 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2661 case Ist_WrTmp: case Ist_Exit:
2662 paranoia = 0; break;
2664 vpanic("do_cse_BB(1)");
2668 for (j = 0; j < aenv->used; j++) {
2669 if (!aenv->inuse[j])
2671 ae = (AvailExpr*)aenv->key[j];
2672 if (ae->tag != GetIt)
2675 if (paranoia >= 2) {
2678 vassert(paranoia == 1);
2679 if (st->tag == Ist_Put) {
2680 if (getAliasingRelation_IC(
2682 IRExpr_RdTmp(ae->u.GetIt.ix),
2684 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2689 if (st->tag == Ist_PutI) {
2690 if (getAliasingRelation_II(
2692 IRExpr_RdTmp(ae->u.GetIt.ix),
2701 vpanic("do_cse_BB(2)");
2705 aenv->inuse[j] = False;
2706 aenv->key[j] = (HWord)NULL; /* be sure */
2709 } /* paranoia > 0 */
2711 /* ------ ENV invalidate aenv bindings ------ */
2713 /* ignore not-interestings */
2714 if (st->tag != Ist_WrTmp)
2717 t = st->Ist.WrTmp.tmp;
2718 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2719 /* ignore if not of AvailExpr form */
2723 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2726 subst_AvailExpr( tenv, eprime );
2728 /* search aenv for eprime, unfortunately the hard way */
2729 for (j = 0; j < aenv->used; j++)
2730 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2733 if (j < aenv->used) {
2734 /* A binding E' -> q was found. Replace stmt by "t = q" and
2735 note the t->q binding in tenv. */
2736 /* (this is the core of the CSE action) */
2737 q = (IRTemp)aenv->val[j];
2738 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2739 addToHHW( tenv, (HWord)t, (HWord)q );
2742 /* No binding was found, so instead we add E' -> t to our
2743 collection of available expressions, replace this stmt
2744 with "t = E'", and move on. */
2745 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2746 addToHHW( aenv, (HWord)eprime, (HWord)t );
2752 sanityCheckIRSB(bb, Ity_I32);
2759 /*---------------------------------------------------------------*/
2760 /*--- Add32/Sub32 chain collapsing ---*/
2761 /*---------------------------------------------------------------*/
2763 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2765 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2766 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2767 root node is Add32, not Sub32. */
2769 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2771 if (e->tag != Iex_Binop)
2773 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2775 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2777 if (e->Iex.Binop.arg2->tag != Iex_Const)
2779 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2780 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2781 if (e->Iex.Binop.op == Iop_Sub32)
2787 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
2788 other tmp2. Scan backwards from the specified start point -- an
2791 static Bool collapseChain ( IRSB* bb, Int startHere,
2793 IRTemp* tmp2, Int* i32 )
2800 /* the (var, con) pair contain the current 'representation' for
2801 'tmp'. We start with 'tmp + 0'. */
2805 /* Scan backwards to see if tmp can be replaced by some other tmp
2807 for (j = startHere; j >= 0; j--) {
2809 if (st->tag != Ist_WrTmp)
2811 if (st->Ist.WrTmp.tmp != var)
2813 e = st->Ist.WrTmp.data;
2814 if (!isAdd32OrSub32(e, &vv, &ii))
2820 /* no earlier binding for var .. ill-formed IR */
2821 vpanic("collapseChain");
2823 /* so, did we find anything interesting? */
2825 return False; /* no .. */
2833 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
2835 static void collapse_AddSub_chains_BB ( IRSB* bb )
2841 for (i = bb->stmts_used-1; i >= 0; i--) {
2843 if (st->tag == Ist_NoOp)
2846 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
2848 if (st->tag == Ist_WrTmp
2849 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
2851 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
2852 Find out if var can be expressed as var2 + con2. */
2853 if (collapseChain(bb, i-1, var, &var2, &con2)) {
2855 vex_printf("replacing1 ");
2857 vex_printf(" with ");
2864 ? IRExpr_Binop(Iop_Add32,
2866 IRExpr_Const(IRConst_U32(con2)))
2867 : IRExpr_Binop(Iop_Sub32,
2869 IRExpr_Const(IRConst_U32(-con2)))
2872 ppIRStmt(bb->stmts[i]);
2880 /* Try to collapse 't1 = GetI[t2, con]'. */
2882 if (st->tag == Ist_WrTmp
2883 && st->Ist.WrTmp.data->tag == Iex_GetI
2884 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
2885 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
2886 ->Iex.RdTmp.tmp, &var2, &con2)) {
2888 vex_printf("replacing3 ");
2890 vex_printf(" with ");
2892 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
2896 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
2900 ppIRStmt(bb->stmts[i]);
2906 /* Perhaps st is PutI[t, con] ? */
2908 if (st->tag == Ist_PutI
2909 && st->Ist.PutI.ix->tag == Iex_RdTmp
2910 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
2913 vex_printf("replacing2 ");
2915 vex_printf(" with ");
2917 con2 += st->Ist.PutI.bias;
2919 = IRStmt_PutI(st->Ist.PutI.descr,
2924 ppIRStmt(bb->stmts[i]);
2934 /*---------------------------------------------------------------*/
2935 /*--- PutI/GetI transformations ---*/
2936 /*---------------------------------------------------------------*/
2938 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
2939 the given starting point to find, if any, a PutI which writes
2940 exactly the same piece of guest state, and so return the expression
2941 that the PutI writes. This is the core of PutI-GetI forwarding. */
2944 IRExpr* findPutI ( IRSB* bb, Int startHere,
2945 IRRegArray* descrG, IRExpr* ixG, Int biasG )
2949 GSAliasing relation;
2952 vex_printf("\nfindPutI ");
2953 ppIRRegArray(descrG);
2956 vex_printf(" %d\n", biasG);
2959 /* Scan backwards in bb from startHere to find a suitable PutI
2960 binding for (descrG, ixG, biasG), if any. */
2962 for (j = startHere; j >= 0; j--) {
2964 if (st->tag == Ist_NoOp)
2967 if (st->tag == Ist_Put) {
2968 /* Non-indexed Put. This can't give a binding, but we do
2969 need to check it doesn't invalidate the search by
2970 overlapping any part of the indexed guest state. */
2973 = getAliasingRelation_IC(
2976 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
2978 if (relation == NoAlias) {
2979 /* we're OK; keep going */
2982 /* relation == UnknownAlias || relation == ExactAlias */
2983 /* If this assertion fails, we've found a Put which writes
2984 an area of guest state which is read by a GetI. Which
2985 is unlikely (although not per se wrong). */
2986 vassert(relation != ExactAlias);
2987 /* This Put potentially writes guest state that the GetI
2988 reads; we must fail. */
2993 if (st->tag == Ist_PutI) {
2995 relation = getAliasingRelation_II(
3002 if (relation == NoAlias) {
3003 /* This PutI definitely doesn't overlap. Ignore it and
3005 continue; /* the for j loop */
3008 if (relation == UnknownAlias) {
3009 /* We don't know if this PutI writes to the same guest
3010 state that the GetI, or not. So we have to give up. */
3014 /* Otherwise, we've found what we're looking for. */
3015 vassert(relation == ExactAlias);
3016 return st->Ist.PutI.data;
3018 } /* if (st->tag == Ist_PutI) */
3020 if (st->tag == Ist_Dirty) {
3021 /* Be conservative. If the dirty call has any guest effects at
3022 all, give up. We could do better -- only give up if there
3023 are any guest writes/modifies. */
3024 if (st->Ist.Dirty.details->nFxState > 0)
3030 /* No valid replacement was found. */
3036 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3037 that it writes exactly the same piece of guest state) ? Safe
3040 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3042 vassert(pi->tag == Ist_PutI);
3043 if (s2->tag != Ist_PutI)
3047 getAliasingRelation_II(
3048 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3049 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3056 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3057 overlap it? Safe answer: True. Note, we could do a lot better
3058 than this if needed. */
3061 Bool guestAccessWhichMightOverlapPutI (
3062 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3065 GSAliasing relation;
3066 UInt minoffP, maxoffP;
3068 vassert(pi->tag == Ist_PutI);
3069 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3078 /* just be paranoid ... these should be rare. */
3082 /* This is unbelievably lame, but it's probably not
3083 significant from a performance point of view. Really, a
3084 CAS is a load-store op, so it should be safe to say False.
3089 /* If the dirty call has any guest effects at all, give up.
3090 Probably could do better. */
3091 if (s2->Ist.Dirty.details->nFxState > 0)
3096 vassert(isIRAtom(s2->Ist.Put.data));
3098 = getAliasingRelation_IC(
3099 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3101 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3106 vassert(isIRAtom(s2->Ist.PutI.ix));
3107 vassert(isIRAtom(s2->Ist.PutI.data));
3109 = getAliasingRelation_II(
3110 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3111 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3116 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3118 = getAliasingRelation_II(
3119 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3121 s2->Ist.WrTmp.data->Iex.GetI.descr,
3122 s2->Ist.WrTmp.data->Iex.GetI.ix,
3123 s2->Ist.WrTmp.data->Iex.GetI.bias
3127 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3129 = getAliasingRelation_IC(
3130 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3131 s2->Ist.WrTmp.data->Iex.Get.offset,
3132 s2->Ist.WrTmp.data->Iex.Get.ty
3139 vassert(isIRAtom(s2->Ist.Store.addr));
3140 vassert(isIRAtom(s2->Ist.Store.data));
3144 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3145 vpanic("guestAccessWhichMightOverlapPutI");
3149 if (relation == NoAlias)
3152 return True; /* ExactAlias or UnknownAlias */
3157 /* ---------- PutI/GetI transformations main functions --------- */
3159 /* Remove redundant GetIs, to the extent that they can be detected.
3160 bb is modified in-place. */
3163 void do_redundant_GetI_elimination ( IRSB* bb )
3168 for (i = bb->stmts_used-1; i >= 0; i--) {
3170 if (st->tag == Ist_NoOp)
3173 if (st->tag == Ist_WrTmp
3174 && st->Ist.WrTmp.data->tag == Iex_GetI
3175 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3176 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3177 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
3178 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
3179 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
3181 && isIRAtom(replacement)
3182 /* Make sure we're doing a type-safe transformation! */
3183 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3185 vex_printf("rGI: ");
3186 ppIRExpr(st->Ist.WrTmp.data);
3188 ppIRExpr(replacement);
3191 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3199 /* Remove redundant PutIs, to the extent which they can be detected.
3200 bb is modified in-place. */
3203 void do_redundant_PutI_elimination ( IRSB* bb )
3209 for (i = 0; i < bb->stmts_used; i++) {
3211 if (st->tag != Ist_PutI)
3213 /* Ok, search forwards from here to see if we can find another
3214 PutI which makes this one redundant, and dodging various
3215 hazards. Search forwards:
3216 * If conditional exit, give up (because anything after that
3217 does not postdominate this put).
3218 * If a Get which might overlap, give up (because this PutI
3219 not necessarily dead).
3220 * If a Put which is identical, stop with success.
3221 * If a Put which might overlap, but is not identical, give up.
3222 * If a dirty helper call which might write guest state, give up.
3223 * If a Put which definitely doesn't overlap, or any other
3224 kind of stmt, continue.
3227 for (j = i+1; j < bb->stmts_used; j++) {
3229 if (stj->tag == Ist_NoOp)
3231 if (identicalPutIs(st, stj)) {
3236 if (stj->tag == Ist_Exit)
3239 if (st->tag == Ist_Dirty)
3240 /* give up; could do better here */
3242 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3249 vex_printf("rPI: ");
3253 bb->stmts[i] = IRStmt_NoOp();
3260 /*---------------------------------------------------------------*/
3261 /*--- Loop unrolling ---*/
3262 /*---------------------------------------------------------------*/
3264 /* Adjust all tmp values (names) in e by delta. e is destructively
3267 static void deltaIRExpr ( IRExpr* e, Int delta )
3272 e->Iex.RdTmp.tmp += delta;
3278 deltaIRExpr(e->Iex.GetI.ix, delta);
3281 deltaIRExpr(e->Iex.Qop.arg1, delta);
3282 deltaIRExpr(e->Iex.Qop.arg2, delta);
3283 deltaIRExpr(e->Iex.Qop.arg3, delta);
3284 deltaIRExpr(e->Iex.Qop.arg4, delta);
3287 deltaIRExpr(e->Iex.Triop.arg1, delta);
3288 deltaIRExpr(e->Iex.Triop.arg2, delta);
3289 deltaIRExpr(e->Iex.Triop.arg3, delta);
3292 deltaIRExpr(e->Iex.Binop.arg1, delta);
3293 deltaIRExpr(e->Iex.Binop.arg2, delta);
3296 deltaIRExpr(e->Iex.Unop.arg, delta);
3299 deltaIRExpr(e->Iex.Load.addr, delta);
3302 for (i = 0; e->Iex.CCall.args[i]; i++)
3303 deltaIRExpr(e->Iex.CCall.args[i], delta);
3306 deltaIRExpr(e->Iex.Mux0X.cond, delta);
3307 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3308 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3311 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3312 vpanic("deltaIRExpr");
3316 /* Adjust all tmp values (names) in st by delta. st is destructively
3319 static void deltaIRStmt ( IRStmt* st, Int delta )
3329 deltaIRExpr(st->Ist.AbiHint.base, delta);
3330 deltaIRExpr(st->Ist.AbiHint.nia, delta);
3333 deltaIRExpr(st->Ist.Put.data, delta);
3336 deltaIRExpr(st->Ist.PutI.ix, delta);
3337 deltaIRExpr(st->Ist.PutI.data, delta);
3340 st->Ist.WrTmp.tmp += delta;
3341 deltaIRExpr(st->Ist.WrTmp.data, delta);
3344 deltaIRExpr(st->Ist.Exit.guard, delta);
3347 deltaIRExpr(st->Ist.Store.addr, delta);
3348 deltaIRExpr(st->Ist.Store.data, delta);
3351 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3352 st->Ist.CAS.details->oldHi += delta;
3353 st->Ist.CAS.details->oldLo += delta;
3354 deltaIRExpr(st->Ist.CAS.details->addr, delta);
3355 if (st->Ist.CAS.details->expdHi)
3356 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3357 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3358 if (st->Ist.CAS.details->dataHi)
3359 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3360 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3363 st->Ist.LLSC.result += delta;
3364 deltaIRExpr(st->Ist.LLSC.addr, delta);
3365 if (st->Ist.LLSC.storedata)
3366 deltaIRExpr(st->Ist.LLSC.storedata, delta);
3369 d = st->Ist.Dirty.details;
3370 deltaIRExpr(d->guard, delta);
3371 for (i = 0; d->args[i]; i++)
3372 deltaIRExpr(d->args[i], delta);
3373 if (d->tmp != IRTemp_INVALID)
3376 deltaIRExpr(d->mAddr, delta);
3379 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3380 vpanic("deltaIRStmt");
3385 /* If possible, return a loop-unrolled version of bb0. The original
3386 is changed. If not possible, return NULL. */
3388 /* The two schemas considered are:
3392 which unrolls to (eg) X: BODY;BODY; goto X
3396 X: BODY; if (c) goto X; goto Y
3397 which trivially transforms to
3398 X: BODY; if (!c) goto Y; goto X;
3399 so it falls in the scope of the first case.
3401 X and Y must be literal (guest) addresses.
3404 static Int calc_unroll_factor( IRSB* bb )
3409 for (i = 0; i < bb->stmts_used; i++) {
3410 if (bb->stmts[i]->tag != Ist_NoOp)
3414 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3415 if (vex_control.iropt_verbosity > 0)
3416 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3417 n_stmts, 8* n_stmts);
3420 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3421 if (vex_control.iropt_verbosity > 0)
3422 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3423 n_stmts, 4* n_stmts);
3427 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3428 if (vex_control.iropt_verbosity > 0)
3429 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3430 n_stmts, 2* n_stmts);
3434 if (vex_control.iropt_verbosity > 0)
3435 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3441 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3443 Int i, j, jmax, n_vars;
3445 Addr64 xxx_value, yyy_value;
3452 if (vex_control.iropt_unroll_thresh <= 0)
3455 /* First off, figure out if we can unroll this loop. Do this
3456 without modifying bb0. */
3458 if (bb0->jumpkind != Ijk_Boring)
3464 /* Extract the next-guest address. If it isn't a literal, we
3468 if (udst->tag == Iex_Const
3469 && (udst->Iex.Const.con->tag == Ico_U32
3470 || udst->Iex.Const.con->tag == Ico_U64)) {
3471 /* The BB ends in a jump to a literal location. */
3473 xxx_value = udst->Iex.Const.con->tag == Ico_U64
3474 ? udst->Iex.Const.con->Ico.U64
3475 : (Addr64)(udst->Iex.Const.con->Ico.U32);
3481 /* Now we know the BB ends to a jump to a literal location. If
3482 it's a jump to itself (viz, idiom #1), move directly to the
3483 unrolling stage, first cloning the bb so the original isn't
3485 if (xxx_value == my_addr) {
3486 unroll_factor = calc_unroll_factor( bb0 );
3487 if (unroll_factor < 2)
3489 bb1 = deepCopyIRSB( bb0 );
3491 udst = NULL; /* is now invalid */
3495 /* Search for the second idiomatic form:
3496 X: BODY; if (c) goto X; goto Y
3497 We know Y, but need to establish that the last stmt
3500 yyy_value = xxx_value;
3501 for (i = bb0->stmts_used-1; i >= 0; i--)
3506 return NULL; /* block with no stmts. Strange. */
3509 if (st->tag != Ist_Exit)
3511 if (st->Ist.Exit.jk != Ijk_Boring)
3514 con = st->Ist.Exit.dst;
3515 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3517 xxx_value = con->tag == Ico_U64
3518 ? st->Ist.Exit.dst->Ico.U64
3519 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3521 /* If this assertion fails, we have some kind of type error. */
3522 vassert(con->tag == udst->Iex.Const.con->tag);
3524 if (xxx_value != my_addr)
3525 /* We didn't find either idiom. Give up. */
3528 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
3529 yyy values (which makes it look like idiom #1), and go into
3530 unrolling proper. This means finding (again) the last stmt, in
3533 unroll_factor = calc_unroll_factor( bb0 );
3534 if (unroll_factor < 2)
3537 bb1 = deepCopyIRSB( bb0 );
3539 udst = NULL; /* is now invalid */
3540 for (i = bb1->stmts_used-1; i >= 0; i--)
3544 /* The next bunch of assertions should be true since we already
3545 found and checked the last stmt in the original bb. */
3550 vassert(st->tag == Ist_Exit);
3552 con = st->Ist.Exit.dst;
3553 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3556 vassert(udst->tag == Iex_Const);
3557 vassert(udst->Iex.Const.con->tag == Ico_U32
3558 || udst->Iex.Const.con->tag == Ico_U64);
3559 vassert(con->tag == udst->Iex.Const.con->tag);
3561 /* switch the xxx and yyy fields around */
3562 if (con->tag == Ico_U64) {
3563 udst->Iex.Const.con->Ico.U64 = xxx_value;
3564 con->Ico.U64 = yyy_value;
3566 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3567 con->Ico.U32 = (UInt)yyy_value;
3570 /* negate the test condition */
3572 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3574 /* --- The unroller proper. Both idioms are by now --- */
3575 /* --- now converted to idiom 1. --- */
3579 vassert(unroll_factor == 2
3580 || unroll_factor == 4
3581 || unroll_factor == 8);
3583 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3584 for (j = 1; j <= jmax; j++) {
3586 n_vars = bb1->tyenv->types_used;
3588 bb2 = deepCopyIRSB(bb1);
3589 for (i = 0; i < n_vars; i++)
3590 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3592 for (i = 0; i < bb2->stmts_used; i++) {
3593 /* deltaIRStmt destructively modifies the stmt, but
3594 that's OK since bb2 is a complete fresh copy of bb1. */
3595 deltaIRStmt(bb2->stmts[i], n_vars);
3596 addStmtToIRSB(bb1, bb2->stmts[i]);
3601 vex_printf("\nUNROLLED (%llx)\n", my_addr);
3606 /* Flattening; sigh. The unroller succeeds in breaking flatness
3607 by negating the test condition. This should be fixed properly.
3608 For the moment use this shotgun approach. */
3609 return flatten_BB(bb1);
3613 /*---------------------------------------------------------------*/
3614 /*--- The tree builder ---*/
3615 /*---------------------------------------------------------------*/
3617 /* This isn't part of IR optimisation. Really it's a pass done prior
3618 to instruction selection, which improves the code that the
3619 instruction selector can produce. */
3621 /* --- The 'tmp' environment is the central data structure here --- */
3623 /* The number of outstanding bindings we're prepared to track.
3624 The number of times the env becomes full and we have to dump
3625 the oldest binding (hence reducing code quality) falls very
3626 rapidly as the env size increases. 8 gives reasonable performance
3627 under most circumstances. */
3630 /* bindee == NULL === slot is not in use
3631 bindee != NULL === slot is in use
3642 __attribute__((unused))
3643 static void ppAEnv ( ATmpInfo* env )
3646 for (i = 0; i < A_NENV; i++) {
3647 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
3649 ppIRExpr(env[i].bindee);
3651 vex_printf("(null)");
3656 /* --- Tree-traversal fns --- */
3658 /* Traverse an expr, and detect if any part of it reads memory or does
3659 a Get. Be careful ... this really controls how much the
3660 tree-builder can reorder the code, so getting it right is critical.
3662 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3667 for (i = 0; e->Iex.CCall.args[i]; i++)
3668 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3671 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3672 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3673 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3676 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3677 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3678 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3679 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3682 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3683 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3684 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3687 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3688 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3691 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3695 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3702 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3708 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3709 vpanic("setHints_Expr");
3714 /* Add a binding to the front of the env and slide all the rest
3715 backwards. It should be the case that the last slot is free. */
3716 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3719 vassert(env[A_NENV-1].bindee == NULL);
3720 for (i = A_NENV-1; i >= 1; i--)
3722 env[0].binder = binder;
3723 env[0].bindee = bindee;
3724 env[0].doesLoad = False; /* filled in later */
3725 env[0].doesGet = False; /* filled in later */
3728 /* Given uses :: array of UShort, indexed by IRTemp
3729 Add the use-occurrences of temps in this expression
3732 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3738 case Iex_RdTmp: /* the only interesting case */
3739 uses[e->Iex.RdTmp.tmp]++;
3743 aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3744 aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3745 aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3749 aoccCount_Expr(uses, e->Iex.Qop.arg1);
3750 aoccCount_Expr(uses, e->Iex.Qop.arg2);
3751 aoccCount_Expr(uses, e->Iex.Qop.arg3);
3752 aoccCount_Expr(uses, e->Iex.Qop.arg4);
3756 aoccCount_Expr(uses, e->Iex.Triop.arg1);
3757 aoccCount_Expr(uses, e->Iex.Triop.arg2);
3758 aoccCount_Expr(uses, e->Iex.Triop.arg3);
3762 aoccCount_Expr(uses, e->Iex.Binop.arg1);
3763 aoccCount_Expr(uses, e->Iex.Binop.arg2);
3767 aoccCount_Expr(uses, e->Iex.Unop.arg);
3771 aoccCount_Expr(uses, e->Iex.Load.addr);
3775 for (i = 0; e->Iex.CCall.args[i]; i++)
3776 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3780 aoccCount_Expr(uses, e->Iex.GetI.ix);
3788 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3789 vpanic("aoccCount_Expr");
3794 /* Given uses :: array of UShort, indexed by IRTemp
3795 Add the use-occurrences of temps in this statement
3798 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3805 aoccCount_Expr(uses, st->Ist.AbiHint.base);
3806 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3809 aoccCount_Expr(uses, st->Ist.WrTmp.data);
3812 aoccCount_Expr(uses, st->Ist.Put.data);
3815 aoccCount_Expr(uses, st->Ist.PutI.ix);
3816 aoccCount_Expr(uses, st->Ist.PutI.data);
3819 aoccCount_Expr(uses, st->Ist.Store.addr);
3820 aoccCount_Expr(uses, st->Ist.Store.data);
3823 cas = st->Ist.CAS.details;
3824 aoccCount_Expr(uses, cas->addr);
3826 aoccCount_Expr(uses, cas->expdHi);
3827 aoccCount_Expr(uses, cas->expdLo);
3829 aoccCount_Expr(uses, cas->dataHi);
3830 aoccCount_Expr(uses, cas->dataLo);
3833 aoccCount_Expr(uses, st->Ist.LLSC.addr);
3834 if (st->Ist.LLSC.storedata)
3835 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
3838 d = st->Ist.Dirty.details;
3839 if (d->mFx != Ifx_None)
3840 aoccCount_Expr(uses, d->mAddr);
3841 aoccCount_Expr(uses, d->guard);
3842 for (i = 0; d->args[i]; i++)
3843 aoccCount_Expr(uses, d->args[i]);
3850 aoccCount_Expr(uses, st->Ist.Exit.guard);
3853 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3854 vpanic("aoccCount_Stmt");
3858 /* Look up a binding for tmp in the env. If found, return the bound
3859 expression, and set the env's binding to NULL so it is marked as
3860 used. If not found, return NULL. */
3862 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
3865 for (i = 0; i < A_NENV; i++) {
3866 if (env[i].binder == tmp && env[i].bindee != NULL) {
3867 IRExpr* bindee = env[i].bindee;
3868 env[i].bindee = NULL;
3875 /* Traverse e, looking for temps. For each observed temp, see if env
3876 contains a binding for the temp, and if so return the bound value.
3877 The env has the property that any binding it holds is
3878 'single-shot', so once a binding is used, it is marked as no longer
3879 available, by setting its .bindee field to NULL. */
3881 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
3882 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
3884 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
3885 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
3888 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
3892 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
3893 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
3894 return IRExpr_Unop( Iop_CmpwNEZ32,
3895 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
3896 a2->Iex.Unop.arg ) );
3901 /* no reduction rule applies */
3902 return IRExpr_Binop( op, a1, a2 );
3905 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
3909 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3910 if (is_Binop(aa, Iop_Or64)
3911 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
3912 return fold_IRExpr_Unop(
3914 IRExpr_Binop(Iop_Or64,
3915 aa->Iex.Binop.arg1->Iex.Unop.arg,
3916 aa->Iex.Binop.arg2));
3917 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3918 if (is_Binop(aa, Iop_Or64)
3919 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
3920 return fold_IRExpr_Unop(
3922 IRExpr_Binop(Iop_Or64,
3924 aa->Iex.Binop.arg2->Iex.Unop.arg));
3927 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
3928 if (is_Unop(aa, Iop_Left64))
3929 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
3932 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
3933 if (is_Unop(aa, Iop_CmpwNEZ32))
3934 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
3937 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
3938 if (is_Unop(aa, Iop_Left32))
3939 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
3942 /* Left32( Left32(x) ) --> Left32(x) */
3943 if (is_Unop(aa, Iop_Left32))
3944 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
3947 /* 32to1( 1Uto32 ( x ) ) --> x */
3948 if (is_Unop(aa, Iop_1Uto32))
3949 return aa->Iex.Unop.arg;
3950 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
3951 if (is_Unop(aa, Iop_CmpwNEZ32))
3952 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
3955 /* 64to1( 1Uto64 ( x ) ) --> x */
3956 if (is_Unop(aa, Iop_1Uto64))
3957 return aa->Iex.Unop.arg;
3958 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
3959 if (is_Unop(aa, Iop_CmpwNEZ64))
3960 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
3963 /* 64to32( 32Uto64 ( x )) --> x */
3964 if (is_Unop(aa, Iop_32Uto64))
3965 return aa->Iex.Unop.arg;
3969 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
3970 if (is_Unop(aa, Iop_CmpNEZ8)
3971 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
3972 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
3973 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
3975 return IRExpr_Unop( Iop_CmpwNEZ32,
3976 aa->Iex.Unop.arg->Iex.Unop.arg
3977 ->Iex.Unop.arg->Iex.Unop.arg);
3985 /* no reduction rule applies */
3986 return IRExpr_Unop( op, aa );
3989 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
3998 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
3999 for (i = 0; args2[i]; i++)
4000 args2[i] = atbSubst_Expr(env,args2[i]);
4001 return IRExpr_CCall(
4007 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4010 return IRExpr_Mux0X(
4011 atbSubst_Expr(env, e->Iex.Mux0X.cond),
4012 atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4013 atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4018 atbSubst_Expr(env, e->Iex.Qop.arg1),
4019 atbSubst_Expr(env, e->Iex.Qop.arg2),
4020 atbSubst_Expr(env, e->Iex.Qop.arg3),
4021 atbSubst_Expr(env, e->Iex.Qop.arg4)
4024 return IRExpr_Triop(
4026 atbSubst_Expr(env, e->Iex.Triop.arg1),
4027 atbSubst_Expr(env, e->Iex.Triop.arg2),
4028 atbSubst_Expr(env, e->Iex.Triop.arg3)
4031 return fold_IRExpr_Binop(
4033 atbSubst_Expr(env, e->Iex.Binop.arg1),
4034 atbSubst_Expr(env, e->Iex.Binop.arg2)
4037 return fold_IRExpr_Unop(
4039 atbSubst_Expr(env, e->Iex.Unop.arg)
4045 atbSubst_Expr(env, e->Iex.Load.addr)
4050 atbSubst_Expr(env, e->Iex.GetI.ix),
4057 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4058 vpanic("atbSubst_Expr");
4062 /* Same deal as atbSubst_Expr, except for stmts. */
4064 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4071 return IRStmt_AbiHint(
4072 atbSubst_Expr(env, st->Ist.AbiHint.base),
4073 st->Ist.AbiHint.len,
4074 atbSubst_Expr(env, st->Ist.AbiHint.nia)
4077 return IRStmt_Store(
4079 atbSubst_Expr(env, st->Ist.Store.addr),
4080 atbSubst_Expr(env, st->Ist.Store.data)
4083 return IRStmt_WrTmp(
4085 atbSubst_Expr(env, st->Ist.WrTmp.data)
4090 atbSubst_Expr(env, st->Ist.Put.data)
4095 atbSubst_Expr(env, st->Ist.PutI.ix),
4097 atbSubst_Expr(env, st->Ist.PutI.data)
4102 atbSubst_Expr(env, st->Ist.Exit.guard),
4107 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
4109 return IRStmt_NoOp();
4111 return IRStmt_MBE(st->Ist.MBE.event);
4113 cas = st->Ist.CAS.details;
4115 cas->oldHi, cas->oldLo, cas->end,
4116 atbSubst_Expr(env, cas->addr),
4117 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4118 atbSubst_Expr(env, cas->expdLo),
4119 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4120 atbSubst_Expr(env, cas->dataLo)
4122 return IRStmt_CAS(cas2);
4126 st->Ist.LLSC.result,
4127 atbSubst_Expr(env, st->Ist.LLSC.addr),
4128 st->Ist.LLSC.storedata
4129 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4132 d = st->Ist.Dirty.details;
4133 d2 = emptyIRDirty();
4135 if (d2->mFx != Ifx_None)
4136 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4137 d2->guard = atbSubst_Expr(env, d2->guard);
4138 for (i = 0; d2->args[i]; i++)
4139 d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4140 return IRStmt_Dirty(d2);
4142 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4143 vpanic("atbSubst_Stmt");
4147 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4150 Bool stmtPuts, stmtStores, invalidateMe;
4153 ATmpInfo env[A_NENV];
4155 Int n_tmps = bb->tyenv->types_used;
4156 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
4158 /* Phase 1. Scan forwards in bb, counting use occurrences of each
4159 temp. Also count occurrences in the bb->next field. */
4161 for (i = 0; i < n_tmps; i++)
4164 for (i = 0; i < bb->stmts_used; i++) {
4166 if (st->tag == Ist_NoOp)
4168 aoccCount_Stmt( uses, st );
4170 aoccCount_Expr(uses, bb->next );
4173 for (i = 0; i < n_tmps; i++) {
4176 ppIRTemp( (IRTemp)i );
4177 vex_printf(" used %d\n", (Int)uses[i] );
4181 /* Phase 2. Scan forwards in bb. For each statement in turn:
4183 If the env is full, emit the end element. This guarantees
4184 there is at least one free slot in the following.
4186 On seeing 't = E', occ(t)==1,
4189 add t -> E' to the front of the env
4190 Examine E' and set the hints for E' appropriately
4191 (doesLoad? doesGet?)
4193 On seeing any other stmt,
4194 let stmt' = env(stmt)
4195 remove from env any 't=E' binds invalidated by stmt
4196 emit the invalidated stmts
4198 compact any holes in env
4199 by sliding entries towards the front
4201 Finally, apply env to bb->next.
4204 for (i = 0; i < A_NENV; i++) {
4205 env[i].bindee = NULL;
4206 env[i].binder = IRTemp_INVALID;
4209 /* The stmts in bb are being reordered, and we are guaranteed to
4210 end up with no more than the number we started with. Use i to
4211 be the cursor of the current stmt examined and j <= i to be that
4212 for the current stmt being written.
4215 for (i = 0; i < bb->stmts_used; i++) {
4218 if (st->tag == Ist_NoOp)
4221 /* Ensure there's at least one space in the env, by emitting
4222 the oldest binding if necessary. */
4223 if (env[A_NENV-1].bindee != NULL) {
4224 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4225 env[A_NENV-1].bindee );
4228 env[A_NENV-1].bindee = NULL;
4231 /* Consider current stmt. */
4232 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4235 /* optional extra: dump dead bindings as we find them.
4236 Removes the need for a prior dead-code removal pass. */
4237 if (uses[st->Ist.WrTmp.tmp] == 0) {
4238 if (0) vex_printf("DEAD binding\n");
4239 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4241 vassert(uses[st->Ist.WrTmp.tmp] == 1);
4243 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
4245 e = st->Ist.WrTmp.data;
4246 e2 = atbSubst_Expr(env, e);
4247 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4248 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4249 /* don't advance j, as we are deleting this stmt and instead
4250 holding it temporarily in the env. */
4251 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4254 /* we get here for any other kind of statement. */
4255 /* 'use up' any bindings required by the current statement. */
4256 st2 = atbSubst_Stmt(env, st);
4258 /* Now, before this stmt, dump any bindings in env that it
4259 invalidates. These need to be dumped in the order in which
4260 they originally entered env -- that means from oldest to
4263 /* stmtPuts/stmtStores characterise what the stmt under
4264 consideration does, or might do (sidely safe @ True). */
4266 = toBool( st->tag == Ist_Put
4267 || st->tag == Ist_PutI
4268 || st->tag == Ist_Dirty );
4270 /* be True if this stmt writes memory or might do (==> we don't
4271 want to reorder other loads or stores relative to it). Also,
4272 both LL and SC fall under this classification, since we
4273 really ought to be conservative and not reorder any other
4274 memory transactions relative to them. */
4276 = toBool( st->tag == Ist_Store
4277 || st->tag == Ist_Dirty
4278 || st->tag == Ist_LLSC );
4280 for (k = A_NENV-1; k >= 0; k--) {
4281 if (env[k].bindee == NULL)
4283 /* Compare the actions of this stmt with the actions of
4284 binding 'k', to see if they invalidate the binding. */
4287 /* a store invalidates loaded data */
4288 (env[k].doesLoad && stmtStores)
4289 /* a put invalidates get'd data */
4290 || (env[k].doesGet && stmtPuts)
4291 /* a put invalidates loaded data. Note, we could do
4292 much better here in the sense that we only need to
4293 invalidate trees containing loads if the Put in
4294 question is marked as requiring precise
4296 || (env[k].doesLoad && stmtPuts)
4297 /* probably overly conservative: a memory bus event
4298 invalidates absolutely everything, so that all
4299 computation prior to it is forced to complete before
4300 proceeding with the event (fence,lock,unlock). */
4301 || st->tag == Ist_MBE
4302 /* also be (probably overly) paranoid re AbiHints */
4303 || st->tag == Ist_AbiHint
4306 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4309 env[k].bindee = NULL;
4313 /* Slide in-use entries in env up to the front */
4315 for (k = 0; k < A_NENV; k++) {
4316 if (env[k].bindee != NULL) {
4321 for (m = m; m < A_NENV; m++) {
4322 env[m].bindee = NULL;
4325 /* finally, emit the substituted statement */
4327 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4331 } /* for each stmt in the original bb ... */
4333 /* Finally ... substitute the ->next field as much as possible, and
4334 dump any left-over bindings. Hmm. Perhaps there should be no
4335 left over bindings? Or any left-over bindings are
4336 by definition dead? */
4337 bb->next = atbSubst_Expr(env, bb->next);
4342 /*---------------------------------------------------------------*/
4343 /*--- iropt main ---*/
4344 /*---------------------------------------------------------------*/
4346 static Bool iropt_verbose = False; /* True; */
4349 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
4350 redundant Put removal, constant propagation, dead code removal,
4351 clean helper specialisation, and dead code removal (again).
4356 IRSB* cheap_transformations (
4358 IRExpr* (*specHelper) (HChar*, IRExpr**),
4359 Bool (*preciseMemExnsFn)(Int,Int)
4362 redundant_get_removal_BB ( bb );
4363 if (iropt_verbose) {
4364 vex_printf("\n========= REDUNDANT GET\n\n" );
4368 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4369 if (iropt_verbose) {
4370 vex_printf("\n========= REDUNDANT PUT\n\n" );
4374 bb = cprop_BB ( bb );
4375 if (iropt_verbose) {
4376 vex_printf("\n========= CPROPD\n\n" );
4380 do_deadcode_BB ( bb );
4381 if (iropt_verbose) {
4382 vex_printf("\n========= DEAD\n\n" );
4386 bb = spec_helpers_BB ( bb, specHelper );
4387 do_deadcode_BB ( bb );
4388 if (iropt_verbose) {
4389 vex_printf("\n========= SPECd \n\n" );
4397 /* Do some more expensive transformations on bb, which are aimed at
4398 optimising as much as possible in the presence of GetI and PutI. */
4401 IRSB* expensive_transformations( IRSB* bb )
4403 (void)do_cse_BB( bb );
4404 collapse_AddSub_chains_BB( bb );
4405 do_redundant_GetI_elimination( bb );
4406 do_redundant_PutI_elimination( bb );
4407 do_deadcode_BB( bb );
4412 /* Scan a flattened BB to look for signs that more expensive
4413 optimisations might be useful:
4414 - find out if there are any GetIs and PutIs
4415 - find out if there are any floating or vector-typed temporaries
4418 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4419 /*OUT*/Bool* hasVorFtemps,
4427 *hasGetIorPutI = False;
4428 *hasVorFtemps = False;
4430 for (i = 0; i < bb->stmts_used; i++) {
4434 vassert(isIRAtom(st->Ist.AbiHint.base));
4435 vassert(isIRAtom(st->Ist.AbiHint.nia));
4438 *hasGetIorPutI = True;
4441 if (st->Ist.WrTmp.data->tag == Iex_GetI)
4442 *hasGetIorPutI = True;
4443 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4444 case Ity_I1: case Ity_I8: case Ity_I16:
4445 case Ity_I32: case Ity_I64: case Ity_I128:
4447 case Ity_F32: case Ity_F64: case Ity_V128:
4448 *hasVorFtemps = True;
4455 vassert(isIRAtom(st->Ist.Put.data));
4458 vassert(isIRAtom(st->Ist.Store.addr));
4459 vassert(isIRAtom(st->Ist.Store.data));
4462 cas = st->Ist.CAS.details;
4463 vassert(isIRAtom(cas->addr));
4464 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4465 vassert(isIRAtom(cas->expdLo));
4466 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4467 vassert(isIRAtom(cas->dataLo));
4470 vassert(isIRAtom(st->Ist.LLSC.addr));
4471 if (st->Ist.LLSC.storedata)
4472 vassert(isIRAtom(st->Ist.LLSC.storedata));
4475 d = st->Ist.Dirty.details;
4476 vassert(isIRAtom(d->guard));
4477 for (j = 0; d->args[j]; j++)
4478 vassert(isIRAtom(d->args[j]));
4479 if (d->mFx != Ifx_None)
4480 vassert(isIRAtom(d->mAddr));
4487 vassert(isIRAtom(st->Ist.Exit.guard));
4492 vpanic("considerExpensives");
4498 /* ---------------- The main iropt entry point. ---------------- */
4500 /* exported from this file */
4501 /* Rules of the game:
4503 - IRExpr/IRStmt trees should be treated as immutable, as they
4504 may get shared. So never change a field of such a tree node;
4505 instead construct and return a new one if needed.
4509 IRSB* do_iropt_BB ( IRSB* bb0,
4510 IRExpr* (*specHelper) (HChar*, IRExpr**),
4511 Bool (*preciseMemExnsFn)(Int,Int),
4514 static Int n_total = 0;
4515 static Int n_expensive = 0;
4517 Bool hasGetIorPutI, hasVorFtemps;
4522 /* First flatten the block out, since all other
4523 phases assume flat code. */
4525 bb = flatten_BB ( bb0 );
4527 if (iropt_verbose) {
4528 vex_printf("\n========= FLAT\n\n" );
4532 /* If at level 0, stop now. */
4533 if (vex_control.iropt_level <= 0) return bb;
4535 /* Now do a preliminary cleanup pass, and figure out if we also
4536 need to do 'expensive' optimisations. Expensive optimisations
4537 are deemed necessary if the block contains any GetIs or PutIs.
4538 If needed, do expensive transformations and then another cheap
4541 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4543 if (vex_control.iropt_level > 1) {
4545 /* Peer at what we have, to decide how much more effort to throw
4547 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4549 if (hasVorFtemps && !hasGetIorPutI) {
4550 /* If any evidence of FP or Vector activity, CSE, as that
4551 tends to mop up all manner of lardy code to do with
4552 rounding modes. Don't bother if hasGetIorPutI since that
4553 case leads into the expensive transformations, which do
4555 (void)do_cse_BB( bb );
4556 do_deadcode_BB( bb );
4559 if (hasGetIorPutI) {
4563 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4564 bb = expensive_transformations( bb );
4565 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4566 /* Potentially common up GetIs */
4567 cses = do_cse_BB( bb );
4569 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4572 /* Now have a go at unrolling simple (single-BB) loops. If
4573 successful, clean up the results as much as possible. */
4575 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4577 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4578 if (hasGetIorPutI) {
4579 bb = expensive_transformations( bb );
4580 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4582 /* at least do CSE and dead code removal */
4584 do_deadcode_BB( bb );
4586 if (0) vex_printf("vex iropt: unrolled a loop\n");
4595 /*---------------------------------------------------------------*/
4596 /*--- end ir_opt.c ---*/
4597 /*---------------------------------------------------------------*/