2 /*---------------------------------------------------------------*/
3 /*--- begin ir_opt.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
40 #include "main_util.h"
41 #include "main_globals.h"
45 /* Set to 1 for lots of debugging output. */
49 /* What iropt does, 29 Dec 04.
51 It takes an IRSB and produces a new one with the same meaning,
54 After execution of the new BB, all guest state and guest memory is
55 the same as after execution of the original. This is true
56 regardless of how the block was exited (at the end vs side exit).
58 In addition, parts of the guest state will be identical to that
59 created by execution of the original at the following observation
62 * In a dirty helper call, any parts of the guest state that the
63 helper states that it reads or modifies will be up to date.
64 Also, guest memory will be up to date. Parts of the guest state
65 not marked as being read or modified by the helper cannot be
66 assumed to be up-to-date at the point where the helper is called.
68 * Immediately prior to any load or store, those parts of the guest
69 state marked as requiring precise exceptions will be up to date.
70 Also, guest memory will be up to date. Parts of the guest state
71 not marked as requiring precise exceptions cannot be assumed to
72 be up-to-date at the point of the load/store.
74 The relative order of loads and stores (including loads/stores of
75 guest memory done by dirty helpers annotated as such) is not
76 changed. However, the relative order of loads with no intervening
77 stores/modifies may be changed.
82 There are three levels of optimisation, controlled by
83 vex_control.iropt_level. Define first:
85 "Cheap transformations" are the following sequence:
86 * Redundant-Get removal
87 * Redundant-Put removal
88 * Constant propagation/folding
90 * Specialisation of clean helper functions
93 "Expensive transformations" are the following sequence:
95 * Folding of add/sub chains
96 * Redundant-GetI removal
97 * Redundant-PutI removal
100 Then the transformations are as follows, as defined by
101 vex_control.iropt_level:
104 * Flatten into atomic form.
106 Level 1: the following sequence:
107 * Flatten into atomic form.
108 * Cheap transformations.
110 Level 2: the following sequence
111 * Flatten into atomic form.
112 * Cheap transformations.
113 * If block contains any floating or vector types, CSE.
114 * If block contains GetI or PutI, Expensive transformations.
115 * Try unrolling loops. Three possible outcomes:
116 - No effect: do nothing more.
117 - Unrolled a loop, and block does not contain GetI or PutI:
120 - Unrolled a loop, and block contains GetI or PutI:
121 Do: * Expensive transformations
122 * Cheap transformations
125 /* Implementation notes, 29 Dec 04.
127 TODO (important): I think rPutI removal ignores precise exceptions
128 and is therefore in a sense, wrong. In the sense that PutIs are
129 assumed not to write parts of the guest state that we need to have
130 up-to-date at loads/stores. So far on x86 guest that has not
131 mattered since indeed only the x87 FP registers and tags are
132 accessed using GetI/PutI, and there is no need so far for them to
133 be up to date at mem exception points. The rPutI pass should be
136 TODO: improve pessimistic handling of precise exceptions
139 TODO: check interaction of rGetI and dirty helpers.
141 F64i constants are treated differently from other constants.
142 They are not regarded as atoms, and instead lifted off and
143 bound to temps. This allows them to participate in CSE, which
144 is important for getting good performance for x86 guest code.
146 CSE up F64 literals (already doing F64is)
148 CSE: consider carefully the requirement for precise exns
149 prior to making CSE any more aggressive. */
152 /*---------------------------------------------------------------*/
153 /*--- Finite mappery, of a sort ---*/
154 /*---------------------------------------------------------------*/
156 /* General map from HWord-sized thing HWord-sized thing. Could be by
157 hashing, but it's not clear whether or not this would really be any
170 static HashHW* newHHW ( void )
172 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
175 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
176 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
177 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
182 /* Look up key in the map. */
184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
187 /* vex_printf("lookupHHW(%llx)\n", key ); */
188 for (i = 0; i < h->used; i++) {
189 if (h->inuse[i] && h->key[i] == key) {
199 /* Add key->val to the map. Replaces any existing binding for key. */
201 static void addToHHW ( HashHW* h, HWord key, HWord val )
204 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
206 /* Find and replace existing binding, if any. */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
214 /* Ensure a space is available. */
215 if (h->used == h->size) {
216 /* Copy into arrays twice the size. */
217 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220 for (i = j = 0; i < h->size; i++) {
221 if (!h->inuse[i]) continue;
234 /* Finally, add it. */
235 vassert(h->used < h->size);
236 h->inuse[h->used] = True;
237 h->key[h->used] = key;
238 h->val[h->used] = val;
243 /*---------------------------------------------------------------*/
244 /*--- Flattening out a BB into atomic SSA form ---*/
245 /*---------------------------------------------------------------*/
247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
248 copies made by flattening. If in doubt return False. */
250 static Bool isFlat ( IRExpr* e )
252 if (e->tag == Iex_Get)
254 if (e->tag == Iex_Binop)
255 return toBool( isIRAtom(e->Iex.Binop.arg1)
256 && isIRAtom(e->Iex.Binop.arg2) );
257 if (e->tag == Iex_Load)
258 return isIRAtom(e->Iex.Load.addr);
262 /* Flatten out 'ex' so it is atomic, returning a new expression with
263 the same value, after having appended extra IRTemp assignments to
266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
270 IRType ty = typeOfIRExpr(bb->tyenv, ex);
276 t1 = newIRTemp(bb->tyenv, ty);
277 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278 IRExpr_GetI(ex->Iex.GetI.descr,
279 flatten_Expr(bb, ex->Iex.GetI.ix),
280 ex->Iex.GetI.bias)));
281 return IRExpr_RdTmp(t1);
284 t1 = newIRTemp(bb->tyenv, ty);
286 IRStmt_WrTmp(t1, ex));
287 return IRExpr_RdTmp(t1);
290 t1 = newIRTemp(bb->tyenv, ty);
291 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292 IRExpr_Qop(ex->Iex.Qop.op,
293 flatten_Expr(bb, ex->Iex.Qop.arg1),
294 flatten_Expr(bb, ex->Iex.Qop.arg2),
295 flatten_Expr(bb, ex->Iex.Qop.arg3),
296 flatten_Expr(bb, ex->Iex.Qop.arg4))));
297 return IRExpr_RdTmp(t1);
300 t1 = newIRTemp(bb->tyenv, ty);
301 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302 IRExpr_Triop(ex->Iex.Triop.op,
303 flatten_Expr(bb, ex->Iex.Triop.arg1),
304 flatten_Expr(bb, ex->Iex.Triop.arg2),
305 flatten_Expr(bb, ex->Iex.Triop.arg3))));
306 return IRExpr_RdTmp(t1);
309 t1 = newIRTemp(bb->tyenv, ty);
310 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311 IRExpr_Binop(ex->Iex.Binop.op,
312 flatten_Expr(bb, ex->Iex.Binop.arg1),
313 flatten_Expr(bb, ex->Iex.Binop.arg2))));
314 return IRExpr_RdTmp(t1);
317 t1 = newIRTemp(bb->tyenv, ty);
318 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319 IRExpr_Unop(ex->Iex.Unop.op,
320 flatten_Expr(bb, ex->Iex.Unop.arg))));
321 return IRExpr_RdTmp(t1);
324 t1 = newIRTemp(bb->tyenv, ty);
325 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326 IRExpr_Load(ex->Iex.Load.end,
328 flatten_Expr(bb, ex->Iex.Load.addr))));
329 return IRExpr_RdTmp(t1);
332 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333 for (i = 0; newargs[i]; i++)
334 newargs[i] = flatten_Expr(bb, newargs[i]);
335 t1 = newIRTemp(bb->tyenv, ty);
336 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337 IRExpr_CCall(ex->Iex.CCall.cee,
340 return IRExpr_RdTmp(t1);
343 t1 = newIRTemp(bb->tyenv, ty);
344 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348 return IRExpr_RdTmp(t1);
351 /* Lift F64i constants out onto temps so they can be CSEd
353 if (ex->Iex.Const.con->tag == Ico_F64i) {
354 t1 = newIRTemp(bb->tyenv, ty);
355 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356 IRExpr_Const(ex->Iex.Const.con)));
357 return IRExpr_RdTmp(t1);
359 /* Leave all other constants alone. */
370 vpanic("flatten_Expr");
375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
380 IRExpr *e1, *e2, *e3, *e4, *e5;
385 if (isIRAtom(st->Ist.Put.data)) {
386 /* optimisation to reduce the amount of heap wasted
388 addStmtToIRSB(bb, st);
390 /* general case, always correct */
391 e1 = flatten_Expr(bb, st->Ist.Put.data);
392 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
396 e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397 e2 = flatten_Expr(bb, st->Ist.PutI.data);
398 addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
404 if (isFlat(st->Ist.WrTmp.data)) {
405 /* optimisation, to reduce the number of tmp-tmp
407 addStmtToIRSB(bb, st);
409 /* general case, always correct */
410 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
415 e1 = flatten_Expr(bb, st->Ist.Store.addr);
416 e2 = flatten_Expr(bb, st->Ist.Store.data);
417 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
420 cas = st->Ist.CAS.details;
421 e1 = flatten_Expr(bb, cas->addr);
422 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423 e3 = flatten_Expr(bb, cas->expdLo);
424 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425 e5 = flatten_Expr(bb, cas->dataLo);
426 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427 e1, e2, e3, e4, e5 );
428 addStmtToIRSB(bb, IRStmt_CAS(cas2));
431 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432 e2 = st->Ist.LLSC.storedata
433 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
435 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436 st->Ist.LLSC.result, e1, e2));
439 d = st->Ist.Dirty.details;
442 d2->args = shallowCopyIRExprVec(d2->args);
443 if (d2->mFx != Ifx_None) {
444 d2->mAddr = flatten_Expr(bb, d2->mAddr);
446 vassert(d2->mAddr == NULL);
448 d2->guard = flatten_Expr(bb, d2->guard);
449 for (i = 0; d2->args[i]; i++)
450 d2->args[i] = flatten_Expr(bb, d2->args[i]);
451 addStmtToIRSB(bb, IRStmt_Dirty(d2));
456 addStmtToIRSB(bb, st);
459 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
464 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
472 vpanic("flatten_Stmt");
477 static IRSB* flatten_BB ( IRSB* in )
482 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483 for (i = 0; i < in->stmts_used; i++)
485 flatten_Stmt( out, in->stmts[i] );
486 out->next = flatten_Expr( out, in->next );
487 out->jumpkind = in->jumpkind;
492 /*---------------------------------------------------------------*/
493 /*--- In-place removal of redundant GETs ---*/
494 /*---------------------------------------------------------------*/
496 /* Scan forwards, building up an environment binding (min offset, max
497 offset) pairs to values, which will either be temps or constants.
499 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500 env and if it matches, replace the Get with the stored value. If
501 there is no match, add a (minoff,maxoff) :-> t binding.
503 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504 any binding which fully or partially overlaps with (minoff,maxoff).
505 Then add a new (minoff,maxoff) :-> t or c binding. */
507 /* Extract the min/max offsets from a guest state array descriptor. */
510 static void getArrayBounds ( IRRegArray* descr,
511 UInt* minoff, UInt* maxoff )
513 *minoff = descr->base;
514 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515 vassert((*minoff & ~0xFFFF) == 0);
516 vassert((*maxoff & ~0xFFFF) == 0);
517 vassert(*minoff <= *maxoff);
520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
522 static UInt mk_key_GetPut ( Int offset, IRType ty )
524 /* offset should fit in 16 bits. */
525 UInt minoff = offset;
526 UInt maxoff = minoff + sizeofIRType(ty) - 1;
527 vassert((minoff & ~0xFFFF) == 0);
528 vassert((maxoff & ~0xFFFF) == 0);
529 return (minoff << 16) | maxoff;
532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
535 getArrayBounds( descr, &minoff, &maxoff );
536 vassert((minoff & ~0xFFFF) == 0);
537 vassert((maxoff & ~0xFFFF) == 0);
538 return (minoff << 16) | maxoff;
541 /* Supposing h has keys of the form generated by mk_key_GetPut and
542 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
549 vassert(k_lo <= k_hi);
550 /* invalidate any env entries which in any way overlap (k_lo
552 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
554 for (j = 0; j < h->used; j++) {
557 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558 e_hi = ((UInt)h->key[j]) & 0xFFFF;
559 vassert(e_lo <= e_hi);
560 if (e_hi < k_lo || k_hi < e_lo)
561 continue; /* no overlap possible */
563 /* overlap; invalidate */
569 static void redundant_get_removal_BB ( IRSB* bb )
571 HashHW* env = newHHW();
572 UInt key = 0; /* keep gcc -O happy */
576 for (i = 0; i < bb->stmts_used; i++) {
577 IRStmt* st = bb->stmts[i];
579 if (st->tag == Ist_NoOp)
583 if (st->tag == Ist_WrTmp
584 && st->Ist.WrTmp.data->tag == Iex_Get) {
585 /* st is 't = Get(...)'. Look up in the environment and see
586 if the Get can be replaced. */
587 IRExpr* get = st->Ist.WrTmp.data;
588 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
590 if (lookupHHW(env, &val, (HWord)key)) {
592 /* Note, we could do better here. If the types are
593 different we don't do the substitution, since doing so
594 could lead to invalidly-typed IR. An improvement would
595 be to stick in a reinterpret-style cast, although that
596 would make maintaining flatness more difficult. */
597 IRExpr* valE = (IRExpr*)val;
598 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599 == st->Ist.WrTmp.data->Iex.Get.ty );
600 if (typesOK && DEBUG_IROPT) {
601 vex_printf("rGET: "); ppIRExpr(get);
602 vex_printf(" -> "); ppIRExpr(valE);
606 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
608 /* Not found, but at least we know that t and the Get(...)
609 are now associated. So add a binding to reflect that
611 addToHHW( env, (HWord)key,
612 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
616 /* Deal with Puts: invalidate any env entries overlapped by this
618 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
620 if (st->tag == Ist_Put) {
621 key = mk_key_GetPut( st->Ist.Put.offset,
622 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
624 vassert(st->tag == Ist_PutI);
625 key = mk_key_GetIPutI( st->Ist.PutI.descr );
628 k_lo = (key >> 16) & 0xFFFF;
630 invalidateOverlaps(env, k_lo, k_hi);
633 if (st->tag == Ist_Dirty) {
634 /* Deal with dirty helpers which write or modify guest state.
635 Invalidate the entire env. We could do a lot better
637 IRDirty* d = st->Ist.Dirty.details;
639 for (j = 0; j < d->nFxState; j++) {
640 if (d->fxState[j].fx == Ifx_Modify
641 || d->fxState[j].fx == Ifx_Write)
645 /* dump the entire env (not clever, but correct ...) */
646 for (j = 0; j < env->used; j++)
647 env->inuse[j] = False;
648 if (0) vex_printf("rGET: trash env due to dirty helper\n");
652 /* add this one to the env, if appropriate */
653 if (st->tag == Ist_Put) {
654 vassert(isIRAtom(st->Ist.Put.data));
655 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
658 } /* for (i = 0; i < bb->stmts_used; i++) */
663 /*---------------------------------------------------------------*/
664 /*--- In-place removal of redundant PUTs ---*/
665 /*---------------------------------------------------------------*/
667 /* Find any Get uses in st and invalidate any partially or fully
668 overlapping ranges listed in env. Due to the flattening phase, the
669 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
671 static void handle_gets_Stmt (
674 Bool (*preciseMemExnsFn)(Int,Int)
678 UInt key = 0; /* keep gcc -O happy */
685 /* This is the only interesting case. Deal with Gets in the RHS
688 e = st->Ist.WrTmp.data;
692 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
696 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
707 k_lo = (key >> 16) & 0xFFFF;
709 invalidateOverlaps(env, k_lo, k_hi);
713 /* Be very conservative for dirty helper calls; dump the entire
714 environment. The helper might read guest state, in which
715 case it needs to be flushed first. Also, the helper might
716 access guest memory, in which case all parts of the guest
717 state requiring precise exceptions needs to be flushed. The
718 crude solution is just to flush everything; we could easily
719 enough do a lot better if needed. */
720 /* Probably also overly-conservative, but also dump everything
721 if we hit a memory bus event (fence, lock, unlock). Ditto
722 AbiHints, CASs, LLs and SCs. */
724 vassert(isIRAtom(st->Ist.AbiHint.base));
725 vassert(isIRAtom(st->Ist.AbiHint.nia));
731 for (j = 0; j < env->used; j++)
732 env->inuse[j] = False;
735 /* all other cases are boring. */
737 vassert(isIRAtom(st->Ist.Store.addr));
738 vassert(isIRAtom(st->Ist.Store.data));
743 vassert(isIRAtom(st->Ist.Exit.guard));
747 vassert(isIRAtom(st->Ist.PutI.ix));
748 vassert(isIRAtom(st->Ist.PutI.data));
759 vpanic("handle_gets_Stmt");
763 /* This statement accesses memory. So we need to dump all parts
764 of the environment corresponding to guest state that may not
765 be reordered with respect to memory references. That means
766 at least the stack pointer. */
767 for (j = 0; j < env->used; j++) {
770 if (vex_control.iropt_precise_memory_exns) {
771 /* Precise exceptions required. Flush all guest state. */
772 env->inuse[j] = False;
774 /* Just flush the minimal amount required, as computed by
776 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777 HWord k_hi = env->key[j] & 0xFFFF;
778 if (preciseMemExnsFn( k_lo, k_hi ))
779 env->inuse[j] = False;
787 /* Scan backwards, building up a set of (min offset, max
788 offset) pairs, indicating those parts of the guest state
789 for which the next event is a write.
791 On seeing a conditional exit, empty the set.
793 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794 completely within the set, remove the Put. Otherwise, add
795 (minoff,maxoff) to the set.
797 On seeing 'Get (minoff,maxoff)', remove any part of the set
798 overlapping (minoff,maxoff). The same has to happen for any events
799 which implicitly read parts of the guest state: dirty helper calls
803 static void redundant_put_removal_BB (
805 Bool (*preciseMemExnsFn)(Int,Int)
811 UInt key = 0; /* keep gcc -O happy */
813 HashHW* env = newHHW();
814 for (i = bb->stmts_used-1; i >= 0; i--) {
817 if (st->tag == Ist_NoOp)
820 /* Deal with conditional exits. */
821 if (st->tag == Ist_Exit) {
822 /* Since control may not get beyond this point, we must empty
823 out the set, since we can no longer claim that the next
824 event for any part of the guest state is definitely a
826 vassert(isIRAtom(st->Ist.Exit.guard));
827 for (j = 0; j < env->used; j++)
828 env->inuse[j] = False;
836 key = mk_key_GetPut( st->Ist.Put.offset,
837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838 vassert(isIRAtom(st->Ist.Put.data));
842 key = mk_key_GetIPutI( st->Ist.PutI.descr );
843 vassert(isIRAtom(st->Ist.PutI.ix));
844 vassert(isIRAtom(st->Ist.PutI.data));
849 if (isPut && st->tag != Ist_PutI) {
850 /* See if any single entry in env overlaps this Put. This is
851 simplistic in that the transformation is valid if, say, two
852 or more entries in the env overlap this Put, but the use of
853 lookupHHW will only find a single entry which exactly
854 overlaps this Put. This is suboptimal but safe. */
855 if (lookupHHW(env, NULL, (HWord)key)) {
856 /* This Put is redundant because a later one will overwrite
857 it. So NULL (nop) it out. */
859 vex_printf("rPUT: "); ppIRStmt(st);
862 bb->stmts[i] = IRStmt_NoOp();
864 /* We can't demonstrate that this Put is redundant, so add it
865 to the running collection. */
866 addToHHW(env, (HWord)key, 0);
871 /* Deal with Gets. These remove bits of the environment since
872 appearance of a Get means that the next event for that slice
873 of the guest state is no longer a write, but a read. Also
874 deals with implicit reads of guest state needed to maintain
875 precise exceptions. */
876 handle_gets_Stmt( env, st, preciseMemExnsFn );
881 /*---------------------------------------------------------------*/
882 /*--- Constant propagation and folding ---*/
883 /*---------------------------------------------------------------*/
885 /* The env in this section is a map from IRTemp to IRExpr*,
886 that is, an array indexed by IRTemp. */
888 /* Are both expressions simply the same IRTemp ? */
889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
891 return toBool( e1->tag == Iex_RdTmp
892 && e2->tag == Iex_RdTmp
893 && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
898 return toBool( e1->tag == Iex_Const
899 && e2->tag == Iex_Const
900 && e1->Iex.Const.con->tag == Ico_U32
901 && e2->Iex.Const.con->tag == Ico_U32
902 && e1->Iex.Const.con->Ico.U32
903 == e2->Iex.Const.con->Ico.U32 );
906 /* Are both expressions either the same IRTemp or IRConst-U32s ? If
908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
912 return sameIRTemps(e1, e2);
914 return sameIcoU32s(e1, e2);
920 static Bool notBool ( Bool b )
922 if (b == True) return False;
923 if (b == False) return True;
927 /* Make a zero which has the same type as the result of the given
929 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
932 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
933 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
935 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
937 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
938 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
939 default: vpanic("mkZeroOfPrimopResultType: bad primop");
943 /* Make a value containing all 1-bits, which has the same type as the
944 result of the given primop. */
945 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
949 return IRExpr_Const(IRConst_U1(toBool(1)));
951 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
953 return IRExpr_Const(IRConst_V128(0xFFFF));
955 vpanic("mkOnesOfPrimopResultType: bad primop");
959 /* Helpers for folding Clz32/64. */
960 static UInt fold_Clz64 ( ULong value )
963 vassert(value != 0ULL); /* no defined semantics for arg==0 */
964 for (i = 0; i < 64; ++i) {
965 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
972 static UInt fold_Clz32 ( UInt value )
975 vassert(value != 0); /* no defined semantics for arg==0 */
976 for (i = 0; i < 32; ++i) {
977 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
985 static IRExpr* fold_Expr ( IRExpr* e )
988 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
991 if (e->tag == Iex_Unop
992 && e->Iex.Unop.arg->tag == Iex_Const) {
993 switch (e->Iex.Unop.op) {
995 e2 = IRExpr_Const(IRConst_U8(toUChar(
996 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1000 e2 = IRExpr_Const(IRConst_U32(
1001 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1005 e2 = IRExpr_Const(IRConst_U64(
1006 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1011 e2 = IRExpr_Const(IRConst_U8(toUChar(
1012 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1016 e2 = IRExpr_Const(IRConst_U16(toUShort(
1017 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1021 e2 = IRExpr_Const(IRConst_U32(
1022 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1026 e2 = IRExpr_Const(IRConst_U64(
1027 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1028 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1032 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1035 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1039 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1042 e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
1046 e2 = IRExpr_Const(IRConst_U64(
1047 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1050 e2 = IRExpr_Const(IRConst_U64(
1051 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1054 e2 = IRExpr_Const(IRConst_U32(
1055 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1058 /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1061 e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
1065 e2 = IRExpr_Const(IRConst_U16(
1066 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1069 e2 = IRExpr_Const(IRConst_U32(
1070 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1073 e2 = IRExpr_Const(IRConst_U16(toUShort(
1074 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1077 e2 = IRExpr_Const(IRConst_U8(toUChar(
1078 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1081 e2 = IRExpr_Const(IRConst_U1(toBool(
1082 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1086 e2 = IRExpr_Const(IRConst_U1(toBool(
1087 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1092 e2 = IRExpr_Const(IRConst_U64(
1093 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1096 e2 = IRExpr_Const(IRConst_U32(
1097 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1100 e2 = IRExpr_Const(IRConst_U16(toUShort(
1101 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1104 e2 = IRExpr_Const(IRConst_U8(toUChar(
1105 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1109 e2 = IRExpr_Const(IRConst_U1(
1110 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1114 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1116 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1120 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1122 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1126 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1127 w64 &= 0x00000000FFFFFFFFULL;
1128 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1131 case Iop_64HIto32: {
1132 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1134 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1138 e2 = IRExpr_Const(IRConst_U64(
1140 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1143 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1146 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1150 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1153 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1158 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1160 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1164 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1167 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1172 e2 = IRExpr_Const(IRConst_U1(toBool(
1174 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1178 e2 = IRExpr_Const(IRConst_U1(toBool(
1180 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1184 e2 = IRExpr_Const(IRConst_U1(toBool(
1185 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1189 case Iop_CmpwNEZ32: {
1190 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1192 e2 = IRExpr_Const(IRConst_U32( 0 ));
1194 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1197 case Iop_CmpwNEZ64: {
1198 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1200 e2 = IRExpr_Const(IRConst_U64( 0 ));
1202 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1207 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1208 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1209 s32 = (s32 | (-s32));
1210 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1215 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1216 Long s64 = (Long)u64;
1217 s64 = (s64 | (-s64));
1218 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1223 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1225 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1229 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1231 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1241 if (e->tag == Iex_Binop) {
1242 if (e->Iex.Binop.arg1->tag == Iex_Const
1243 && e->Iex.Binop.arg2->tag == Iex_Const) {
1244 /* cases where both args are consts */
1245 switch (e->Iex.Binop.op) {
1249 e2 = IRExpr_Const(IRConst_U8(toUChar(
1250 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1251 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1254 e2 = IRExpr_Const(IRConst_U16(toUShort(
1255 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1256 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1259 e2 = IRExpr_Const(IRConst_U32(
1260 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1261 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1264 e2 = IRExpr_Const(IRConst_U64(
1265 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1266 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1271 e2 = IRExpr_Const(IRConst_U8(toUChar(
1272 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1273 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1276 e2 = IRExpr_Const(IRConst_U16(toUShort(
1277 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1278 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1281 e2 = IRExpr_Const(IRConst_U32(
1282 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1283 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1286 e2 = IRExpr_Const(IRConst_U64(
1287 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1288 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1293 e2 = IRExpr_Const(IRConst_U8(toUChar(
1294 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1295 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1298 e2 = IRExpr_Const(IRConst_U16(toUShort(
1299 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1300 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1303 e2 = IRExpr_Const(IRConst_U32(
1304 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1305 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1308 e2 = IRExpr_Const(IRConst_U64(
1309 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1310 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1315 e2 = IRExpr_Const(IRConst_U8(toUChar(
1316 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1317 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1320 e2 = IRExpr_Const(IRConst_U32(
1321 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1322 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1325 e2 = IRExpr_Const(IRConst_U64(
1326 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1327 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1332 e2 = IRExpr_Const(IRConst_U8(toUChar(
1333 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1334 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1337 e2 = IRExpr_Const(IRConst_U32(
1338 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1339 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1342 e2 = IRExpr_Const(IRConst_U64(
1343 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1344 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1349 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1350 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1351 UInt res = u32a > u32b ? u32a : u32b;
1352 e2 = IRExpr_Const(IRConst_U32(res));
1358 e2 = IRExpr_Const(IRConst_U32(
1359 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1360 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1363 e2 = IRExpr_Const(IRConst_U64(
1364 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1365 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1370 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1371 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1372 Int s32a = (Int)u32a;
1373 Int s32b = (Int)u32b;
1374 Long s64a = (Long)s32a;
1375 Long s64b = (Long)s32b;
1376 Long sres = s64a * s64b;
1377 ULong ures = (ULong)sres;
1378 e2 = IRExpr_Const(IRConst_U64(ures));
1384 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1385 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1386 if (shift >= 0 && shift <= 31)
1387 e2 = IRExpr_Const(IRConst_U32(
1388 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1392 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1393 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1394 if (shift >= 0 && shift <= 63)
1395 e2 = IRExpr_Const(IRConst_U64(
1396 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1404 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1405 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1406 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1407 if (shift >= 0 && shift <= 31) {
1408 s32 >>=/*signed*/ shift;
1409 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1415 /*signed*/ Long s64;
1416 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1417 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1418 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1419 if (shift >= 0 && shift <= 63) {
1420 s64 >>=/*signed*/ shift;
1421 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1429 /*unsigned*/ UInt u32;
1430 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1431 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1432 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1433 if (shift >= 0 && shift <= 31) {
1434 u32 >>=/*unsigned*/ shift;
1435 e2 = IRExpr_Const(IRConst_U32(u32));
1441 /*unsigned*/ ULong u64;
1442 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1443 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1444 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1445 if (shift >= 0 && shift <= 63) {
1446 u64 >>=/*unsigned*/ shift;
1447 e2 = IRExpr_Const(IRConst_U64(u64));
1454 e2 = IRExpr_Const(IRConst_U1(toBool(
1455 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1456 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1459 e2 = IRExpr_Const(IRConst_U1(toBool(
1460 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1461 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1466 e2 = IRExpr_Const(IRConst_U1(toBool(
1467 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1468 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1471 e2 = IRExpr_Const(IRConst_U1(toBool(
1472 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1473 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1476 e2 = IRExpr_Const(IRConst_U1(toBool(
1477 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1478 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1483 e2 = IRExpr_Const(IRConst_U1(toBool(
1484 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1485 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1488 e2 = IRExpr_Const(IRConst_U1(toBool(
1489 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1490 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1495 e2 = IRExpr_Const(IRConst_U1(toBool(
1496 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1497 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1500 e2 = IRExpr_Const(IRConst_U1(toBool(
1501 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1502 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1507 e2 = IRExpr_Const(IRConst_U1(toBool(
1508 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1509 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1512 e2 = IRExpr_Const(IRConst_U1(toBool(
1513 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1514 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1519 e2 = IRExpr_Const(IRConst_U1(toBool(
1520 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1521 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1524 e2 = IRExpr_Const(IRConst_U1(toBool(
1525 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1526 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1530 case Iop_CmpORD32S: {
1532 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1533 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1534 Int s32a = (Int)u32a;
1535 Int s32b = (Int)u32b;
1536 Int r = 0x2; /* EQ */
1540 else if (s32a > s32b) {
1543 e2 = IRExpr_Const(IRConst_U32(r));
1549 e2 = IRExpr_Const(IRConst_U64(
1550 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1551 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1555 /* We can't fold this, because there is no way to
1556 express he result in IR, but at least pretend to
1557 handle it, so as to stop getting blasted with
1558 no-rule-for-this-primop messages. */
1567 /* other cases (identities, etc) */
1569 /* Shl64/Shr64(x,0) ==> x */
1570 if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1571 && e->Iex.Binop.arg2->tag == Iex_Const
1572 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1573 e2 = e->Iex.Binop.arg1;
1576 /* Shl32/Shr32(x,0) ==> x */
1577 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1578 && e->Iex.Binop.arg2->tag == Iex_Const
1579 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1580 e2 = e->Iex.Binop.arg1;
1583 /* Or8(x,0) ==> x */
1584 if ((e->Iex.Binop.op == Iop_Or8)
1585 && e->Iex.Binop.arg2->tag == Iex_Const
1586 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1587 e2 = e->Iex.Binop.arg1;
1590 /* Or16(x,0) ==> x */
1591 if ((e->Iex.Binop.op == Iop_Or16)
1592 && e->Iex.Binop.arg2->tag == Iex_Const
1593 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1594 e2 = e->Iex.Binop.arg1;
1597 /* Or32/Add32/Max32U(x,0) ==> x */
1598 if ((e->Iex.Binop.op == Iop_Add32
1599 || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1600 && e->Iex.Binop.arg2->tag == Iex_Const
1601 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1602 e2 = e->Iex.Binop.arg1;
1605 /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that
1606 x+x produces a defined least significant bit, and it seems
1607 simplest just to get rid of the problem by rewriting it
1608 out, since the opportunity to do so exists. */
1609 if (e->Iex.Binop.op == Iop_Add32
1610 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1611 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1612 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1613 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1614 e2 = IRExpr_Binop(Iop_Shl32,
1616 IRExpr_Const(IRConst_U8(1)));
1619 /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1620 if (e->Iex.Binop.op == Iop_Add64
1621 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1622 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1623 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1624 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1625 e2 = IRExpr_Binop(Iop_Shl64,
1627 IRExpr_Const(IRConst_U8(1)));
1630 /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1631 if (e->Iex.Binop.op == Iop_Add8
1632 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1633 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1634 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1635 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1636 e2 = IRExpr_Binop(Iop_Shl8,
1638 IRExpr_Const(IRConst_U8(1)));
1640 /* NB no Add16(t,t) case yet as no known test case exists */
1642 /* Or64/Add64(x,0) ==> x */
1643 if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
1644 && e->Iex.Binop.arg2->tag == Iex_Const
1645 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
1646 e2 = e->Iex.Binop.arg1;
1649 /* And32(x,0xFFFFFFFF) ==> x */
1650 if (e->Iex.Binop.op == Iop_And32
1651 && e->Iex.Binop.arg2->tag == Iex_Const
1652 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1653 e2 = e->Iex.Binop.arg1;
1656 /* And32(x,0) ==> 0 */
1657 if (e->Iex.Binop.op == Iop_And32
1658 && e->Iex.Binop.arg2->tag == Iex_Const
1659 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1660 e2 = IRExpr_Const(IRConst_U32(0));
1663 /* And32/Shl32(0,x) ==> 0 */
1664 if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1665 && e->Iex.Binop.arg1->tag == Iex_Const
1666 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1667 e2 = IRExpr_Const(IRConst_U32(0));
1670 /* Or8(0,x) ==> x */
1671 if (e->Iex.Binop.op == Iop_Or8
1672 && e->Iex.Binop.arg1->tag == Iex_Const
1673 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1674 e2 = e->Iex.Binop.arg2;
1677 /* Or32/Max32U(0,x) ==> x */
1678 if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1679 && e->Iex.Binop.arg1->tag == Iex_Const
1680 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1681 e2 = e->Iex.Binop.arg2;
1684 /* Or64(0,x) ==> x */
1685 if (e->Iex.Binop.op == Iop_Or64
1686 && e->Iex.Binop.arg1->tag == Iex_Const
1687 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
1688 e2 = e->Iex.Binop.arg2;
1691 /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
1692 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1693 /* Max32U(t,t) ==> t, for some IRTemp t */
1694 switch (e->Iex.Binop.op) {
1695 case Iop_And64: case Iop_And32:
1696 case Iop_And16: case Iop_And8:
1697 case Iop_Or64: case Iop_Or32:
1698 case Iop_Or16: case Iop_Or8: case Iop_OrV128:
1700 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1701 e2 = e->Iex.Binop.arg1;
1707 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1708 /* Sub32/64(t,t) ==> 0, for some IRTemp t */
1709 switch (e->Iex.Binop.op) {
1710 case Iop_Xor64: case Iop_Xor32:
1711 case Iop_Xor16: case Iop_Xor8:
1713 case Iop_Sub64: case Iop_Sub32:
1714 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1715 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
1721 switch (e->Iex.Binop.op) {
1725 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1726 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
1736 if (e->tag == Iex_Mux0X) {
1737 /* is the discriminant is a constant? */
1738 if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1740 /* assured us by the IR type rules */
1741 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1742 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1743 ->Iex.Const.con->Ico.U8));
1744 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1747 /* are the arms identical? (pretty weedy test) */
1748 if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1749 e->Iex.Mux0X.exprX)) {
1750 e2 = e->Iex.Mux0X.expr0;
1754 /* Show cases where we've found but not folded 'op(t,t)'. */
1755 if (0 && e == e2 && e->tag == Iex_Binop
1756 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1757 vex_printf("IDENT: ");
1758 ppIRExpr(e); vex_printf("\n");
1761 /* Show the overall results of folding. */
1762 if (DEBUG_IROPT && e2 != e) {
1763 vex_printf("FOLD: ");
1764 ppIRExpr(e); vex_printf(" -> ");
1765 ppIRExpr(e2); vex_printf("\n");
1774 vpanic("fold_Expr: no rule for the above");
1776 if (vex_control.iropt_verbosity > 0) {
1777 vex_printf("vex iropt: fold_Expr: no rule for: ");
1786 /* Apply the subst to a simple 1-level expression -- guaranteed to be
1787 1-level due to previous flattening pass. */
1789 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1793 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1794 return env[(Int)ex->Iex.RdTmp.tmp];
1796 /* not bound in env */
1805 vassert(isIRAtom(ex->Iex.GetI.ix));
1808 subst_Expr(env, ex->Iex.GetI.ix),
1813 vassert(isIRAtom(ex->Iex.Qop.arg1));
1814 vassert(isIRAtom(ex->Iex.Qop.arg2));
1815 vassert(isIRAtom(ex->Iex.Qop.arg3));
1816 vassert(isIRAtom(ex->Iex.Qop.arg4));
1819 subst_Expr(env, ex->Iex.Qop.arg1),
1820 subst_Expr(env, ex->Iex.Qop.arg2),
1821 subst_Expr(env, ex->Iex.Qop.arg3),
1822 subst_Expr(env, ex->Iex.Qop.arg4)
1826 vassert(isIRAtom(ex->Iex.Triop.arg1));
1827 vassert(isIRAtom(ex->Iex.Triop.arg2));
1828 vassert(isIRAtom(ex->Iex.Triop.arg3));
1829 return IRExpr_Triop(
1831 subst_Expr(env, ex->Iex.Triop.arg1),
1832 subst_Expr(env, ex->Iex.Triop.arg2),
1833 subst_Expr(env, ex->Iex.Triop.arg3)
1837 vassert(isIRAtom(ex->Iex.Binop.arg1));
1838 vassert(isIRAtom(ex->Iex.Binop.arg2));
1839 return IRExpr_Binop(
1841 subst_Expr(env, ex->Iex.Binop.arg1),
1842 subst_Expr(env, ex->Iex.Binop.arg2)
1846 vassert(isIRAtom(ex->Iex.Unop.arg));
1849 subst_Expr(env, ex->Iex.Unop.arg)
1853 vassert(isIRAtom(ex->Iex.Load.addr));
1857 subst_Expr(env, ex->Iex.Load.addr)
1862 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1863 for (i = 0; args2[i]; i++) {
1864 vassert(isIRAtom(args2[i]));
1865 args2[i] = subst_Expr(env, args2[i]);
1867 return IRExpr_CCall(
1869 ex->Iex.CCall.retty,
1875 vassert(isIRAtom(ex->Iex.Mux0X.cond));
1876 vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1877 vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1878 return IRExpr_Mux0X(
1879 subst_Expr(env, ex->Iex.Mux0X.cond),
1880 subst_Expr(env, ex->Iex.Mux0X.expr0),
1881 subst_Expr(env, ex->Iex.Mux0X.exprX)
1885 vex_printf("\n\n"); ppIRExpr(ex);
1886 vpanic("subst_Expr");
1892 /* Apply the subst to stmt, then fold the result as much as possible.
1893 Much simplified due to stmt being previously flattened. As a
1894 result of this, the stmt may wind up being turned into a no-op.
1896 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1899 vex_printf("\nsubst and fold stmt\n");
1906 vassert(isIRAtom(st->Ist.AbiHint.base));
1907 vassert(isIRAtom(st->Ist.AbiHint.nia));
1908 return IRStmt_AbiHint(
1909 fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1910 st->Ist.AbiHint.len,
1911 fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1914 vassert(isIRAtom(st->Ist.Put.data));
1917 fold_Expr(subst_Expr(env, st->Ist.Put.data))
1921 vassert(isIRAtom(st->Ist.PutI.ix));
1922 vassert(isIRAtom(st->Ist.PutI.data));
1925 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1927 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1931 /* This is the one place where an expr (st->Ist.WrTmp.data) is
1932 allowed to be more than just a constant or a tmp. */
1933 return IRStmt_WrTmp(
1935 fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1939 vassert(isIRAtom(st->Ist.Store.addr));
1940 vassert(isIRAtom(st->Ist.Store.data));
1941 return IRStmt_Store(
1943 fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1944 fold_Expr(subst_Expr(env, st->Ist.Store.data))
1949 cas = st->Ist.CAS.details;
1950 vassert(isIRAtom(cas->addr));
1951 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1952 vassert(isIRAtom(cas->expdLo));
1953 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1954 vassert(isIRAtom(cas->dataLo));
1956 cas->oldHi, cas->oldLo, cas->end,
1957 fold_Expr(subst_Expr(env, cas->addr)),
1958 cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1959 fold_Expr(subst_Expr(env, cas->expdLo)),
1960 cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1961 fold_Expr(subst_Expr(env, cas->dataLo))
1963 return IRStmt_CAS(cas2);
1967 vassert(isIRAtom(st->Ist.LLSC.addr));
1968 if (st->Ist.LLSC.storedata)
1969 vassert(isIRAtom(st->Ist.LLSC.storedata));
1972 st->Ist.LLSC.result,
1973 fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1974 st->Ist.LLSC.storedata
1975 ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1982 d = st->Ist.Dirty.details;
1983 d2 = emptyIRDirty();
1985 d2->args = shallowCopyIRExprVec(d2->args);
1986 if (d2->mFx != Ifx_None) {
1987 vassert(isIRAtom(d2->mAddr));
1988 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
1990 vassert(isIRAtom(d2->guard));
1991 d2->guard = fold_Expr(subst_Expr(env, d2->guard));
1992 for (i = 0; d2->args[i]; i++) {
1993 vassert(isIRAtom(d2->args[i]));
1994 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
1996 return IRStmt_Dirty(d2);
2000 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
2003 return IRStmt_NoOp();
2006 return IRStmt_MBE(st->Ist.MBE.event);
2010 vassert(isIRAtom(st->Ist.Exit.guard));
2011 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
2012 if (fcond->tag == Iex_Const) {
2013 /* Interesting. The condition on this exit has folded down to
2015 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2016 vassert(fcond->Iex.Const.con->Ico.U1 == False
2017 || fcond->Iex.Const.con->Ico.U1 == True);
2018 if (fcond->Iex.Const.con->Ico.U1 == False) {
2019 /* exit is never going to happen, so dump the statement. */
2020 return IRStmt_NoOp();
2022 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2023 /* Hmmm. The exit has become unconditional. Leave it
2024 as it is for now, since we'd have to truncate the BB
2025 at this point, which is tricky. Such truncation is
2026 done later by the dead-code elimination pass. */
2027 /* fall out into the reconstruct-the-exit code. */
2028 if (vex_control.iropt_verbosity > 0)
2029 /* really a misuse of vex_control.iropt_verbosity */
2030 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2033 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
2037 vex_printf("\n"); ppIRStmt(st);
2038 vpanic("subst_and_fold_Stmt");
2043 IRSB* cprop_BB ( IRSB* in )
2048 Int n_tmps = in->tyenv->types_used;
2049 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
2052 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2054 /* Set up the env with which travels forward. This holds a
2055 substitution, mapping IRTemps to atoms, that is, IRExprs which
2056 are either IRTemps or IRConsts. Thus, copy and constant
2057 propagation is done. The environment is to be applied as we
2058 move along. Keys are IRTemps. Values are IRExpr*s.
2060 for (i = 0; i < n_tmps; i++)
2063 /* For each original SSA-form stmt ... */
2064 for (i = 0; i < in->stmts_used; i++) {
2066 /* First apply the substitution to the current stmt. This
2067 propagates in any constants and tmp-tmp assignments
2068 accumulated prior to this point. As part of the subst_Stmt
2069 call, also then fold any constant expressions resulting. */
2073 /* perhaps st2 is already a no-op? */
2074 if (st2->tag == Ist_NoOp) continue;
2076 st2 = subst_and_fold_Stmt( env, st2 );
2078 /* If the statement has been folded into a no-op, forget it. */
2079 if (st2->tag == Ist_NoOp) continue;
2081 /* Now consider what the stmt looks like. If it's of the form
2082 't = const' or 't1 = t2', add it to the running environment
2083 and not to the output BB. Otherwise, add it to the output
2084 BB. Note, we choose not to propagate const when const is an
2085 F64i, so that F64i literals can be CSE'd later. This helps
2086 x86 floating point code generation. */
2088 if (st2->tag == Ist_WrTmp
2089 && st2->Ist.WrTmp.data->tag == Iex_Const
2090 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2091 /* 't = const' -- add to env.
2092 The pair (IRTemp, IRExpr*) is added. */
2093 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2096 if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
2097 /* 't1 = t2' -- add to env.
2098 The pair (IRTemp, IRExpr*) is added. */
2099 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2102 /* Not interesting, copy st2 into the output block. */
2103 addStmtToIRSB( out, st2 );
2107 out->next = subst_Expr( env, in->next );
2108 out->jumpkind = in->jumpkind;
2113 /*---------------------------------------------------------------*/
2114 /*--- Dead code (t = E) removal ---*/
2115 /*---------------------------------------------------------------*/
2117 /* As a side effect, also removes all code following an unconditional
2120 /* The type of the HashHW map is: a map from IRTemp to nothing
2121 -- really just operating a set or IRTemps.
2125 static void addUses_Temp ( Bool* set, IRTemp tmp )
2127 set[(Int)tmp] = True;
2130 static void addUses_Expr ( Bool* set, IRExpr* e )
2135 addUses_Expr(set, e->Iex.GetI.ix);
2138 addUses_Expr(set, e->Iex.Mux0X.cond);
2139 addUses_Expr(set, e->Iex.Mux0X.expr0);
2140 addUses_Expr(set, e->Iex.Mux0X.exprX);
2143 for (i = 0; e->Iex.CCall.args[i]; i++)
2144 addUses_Expr(set, e->Iex.CCall.args[i]);
2147 addUses_Expr(set, e->Iex.Load.addr);
2150 addUses_Expr(set, e->Iex.Qop.arg1);
2151 addUses_Expr(set, e->Iex.Qop.arg2);
2152 addUses_Expr(set, e->Iex.Qop.arg3);
2153 addUses_Expr(set, e->Iex.Qop.arg4);
2156 addUses_Expr(set, e->Iex.Triop.arg1);
2157 addUses_Expr(set, e->Iex.Triop.arg2);
2158 addUses_Expr(set, e->Iex.Triop.arg3);
2161 addUses_Expr(set, e->Iex.Binop.arg1);
2162 addUses_Expr(set, e->Iex.Binop.arg2);
2165 addUses_Expr(set, e->Iex.Unop.arg);
2168 addUses_Temp(set, e->Iex.RdTmp.tmp);
2176 vpanic("addUses_Expr");
2180 static void addUses_Stmt ( Bool* set, IRStmt* st )
2187 addUses_Expr(set, st->Ist.AbiHint.base);
2188 addUses_Expr(set, st->Ist.AbiHint.nia);
2191 addUses_Expr(set, st->Ist.PutI.ix);
2192 addUses_Expr(set, st->Ist.PutI.data);
2195 addUses_Expr(set, st->Ist.WrTmp.data);
2198 addUses_Expr(set, st->Ist.Put.data);
2201 addUses_Expr(set, st->Ist.Store.addr);
2202 addUses_Expr(set, st->Ist.Store.data);
2205 cas = st->Ist.CAS.details;
2206 addUses_Expr(set, cas->addr);
2208 addUses_Expr(set, cas->expdHi);
2209 addUses_Expr(set, cas->expdLo);
2211 addUses_Expr(set, cas->dataHi);
2212 addUses_Expr(set, cas->dataLo);
2215 addUses_Expr(set, st->Ist.LLSC.addr);
2216 if (st->Ist.LLSC.storedata)
2217 addUses_Expr(set, st->Ist.LLSC.storedata);
2220 d = st->Ist.Dirty.details;
2221 if (d->mFx != Ifx_None)
2222 addUses_Expr(set, d->mAddr);
2223 addUses_Expr(set, d->guard);
2224 for (i = 0; d->args[i] != NULL; i++)
2225 addUses_Expr(set, d->args[i]);
2232 addUses_Expr(set, st->Ist.Exit.guard);
2237 vpanic("addUses_Stmt");
2242 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
2243 static Bool isZeroU1 ( IRExpr* e )
2245 return toBool( e->tag == Iex_Const
2246 && e->Iex.Const.con->tag == Ico_U1
2247 && e->Iex.Const.con->Ico.U1 == False );
2250 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
2251 static Bool isOneU1 ( IRExpr* e )
2253 return toBool( e->tag == Iex_Const
2254 && e->Iex.Const.con->tag == Ico_U1
2255 && e->Iex.Const.con->Ico.U1 == True );
2259 /* Note, this destructively modifies the given IRSB. */
2261 /* Scan backwards through statements, carrying a set of IRTemps which
2262 are known to be used after the current point. On encountering 't =
2263 E', delete the binding if it is not used. Otherwise, add any temp
2264 uses to the set and keep on moving backwards.
2266 As an enhancement, the first (backwards) pass searches for IR exits
2267 with always-taken conditions and notes the location of the earliest
2268 one in the block. If any such are found, a second pass copies the
2269 exit destination and jump kind to the bb-end. Then, the exit and
2270 all statements following it are turned into no-ops.
2273 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
2275 Int i, i_unconditional_exit;
2276 Int n_tmps = bb->tyenv->types_used;
2277 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2280 for (i = 0; i < n_tmps; i++)
2283 /* start off by recording IRTemp uses in the next field. */
2284 addUses_Expr(set, bb->next);
2288 /* Work backwards through the stmts */
2289 i_unconditional_exit = -1;
2290 for (i = bb->stmts_used-1; i >= 0; i--) {
2292 if (st->tag == Ist_NoOp)
2294 /* take note of any unconditional exits */
2295 if (st->tag == Ist_Exit
2296 && isOneU1(st->Ist.Exit.guard))
2297 i_unconditional_exit = i;
2298 if (st->tag == Ist_WrTmp
2299 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2300 /* it's an IRTemp which never got used. Delete it. */
2302 vex_printf("DEAD: ");
2306 bb->stmts[i] = IRStmt_NoOp();
2309 if (st->tag == Ist_Dirty
2310 && st->Ist.Dirty.details->guard
2311 && isZeroU1(st->Ist.Dirty.details->guard)) {
2312 /* This is a dirty helper which will never get called.
2314 bb->stmts[i] = IRStmt_NoOp();
2317 /* Note any IRTemp uses made by the current statement. */
2318 addUses_Stmt(set, st);
2322 /* Optional second pass: if any unconditional exits were found,
2323 delete them and all following statements. */
2325 if (i_unconditional_exit != -1) {
2326 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2327 i_unconditional_exit);
2328 vassert(i_unconditional_exit >= 0
2329 && i_unconditional_exit < bb->stmts_used);
2331 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2333 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2334 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2335 bb->stmts[i] = IRStmt_NoOp();
2340 /*---------------------------------------------------------------*/
2341 /*--- Specialisation of helper function calls, in ---*/
2342 /*--- collaboration with the front end ---*/
2343 /*---------------------------------------------------------------*/
2346 IRSB* spec_helpers_BB(
2348 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2356 for (i = bb->stmts_used-1; i >= 0; i--) {
2359 if (st->tag != Ist_WrTmp
2360 || st->Ist.WrTmp.data->tag != Iex_CCall)
2363 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2364 st->Ist.WrTmp.data->Iex.CCall.args,
2367 /* the front end can't think of a suitable replacement */
2370 /* We got something better. Install it in the bb. */
2373 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2376 vex_printf("SPEC: ");
2377 ppIRExpr(st->Ist.WrTmp.data);
2378 vex_printf(" --> ");
2385 bb = flatten_BB(bb);
2390 /*---------------------------------------------------------------*/
2391 /*--- Determination of guest state aliasing relationships ---*/
2392 /*---------------------------------------------------------------*/
2394 /* These are helper functions for CSE and GetI/PutI transformations.
2396 Determine, to the extent possible, the relationship between two
2397 guest state accesses. The possible outcomes are:
2399 * Exact alias. These two accesses denote precisely the same
2400 piece of the guest state.
2402 * Definitely no alias. These two accesses are guaranteed not to
2403 overlap any part of the guest state.
2405 * Unknown -- if neither of the above can be established.
2407 If in doubt, return Unknown. */
2410 enum { ExactAlias, NoAlias, UnknownAlias }
2414 /* Produces the alias relation between an indexed guest
2415 state access and a non-indexed access. */
2418 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2419 Int offset2, IRType ty2 )
2421 UInt minoff1, maxoff1, minoff2, maxoff2;
2423 getArrayBounds( descr1, &minoff1, &maxoff1 );
2425 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2427 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2430 /* Could probably do better here if required. For the moment
2431 however just claim not to know anything more. */
2432 return UnknownAlias;
2436 /* Produces the alias relation between two indexed guest state
2440 GSAliasing getAliasingRelation_II (
2441 IRRegArray* descr1, IRExpr* ix1, Int bias1,
2442 IRRegArray* descr2, IRExpr* ix2, Int bias2
2445 UInt minoff1, maxoff1, minoff2, maxoff2;
2448 /* First try hard to show they don't alias. */
2449 getArrayBounds( descr1, &minoff1, &maxoff1 );
2450 getArrayBounds( descr2, &minoff2, &maxoff2 );
2451 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2454 /* So the two arrays at least partially overlap. To get any
2455 further we'll have to be sure that the descriptors are
2457 if (!eqIRRegArray(descr1, descr2))
2458 return UnknownAlias;
2460 /* The descriptors are identical. Now the only difference can be
2461 in the index expressions. If they cannot be shown to be
2462 identical, we have to say we don't know what the aliasing
2463 relation will be. Now, since the IR is flattened, the index
2464 expressions should be atoms -- either consts or tmps. So that
2465 makes the comparison simple. */
2466 vassert(isIRAtom(ix1));
2467 vassert(isIRAtom(ix2));
2468 if (!eqIRAtom(ix1,ix2))
2469 return UnknownAlias;
2471 /* Ok, the index expressions are identical. So now the only way
2472 they can be different is in the bias. Normalise this
2473 paranoidly, to reliably establish equality/non-equality. */
2475 /* So now we know that the GetI and PutI index the same array
2476 with the same base. Are the offsets the same, modulo the
2477 array size? Do this paranoidly. */
2478 vassert(descr1->nElems == descr2->nElems);
2479 vassert(descr1->elemTy == descr2->elemTy);
2480 vassert(descr1->base == descr2->base);
2482 while (bias1 < 0 || bias2 < 0) {
2483 bias1 += descr1->nElems;
2484 bias2 += descr1->nElems;
2487 vpanic("getAliasingRelation: iters");
2489 vassert(bias1 >= 0 && bias2 >= 0);
2490 bias1 %= descr1->nElems;
2491 bias2 %= descr1->nElems;
2492 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2493 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2495 /* Finally, biasP and biasG are normalised into the range
2496 0 .. descrP/G->nElems - 1. And so we can establish
2497 equality/non-equality. */
2499 return bias1==bias2 ? ExactAlias : NoAlias;
2503 /*---------------------------------------------------------------*/
2504 /*--- Common Subexpression Elimination ---*/
2505 /*---------------------------------------------------------------*/
2507 /* Expensive in time and space. */
2509 /* Uses two environments:
2510 a IRTemp -> IRTemp mapping
2511 a mapping from AvailExpr* to IRTemp
2516 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2523 /* binop(tmp,tmp) */
2529 /* binop(tmp,const) */
2535 /* binop(const,tmp) */
2541 /* F64i-style const */
2545 /* Mux0X(tmp,tmp,tmp) */
2551 /* GetI(descr,tmp,bias)*/
2561 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2563 if (a1->tag != a2->tag)
2568 a1->u.Ut.op == a2->u.Ut.op
2569 && a1->u.Ut.arg == a2->u.Ut.arg);
2572 a1->u.Btt.op == a2->u.Btt.op
2573 && a1->u.Btt.arg1 == a2->u.Btt.arg1
2574 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2577 a1->u.Btc.op == a2->u.Btc.op
2578 && a1->u.Btc.arg1 == a2->u.Btc.arg1
2579 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2582 a1->u.Bct.op == a2->u.Bct.op
2583 && a1->u.Bct.arg2 == a2->u.Bct.arg2
2584 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2586 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2588 return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2589 && a1->u.Mttt.e0 == a2->u.Mttt.e0
2590 && a1->u.Mttt.eX == a2->u.Mttt.eX);
2592 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2593 && a1->u.GetIt.ix == a2->u.GetIt.ix
2594 && a1->u.GetIt.bias == a2->u.GetIt.bias);
2595 default: vpanic("eq_AvailExpr");
2599 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2604 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2606 return IRExpr_Binop( ae->u.Btt.op,
2607 IRExpr_RdTmp(ae->u.Btt.arg1),
2608 IRExpr_RdTmp(ae->u.Btt.arg2) );
2610 con = LibVEX_Alloc(sizeof(IRConst));
2611 *con = ae->u.Btc.con2;
2612 return IRExpr_Binop( ae->u.Btc.op,
2613 IRExpr_RdTmp(ae->u.Btc.arg1),
2614 IRExpr_Const(con) );
2616 con = LibVEX_Alloc(sizeof(IRConst));
2617 *con = ae->u.Bct.con1;
2618 return IRExpr_Binop( ae->u.Bct.op,
2620 IRExpr_RdTmp(ae->u.Bct.arg2) );
2622 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2624 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2625 IRExpr_RdTmp(ae->u.Mttt.e0),
2626 IRExpr_RdTmp(ae->u.Mttt.eX));
2628 return IRExpr_GetI(ae->u.GetIt.descr,
2629 IRExpr_RdTmp(ae->u.GetIt.ix),
2632 vpanic("availExpr_to_IRExpr");
2637 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2640 /* env :: IRTemp -> IRTemp */
2641 if (lookupHHW( env, &res, (HWord)tmp ))
2647 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2649 /* env :: IRTemp -> IRTemp */
2652 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2655 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2656 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2659 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2662 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2667 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2668 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2669 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2672 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2675 vpanic("subst_AvailExpr");
2679 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2683 if (e->tag == Iex_Unop
2684 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2685 ae = LibVEX_Alloc(sizeof(AvailExpr));
2687 ae->u.Ut.op = e->Iex.Unop.op;
2688 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2692 if (e->tag == Iex_Binop
2693 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2694 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2695 ae = LibVEX_Alloc(sizeof(AvailExpr));
2697 ae->u.Btt.op = e->Iex.Binop.op;
2698 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2699 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2703 if (e->tag == Iex_Binop
2704 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2705 && e->Iex.Binop.arg2->tag == Iex_Const) {
2706 ae = LibVEX_Alloc(sizeof(AvailExpr));
2708 ae->u.Btc.op = e->Iex.Binop.op;
2709 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2710 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2714 if (e->tag == Iex_Binop
2715 && e->Iex.Binop.arg1->tag == Iex_Const
2716 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2717 ae = LibVEX_Alloc(sizeof(AvailExpr));
2719 ae->u.Bct.op = e->Iex.Binop.op;
2720 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2721 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2725 if (e->tag == Iex_Const
2726 && e->Iex.Const.con->tag == Ico_F64i) {
2727 ae = LibVEX_Alloc(sizeof(AvailExpr));
2729 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2733 if (e->tag == Iex_Mux0X
2734 && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2735 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2736 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2737 ae = LibVEX_Alloc(sizeof(AvailExpr));
2739 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2740 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2741 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2745 if (e->tag == Iex_GetI
2746 && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2747 ae = LibVEX_Alloc(sizeof(AvailExpr));
2749 ae->u.GetIt.descr = e->Iex.GetI.descr;
2750 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2751 ae->u.GetIt.bias = e->Iex.GetI.bias;
2759 /* The BB is modified in-place. Returns True if any changes were
2762 static Bool do_cse_BB ( IRSB* bb )
2770 Bool anyDone = False;
2772 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2773 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2775 vassert(sizeof(IRTemp) <= sizeof(HWord));
2777 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2779 /* Iterate forwards over the stmts.
2780 On seeing "t = E", where E is one of the 5 AvailExpr forms:
2781 let E' = apply tenv substitution to E
2783 if a mapping E' -> q is found,
2784 replace this stmt by "t = q"
2785 and add binding t -> q to tenv
2787 add binding E' -> t to aenv
2788 replace this stmt by "t = E'"
2790 Other statements are only interesting to the extent that they
2791 might invalidate some of the expressions in aenv. So there is
2792 an invalidate-bindings check for each statement seen.
2794 for (i = 0; i < bb->stmts_used; i++) {
2797 /* ------ BEGIN invalidate aenv bindings ------ */
2798 /* This is critical: remove from aenv any E' -> .. bindings
2799 which might be invalidated by this statement. The only
2800 vulnerable kind of bindings are the GetI kind.
2801 Dirty call - dump (paranoia level -> 2)
2802 Store - dump (ditto)
2803 Put, PutI - dump unless no-overlap is proven (.. -> 1)
2804 Uses getAliasingRelation_IC and getAliasingRelation_II
2805 to do the no-overlap assessments needed for Put/PutI.
2808 case Ist_Dirty: case Ist_Store: case Ist_MBE:
2809 case Ist_CAS: case Ist_LLSC:
2810 paranoia = 2; break;
2811 case Ist_Put: case Ist_PutI:
2812 paranoia = 1; break;
2813 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2814 case Ist_WrTmp: case Ist_Exit:
2815 paranoia = 0; break;
2817 vpanic("do_cse_BB(1)");
2821 for (j = 0; j < aenv->used; j++) {
2822 if (!aenv->inuse[j])
2824 ae = (AvailExpr*)aenv->key[j];
2825 if (ae->tag != GetIt)
2828 if (paranoia >= 2) {
2831 vassert(paranoia == 1);
2832 if (st->tag == Ist_Put) {
2833 if (getAliasingRelation_IC(
2835 IRExpr_RdTmp(ae->u.GetIt.ix),
2837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2842 if (st->tag == Ist_PutI) {
2843 if (getAliasingRelation_II(
2845 IRExpr_RdTmp(ae->u.GetIt.ix),
2854 vpanic("do_cse_BB(2)");
2858 aenv->inuse[j] = False;
2859 aenv->key[j] = (HWord)NULL; /* be sure */
2862 } /* paranoia > 0 */
2864 /* ------ ENV invalidate aenv bindings ------ */
2866 /* ignore not-interestings */
2867 if (st->tag != Ist_WrTmp)
2870 t = st->Ist.WrTmp.tmp;
2871 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2872 /* ignore if not of AvailExpr form */
2876 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2879 subst_AvailExpr( tenv, eprime );
2881 /* search aenv for eprime, unfortunately the hard way */
2882 for (j = 0; j < aenv->used; j++)
2883 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2886 if (j < aenv->used) {
2887 /* A binding E' -> q was found. Replace stmt by "t = q" and
2888 note the t->q binding in tenv. */
2889 /* (this is the core of the CSE action) */
2890 q = (IRTemp)aenv->val[j];
2891 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2892 addToHHW( tenv, (HWord)t, (HWord)q );
2895 /* No binding was found, so instead we add E' -> t to our
2896 collection of available expressions, replace this stmt
2897 with "t = E'", and move on. */
2898 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2899 addToHHW( aenv, (HWord)eprime, (HWord)t );
2905 sanityCheckIRSB(bb, Ity_I32);
2912 /*---------------------------------------------------------------*/
2913 /*--- Add32/Sub32 chain collapsing ---*/
2914 /*---------------------------------------------------------------*/
2916 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2918 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2919 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2920 root node is Add32, not Sub32. */
2922 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2924 if (e->tag != Iex_Binop)
2926 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2928 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2930 if (e->Iex.Binop.arg2->tag != Iex_Const)
2932 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2933 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2934 if (e->Iex.Binop.op == Iop_Sub32)
2940 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
2941 other tmp2. Scan backwards from the specified start point -- an
2944 static Bool collapseChain ( IRSB* bb, Int startHere,
2946 IRTemp* tmp2, Int* i32 )
2953 /* the (var, con) pair contain the current 'representation' for
2954 'tmp'. We start with 'tmp + 0'. */
2958 /* Scan backwards to see if tmp can be replaced by some other tmp
2960 for (j = startHere; j >= 0; j--) {
2962 if (st->tag != Ist_WrTmp)
2964 if (st->Ist.WrTmp.tmp != var)
2966 e = st->Ist.WrTmp.data;
2967 if (!isAdd32OrSub32(e, &vv, &ii))
2973 /* no earlier binding for var .. ill-formed IR */
2974 vpanic("collapseChain");
2976 /* so, did we find anything interesting? */
2978 return False; /* no .. */
2986 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
2988 static void collapse_AddSub_chains_BB ( IRSB* bb )
2994 for (i = bb->stmts_used-1; i >= 0; i--) {
2996 if (st->tag == Ist_NoOp)
2999 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
3001 if (st->tag == Ist_WrTmp
3002 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
3004 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
3005 Find out if var can be expressed as var2 + con2. */
3006 if (collapseChain(bb, i-1, var, &var2, &con2)) {
3008 vex_printf("replacing1 ");
3010 vex_printf(" with ");
3017 ? IRExpr_Binop(Iop_Add32,
3019 IRExpr_Const(IRConst_U32(con2)))
3020 : IRExpr_Binop(Iop_Sub32,
3022 IRExpr_Const(IRConst_U32(-con2)))
3025 ppIRStmt(bb->stmts[i]);
3033 /* Try to collapse 't1 = GetI[t2, con]'. */
3035 if (st->tag == Ist_WrTmp
3036 && st->Ist.WrTmp.data->tag == Iex_GetI
3037 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
3038 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
3039 ->Iex.RdTmp.tmp, &var2, &con2)) {
3041 vex_printf("replacing3 ");
3043 vex_printf(" with ");
3045 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
3049 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
3053 ppIRStmt(bb->stmts[i]);
3059 /* Perhaps st is PutI[t, con] ? */
3061 if (st->tag == Ist_PutI
3062 && st->Ist.PutI.ix->tag == Iex_RdTmp
3063 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
3066 vex_printf("replacing2 ");
3068 vex_printf(" with ");
3070 con2 += st->Ist.PutI.bias;
3072 = IRStmt_PutI(st->Ist.PutI.descr,
3077 ppIRStmt(bb->stmts[i]);
3087 /*---------------------------------------------------------------*/
3088 /*--- PutI/GetI transformations ---*/
3089 /*---------------------------------------------------------------*/
3091 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
3092 the given starting point to find, if any, a PutI which writes
3093 exactly the same piece of guest state, and so return the expression
3094 that the PutI writes. This is the core of PutI-GetI forwarding. */
3097 IRExpr* findPutI ( IRSB* bb, Int startHere,
3098 IRRegArray* descrG, IRExpr* ixG, Int biasG )
3102 GSAliasing relation;
3105 vex_printf("\nfindPutI ");
3106 ppIRRegArray(descrG);
3109 vex_printf(" %d\n", biasG);
3112 /* Scan backwards in bb from startHere to find a suitable PutI
3113 binding for (descrG, ixG, biasG), if any. */
3115 for (j = startHere; j >= 0; j--) {
3117 if (st->tag == Ist_NoOp)
3120 if (st->tag == Ist_Put) {
3121 /* Non-indexed Put. This can't give a binding, but we do
3122 need to check it doesn't invalidate the search by
3123 overlapping any part of the indexed guest state. */
3126 = getAliasingRelation_IC(
3129 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3131 if (relation == NoAlias) {
3132 /* we're OK; keep going */
3135 /* relation == UnknownAlias || relation == ExactAlias */
3136 /* If this assertion fails, we've found a Put which writes
3137 an area of guest state which is read by a GetI. Which
3138 is unlikely (although not per se wrong). */
3139 vassert(relation != ExactAlias);
3140 /* This Put potentially writes guest state that the GetI
3141 reads; we must fail. */
3146 if (st->tag == Ist_PutI) {
3148 relation = getAliasingRelation_II(
3155 if (relation == NoAlias) {
3156 /* This PutI definitely doesn't overlap. Ignore it and
3158 continue; /* the for j loop */
3161 if (relation == UnknownAlias) {
3162 /* We don't know if this PutI writes to the same guest
3163 state that the GetI, or not. So we have to give up. */
3167 /* Otherwise, we've found what we're looking for. */
3168 vassert(relation == ExactAlias);
3169 return st->Ist.PutI.data;
3171 } /* if (st->tag == Ist_PutI) */
3173 if (st->tag == Ist_Dirty) {
3174 /* Be conservative. If the dirty call has any guest effects at
3175 all, give up. We could do better -- only give up if there
3176 are any guest writes/modifies. */
3177 if (st->Ist.Dirty.details->nFxState > 0)
3183 /* No valid replacement was found. */
3189 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3190 that it writes exactly the same piece of guest state) ? Safe
3193 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3195 vassert(pi->tag == Ist_PutI);
3196 if (s2->tag != Ist_PutI)
3200 getAliasingRelation_II(
3201 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3202 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3209 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3210 overlap it? Safe answer: True. Note, we could do a lot better
3211 than this if needed. */
3214 Bool guestAccessWhichMightOverlapPutI (
3215 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3218 GSAliasing relation;
3219 UInt minoffP, maxoffP;
3221 vassert(pi->tag == Ist_PutI);
3222 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3231 /* just be paranoid ... these should be rare. */
3235 /* This is unbelievably lame, but it's probably not
3236 significant from a performance point of view. Really, a
3237 CAS is a load-store op, so it should be safe to say False.
3242 /* If the dirty call has any guest effects at all, give up.
3243 Probably could do better. */
3244 if (s2->Ist.Dirty.details->nFxState > 0)
3249 vassert(isIRAtom(s2->Ist.Put.data));
3251 = getAliasingRelation_IC(
3252 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3254 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3259 vassert(isIRAtom(s2->Ist.PutI.ix));
3260 vassert(isIRAtom(s2->Ist.PutI.data));
3262 = getAliasingRelation_II(
3263 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3264 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3269 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3271 = getAliasingRelation_II(
3272 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3274 s2->Ist.WrTmp.data->Iex.GetI.descr,
3275 s2->Ist.WrTmp.data->Iex.GetI.ix,
3276 s2->Ist.WrTmp.data->Iex.GetI.bias
3280 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3282 = getAliasingRelation_IC(
3283 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3284 s2->Ist.WrTmp.data->Iex.Get.offset,
3285 s2->Ist.WrTmp.data->Iex.Get.ty
3292 vassert(isIRAtom(s2->Ist.Store.addr));
3293 vassert(isIRAtom(s2->Ist.Store.data));
3297 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3298 vpanic("guestAccessWhichMightOverlapPutI");
3302 if (relation == NoAlias)
3305 return True; /* ExactAlias or UnknownAlias */
3310 /* ---------- PutI/GetI transformations main functions --------- */
3312 /* Remove redundant GetIs, to the extent that they can be detected.
3313 bb is modified in-place. */
3316 void do_redundant_GetI_elimination ( IRSB* bb )
3321 for (i = bb->stmts_used-1; i >= 0; i--) {
3323 if (st->tag == Ist_NoOp)
3326 if (st->tag == Ist_WrTmp
3327 && st->Ist.WrTmp.data->tag == Iex_GetI
3328 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3329 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3330 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
3331 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
3332 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
3334 && isIRAtom(replacement)
3335 /* Make sure we're doing a type-safe transformation! */
3336 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3338 vex_printf("rGI: ");
3339 ppIRExpr(st->Ist.WrTmp.data);
3341 ppIRExpr(replacement);
3344 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3352 /* Remove redundant PutIs, to the extent which they can be detected.
3353 bb is modified in-place. */
3356 void do_redundant_PutI_elimination ( IRSB* bb )
3362 for (i = 0; i < bb->stmts_used; i++) {
3364 if (st->tag != Ist_PutI)
3366 /* Ok, search forwards from here to see if we can find another
3367 PutI which makes this one redundant, and dodging various
3368 hazards. Search forwards:
3369 * If conditional exit, give up (because anything after that
3370 does not postdominate this put).
3371 * If a Get which might overlap, give up (because this PutI
3372 not necessarily dead).
3373 * If a Put which is identical, stop with success.
3374 * If a Put which might overlap, but is not identical, give up.
3375 * If a dirty helper call which might write guest state, give up.
3376 * If a Put which definitely doesn't overlap, or any other
3377 kind of stmt, continue.
3380 for (j = i+1; j < bb->stmts_used; j++) {
3382 if (stj->tag == Ist_NoOp)
3384 if (identicalPutIs(st, stj)) {
3389 if (stj->tag == Ist_Exit)
3392 if (st->tag == Ist_Dirty)
3393 /* give up; could do better here */
3395 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3402 vex_printf("rPI: ");
3406 bb->stmts[i] = IRStmt_NoOp();
3413 /*---------------------------------------------------------------*/
3414 /*--- Loop unrolling ---*/
3415 /*---------------------------------------------------------------*/
3417 /* Adjust all tmp values (names) in e by delta. e is destructively
3420 static void deltaIRExpr ( IRExpr* e, Int delta )
3425 e->Iex.RdTmp.tmp += delta;
3431 deltaIRExpr(e->Iex.GetI.ix, delta);
3434 deltaIRExpr(e->Iex.Qop.arg1, delta);
3435 deltaIRExpr(e->Iex.Qop.arg2, delta);
3436 deltaIRExpr(e->Iex.Qop.arg3, delta);
3437 deltaIRExpr(e->Iex.Qop.arg4, delta);
3440 deltaIRExpr(e->Iex.Triop.arg1, delta);
3441 deltaIRExpr(e->Iex.Triop.arg2, delta);
3442 deltaIRExpr(e->Iex.Triop.arg3, delta);
3445 deltaIRExpr(e->Iex.Binop.arg1, delta);
3446 deltaIRExpr(e->Iex.Binop.arg2, delta);
3449 deltaIRExpr(e->Iex.Unop.arg, delta);
3452 deltaIRExpr(e->Iex.Load.addr, delta);
3455 for (i = 0; e->Iex.CCall.args[i]; i++)
3456 deltaIRExpr(e->Iex.CCall.args[i], delta);
3459 deltaIRExpr(e->Iex.Mux0X.cond, delta);
3460 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3461 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3465 vpanic("deltaIRExpr");
3469 /* Adjust all tmp values (names) in st by delta. st is destructively
3472 static void deltaIRStmt ( IRStmt* st, Int delta )
3482 deltaIRExpr(st->Ist.AbiHint.base, delta);
3483 deltaIRExpr(st->Ist.AbiHint.nia, delta);
3486 deltaIRExpr(st->Ist.Put.data, delta);
3489 deltaIRExpr(st->Ist.PutI.ix, delta);
3490 deltaIRExpr(st->Ist.PutI.data, delta);
3493 st->Ist.WrTmp.tmp += delta;
3494 deltaIRExpr(st->Ist.WrTmp.data, delta);
3497 deltaIRExpr(st->Ist.Exit.guard, delta);
3500 deltaIRExpr(st->Ist.Store.addr, delta);
3501 deltaIRExpr(st->Ist.Store.data, delta);
3504 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3505 st->Ist.CAS.details->oldHi += delta;
3506 st->Ist.CAS.details->oldLo += delta;
3507 deltaIRExpr(st->Ist.CAS.details->addr, delta);
3508 if (st->Ist.CAS.details->expdHi)
3509 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3510 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3511 if (st->Ist.CAS.details->dataHi)
3512 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3513 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3516 st->Ist.LLSC.result += delta;
3517 deltaIRExpr(st->Ist.LLSC.addr, delta);
3518 if (st->Ist.LLSC.storedata)
3519 deltaIRExpr(st->Ist.LLSC.storedata, delta);
3522 d = st->Ist.Dirty.details;
3523 deltaIRExpr(d->guard, delta);
3524 for (i = 0; d->args[i]; i++)
3525 deltaIRExpr(d->args[i], delta);
3526 if (d->tmp != IRTemp_INVALID)
3529 deltaIRExpr(d->mAddr, delta);
3532 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3533 vpanic("deltaIRStmt");
3538 /* If possible, return a loop-unrolled version of bb0. The original
3539 is changed. If not possible, return NULL. */
3541 /* The two schemas considered are:
3545 which unrolls to (eg) X: BODY;BODY; goto X
3549 X: BODY; if (c) goto X; goto Y
3550 which trivially transforms to
3551 X: BODY; if (!c) goto Y; goto X;
3552 so it falls in the scope of the first case.
3554 X and Y must be literal (guest) addresses.
3557 static Int calc_unroll_factor( IRSB* bb )
3562 for (i = 0; i < bb->stmts_used; i++) {
3563 if (bb->stmts[i]->tag != Ist_NoOp)
3567 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3568 if (vex_control.iropt_verbosity > 0)
3569 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3570 n_stmts, 8* n_stmts);
3573 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3574 if (vex_control.iropt_verbosity > 0)
3575 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3576 n_stmts, 4* n_stmts);
3580 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3581 if (vex_control.iropt_verbosity > 0)
3582 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3583 n_stmts, 2* n_stmts);
3587 if (vex_control.iropt_verbosity > 0)
3588 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3594 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3596 Int i, j, jmax, n_vars;
3598 Addr64 xxx_value, yyy_value;
3605 if (vex_control.iropt_unroll_thresh <= 0)
3608 /* First off, figure out if we can unroll this loop. Do this
3609 without modifying bb0. */
3611 if (bb0->jumpkind != Ijk_Boring)
3617 /* Extract the next-guest address. If it isn't a literal, we
3621 if (udst->tag == Iex_Const
3622 && (udst->Iex.Const.con->tag == Ico_U32
3623 || udst->Iex.Const.con->tag == Ico_U64)) {
3624 /* The BB ends in a jump to a literal location. */
3626 xxx_value = udst->Iex.Const.con->tag == Ico_U64
3627 ? udst->Iex.Const.con->Ico.U64
3628 : (Addr64)(udst->Iex.Const.con->Ico.U32);
3634 /* Now we know the BB ends to a jump to a literal location. If
3635 it's a jump to itself (viz, idiom #1), move directly to the
3636 unrolling stage, first cloning the bb so the original isn't
3638 if (xxx_value == my_addr) {
3639 unroll_factor = calc_unroll_factor( bb0 );
3640 if (unroll_factor < 2)
3642 bb1 = deepCopyIRSB( bb0 );
3644 udst = NULL; /* is now invalid */
3648 /* Search for the second idiomatic form:
3649 X: BODY; if (c) goto X; goto Y
3650 We know Y, but need to establish that the last stmt
3653 yyy_value = xxx_value;
3654 for (i = bb0->stmts_used-1; i >= 0; i--)
3659 return NULL; /* block with no stmts. Strange. */
3662 if (st->tag != Ist_Exit)
3664 if (st->Ist.Exit.jk != Ijk_Boring)
3667 con = st->Ist.Exit.dst;
3668 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3670 xxx_value = con->tag == Ico_U64
3671 ? st->Ist.Exit.dst->Ico.U64
3672 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3674 /* If this assertion fails, we have some kind of type error. */
3675 vassert(con->tag == udst->Iex.Const.con->tag);
3677 if (xxx_value != my_addr)
3678 /* We didn't find either idiom. Give up. */
3681 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
3682 yyy values (which makes it look like idiom #1), and go into
3683 unrolling proper. This means finding (again) the last stmt, in
3686 unroll_factor = calc_unroll_factor( bb0 );
3687 if (unroll_factor < 2)
3690 bb1 = deepCopyIRSB( bb0 );
3692 udst = NULL; /* is now invalid */
3693 for (i = bb1->stmts_used-1; i >= 0; i--)
3697 /* The next bunch of assertions should be true since we already
3698 found and checked the last stmt in the original bb. */
3703 vassert(st->tag == Ist_Exit);
3705 con = st->Ist.Exit.dst;
3706 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3709 vassert(udst->tag == Iex_Const);
3710 vassert(udst->Iex.Const.con->tag == Ico_U32
3711 || udst->Iex.Const.con->tag == Ico_U64);
3712 vassert(con->tag == udst->Iex.Const.con->tag);
3714 /* switch the xxx and yyy fields around */
3715 if (con->tag == Ico_U64) {
3716 udst->Iex.Const.con->Ico.U64 = xxx_value;
3717 con->Ico.U64 = yyy_value;
3719 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3720 con->Ico.U32 = (UInt)yyy_value;
3723 /* negate the test condition */
3725 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3727 /* --- The unroller proper. Both idioms are by now --- */
3728 /* --- now converted to idiom 1. --- */
3732 vassert(unroll_factor == 2
3733 || unroll_factor == 4
3734 || unroll_factor == 8);
3736 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3737 for (j = 1; j <= jmax; j++) {
3739 n_vars = bb1->tyenv->types_used;
3741 bb2 = deepCopyIRSB(bb1);
3742 for (i = 0; i < n_vars; i++)
3743 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3745 for (i = 0; i < bb2->stmts_used; i++) {
3746 /* deltaIRStmt destructively modifies the stmt, but
3747 that's OK since bb2 is a complete fresh copy of bb1. */
3748 deltaIRStmt(bb2->stmts[i], n_vars);
3749 addStmtToIRSB(bb1, bb2->stmts[i]);
3754 vex_printf("\nUNROLLED (%llx)\n", my_addr);
3759 /* Flattening; sigh. The unroller succeeds in breaking flatness
3760 by negating the test condition. This should be fixed properly.
3761 For the moment use this shotgun approach. */
3762 return flatten_BB(bb1);
3766 /*---------------------------------------------------------------*/
3767 /*--- The tree builder ---*/
3768 /*---------------------------------------------------------------*/
3770 /* This isn't part of IR optimisation. Really it's a pass done prior
3771 to instruction selection, which improves the code that the
3772 instruction selector can produce. */
3774 /* --- The 'tmp' environment is the central data structure here --- */
3776 /* The number of outstanding bindings we're prepared to track.
3777 The number of times the env becomes full and we have to dump
3778 the oldest binding (hence reducing code quality) falls very
3779 rapidly as the env size increases. 8 gives reasonable performance
3780 under most circumstances. */
3783 /* bindee == NULL === slot is not in use
3784 bindee != NULL === slot is in use
3795 __attribute__((unused))
3796 static void ppAEnv ( ATmpInfo* env )
3799 for (i = 0; i < A_NENV; i++) {
3800 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
3802 ppIRExpr(env[i].bindee);
3804 vex_printf("(null)");
3809 /* --- Tree-traversal fns --- */
3811 /* Traverse an expr, and detect if any part of it reads memory or does
3812 a Get. Be careful ... this really controls how much the
3813 tree-builder can reorder the code, so getting it right is critical.
3815 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3820 for (i = 0; e->Iex.CCall.args[i]; i++)
3821 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3824 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3825 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3826 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3829 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3830 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3831 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3832 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3835 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3836 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3837 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3840 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3841 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3844 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3848 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3855 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3861 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3862 vpanic("setHints_Expr");
3867 /* Add a binding to the front of the env and slide all the rest
3868 backwards. It should be the case that the last slot is free. */
3869 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3872 vassert(env[A_NENV-1].bindee == NULL);
3873 for (i = A_NENV-1; i >= 1; i--)
3875 env[0].binder = binder;
3876 env[0].bindee = bindee;
3877 env[0].doesLoad = False; /* filled in later */
3878 env[0].doesGet = False; /* filled in later */
3881 /* Given uses :: array of UShort, indexed by IRTemp
3882 Add the use-occurrences of temps in this expression
3885 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3891 case Iex_RdTmp: /* the only interesting case */
3892 uses[e->Iex.RdTmp.tmp]++;
3896 aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3897 aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3898 aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3902 aoccCount_Expr(uses, e->Iex.Qop.arg1);
3903 aoccCount_Expr(uses, e->Iex.Qop.arg2);
3904 aoccCount_Expr(uses, e->Iex.Qop.arg3);
3905 aoccCount_Expr(uses, e->Iex.Qop.arg4);
3909 aoccCount_Expr(uses, e->Iex.Triop.arg1);
3910 aoccCount_Expr(uses, e->Iex.Triop.arg2);
3911 aoccCount_Expr(uses, e->Iex.Triop.arg3);
3915 aoccCount_Expr(uses, e->Iex.Binop.arg1);
3916 aoccCount_Expr(uses, e->Iex.Binop.arg2);
3920 aoccCount_Expr(uses, e->Iex.Unop.arg);
3924 aoccCount_Expr(uses, e->Iex.Load.addr);
3928 for (i = 0; e->Iex.CCall.args[i]; i++)
3929 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3933 aoccCount_Expr(uses, e->Iex.GetI.ix);
3941 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3942 vpanic("aoccCount_Expr");
3947 /* Given uses :: array of UShort, indexed by IRTemp
3948 Add the use-occurrences of temps in this statement
3951 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3958 aoccCount_Expr(uses, st->Ist.AbiHint.base);
3959 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3962 aoccCount_Expr(uses, st->Ist.WrTmp.data);
3965 aoccCount_Expr(uses, st->Ist.Put.data);
3968 aoccCount_Expr(uses, st->Ist.PutI.ix);
3969 aoccCount_Expr(uses, st->Ist.PutI.data);
3972 aoccCount_Expr(uses, st->Ist.Store.addr);
3973 aoccCount_Expr(uses, st->Ist.Store.data);
3976 cas = st->Ist.CAS.details;
3977 aoccCount_Expr(uses, cas->addr);
3979 aoccCount_Expr(uses, cas->expdHi);
3980 aoccCount_Expr(uses, cas->expdLo);
3982 aoccCount_Expr(uses, cas->dataHi);
3983 aoccCount_Expr(uses, cas->dataLo);
3986 aoccCount_Expr(uses, st->Ist.LLSC.addr);
3987 if (st->Ist.LLSC.storedata)
3988 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
3991 d = st->Ist.Dirty.details;
3992 if (d->mFx != Ifx_None)
3993 aoccCount_Expr(uses, d->mAddr);
3994 aoccCount_Expr(uses, d->guard);
3995 for (i = 0; d->args[i]; i++)
3996 aoccCount_Expr(uses, d->args[i]);
4003 aoccCount_Expr(uses, st->Ist.Exit.guard);
4006 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4007 vpanic("aoccCount_Stmt");
4011 /* Look up a binding for tmp in the env. If found, return the bound
4012 expression, and set the env's binding to NULL so it is marked as
4013 used. If not found, return NULL. */
4015 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
4018 for (i = 0; i < A_NENV; i++) {
4019 if (env[i].binder == tmp && env[i].bindee != NULL) {
4020 IRExpr* bindee = env[i].bindee;
4021 env[i].bindee = NULL;
4028 /* Traverse e, looking for temps. For each observed temp, see if env
4029 contains a binding for the temp, and if so return the bound value.
4030 The env has the property that any binding it holds is
4031 'single-shot', so once a binding is used, it is marked as no longer
4032 available, by setting its .bindee field to NULL. */
4034 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
4035 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
4037 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
4038 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
4041 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
4045 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
4046 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
4047 return IRExpr_Unop( Iop_CmpwNEZ32,
4048 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
4049 a2->Iex.Unop.arg ) );
4054 /* no reduction rule applies */
4055 return IRExpr_Binop( op, a1, a2 );
4058 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
4062 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4063 if (is_Binop(aa, Iop_Or64)
4064 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
4065 return fold_IRExpr_Unop(
4067 IRExpr_Binop(Iop_Or64,
4068 aa->Iex.Binop.arg1->Iex.Unop.arg,
4069 aa->Iex.Binop.arg2));
4070 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4071 if (is_Binop(aa, Iop_Or64)
4072 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
4073 return fold_IRExpr_Unop(
4075 IRExpr_Binop(Iop_Or64,
4077 aa->Iex.Binop.arg2->Iex.Unop.arg));
4080 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
4081 if (is_Unop(aa, Iop_Left64))
4082 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
4085 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
4086 if (is_Unop(aa, Iop_CmpwNEZ32))
4087 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
4090 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
4091 if (is_Unop(aa, Iop_Left32))
4092 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
4095 /* Left32( Left32(x) ) --> Left32(x) */
4096 if (is_Unop(aa, Iop_Left32))
4097 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
4100 /* 32to1( 1Uto32 ( x ) ) --> x */
4101 if (is_Unop(aa, Iop_1Uto32))
4102 return aa->Iex.Unop.arg;
4103 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4104 if (is_Unop(aa, Iop_CmpwNEZ32))
4105 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4108 /* 64to1( 1Uto64 ( x ) ) --> x */
4109 if (is_Unop(aa, Iop_1Uto64))
4110 return aa->Iex.Unop.arg;
4111 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4112 if (is_Unop(aa, Iop_CmpwNEZ64))
4113 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4116 /* 64to32( 32Uto64 ( x )) --> x */
4117 if (is_Unop(aa, Iop_32Uto64))
4118 return aa->Iex.Unop.arg;
4119 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4120 if (is_Unop(aa, Iop_8Uto64))
4121 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4125 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4126 if (is_Unop(aa, Iop_8Uto32))
4127 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4128 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4129 if (is_Unop(aa, Iop_16Uto32))
4130 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4134 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4135 if (is_Unop(aa, Iop_CmpNEZ8)
4136 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4137 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4138 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4140 return IRExpr_Unop( Iop_CmpwNEZ32,
4141 aa->Iex.Unop.arg->Iex.Unop.arg
4142 ->Iex.Unop.arg->Iex.Unop.arg);
4150 /* no reduction rule applies */
4151 return IRExpr_Unop( op, aa );
4154 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4163 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4164 for (i = 0; args2[i]; i++)
4165 args2[i] = atbSubst_Expr(env,args2[i]);
4166 return IRExpr_CCall(
4172 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4175 return IRExpr_Mux0X(
4176 atbSubst_Expr(env, e->Iex.Mux0X.cond),
4177 atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4178 atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4183 atbSubst_Expr(env, e->Iex.Qop.arg1),
4184 atbSubst_Expr(env, e->Iex.Qop.arg2),
4185 atbSubst_Expr(env, e->Iex.Qop.arg3),
4186 atbSubst_Expr(env, e->Iex.Qop.arg4)
4189 return IRExpr_Triop(
4191 atbSubst_Expr(env, e->Iex.Triop.arg1),
4192 atbSubst_Expr(env, e->Iex.Triop.arg2),
4193 atbSubst_Expr(env, e->Iex.Triop.arg3)
4196 return fold_IRExpr_Binop(
4198 atbSubst_Expr(env, e->Iex.Binop.arg1),
4199 atbSubst_Expr(env, e->Iex.Binop.arg2)
4202 return fold_IRExpr_Unop(
4204 atbSubst_Expr(env, e->Iex.Unop.arg)
4210 atbSubst_Expr(env, e->Iex.Load.addr)
4215 atbSubst_Expr(env, e->Iex.GetI.ix),
4222 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4223 vpanic("atbSubst_Expr");
4227 /* Same deal as atbSubst_Expr, except for stmts. */
4229 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4236 return IRStmt_AbiHint(
4237 atbSubst_Expr(env, st->Ist.AbiHint.base),
4238 st->Ist.AbiHint.len,
4239 atbSubst_Expr(env, st->Ist.AbiHint.nia)
4242 return IRStmt_Store(
4244 atbSubst_Expr(env, st->Ist.Store.addr),
4245 atbSubst_Expr(env, st->Ist.Store.data)
4248 return IRStmt_WrTmp(
4250 atbSubst_Expr(env, st->Ist.WrTmp.data)
4255 atbSubst_Expr(env, st->Ist.Put.data)
4260 atbSubst_Expr(env, st->Ist.PutI.ix),
4262 atbSubst_Expr(env, st->Ist.PutI.data)
4267 atbSubst_Expr(env, st->Ist.Exit.guard),
4272 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
4274 return IRStmt_NoOp();
4276 return IRStmt_MBE(st->Ist.MBE.event);
4278 cas = st->Ist.CAS.details;
4280 cas->oldHi, cas->oldLo, cas->end,
4281 atbSubst_Expr(env, cas->addr),
4282 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4283 atbSubst_Expr(env, cas->expdLo),
4284 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4285 atbSubst_Expr(env, cas->dataLo)
4287 return IRStmt_CAS(cas2);
4291 st->Ist.LLSC.result,
4292 atbSubst_Expr(env, st->Ist.LLSC.addr),
4293 st->Ist.LLSC.storedata
4294 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4297 d = st->Ist.Dirty.details;
4298 d2 = emptyIRDirty();
4300 if (d2->mFx != Ifx_None)
4301 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4302 d2->guard = atbSubst_Expr(env, d2->guard);
4303 for (i = 0; d2->args[i]; i++)
4304 d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4305 return IRStmt_Dirty(d2);
4307 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4308 vpanic("atbSubst_Stmt");
4312 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4315 Bool stmtPuts, stmtStores, invalidateMe;
4318 ATmpInfo env[A_NENV];
4320 Int n_tmps = bb->tyenv->types_used;
4321 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
4323 /* Phase 1. Scan forwards in bb, counting use occurrences of each
4324 temp. Also count occurrences in the bb->next field. */
4326 for (i = 0; i < n_tmps; i++)
4329 for (i = 0; i < bb->stmts_used; i++) {
4331 if (st->tag == Ist_NoOp)
4333 aoccCount_Stmt( uses, st );
4335 aoccCount_Expr(uses, bb->next );
4338 for (i = 0; i < n_tmps; i++) {
4341 ppIRTemp( (IRTemp)i );
4342 vex_printf(" used %d\n", (Int)uses[i] );
4346 /* Phase 2. Scan forwards in bb. For each statement in turn:
4348 If the env is full, emit the end element. This guarantees
4349 there is at least one free slot in the following.
4351 On seeing 't = E', occ(t)==1,
4354 add t -> E' to the front of the env
4355 Examine E' and set the hints for E' appropriately
4356 (doesLoad? doesGet?)
4358 On seeing any other stmt,
4359 let stmt' = env(stmt)
4360 remove from env any 't=E' binds invalidated by stmt
4361 emit the invalidated stmts
4363 compact any holes in env
4364 by sliding entries towards the front
4366 Finally, apply env to bb->next.
4369 for (i = 0; i < A_NENV; i++) {
4370 env[i].bindee = NULL;
4371 env[i].binder = IRTemp_INVALID;
4374 /* The stmts in bb are being reordered, and we are guaranteed to
4375 end up with no more than the number we started with. Use i to
4376 be the cursor of the current stmt examined and j <= i to be that
4377 for the current stmt being written.
4380 for (i = 0; i < bb->stmts_used; i++) {
4383 if (st->tag == Ist_NoOp)
4386 /* Ensure there's at least one space in the env, by emitting
4387 the oldest binding if necessary. */
4388 if (env[A_NENV-1].bindee != NULL) {
4389 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4390 env[A_NENV-1].bindee );
4393 env[A_NENV-1].bindee = NULL;
4396 /* Consider current stmt. */
4397 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4400 /* optional extra: dump dead bindings as we find them.
4401 Removes the need for a prior dead-code removal pass. */
4402 if (uses[st->Ist.WrTmp.tmp] == 0) {
4403 if (0) vex_printf("DEAD binding\n");
4404 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4406 vassert(uses[st->Ist.WrTmp.tmp] == 1);
4408 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
4410 e = st->Ist.WrTmp.data;
4411 e2 = atbSubst_Expr(env, e);
4412 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4413 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4414 /* don't advance j, as we are deleting this stmt and instead
4415 holding it temporarily in the env. */
4416 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4419 /* we get here for any other kind of statement. */
4420 /* 'use up' any bindings required by the current statement. */
4421 st2 = atbSubst_Stmt(env, st);
4423 /* Now, before this stmt, dump any bindings in env that it
4424 invalidates. These need to be dumped in the order in which
4425 they originally entered env -- that means from oldest to
4428 /* stmtPuts/stmtStores characterise what the stmt under
4429 consideration does, or might do (sidely safe @ True). */
4431 = toBool( st->tag == Ist_Put
4432 || st->tag == Ist_PutI
4433 || st->tag == Ist_Dirty );
4435 /* be True if this stmt writes memory or might do (==> we don't
4436 want to reorder other loads or stores relative to it). Also,
4437 both LL and SC fall under this classification, since we
4438 really ought to be conservative and not reorder any other
4439 memory transactions relative to them. */
4441 = toBool( st->tag == Ist_Store
4442 || st->tag == Ist_Dirty
4443 || st->tag == Ist_LLSC );
4445 for (k = A_NENV-1; k >= 0; k--) {
4446 if (env[k].bindee == NULL)
4448 /* Compare the actions of this stmt with the actions of
4449 binding 'k', to see if they invalidate the binding. */
4452 /* a store invalidates loaded data */
4453 (env[k].doesLoad && stmtStores)
4454 /* a put invalidates get'd data */
4455 || (env[k].doesGet && stmtPuts)
4456 /* a put invalidates loaded data. Note, we could do
4457 much better here in the sense that we only need to
4458 invalidate trees containing loads if the Put in
4459 question is marked as requiring precise
4461 || (env[k].doesLoad && stmtPuts)
4462 /* probably overly conservative: a memory bus event
4463 invalidates absolutely everything, so that all
4464 computation prior to it is forced to complete before
4465 proceeding with the event (fence,lock,unlock). */
4466 || st->tag == Ist_MBE
4467 /* also be (probably overly) paranoid re AbiHints */
4468 || st->tag == Ist_AbiHint
4471 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4474 env[k].bindee = NULL;
4478 /* Slide in-use entries in env up to the front */
4480 for (k = 0; k < A_NENV; k++) {
4481 if (env[k].bindee != NULL) {
4486 for (m = m; m < A_NENV; m++) {
4487 env[m].bindee = NULL;
4490 /* finally, emit the substituted statement */
4492 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4496 } /* for each stmt in the original bb ... */
4498 /* Finally ... substitute the ->next field as much as possible, and
4499 dump any left-over bindings. Hmm. Perhaps there should be no
4500 left over bindings? Or any left-over bindings are
4501 by definition dead? */
4502 bb->next = atbSubst_Expr(env, bb->next);
4507 /*---------------------------------------------------------------*/
4508 /*--- iropt main ---*/
4509 /*---------------------------------------------------------------*/
4511 static Bool iropt_verbose = False; /* True; */
4514 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
4515 redundant Put removal, constant propagation, dead code removal,
4516 clean helper specialisation, and dead code removal (again).
4521 IRSB* cheap_transformations (
4523 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4524 Bool (*preciseMemExnsFn)(Int,Int)
4527 redundant_get_removal_BB ( bb );
4528 if (iropt_verbose) {
4529 vex_printf("\n========= REDUNDANT GET\n\n" );
4533 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4534 if (iropt_verbose) {
4535 vex_printf("\n========= REDUNDANT PUT\n\n" );
4539 bb = cprop_BB ( bb );
4540 if (iropt_verbose) {
4541 vex_printf("\n========= CPROPD\n\n" );
4545 do_deadcode_BB ( bb );
4546 if (iropt_verbose) {
4547 vex_printf("\n========= DEAD\n\n" );
4551 bb = spec_helpers_BB ( bb, specHelper );
4552 do_deadcode_BB ( bb );
4553 if (iropt_verbose) {
4554 vex_printf("\n========= SPECd \n\n" );
4562 /* Do some more expensive transformations on bb, which are aimed at
4563 optimising as much as possible in the presence of GetI and PutI. */
4566 IRSB* expensive_transformations( IRSB* bb )
4568 (void)do_cse_BB( bb );
4569 collapse_AddSub_chains_BB( bb );
4570 do_redundant_GetI_elimination( bb );
4571 do_redundant_PutI_elimination( bb );
4572 do_deadcode_BB( bb );
4577 /* Scan a flattened BB to look for signs that more expensive
4578 optimisations might be useful:
4579 - find out if there are any GetIs and PutIs
4580 - find out if there are any floating or vector-typed temporaries
4583 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4584 /*OUT*/Bool* hasVorFtemps,
4592 *hasGetIorPutI = False;
4593 *hasVorFtemps = False;
4595 for (i = 0; i < bb->stmts_used; i++) {
4599 vassert(isIRAtom(st->Ist.AbiHint.base));
4600 vassert(isIRAtom(st->Ist.AbiHint.nia));
4603 *hasGetIorPutI = True;
4606 if (st->Ist.WrTmp.data->tag == Iex_GetI)
4607 *hasGetIorPutI = True;
4608 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4609 case Ity_I1: case Ity_I8: case Ity_I16:
4610 case Ity_I32: case Ity_I64: case Ity_I128:
4612 case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
4613 *hasVorFtemps = True;
4620 vassert(isIRAtom(st->Ist.Put.data));
4623 vassert(isIRAtom(st->Ist.Store.addr));
4624 vassert(isIRAtom(st->Ist.Store.data));
4627 cas = st->Ist.CAS.details;
4628 vassert(isIRAtom(cas->addr));
4629 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4630 vassert(isIRAtom(cas->expdLo));
4631 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4632 vassert(isIRAtom(cas->dataLo));
4635 vassert(isIRAtom(st->Ist.LLSC.addr));
4636 if (st->Ist.LLSC.storedata)
4637 vassert(isIRAtom(st->Ist.LLSC.storedata));
4640 d = st->Ist.Dirty.details;
4641 vassert(isIRAtom(d->guard));
4642 for (j = 0; d->args[j]; j++)
4643 vassert(isIRAtom(d->args[j]));
4644 if (d->mFx != Ifx_None)
4645 vassert(isIRAtom(d->mAddr));
4652 vassert(isIRAtom(st->Ist.Exit.guard));
4657 vpanic("considerExpensives");
4663 /* ---------------- The main iropt entry point. ---------------- */
4665 /* exported from this file */
4666 /* Rules of the game:
4668 - IRExpr/IRStmt trees should be treated as immutable, as they
4669 may get shared. So never change a field of such a tree node;
4670 instead construct and return a new one if needed.
4676 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4677 Bool (*preciseMemExnsFn)(Int,Int),
4682 static Int n_total = 0;
4683 static Int n_expensive = 0;
4685 Bool hasGetIorPutI, hasVorFtemps;
4690 /* First flatten the block out, since all other
4691 phases assume flat code. */
4693 bb = flatten_BB ( bb0 );
4695 if (iropt_verbose) {
4696 vex_printf("\n========= FLAT\n\n" );
4700 /* If at level 0, stop now. */
4701 if (vex_control.iropt_level <= 0) return bb;
4703 /* Now do a preliminary cleanup pass, and figure out if we also
4704 need to do 'expensive' optimisations. Expensive optimisations
4705 are deemed necessary if the block contains any GetIs or PutIs.
4706 If needed, do expensive transformations and then another cheap
4709 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4711 if (guest_arch == VexArchARM) {
4712 /* Translating Thumb2 code produces a lot of chaff. We have to
4713 work extra hard to get rid of it. */
4715 bb = spec_helpers_BB ( bb, specHelper );
4716 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4718 do_deadcode_BB( bb );
4721 if (vex_control.iropt_level > 1) {
4723 /* Peer at what we have, to decide how much more effort to throw
4725 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4727 if (hasVorFtemps && !hasGetIorPutI) {
4728 /* If any evidence of FP or Vector activity, CSE, as that
4729 tends to mop up all manner of lardy code to do with
4730 rounding modes. Don't bother if hasGetIorPutI since that
4731 case leads into the expensive transformations, which do
4733 (void)do_cse_BB( bb );
4734 do_deadcode_BB( bb );
4737 if (hasGetIorPutI) {
4741 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4742 bb = expensive_transformations( bb );
4743 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4744 /* Potentially common up GetIs */
4745 cses = do_cse_BB( bb );
4747 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4750 /* Now have a go at unrolling simple (single-BB) loops. If
4751 successful, clean up the results as much as possible. */
4753 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4755 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4756 if (hasGetIorPutI) {
4757 bb = expensive_transformations( bb );
4758 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4760 /* at least do CSE and dead code removal */
4762 do_deadcode_BB( bb );
4764 if (0) vex_printf("vex iropt: unrolled a loop\n");
4773 /*---------------------------------------------------------------*/
4774 /*--- end ir_opt.c ---*/
4775 /*---------------------------------------------------------------*/