2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_x86_defs.h"
49 -- (Really an assembler issue) don't emit CMov32 as a cmov
50 insn, since that's expensive on P4 and conditional branch
51 is cheaper if (as we expect) the condition is highly predictable
53 -- preserve xmm registers across function calls (by declaring them
54 as trashed by call insns)
56 -- preserve x87 ST stack discipline across function calls. Sigh.
58 -- Check doHelperCall: if a call is conditional, we cannot safely
59 compute any regparm args directly to registers. Hence, the
60 fast-regparm marshalling should be restricted to unconditional
64 /*---------------------------------------------------------*/
65 /*--- x87 control word stuff ---*/
66 /*---------------------------------------------------------*/
68 /* Vex-generated code expects to run with the FPU set as follows: all
69 exceptions masked, round-to-nearest, precision = 53 bits. This
70 corresponds to a FPU control word value of 0x027F.
72 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74 %fpucw and %mxcsr should have these values on entry to
75 Vex-generated code, and should those values should be
79 #define DEFAULT_FPUCW 0x027F
81 /* debugging only, do not use */
82 /* define DEFAULT_FPUCW 0x037F */
85 /*---------------------------------------------------------*/
86 /*--- misc helpers ---*/
87 /*---------------------------------------------------------*/
89 /* These are duplicated in guest-x86/toIR.c */
90 static IRExpr* unop ( IROp op, IRExpr* a )
92 return IRExpr_Unop(op, a);
95 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 return IRExpr_Binop(op, a1, a2);
100 static IRExpr* bind ( Int binder )
102 return IRExpr_Binder(binder);
105 static Bool isZeroU8 ( IRExpr* e )
107 return e->tag == Iex_Const
108 && e->Iex.Const.con->tag == Ico_U8
109 && e->Iex.Const.con->Ico.U8 == 0;
112 static Bool isZeroU32 ( IRExpr* e )
114 return e->tag == Iex_Const
115 && e->Iex.Const.con->tag == Ico_U32
116 && e->Iex.Const.con->Ico.U32 == 0;
119 static Bool isZeroU64 ( IRExpr* e )
121 return e->tag == Iex_Const
122 && e->Iex.Const.con->tag == Ico_U64
123 && e->Iex.Const.con->Ico.U64 == 0ULL;
127 /*---------------------------------------------------------*/
129 /*---------------------------------------------------------*/
131 /* This carries around:
133 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
134 might encounter. This is computed before insn selection starts,
137 - A mapping from IRTemp to HReg. This tells the insn selector
138 which virtual register(s) are associated with each IRTemp
139 temporary. This is computed before insn selection starts, and
140 does not change. We expect this mapping to map precisely the
141 same set of IRTemps as the type mapping does.
143 - vregmap holds the primary register for the IRTemp.
144 - vregmapHI is only used for 64-bit integer-typed
145 IRTemps. It holds the identity of a second
146 32-bit virtual HReg, which holds the high half
149 - The code array, that is, the insns selected so far.
151 - A counter, for generating new virtual registers.
153 - The host subarchitecture we are selecting insns for.
154 This is set at the start and does not change.
156 Note, this is all host-independent. */
175 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
178 vassert(tmp < env->n_vregmap);
179 return env->vregmap[tmp];
182 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
185 vassert(tmp < env->n_vregmap);
186 vassert(env->vregmapHI[tmp] != INVALID_HREG);
187 *vrLO = env->vregmap[tmp];
188 *vrHI = env->vregmapHI[tmp];
191 static void addInstr ( ISelEnv* env, X86Instr* instr )
193 addHInstr(env->code, instr);
194 if (vex_traceflags & VEX_TRACE_VCODE) {
195 ppX86Instr(instr, False);
200 static HReg newVRegI ( ISelEnv* env )
202 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
207 static HReg newVRegF ( ISelEnv* env )
209 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
214 static HReg newVRegV ( ISelEnv* env )
216 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
222 /*---------------------------------------------------------*/
223 /*--- ISEL: Forward declarations ---*/
224 /*---------------------------------------------------------*/
226 /* These are organised as iselXXX and iselXXX_wrk pairs. The
227 iselXXX_wrk do the real work, but are not to be called directly.
228 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
229 checks that all returned registers are virtual. You should not
230 call the _wrk version directly.
232 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
233 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
235 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
236 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
238 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
239 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
241 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
242 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
244 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
245 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
247 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
248 ISelEnv* env, IRExpr* e );
249 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
250 ISelEnv* env, IRExpr* e );
252 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
253 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
255 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
256 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
258 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
259 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
261 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
262 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
265 /*---------------------------------------------------------*/
266 /*--- ISEL: Misc helpers ---*/
267 /*---------------------------------------------------------*/
269 /* Make a int reg-reg move. */
271 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
273 vassert(hregClass(src) == HRcInt32);
274 vassert(hregClass(dst) == HRcInt32);
275 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
279 /* Make a vector reg-reg move. */
281 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
283 vassert(hregClass(src) == HRcVec128);
284 vassert(hregClass(dst) == HRcVec128);
285 return X86Instr_SseReRg(Xsse_MOV, src, dst);
288 /* Advance/retreat %esp by n. */
290 static void add_to_esp ( ISelEnv* env, Int n )
292 vassert(n > 0 && n < 256 && (n%4) == 0);
294 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
297 static void sub_from_esp ( ISelEnv* env, Int n )
299 vassert(n > 0 && n < 256 && (n%4) == 0);
301 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
305 /* Given an amode, return one which references 4 bytes further
308 static X86AMode* advance4 ( X86AMode* am )
310 X86AMode* am4 = dopyX86AMode(am);
313 am4->Xam.IRRS.imm += 4; break;
315 am4->Xam.IR.imm += 4; break;
317 vpanic("advance4(x86,host)");
323 /* Push an arg onto the host stack, in preparation for a call to a
324 helper function of some kind. Returns the number of 32-bit words
327 static Int pushArg ( ISelEnv* env, IRExpr* arg )
329 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
330 if (arg_ty == Ity_I32) {
331 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
334 if (arg_ty == Ity_I64) {
336 iselInt64Expr(&rHi, &rLo, env, arg);
337 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
338 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
342 vpanic("pushArg(x86): can't handle arg of this type");
346 /* Complete the call to a helper function, by calling the
347 helper and clearing the args off the stack. */
350 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
351 IRCallee* cee, Int n_arg_ws )
353 /* Complication. Need to decide which reg to use as the fn address
354 pointer, in a way that doesn't trash regparm-passed
356 vassert(sizeof(void*) == 4);
358 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
361 add_to_esp(env, 4*n_arg_ws);
365 /* Used only in doHelperCall. See big comment in doHelperCall re
366 handling of regparm args. This function figures out whether
367 evaluation of an expression might require use of a fixed register.
368 If in doubt return True (safe but suboptimal).
371 Bool mightRequireFixedRegs ( IRExpr* e )
374 case Iex_RdTmp: case Iex_Const: case Iex_Get:
382 /* Do a complete function call. guard is a Ity_Bit expression
383 indicating whether or not the call happens. If guard==NULL, the
384 call is unconditional. */
387 void doHelperCall ( ISelEnv* env,
389 IRExpr* guard, IRCallee* cee, IRExpr** args )
395 Int not_done_yet, n_args, n_arg_ws, stack_limit,
398 /* Marshal args for a call, do the call, and clear the stack.
399 Complexities to consider:
401 * if passBBP is True, %ebp (the baseblock pointer) is to be
402 passed as the first arg.
404 * If the callee claims regparmness of 1, 2 or 3, we must pass the
405 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
406 respectively). To keep things relatively simple, only args of
407 type I32 may be passed as regparms -- just bomb out if anything
408 else turns up. Clearly this depends on the front ends not
409 trying to pass any other types as regparms.
412 /* 16 Nov 2004: the regparm handling is complicated by the
415 Consider a call two a function with two regparm parameters:
416 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
417 Suppose code is first generated to compute e1 into %eax. Then,
418 code is generated to compute e2 into %edx. Unfortunately, if
419 the latter code sequence uses %eax, it will trash the value of
420 e1 computed by the former sequence. This could happen if (for
421 example) e2 itself involved a function call. In the code below,
422 args are evaluated right-to-left, not left-to-right, but the
423 principle and the problem are the same.
425 One solution is to compute all regparm-bound args into vregs
426 first, and once they are all done, move them to the relevant
427 real regs. This always gives correct code, but it also gives
428 a bunch of vreg-to-rreg moves which are usually redundant but
429 are hard for the register allocator to get rid of.
431 A compromise is to first examine all regparm'd argument
432 expressions. If they are all so simple that it is clear
433 they will be evaluated without use of any fixed registers,
434 use the old compute-directly-to-fixed-target scheme. If not,
435 be safe and use the via-vregs scheme.
437 Note this requires being able to examine an expression and
438 determine whether or not evaluation of it might use a fixed
439 register. That requires knowledge of how the rest of this
440 insn selector works. Currently just the following 3 are
441 regarded as safe -- hopefully they cover the majority of
442 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
444 vassert(cee->regparms >= 0 && cee->regparms <= 3);
446 n_args = n_arg_ws = 0;
447 while (args[n_args]) n_args++;
449 not_done_yet = n_args;
453 stack_limit = cee->regparms;
454 if (cee->regparms > 0 && passBBP) stack_limit--;
456 /* ------ BEGIN marshall all arguments ------ */
458 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
459 for (i = n_args-1; i >= stack_limit; i--) {
460 n_arg_ws += pushArg(env, args[i]);
464 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
467 if (cee->regparms > 0) {
469 /* ------ BEGIN deal with regparms ------ */
471 /* deal with regparms, not forgetting %ebp if needed. */
472 argregs[0] = hregX86_EAX();
473 argregs[1] = hregX86_EDX();
474 argregs[2] = hregX86_ECX();
475 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
477 argreg = cee->regparms;
479 /* In keeping with big comment above, detect potential danger
480 and use the via-vregs scheme if needed. */
482 for (i = stack_limit-1; i >= 0; i--) {
483 if (mightRequireFixedRegs(args[i])) {
491 /* Move via temporaries */
493 for (i = stack_limit-1; i >= 0; i--) {
496 vex_printf("x86 host: register param is complex: ");
502 vassert(argreg >= 0);
503 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
504 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
507 for (i = stack_limit-1; i >= 0; i--) {
509 vassert(argregX >= 0);
510 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
514 /* It's safe to compute all regparm args directly into their
516 for (i = stack_limit-1; i >= 0; i--) {
518 vassert(argreg >= 0);
519 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
520 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
521 iselIntExpr_RMI(env, args[i]),
528 /* Not forgetting %ebp if needed. */
530 vassert(argreg == 1);
531 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
535 /* ------ END deal with regparms ------ */
539 /* No regparms. Heave %ebp on the stack if needed. */
541 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
548 vassert(not_done_yet == 0);
550 /* ------ END marshall all arguments ------ */
552 /* Now we can compute the condition. We can't do it earlier
553 because the argument computations could trash the condition
554 codes. Be a bit clever to handle the common case where the
558 if (guard->tag == Iex_Const
559 && guard->Iex.Const.con->tag == Ico_U1
560 && guard->Iex.Const.con->Ico.U1 == True) {
561 /* unconditional -- do nothing */
563 cc = iselCondCode( env, guard );
567 /* call the helper, and get the args off the stack afterwards. */
568 callHelperAndClearArgs( env, cc, cee, n_arg_ws );
572 /* Given a guest-state array descriptor, an index expression and a
573 bias, generate an X86AMode holding the relevant guest state
577 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
578 IRExpr* off, Int bias )
581 Int elemSz = sizeofIRType(descr->elemTy);
582 Int nElems = descr->nElems;
585 /* throw out any cases not generated by an x86 front end. In
586 theory there might be a day where we need to handle them -- if
587 we ever run non-x86-guest on x86 host. */
590 vpanic("genGuestArrayOffset(x86 host)(1)");
593 case 1: shift = 0; break;
594 case 4: shift = 2; break;
595 case 8: shift = 3; break;
596 default: vpanic("genGuestArrayOffset(x86 host)(2)");
599 /* Compute off into a reg, %off. Then return:
602 addl $bias, %tmp (if bias != 0)
604 ... base(%ebp, %tmp, shift) ...
607 roff = iselIntExpr_R(env, off);
608 addInstr(env, mk_iMOVsd_RR(roff, tmp));
611 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
614 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
616 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
620 /* Mess with the FPU's rounding mode: set to the default rounding mode
623 void set_FPU_rounding_default ( ISelEnv* env )
625 /* pushl $DEFAULT_FPUCW
629 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
630 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
631 addInstr(env, X86Instr_FpLdCW(zero_esp));
636 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
637 expression denoting a value in the range 0 .. 3, indicating a round
638 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
642 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
644 HReg rrm = iselIntExpr_R(env, mode);
645 HReg rrm2 = newVRegI(env);
646 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
649 andl $3, %rrm2 -- shouldn't be needed; paranoia
651 orl $DEFAULT_FPUCW, %rrm2
656 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
657 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
658 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
659 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
660 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
661 addInstr(env, X86Instr_FpLdCW(zero_esp));
666 /* Generate !src into a new vector register, and be sure that the code
667 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
670 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
672 HReg dst = newVRegV(env);
673 /* Set dst to zero. If dst contains a NaN then all hell might
674 break loose after the comparison. So, first zero it. */
675 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
676 /* And now make it all 1s ... */
677 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
678 /* Finally, xor 'src' into it. */
679 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
680 /* Doesn't that just totally suck? */
685 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
686 after most non-simple FPU operations (simple = +, -, *, / and
689 This could be done a lot more efficiently if needed, by loading
690 zero and adding it to the value to be rounded (fldz ; faddp?).
692 static void roundToF64 ( ISelEnv* env, HReg reg )
694 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
695 sub_from_esp(env, 8);
696 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
697 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
702 /*---------------------------------------------------------*/
703 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
704 /*---------------------------------------------------------*/
706 /* Select insns for an integer-typed expression, and add them to the
707 code list. Return a reg holding the result. This reg will be a
708 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
709 want to modify it, ask for a new vreg, copy it in there, and modify
710 the copy. The register allocator will do its best to map both
711 vregs to the same real register, so the copies will often disappear
714 This should handle expressions of 32, 16 and 8-bit type. All
715 results are returned in a 32-bit register. For 16- and 8-bit
716 expressions, the upper 16/24 bits are arbitrary, so you should mask
717 or sign extend partial values if necessary.
720 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
722 HReg r = iselIntExpr_R_wrk(env, e);
723 /* sanity checks ... */
725 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
727 vassert(hregClass(r) == HRcInt32);
728 vassert(hregIsVirtual(r));
732 /* DO NOT CALL THIS DIRECTLY ! */
733 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
737 IRType ty = typeOfIRExpr(env->type_env,e);
738 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
742 /* --------- TEMP --------- */
744 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
747 /* --------- LOAD --------- */
749 HReg dst = newVRegI(env);
750 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
752 /* We can't handle big-endian loads, nor load-linked. */
753 if (e->Iex.Load.end != Iend_LE)
757 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
758 X86RMI_Mem(amode), dst) );
762 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
766 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
772 /* --------- TERNARY OP --------- */
774 /* C3210 flags following FPU partial remainder (fprem), both
775 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
776 if (e->Iex.Triop.op == Iop_PRemC3210F64
777 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
778 HReg junk = newVRegF(env);
779 HReg dst = newVRegI(env);
780 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
781 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
782 /* XXXROUNDINGFIXME */
783 /* set roundingmode here */
784 addInstr(env, X86Instr_FpBinary(
785 e->Iex.Binop.op==Iop_PRemC3210F64
786 ? Xfp_PREM : Xfp_PREM1,
789 /* The previous pseudo-insn will have left the FPU's C3210
790 flags set correctly. So bag them. */
791 addInstr(env, X86Instr_FpStSW_AX());
792 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
793 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
800 /* --------- BINARY OP --------- */
805 /* Pattern: Sub32(0,x) */
806 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
807 HReg dst = newVRegI(env);
808 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
809 addInstr(env, mk_iMOVsd_RR(reg,dst));
810 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
814 /* Is it an addition or logical style op? */
815 switch (e->Iex.Binop.op) {
816 case Iop_Add8: case Iop_Add16: case Iop_Add32:
817 aluOp = Xalu_ADD; break;
818 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
819 aluOp = Xalu_SUB; break;
820 case Iop_And8: case Iop_And16: case Iop_And32:
821 aluOp = Xalu_AND; break;
822 case Iop_Or8: case Iop_Or16: case Iop_Or32:
823 aluOp = Xalu_OR; break;
824 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
825 aluOp = Xalu_XOR; break;
826 case Iop_Mul16: case Iop_Mul32:
827 aluOp = Xalu_MUL; break;
829 aluOp = Xalu_INVALID; break;
831 /* For commutative ops we assume any literal
832 values are on the second operand. */
833 if (aluOp != Xalu_INVALID) {
834 HReg dst = newVRegI(env);
835 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
836 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
837 addInstr(env, mk_iMOVsd_RR(reg,dst));
838 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
841 /* Could do better here; forcing the first arg into a reg
843 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
844 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
845 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
846 movl 0xFFFFFFA0(%vr41),%vr107
847 movl 0xFFFFFFA4(%vr41),%vr108
850 movl 0xFFFFFFA8(%vr41),%vr109
853 movl 0xFFFFFFA0(%vr41),%vr110
859 /* Perhaps a shift op? */
860 switch (e->Iex.Binop.op) {
861 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
862 shOp = Xsh_SHL; break;
863 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
864 shOp = Xsh_SHR; break;
865 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
866 shOp = Xsh_SAR; break;
868 shOp = Xsh_INVALID; break;
870 if (shOp != Xsh_INVALID) {
871 HReg dst = newVRegI(env);
873 /* regL = the value to be shifted */
874 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
875 addInstr(env, mk_iMOVsd_RR(regL,dst));
877 /* Do any necessary widening for 16/8 bit operands */
878 switch (e->Iex.Binop.op) {
880 addInstr(env, X86Instr_Alu32R(
881 Xalu_AND, X86RMI_Imm(0xFF), dst));
884 addInstr(env, X86Instr_Alu32R(
885 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
888 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
889 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
892 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
893 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
898 /* Now consider the shift amount. If it's a literal, we
899 can do a much better job than the general case. */
900 if (e->Iex.Binop.arg2->tag == Iex_Const) {
901 /* assert that the IR is well-typed */
903 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
904 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
905 vassert(nshift >= 0);
907 /* Can't allow nshift==0 since that means %cl */
908 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
910 /* General case; we have to force the amount into %cl. */
911 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
912 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
913 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
918 /* Handle misc other ops. */
920 if (e->Iex.Binop.op == Iop_Max32U) {
921 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
922 HReg dst = newVRegI(env);
923 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
924 addInstr(env, mk_iMOVsd_RR(src1,dst));
925 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
926 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
930 if (e->Iex.Binop.op == Iop_8HLto16) {
931 HReg hi8 = newVRegI(env);
932 HReg lo8 = newVRegI(env);
933 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
934 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
935 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
936 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
937 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
938 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
939 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
943 if (e->Iex.Binop.op == Iop_16HLto32) {
944 HReg hi16 = newVRegI(env);
945 HReg lo16 = newVRegI(env);
946 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
947 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
948 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
949 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
950 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
951 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
952 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
956 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
957 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
958 HReg a16 = newVRegI(env);
959 HReg b16 = newVRegI(env);
960 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
961 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
962 Int shift = (e->Iex.Binop.op == Iop_MullS8
963 || e->Iex.Binop.op == Iop_MullU8)
965 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
966 || e->Iex.Binop.op == Iop_MullS16)
969 addInstr(env, mk_iMOVsd_RR(a16s, a16));
970 addInstr(env, mk_iMOVsd_RR(b16s, b16));
971 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
972 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
973 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
974 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
975 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
979 if (e->Iex.Binop.op == Iop_CmpF64) {
980 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
981 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
982 HReg dst = newVRegI(env);
983 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
984 /* shift this right 8 bits so as to conform to CmpF64
986 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
990 if (e->Iex.Binop.op == Iop_F64toI32S
991 || e->Iex.Binop.op == Iop_F64toI16S) {
992 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
993 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
994 HReg dst = newVRegI(env);
996 /* Used several times ... */
997 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
999 /* rf now holds the value to be converted, and rrm holds the
1000 rounding mode value, encoded as per the IRRoundingMode
1001 enum. The first thing to do is set the FPU's rounding
1002 mode accordingly. */
1004 /* Create a space for the format conversion. */
1006 sub_from_esp(env, 4);
1008 /* Set host rounding mode */
1009 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1011 /* gistw/l %rf, 0(%esp) */
1012 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1013 toUChar(sz), rf, zero_esp));
1016 /* movzwl 0(%esp), %dst */
1017 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1019 /* movl 0(%esp), %dst */
1021 addInstr(env, X86Instr_Alu32R(
1022 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1025 /* Restore default FPU rounding. */
1026 set_FPU_rounding_default( env );
1036 /* --------- UNARY OP --------- */
1039 /* 1Uto8(32to1(expr32)) */
1040 if (e->Iex.Unop.op == Iop_1Uto8) {
1041 DECLARE_PATTERN(p_32to1_then_1Uto8);
1042 DEFINE_PATTERN(p_32to1_then_1Uto8,
1043 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1044 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1045 IRExpr* expr32 = mi.bindee[0];
1046 HReg dst = newVRegI(env);
1047 HReg src = iselIntExpr_R(env, expr32);
1048 addInstr(env, mk_iMOVsd_RR(src,dst) );
1049 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1050 X86RMI_Imm(1), dst));
1055 /* 8Uto32(LDle(expr32)) */
1056 if (e->Iex.Unop.op == Iop_8Uto32) {
1057 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1058 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1060 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1061 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1062 HReg dst = newVRegI(env);
1063 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1064 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1069 /* 8Sto32(LDle(expr32)) */
1070 if (e->Iex.Unop.op == Iop_8Sto32) {
1071 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1072 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1074 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1075 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1076 HReg dst = newVRegI(env);
1077 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1078 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1083 /* 16Uto32(LDle(expr32)) */
1084 if (e->Iex.Unop.op == Iop_16Uto32) {
1085 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1086 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1088 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1089 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1090 HReg dst = newVRegI(env);
1091 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1092 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1097 /* 8Uto32(GET:I8) */
1098 if (e->Iex.Unop.op == Iop_8Uto32) {
1099 if (e->Iex.Unop.arg->tag == Iex_Get) {
1102 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1103 dst = newVRegI(env);
1104 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1106 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1111 /* 16to32(GET:I16) */
1112 if (e->Iex.Unop.op == Iop_16Uto32) {
1113 if (e->Iex.Unop.arg->tag == Iex_Get) {
1116 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1117 dst = newVRegI(env);
1118 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1120 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125 switch (e->Iex.Unop.op) {
1129 HReg dst = newVRegI(env);
1130 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1131 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1132 addInstr(env, mk_iMOVsd_RR(src,dst) );
1133 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1134 X86RMI_Imm(mask), dst));
1140 HReg dst = newVRegI(env);
1141 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1142 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1143 addInstr(env, mk_iMOVsd_RR(src,dst) );
1144 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1145 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1151 HReg dst = newVRegI(env);
1152 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1153 addInstr(env, mk_iMOVsd_RR(src,dst) );
1154 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1157 case Iop_64HIto32: {
1159 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1160 return rHi; /* and abandon rLo .. poor wee thing :-) */
1164 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1165 return rLo; /* similar stupid comment to the above ... */
1168 case Iop_32HIto16: {
1169 HReg dst = newVRegI(env);
1170 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1171 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1172 addInstr(env, mk_iMOVsd_RR(src,dst) );
1173 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1178 HReg dst = newVRegI(env);
1179 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1180 addInstr(env, X86Instr_Set32(cond,dst));
1186 /* could do better than this, but for now ... */
1187 HReg dst = newVRegI(env);
1188 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1189 addInstr(env, X86Instr_Set32(cond,dst));
1190 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1191 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1195 /* Count trailing zeroes, implemented by x86 'bsfl' */
1196 HReg dst = newVRegI(env);
1197 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1198 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1202 /* Count leading zeroes. Do 'bsrl' to establish the index
1203 of the highest set bit, and subtract that value from
1205 HReg tmp = newVRegI(env);
1206 HReg dst = newVRegI(env);
1207 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1208 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1209 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1210 X86RMI_Imm(31), dst));
1211 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1212 X86RMI_Reg(tmp), dst));
1216 case Iop_CmpwNEZ32: {
1217 HReg dst = newVRegI(env);
1218 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1219 addInstr(env, mk_iMOVsd_RR(src,dst));
1220 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1221 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1222 X86RMI_Reg(src), dst));
1223 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1229 HReg dst = newVRegI(env);
1230 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1231 addInstr(env, mk_iMOVsd_RR(src, dst));
1232 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1233 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1237 case Iop_V128to32: {
1238 HReg dst = newVRegI(env);
1239 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1240 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1241 sub_from_esp(env, 16);
1242 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1243 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1244 add_to_esp(env, 16);
1248 /* ReinterpF32asI32(e) */
1249 /* Given an IEEE754 single, produce an I32 with the same bit
1250 pattern. Keep stack 8-aligned even though only using 4
1252 case Iop_ReinterpF32asI32: {
1253 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1254 HReg dst = newVRegI(env);
1255 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1257 set_FPU_rounding_default(env);
1259 sub_from_esp(env, 8);
1260 /* gstF %rf, 0(%esp) */
1262 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1263 /* movl 0(%esp), %dst */
1265 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1274 /* These are no-ops. */
1275 return iselIntExpr_R(env, e->Iex.Unop.arg);
1283 /* --------- GET --------- */
1285 if (ty == Ity_I32) {
1286 HReg dst = newVRegI(env);
1287 addInstr(env, X86Instr_Alu32R(
1289 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1294 if (ty == Ity_I8 || ty == Ity_I16) {
1295 HReg dst = newVRegI(env);
1296 addInstr(env, X86Instr_LoadEX(
1297 toUChar(ty==Ity_I8 ? 1 : 2),
1299 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1308 = genGuestArrayOffset(
1309 env, e->Iex.GetI.descr,
1310 e->Iex.GetI.ix, e->Iex.GetI.bias );
1311 HReg dst = newVRegI(env);
1313 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1316 if (ty == Ity_I32) {
1317 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1323 /* --------- CCALL --------- */
1325 HReg dst = newVRegI(env);
1326 vassert(ty == e->Iex.CCall.retty);
1328 /* be very restrictive for now. Only 32/64-bit ints allowed
1329 for args, and 32 bits for return type. */
1330 if (e->Iex.CCall.retty != Ity_I32)
1333 /* Marshal args, do the call, clear stack. */
1334 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1336 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1340 /* --------- LITERAL --------- */
1341 /* 32/16/8-bit literals */
1343 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1344 HReg r = newVRegI(env);
1345 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1349 /* --------- MULTIPLEX --------- */
1351 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1352 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1354 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1355 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1356 HReg dst = newVRegI(env);
1357 addInstr(env, mk_iMOVsd_RR(rX,dst));
1358 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1359 addInstr(env, X86Instr_Test32(0xFF, r8));
1360 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
1368 } /* switch (e->tag) */
1370 /* We get here if no pattern matched. */
1373 vpanic("iselIntExpr_R: cannot reduce tree");
1377 /*---------------------------------------------------------*/
1378 /*--- ISEL: Integer expression auxiliaries ---*/
1379 /*---------------------------------------------------------*/
1381 /* --------------------- AMODEs --------------------- */
1383 /* Return an AMode which computes the value of the specified
1384 expression, possibly also adding insns to the code list as a
1385 result. The expression may only be a 32-bit one.
1388 static Bool sane_AMode ( X86AMode* am )
1393 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1394 && (hregIsVirtual(am->Xam.IR.reg)
1395 || am->Xam.IR.reg == hregX86_EBP()) );
1398 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1399 && hregIsVirtual(am->Xam.IRRS.base)
1400 && hregClass(am->Xam.IRRS.index) == HRcInt32
1401 && hregIsVirtual(am->Xam.IRRS.index) );
1403 vpanic("sane_AMode: unknown x86 amode tag");
1407 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1409 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1410 vassert(sane_AMode(am));
1414 /* DO NOT CALL THIS DIRECTLY ! */
1415 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1417 IRType ty = typeOfIRExpr(env->type_env,e);
1418 vassert(ty == Ity_I32);
1420 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1421 if (e->tag == Iex_Binop
1422 && e->Iex.Binop.op == Iop_Add32
1423 && e->Iex.Binop.arg2->tag == Iex_Const
1424 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1425 && e->Iex.Binop.arg1->tag == Iex_Binop
1426 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1427 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1428 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1429 && e->Iex.Binop.arg1
1430 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1431 && e->Iex.Binop.arg1
1432 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1433 UInt shift = e->Iex.Binop.arg1
1434 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1435 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1436 if (shift == 1 || shift == 2 || shift == 3) {
1437 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1438 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1439 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1440 return X86AMode_IRRS(imm32, r1, r2, shift);
1444 /* Add32(expr1, Shl32(expr2, imm)) */
1445 if (e->tag == Iex_Binop
1446 && e->Iex.Binop.op == Iop_Add32
1447 && e->Iex.Binop.arg2->tag == Iex_Binop
1448 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1449 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1450 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1451 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1452 if (shift == 1 || shift == 2 || shift == 3) {
1453 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1454 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1455 return X86AMode_IRRS(0, r1, r2, shift);
1460 if (e->tag == Iex_Binop
1461 && e->Iex.Binop.op == Iop_Add32
1462 && e->Iex.Binop.arg2->tag == Iex_Const
1463 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1464 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1465 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1468 /* Doesn't match anything in particular. Generate it into
1469 a register and use that. */
1471 HReg r1 = iselIntExpr_R(env, e);
1472 return X86AMode_IR(0, r1);
1477 /* --------------------- RMIs --------------------- */
1479 /* Similarly, calculate an expression into an X86RMI operand. As with
1480 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1482 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1484 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1485 /* sanity checks ... */
1490 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1491 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1494 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1497 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1501 /* DO NOT CALL THIS DIRECTLY ! */
1502 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1504 IRType ty = typeOfIRExpr(env->type_env,e);
1505 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1507 /* special case: immediate */
1508 if (e->tag == Iex_Const) {
1510 switch (e->Iex.Const.con->tag) {
1511 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1512 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1513 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1514 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1516 return X86RMI_Imm(u);
1519 /* special case: 32-bit GET */
1520 if (e->tag == Iex_Get && ty == Ity_I32) {
1521 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1525 /* special case: 32-bit load from memory */
1526 if (e->tag == Iex_Load && ty == Ity_I32
1527 && e->Iex.Load.end == Iend_LE) {
1528 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1529 return X86RMI_Mem(am);
1532 /* default case: calculate into a register and return that */
1534 HReg r = iselIntExpr_R ( env, e );
1535 return X86RMI_Reg(r);
1540 /* --------------------- RIs --------------------- */
1542 /* Calculate an expression into an X86RI operand. As with
1543 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1545 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1547 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1548 /* sanity checks ... */
1553 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1554 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1557 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1561 /* DO NOT CALL THIS DIRECTLY ! */
1562 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1564 IRType ty = typeOfIRExpr(env->type_env,e);
1565 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1567 /* special case: immediate */
1568 if (e->tag == Iex_Const) {
1570 switch (e->Iex.Const.con->tag) {
1571 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1572 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1573 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1574 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1576 return X86RI_Imm(u);
1579 /* default case: calculate into a register and return that */
1581 HReg r = iselIntExpr_R ( env, e );
1582 return X86RI_Reg(r);
1587 /* --------------------- RMs --------------------- */
1589 /* Similarly, calculate an expression into an X86RM operand. As with
1590 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1592 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1594 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1595 /* sanity checks ... */
1598 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1599 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1602 vassert(sane_AMode(rm->Xrm.Mem.am));
1605 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1609 /* DO NOT CALL THIS DIRECTLY ! */
1610 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1612 IRType ty = typeOfIRExpr(env->type_env,e);
1613 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1615 /* special case: 32-bit GET */
1616 if (e->tag == Iex_Get && ty == Ity_I32) {
1617 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1621 /* special case: load from memory */
1623 /* default case: calculate into a register and return that */
1625 HReg r = iselIntExpr_R ( env, e );
1626 return X86RM_Reg(r);
1631 /* --------------------- CONDCODE --------------------- */
1633 /* Generate code to evaluated a bit-typed expression, returning the
1634 condition code which would correspond when the expression would
1635 notionally have returned 1. */
1637 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1639 /* Uh, there's nothing we can sanity check here, unfortunately. */
1640 return iselCondCode_wrk(env,e);
1643 /* DO NOT CALL THIS DIRECTLY ! */
1644 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1649 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1652 if (e->tag == Iex_RdTmp) {
1653 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1654 /* Test32 doesn't modify r32; so this is OK. */
1655 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1659 /* Constant 1:Bit */
1660 if (e->tag == Iex_Const) {
1662 vassert(e->Iex.Const.con->tag == Ico_U1);
1663 vassert(e->Iex.Const.con->Ico.U1 == True
1664 || e->Iex.Const.con->Ico.U1 == False);
1666 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1667 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1668 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1672 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1673 /* Generate code for the arg, and negate the test condition */
1674 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1677 /* --- patterns rooted at: 32to1 --- */
1679 if (e->tag == Iex_Unop
1680 && e->Iex.Unop.op == Iop_32to1) {
1681 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1682 addInstr(env, X86Instr_Test32(1,rm));
1686 /* --- patterns rooted at: CmpNEZ8 --- */
1689 if (e->tag == Iex_Unop
1690 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1691 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1692 addInstr(env, X86Instr_Test32(0xFF,rm));
1696 /* --- patterns rooted at: CmpNEZ16 --- */
1699 if (e->tag == Iex_Unop
1700 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1701 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1702 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1706 /* --- patterns rooted at: CmpNEZ32 --- */
1708 /* CmpNEZ32(And32(x,y)) */
1710 DECLARE_PATTERN(p_CmpNEZ32_And32);
1711 DEFINE_PATTERN(p_CmpNEZ32_And32,
1712 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1713 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1714 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1715 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1716 HReg tmp = newVRegI(env);
1717 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1718 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1723 /* CmpNEZ32(Or32(x,y)) */
1725 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1726 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1727 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1728 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1729 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1730 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1731 HReg tmp = newVRegI(env);
1732 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1733 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1738 /* CmpNEZ32(GET(..):I32) */
1739 if (e->tag == Iex_Unop
1740 && e->Iex.Unop.op == Iop_CmpNEZ32
1741 && e->Iex.Unop.arg->tag == Iex_Get) {
1742 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1744 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1749 if (e->tag == Iex_Unop
1750 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1751 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1752 X86RMI* rmi2 = X86RMI_Imm(0);
1753 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1757 /* --- patterns rooted at: CmpNEZ64 --- */
1759 /* CmpNEZ64(Or64(x,y)) */
1761 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1762 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1763 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1764 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1765 HReg hi1, lo1, hi2, lo2;
1766 HReg tmp = newVRegI(env);
1767 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1768 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1769 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1770 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1771 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1772 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1778 if (e->tag == Iex_Unop
1779 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1781 HReg tmp = newVRegI(env);
1782 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1783 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1784 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1788 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1790 /* CmpEQ8 / CmpNE8 */
1791 if (e->tag == Iex_Binop
1792 && (e->Iex.Binop.op == Iop_CmpEQ8
1793 || e->Iex.Binop.op == Iop_CmpNE8
1794 || e->Iex.Binop.op == Iop_CasCmpEQ8
1795 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1796 if (isZeroU8(e->Iex.Binop.arg2)) {
1797 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1798 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1799 switch (e->Iex.Binop.op) {
1800 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1801 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1802 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1805 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1806 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1807 HReg r = newVRegI(env);
1808 addInstr(env, mk_iMOVsd_RR(r1,r));
1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1810 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1811 switch (e->Iex.Binop.op) {
1812 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1813 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1814 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1819 /* CmpEQ16 / CmpNE16 */
1820 if (e->tag == Iex_Binop
1821 && (e->Iex.Binop.op == Iop_CmpEQ16
1822 || e->Iex.Binop.op == Iop_CmpNE16
1823 || e->Iex.Binop.op == Iop_CasCmpEQ16
1824 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1825 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1826 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1827 HReg r = newVRegI(env);
1828 addInstr(env, mk_iMOVsd_RR(r1,r));
1829 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1830 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1831 switch (e->Iex.Binop.op) {
1832 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
1833 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
1834 default: vpanic("iselCondCode(x86): CmpXX16");
1839 if (e->tag == Iex_Binop
1840 && (e->Iex.Binop.op == Iop_CmpEQ32
1841 || e->Iex.Binop.op == Iop_CmpNE32
1842 || e->Iex.Binop.op == Iop_CmpLT32S
1843 || e->Iex.Binop.op == Iop_CmpLT32U
1844 || e->Iex.Binop.op == Iop_CmpLE32S
1845 || e->Iex.Binop.op == Iop_CmpLE32U
1846 || e->Iex.Binop.op == Iop_CasCmpEQ32
1847 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1848 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1849 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1850 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1851 switch (e->Iex.Binop.op) {
1852 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
1853 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
1854 case Iop_CmpLT32S: return Xcc_L;
1855 case Iop_CmpLT32U: return Xcc_B;
1856 case Iop_CmpLE32S: return Xcc_LE;
1857 case Iop_CmpLE32U: return Xcc_BE;
1858 default: vpanic("iselCondCode(x86): CmpXX32");
1863 if (e->tag == Iex_Binop
1864 && (e->Iex.Binop.op == Iop_CmpNE64
1865 || e->Iex.Binop.op == Iop_CmpEQ64)) {
1866 HReg hi1, hi2, lo1, lo2;
1867 HReg tHi = newVRegI(env);
1868 HReg tLo = newVRegI(env);
1869 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1870 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1871 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1872 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1873 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1874 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1876 switch (e->Iex.Binop.op) {
1877 case Iop_CmpNE64: return Xcc_NZ;
1878 case Iop_CmpEQ64: return Xcc_Z;
1879 default: vpanic("iselCondCode(x86): CmpXX64");
1884 vpanic("iselCondCode");
1888 /*---------------------------------------------------------*/
1889 /*--- ISEL: Integer expressions (64 bit) ---*/
1890 /*---------------------------------------------------------*/
1892 /* Compute a 64-bit value into a register pair, which is returned as
1893 the first two parameters. As with iselIntExpr_R, these may be
1894 either real or virtual regs; in any case they must not be changed
1895 by subsequent code emitted by the caller. */
1897 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1899 iselInt64Expr_wrk(rHi, rLo, env, e);
1901 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1903 vassert(hregClass(*rHi) == HRcInt32);
1904 vassert(hregIsVirtual(*rHi));
1905 vassert(hregClass(*rLo) == HRcInt32);
1906 vassert(hregIsVirtual(*rLo));
1909 /* DO NOT CALL THIS DIRECTLY ! */
1910 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1913 HWord fn = 0; /* helper fn for most SIMD64 stuff */
1915 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1917 /* 64-bit literal */
1918 if (e->tag == Iex_Const) {
1919 ULong w64 = e->Iex.Const.con->Ico.U64;
1920 UInt wHi = toUInt(w64 >> 32);
1921 UInt wLo = toUInt(w64);
1922 HReg tLo = newVRegI(env);
1923 HReg tHi = newVRegI(env);
1924 vassert(e->Iex.Const.con->tag == Ico_U64);
1926 /* Save a precious Int register in this special case. */
1927 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1931 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1932 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1939 /* read 64-bit IRTemp */
1940 if (e->tag == Iex_RdTmp) {
1941 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1946 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1948 X86AMode *am0, *am4;
1949 vassert(e->Iex.Load.ty == Ity_I64);
1950 tLo = newVRegI(env);
1951 tHi = newVRegI(env);
1952 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
1953 am4 = advance4(am0);
1954 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1955 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1962 if (e->tag == Iex_Get) {
1963 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
1964 X86AMode* am4 = advance4(am);
1965 HReg tLo = newVRegI(env);
1966 HReg tHi = newVRegI(env);
1967 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1968 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1975 if (e->tag == Iex_GetI) {
1977 = genGuestArrayOffset( env, e->Iex.GetI.descr,
1978 e->Iex.GetI.ix, e->Iex.GetI.bias );
1979 X86AMode* am4 = advance4(am);
1980 HReg tLo = newVRegI(env);
1981 HReg tHi = newVRegI(env);
1982 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1983 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1989 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
1990 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
1993 HReg tLo = newVRegI(env);
1994 HReg tHi = newVRegI(env);
1995 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1996 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
1997 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1998 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
1999 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2000 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2001 addInstr(env, X86Instr_Test32(0xFF, r8));
2002 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
2003 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
2009 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
2010 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
2013 HReg tLo = newVRegI(env);
2014 HReg tHi = newVRegI(env);
2015 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2016 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
2017 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2018 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2019 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2020 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2021 addInstr(env, X86Instr_Test32(0xFF, r8));
2022 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
2023 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
2030 /* 64-bit Mux0X: Mux0X(g, expr, expr) */
2031 if (e->tag == Iex_Mux0X) {
2033 HReg e0Lo, e0Hi, eXLo, eXHi;
2034 HReg tLo = newVRegI(env);
2035 HReg tHi = newVRegI(env);
2036 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2037 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2038 addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2039 addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2040 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2041 addInstr(env, X86Instr_Test32(0xFF, r8));
2042 /* This assumes the first cmov32 doesn't trash the condition
2043 codes, so they are still available for the second cmov32 */
2044 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2045 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2051 /* --------- BINARY ops --------- */
2052 if (e->tag == Iex_Binop) {
2053 switch (e->Iex.Binop.op) {
2054 /* 32 x 32 -> 64 multiply */
2057 /* get one operand into %eax, and the other into a R/M.
2058 Need to make an educated guess about which is better in
2060 HReg tLo = newVRegI(env);
2061 HReg tHi = newVRegI(env);
2062 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2063 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2064 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2065 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2066 addInstr(env, X86Instr_MulL(syned, rmLeft));
2067 /* Result is now in EDX:EAX. Tell the caller. */
2068 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2069 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2075 /* 64 x 32 -> (32(rem),32(div)) division */
2076 case Iop_DivModU64to32:
2077 case Iop_DivModS64to32: {
2078 /* Get the 64-bit operand into edx:eax, and the other into
2081 HReg tLo = newVRegI(env);
2082 HReg tHi = newVRegI(env);
2083 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2084 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2085 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2086 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2087 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2088 addInstr(env, X86Instr_Div(syned, rmRight));
2089 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2090 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2096 /* Or64/And64/Xor64 */
2100 HReg xLo, xHi, yLo, yHi;
2101 HReg tLo = newVRegI(env);
2102 HReg tHi = newVRegI(env);
2103 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2104 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2106 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2107 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2108 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2109 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2110 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2111 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2119 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2120 /* special case Add64(e, const) */
2121 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2122 UInt wHi = toUInt(w64 >> 32);
2123 UInt wLo = toUInt(w64);
2124 HReg tLo = newVRegI(env);
2125 HReg tHi = newVRegI(env);
2127 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2128 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2129 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2130 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2131 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2132 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2137 /* else fall through to the generic case */
2139 HReg xLo, xHi, yLo, yHi;
2140 HReg tLo = newVRegI(env);
2141 HReg tHi = newVRegI(env);
2142 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2143 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2144 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2145 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2146 if (e->Iex.Binop.op==Iop_Add64) {
2147 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2148 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2150 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2151 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2158 /* 32HLto64(e1,e2) */
2160 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2161 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2166 /* We use the same ingenious scheme as gcc. Put the value
2167 to be shifted into %hi:%lo, and the shift amount into
2168 %cl. Then (dsts on right, a la ATT syntax):
2170 shldl %cl, %lo, %hi -- make %hi be right for the
2171 -- shift amt %cl % 32
2172 shll %cl, %lo -- make %lo be right for the
2173 -- shift amt %cl % 32
2175 Now, if (shift amount % 64) is in the range 32 .. 63,
2176 we have to do a fixup, which puts the result low half
2177 into the result high half, and zeroes the low half:
2182 movl $0, %tmp -- sigh; need yet another reg
2185 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2186 tLo = newVRegI(env);
2187 tHi = newVRegI(env);
2188 tTemp = newVRegI(env);
2189 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2190 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2191 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2192 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2193 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2194 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2195 and those regs are legitimately modifiable. */
2196 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2197 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2198 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2199 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2200 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2201 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2208 /* We use the same ingenious scheme as gcc. Put the value
2209 to be shifted into %hi:%lo, and the shift amount into
2212 shrdl %cl, %hi, %lo -- make %lo be right for the
2213 -- shift amt %cl % 32
2214 shrl %cl, %hi -- make %hi be right for the
2215 -- shift amt %cl % 32
2217 Now, if (shift amount % 64) is in the range 32 .. 63,
2218 we have to do a fixup, which puts the result high half
2219 into the result low half, and zeroes the high half:
2224 movl $0, %tmp -- sigh; need yet another reg
2227 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2228 tLo = newVRegI(env);
2229 tHi = newVRegI(env);
2230 tTemp = newVRegI(env);
2231 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2232 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2233 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2234 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2235 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2236 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2237 and those regs are legitimately modifiable. */
2238 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2239 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2240 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2241 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2242 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2243 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2250 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2251 case. Unfortunately I see no easy way to avoid the
2253 case Iop_F64toI64S: {
2254 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2255 HReg tLo = newVRegI(env);
2256 HReg tHi = newVRegI(env);
2258 /* Used several times ... */
2259 /* Careful ... this sharing is only safe because
2260 zero_esp/four_esp do not hold any registers which the
2261 register allocator could attempt to swizzle later. */
2262 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2263 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2265 /* rf now holds the value to be converted, and rrm holds
2266 the rounding mode value, encoded as per the
2267 IRRoundingMode enum. The first thing to do is set the
2268 FPU's rounding mode accordingly. */
2270 /* Create a space for the format conversion. */
2272 sub_from_esp(env, 8);
2274 /* Set host rounding mode */
2275 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2277 /* gistll %rf, 0(%esp) */
2278 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2280 /* movl 0(%esp), %dstLo */
2281 /* movl 4(%esp), %dstHi */
2282 addInstr(env, X86Instr_Alu32R(
2283 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2284 addInstr(env, X86Instr_Alu32R(
2285 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2287 /* Restore default FPU rounding. */
2288 set_FPU_rounding_default( env );
2299 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2301 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2303 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2306 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2308 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2311 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2313 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2315 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2318 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2319 case Iop_CmpGT16Sx4:
2320 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2321 case Iop_CmpGT32Sx2:
2322 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2324 case Iop_InterleaveHI8x8:
2325 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2326 case Iop_InterleaveLO8x8:
2327 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2328 case Iop_InterleaveHI16x4:
2329 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2330 case Iop_InterleaveLO16x4:
2331 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2332 case Iop_InterleaveHI32x2:
2333 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2334 case Iop_InterleaveLO32x2:
2335 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2336 case Iop_CatOddLanes16x4:
2337 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2338 case Iop_CatEvenLanes16x4:
2339 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2341 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2344 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2346 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2348 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2350 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2353 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2355 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2356 case Iop_MulHi16Sx4:
2357 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2358 case Iop_MulHi16Ux4:
2359 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2362 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2364 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2366 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2368 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2370 case Iop_QNarrow32Sx2:
2371 fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
2372 case Iop_QNarrow16Sx4:
2373 fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
2374 case Iop_QNarrow16Ux4:
2375 fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
2378 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2380 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2382 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2384 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2387 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2389 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2391 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2394 /* Note: the following assumes all helpers are of
2396 ULong fn ( ULong, ULong ), and they are
2397 not marked as regparm functions.
2399 HReg xLo, xHi, yLo, yHi;
2400 HReg tLo = newVRegI(env);
2401 HReg tHi = newVRegI(env);
2402 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2403 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2404 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2405 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2406 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2407 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2408 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2409 add_to_esp(env, 4*4);
2410 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2411 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2418 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2420 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2422 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2424 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2426 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2428 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2430 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2432 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2434 /* Note: the following assumes all helpers are of
2436 ULong fn ( ULong, UInt ), and they are
2437 not marked as regparm functions.
2440 HReg tLo = newVRegI(env);
2441 HReg tHi = newVRegI(env);
2442 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2443 addInstr(env, X86Instr_Push(y));
2444 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2445 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2446 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2447 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2448 add_to_esp(env, 3*4);
2449 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2450 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2459 } /* if (e->tag == Iex_Binop) */
2462 /* --------- UNARY ops --------- */
2463 if (e->tag == Iex_Unop) {
2464 switch (e->Iex.Unop.op) {
2468 HReg tLo = newVRegI(env);
2469 HReg tHi = newVRegI(env);
2470 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2471 addInstr(env, mk_iMOVsd_RR(src,tHi));
2472 addInstr(env, mk_iMOVsd_RR(src,tLo));
2473 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2481 HReg tLo = newVRegI(env);
2482 HReg tHi = newVRegI(env);
2483 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2484 addInstr(env, mk_iMOVsd_RR(src,tLo));
2485 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2492 case Iop_V128HIto64:
2493 case Iop_V128to64: {
2494 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2495 HReg tLo = newVRegI(env);
2496 HReg tHi = newVRegI(env);
2497 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2498 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2499 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2500 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2501 sub_from_esp(env, 16);
2502 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2503 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2504 X86RMI_Mem(espLO), tLo ));
2505 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2506 X86RMI_Mem(espHI), tHi ));
2507 add_to_esp(env, 16);
2513 /* could do better than this, but for now ... */
2515 HReg tLo = newVRegI(env);
2516 HReg tHi = newVRegI(env);
2517 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2518 addInstr(env, X86Instr_Set32(cond,tLo));
2519 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2520 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2521 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2529 HReg tLo = newVRegI(env);
2530 HReg tHi = newVRegI(env);
2532 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2533 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2534 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2535 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2536 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2545 HReg tLo = newVRegI(env);
2546 HReg tHi = newVRegI(env);
2548 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2549 /* tLo = 0 - yLo, and set carry */
2550 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2551 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2552 /* tHi = 0 - yHi - carry */
2553 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2554 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2555 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2556 back in, so as to give the final result
2557 tHi:tLo = arg | -arg. */
2558 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2559 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2565 /* --- patterns rooted at: CmpwNEZ64 --- */
2568 case Iop_CmpwNEZ64: {
2570 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2571 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2572 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2573 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2574 /* CmpwNEZ64(Or64(x,y)) */
2575 HReg xHi,xLo,yHi,yLo;
2576 HReg xBoth = newVRegI(env);
2577 HReg merged = newVRegI(env);
2578 HReg tmp2 = newVRegI(env);
2580 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2581 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2582 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2583 X86RMI_Reg(xLo),xBoth));
2585 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2586 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2587 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2588 X86RMI_Reg(yLo),merged));
2589 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2590 X86RMI_Reg(xBoth),merged));
2592 /* tmp2 = (merged | -merged) >>s 31 */
2593 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2594 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2595 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2596 X86RMI_Reg(merged), tmp2));
2597 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2604 HReg tmp1 = newVRegI(env);
2605 HReg tmp2 = newVRegI(env);
2606 /* srcHi:srcLo = arg */
2607 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2608 /* tmp1 = srcHi | srcLo */
2609 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2610 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2611 X86RMI_Reg(srcLo), tmp1));
2612 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2613 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2614 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2615 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2616 X86RMI_Reg(tmp1), tmp2));
2617 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2624 /* ReinterpF64asI64(e) */
2625 /* Given an IEEE754 double, produce an I64 with the same bit
2627 case Iop_ReinterpF64asI64: {
2628 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2629 HReg tLo = newVRegI(env);
2630 HReg tHi = newVRegI(env);
2631 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2632 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2634 set_FPU_rounding_default(env);
2636 sub_from_esp(env, 8);
2637 /* gstD %rf, 0(%esp) */
2639 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2640 /* movl 0(%esp), %tLo */
2642 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2643 /* movl 4(%esp), %tHi */
2645 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2653 case Iop_CmpNEZ32x2:
2654 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2655 case Iop_CmpNEZ16x4:
2656 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2658 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2660 /* Note: the following assumes all helpers are of
2662 ULong fn ( ULong ), and they are
2663 not marked as regparm functions.
2666 HReg tLo = newVRegI(env);
2667 HReg tHi = newVRegI(env);
2668 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2669 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2670 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2671 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2672 add_to_esp(env, 2*4);
2673 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2674 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2683 } /* if (e->tag == Iex_Unop) */
2686 /* --------- CCALL --------- */
2687 if (e->tag == Iex_CCall) {
2688 HReg tLo = newVRegI(env);
2689 HReg tHi = newVRegI(env);
2691 /* Marshal args, do the call, clear stack. */
2692 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2694 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2695 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2702 vpanic("iselInt64Expr");
2706 /*---------------------------------------------------------*/
2707 /*--- ISEL: Floating point expressions (32 bit) ---*/
2708 /*---------------------------------------------------------*/
2710 /* Nothing interesting here; really just wrappers for
2713 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2715 HReg r = iselFltExpr_wrk( env, e );
2717 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2719 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2720 vassert(hregIsVirtual(r));
2724 /* DO NOT CALL THIS DIRECTLY */
2725 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2727 IRType ty = typeOfIRExpr(env->type_env,e);
2728 vassert(ty == Ity_F32);
2730 if (e->tag == Iex_RdTmp) {
2731 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2734 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2736 HReg res = newVRegF(env);
2737 vassert(e->Iex.Load.ty == Ity_F32);
2738 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2739 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2743 if (e->tag == Iex_Binop
2744 && e->Iex.Binop.op == Iop_F64toF32) {
2745 /* Although the result is still held in a standard FPU register,
2746 we need to round it to reflect the loss of accuracy/range
2747 entailed in casting it to a 32-bit float. */
2748 HReg dst = newVRegF(env);
2749 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2750 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2751 addInstr(env, X86Instr_Fp64to32(src,dst));
2752 set_FPU_rounding_default( env );
2756 if (e->tag == Iex_Get) {
2757 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2759 HReg res = newVRegF(env);
2760 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2764 if (e->tag == Iex_Unop
2765 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2766 /* Given an I32, produce an IEEE754 float with the same bit
2768 HReg dst = newVRegF(env);
2769 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2771 addInstr(env, X86Instr_Push(rmi));
2772 addInstr(env, X86Instr_FpLdSt(
2773 True/*load*/, 4, dst,
2774 X86AMode_IR(0, hregX86_ESP())));
2780 vpanic("iselFltExpr_wrk");
2784 /*---------------------------------------------------------*/
2785 /*--- ISEL: Floating point expressions (64 bit) ---*/
2786 /*---------------------------------------------------------*/
2788 /* Compute a 64-bit floating point value into a register, the identity
2789 of which is returned. As with iselIntExpr_R, the reg may be either
2790 real or virtual; in any case it must not be changed by subsequent
2791 code emitted by the caller. */
2793 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2795 Type S (1 bit) E (11 bits) F (52 bits)
2796 ---- --------- ----------- -----------
2797 signalling NaN u 2047 (max) .0uuuuu---u
2800 quiet NaN u 2047 (max) .1uuuuu---u
2802 negative infinity 1 2047 (max) .000000---0
2804 positive infinity 0 2047 (max) .000000---0
2806 negative zero 1 0 .000000---0
2808 positive zero 0 0 .000000---0
2811 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2813 HReg r = iselDblExpr_wrk( env, e );
2815 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2817 vassert(hregClass(r) == HRcFlt64);
2818 vassert(hregIsVirtual(r));
2822 /* DO NOT CALL THIS DIRECTLY */
2823 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2825 IRType ty = typeOfIRExpr(env->type_env,e);
2827 vassert(ty == Ity_F64);
2829 if (e->tag == Iex_RdTmp) {
2830 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2833 if (e->tag == Iex_Const) {
2834 union { UInt u32x2[2]; ULong u64; Double f64; } u;
2835 HReg freg = newVRegF(env);
2836 vassert(sizeof(u) == 8);
2837 vassert(sizeof(u.u64) == 8);
2838 vassert(sizeof(u.f64) == 8);
2839 vassert(sizeof(u.u32x2) == 8);
2841 if (e->Iex.Const.con->tag == Ico_F64) {
2842 u.f64 = e->Iex.Const.con->Ico.F64;
2844 else if (e->Iex.Const.con->tag == Ico_F64i) {
2845 u.u64 = e->Iex.Const.con->Ico.F64i;
2848 vpanic("iselDblExpr(x86): const");
2850 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
2851 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
2852 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
2853 X86AMode_IR(0, hregX86_ESP())));
2858 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2860 HReg res = newVRegF(env);
2861 vassert(e->Iex.Load.ty == Ity_F64);
2862 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2863 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
2867 if (e->tag == Iex_Get) {
2868 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2870 HReg res = newVRegF(env);
2871 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2875 if (e->tag == Iex_GetI) {
2877 = genGuestArrayOffset(
2878 env, e->Iex.GetI.descr,
2879 e->Iex.GetI.ix, e->Iex.GetI.bias );
2880 HReg res = newVRegF(env);
2881 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2885 if (e->tag == Iex_Triop) {
2886 X86FpOp fpop = Xfp_INVALID;
2887 switch (e->Iex.Triop.op) {
2888 case Iop_AddF64: fpop = Xfp_ADD; break;
2889 case Iop_SubF64: fpop = Xfp_SUB; break;
2890 case Iop_MulF64: fpop = Xfp_MUL; break;
2891 case Iop_DivF64: fpop = Xfp_DIV; break;
2892 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2893 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2894 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2895 case Iop_AtanF64: fpop = Xfp_ATAN; break;
2896 case Iop_PRemF64: fpop = Xfp_PREM; break;
2897 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2900 if (fpop != Xfp_INVALID) {
2901 HReg res = newVRegF(env);
2902 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
2903 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
2904 /* XXXROUNDINGFIXME */
2905 /* set roundingmode here */
2906 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2907 if (fpop != Xfp_ADD && fpop != Xfp_SUB
2908 && fpop != Xfp_MUL && fpop != Xfp_DIV)
2909 roundToF64(env, res);
2914 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2915 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2916 HReg dst = newVRegF(env);
2918 /* rf now holds the value to be rounded. The first thing to do
2919 is set the FPU's rounding mode accordingly. */
2921 /* Set host rounding mode */
2922 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2924 /* grndint %rf, %dst */
2925 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2927 /* Restore default FPU rounding. */
2928 set_FPU_rounding_default( env );
2933 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
2934 HReg dst = newVRegF(env);
2936 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
2937 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
2938 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
2940 /* Set host rounding mode */
2941 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2943 addInstr(env, X86Instr_FpLdStI(
2944 True/*load*/, 8, dst,
2945 X86AMode_IR(0, hregX86_ESP())));
2947 /* Restore default FPU rounding. */
2948 set_FPU_rounding_default( env );
2954 if (e->tag == Iex_Binop) {
2955 X86FpOp fpop = Xfp_INVALID;
2956 switch (e->Iex.Binop.op) {
2957 case Iop_SinF64: fpop = Xfp_SIN; break;
2958 case Iop_CosF64: fpop = Xfp_COS; break;
2959 case Iop_TanF64: fpop = Xfp_TAN; break;
2960 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
2961 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
2964 if (fpop != Xfp_INVALID) {
2965 HReg res = newVRegF(env);
2966 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2967 /* XXXROUNDINGFIXME */
2968 /* set roundingmode here */
2969 addInstr(env, X86Instr_FpUnary(fpop,src,res));
2970 if (fpop != Xfp_SQRT
2971 && fpop != Xfp_NEG && fpop != Xfp_ABS)
2972 roundToF64(env, res);
2977 if (e->tag == Iex_Unop) {
2978 X86FpOp fpop = Xfp_INVALID;
2979 switch (e->Iex.Unop.op) {
2980 case Iop_NegF64: fpop = Xfp_NEG; break;
2981 case Iop_AbsF64: fpop = Xfp_ABS; break;
2984 if (fpop != Xfp_INVALID) {
2985 HReg res = newVRegF(env);
2986 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2987 addInstr(env, X86Instr_FpUnary(fpop,src,res));
2988 if (fpop != Xfp_NEG && fpop != Xfp_ABS)
2989 roundToF64(env, res);
2994 if (e->tag == Iex_Unop) {
2995 switch (e->Iex.Unop.op) {
2996 case Iop_I32StoF64: {
2997 HReg dst = newVRegF(env);
2998 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
2999 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3000 set_FPU_rounding_default(env);
3001 addInstr(env, X86Instr_FpLdStI(
3002 True/*load*/, 4, dst,
3003 X86AMode_IR(0, hregX86_ESP())));
3007 case Iop_ReinterpI64asF64: {
3008 /* Given an I64, produce an IEEE754 double with the same
3010 HReg dst = newVRegF(env);
3012 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3014 set_FPU_rounding_default(env);
3015 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3016 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3017 addInstr(env, X86Instr_FpLdSt(
3018 True/*load*/, 8, dst,
3019 X86AMode_IR(0, hregX86_ESP())));
3023 case Iop_F32toF64: {
3024 /* this is a no-op */
3025 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3033 /* --------- MULTIPLEX --------- */
3034 if (e->tag == Iex_Mux0X) {
3036 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
3037 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3038 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3039 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3040 HReg dst = newVRegF(env);
3041 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
3042 addInstr(env, X86Instr_Test32(0xFF, r8));
3043 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
3049 vpanic("iselDblExpr_wrk");
3053 /*---------------------------------------------------------*/
3054 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3055 /*---------------------------------------------------------*/
3057 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3059 HReg r = iselVecExpr_wrk( env, e );
3061 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3063 vassert(hregClass(r) == HRcVec128);
3064 vassert(hregIsVirtual(r));
3069 /* DO NOT CALL THIS DIRECTLY */
3070 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3073 # define REQUIRE_SSE1 \
3074 do { if (env->hwcaps == 0/*baseline, no sse*/) \
3078 # define REQUIRE_SSE2 \
3079 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3083 # define SSE2_OR_ABOVE \
3084 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3087 Bool arg1isEReg = False;
3088 X86SseOp op = Xsse_INVALID;
3089 IRType ty = typeOfIRExpr(env->type_env,e);
3091 vassert(ty == Ity_V128);
3095 if (e->tag == Iex_RdTmp) {
3096 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3099 if (e->tag == Iex_Get) {
3100 HReg dst = newVRegV(env);
3101 addInstr(env, X86Instr_SseLdSt(
3104 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3110 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3111 HReg dst = newVRegV(env);
3112 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3113 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3117 if (e->tag == Iex_Const) {
3118 HReg dst = newVRegV(env);
3119 vassert(e->Iex.Const.con->tag == Ico_V128);
3120 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3124 if (e->tag == Iex_Unop) {
3126 if (SSE2_OR_ABOVE) {
3127 /* 64UtoV128(LDle:I64(addr)) */
3128 DECLARE_PATTERN(p_zwiden_load64);
3129 DEFINE_PATTERN(p_zwiden_load64,
3131 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3132 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3133 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3134 HReg dst = newVRegV(env);
3135 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3140 switch (e->Iex.Unop.op) {
3143 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3144 return do_sse_Not128(env, arg);
3147 case Iop_CmpNEZ64x2: {
3148 /* We can use SSE2 instructions for this. */
3149 /* Ideally, we want to do a 64Ix2 comparison against zero of
3150 the operand. Problem is no such insn exists. Solution
3151 therefore is to do a 32Ix4 comparison instead, and bitwise-
3152 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3153 let the not'd result of this initial comparison be a:b:c:d.
3154 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3155 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3156 giving the required result.
3158 The required selection sequence is 2,3,0,1, which
3159 according to Intel's documentation means the pshufd
3160 literal value is 0xB1, that is,
3161 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3163 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3164 HReg tmp = newVRegV(env);
3165 HReg dst = newVRegV(env);
3167 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3168 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3169 tmp = do_sse_Not128(env, tmp);
3170 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3171 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3175 case Iop_CmpNEZ32x4: {
3176 /* Sigh, we have to generate lousy code since this has to
3177 work on SSE1 hosts */
3178 /* basically, the idea is: for each lane:
3179 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3180 sbbl %r, %r (now %r = 1Sto32(CF))
3185 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3186 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3187 HReg dst = newVRegV(env);
3188 HReg r32 = newVRegI(env);
3189 sub_from_esp(env, 16);
3190 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3191 for (i = 0; i < 4; i++) {
3192 am = X86AMode_IR(i*4, hregX86_ESP());
3193 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3194 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3195 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3196 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3198 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3199 add_to_esp(env, 16);
3203 case Iop_CmpNEZ8x16:
3204 case Iop_CmpNEZ16x8: {
3205 /* We can use SSE2 instructions for this. */
3207 HReg vec0 = newVRegV(env);
3208 HReg vec1 = newVRegV(env);
3209 HReg dst = newVRegV(env);
3211 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3214 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3215 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3216 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3217 /* defer arg computation to here so as to give CMPEQF as long
3218 as possible to complete */
3219 arg = iselVecExpr(env, e->Iex.Unop.arg);
3220 /* vec0 is all 0s; vec1 is all 1s */
3221 addInstr(env, mk_vMOVsd_RR(arg, dst));
3222 /* 16x8 or 8x16 comparison == */
3223 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3225 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3229 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3230 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3231 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3234 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3235 HReg dst = newVRegV(env);
3236 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3240 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3241 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3242 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3245 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3246 HReg dst = newVRegV(env);
3248 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3252 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3253 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3254 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3257 /* A bit subtle. We have to copy the arg to the result
3258 register first, because actually doing the SSE scalar insn
3259 leaves the upper 3/4 of the destination register
3260 unchanged. Whereas the required semantics of these
3261 primops is that the upper 3/4 is simply copied in from the
3263 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3264 HReg dst = newVRegV(env);
3265 addInstr(env, mk_vMOVsd_RR(arg, dst));
3266 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3270 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3271 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3272 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3275 /* A bit subtle. We have to copy the arg to the result
3276 register first, because actually doing the SSE scalar insn
3277 leaves the upper half of the destination register
3278 unchanged. Whereas the required semantics of these
3279 primops is that the upper half is simply copied in from the
3281 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3282 HReg dst = newVRegV(env);
3284 addInstr(env, mk_vMOVsd_RR(arg, dst));
3285 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3289 case Iop_32UtoV128: {
3290 HReg dst = newVRegV(env);
3291 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3292 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3293 addInstr(env, X86Instr_Push(rmi));
3294 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3299 case Iop_64UtoV128: {
3301 HReg dst = newVRegV(env);
3302 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3303 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3304 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3305 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3306 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3313 } /* switch (e->Iex.Unop.op) */
3314 } /* if (e->tag == Iex_Unop) */
3316 if (e->tag == Iex_Binop) {
3317 switch (e->Iex.Binop.op) {
3319 case Iop_SetV128lo32: {
3320 HReg dst = newVRegV(env);
3321 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3322 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3323 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3324 sub_from_esp(env, 16);
3325 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3326 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3327 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3328 add_to_esp(env, 16);
3332 case Iop_SetV128lo64: {
3333 HReg dst = newVRegV(env);
3334 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3335 HReg srcIhi, srcIlo;
3336 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3337 X86AMode* esp4 = advance4(esp0);
3338 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3339 sub_from_esp(env, 16);
3340 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3341 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3342 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3343 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3344 add_to_esp(env, 16);
3348 case Iop_64HLtoV128: {
3349 HReg r3, r2, r1, r0;
3350 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3351 X86AMode* esp4 = advance4(esp0);
3352 X86AMode* esp8 = advance4(esp4);
3353 X86AMode* esp12 = advance4(esp8);
3354 HReg dst = newVRegV(env);
3355 /* do this via the stack (easy, convenient, etc) */
3356 sub_from_esp(env, 16);
3357 /* Do the less significant 64 bits */
3358 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3359 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3360 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3361 /* Do the more significant 64 bits */
3362 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3363 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3364 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3365 /* Fetch result back from stack. */
3366 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3367 add_to_esp(env, 16);
3371 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3372 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3373 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3374 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3375 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3376 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3377 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3378 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3379 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3380 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3383 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3384 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3385 HReg dst = newVRegV(env);
3386 addInstr(env, mk_vMOVsd_RR(argL, dst));
3387 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3391 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3392 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3393 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3394 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3395 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3396 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3397 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3398 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3399 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3400 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3403 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3404 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3405 HReg dst = newVRegV(env);
3407 addInstr(env, mk_vMOVsd_RR(argL, dst));
3408 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3412 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3413 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3414 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3415 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3416 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3417 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3418 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3419 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3420 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3421 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3423 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3424 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3425 HReg dst = newVRegV(env);
3426 addInstr(env, mk_vMOVsd_RR(argL, dst));
3427 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3431 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3432 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3433 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3434 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3435 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3436 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3437 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3438 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3439 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3440 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3442 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3443 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3444 HReg dst = newVRegV(env);
3446 addInstr(env, mk_vMOVsd_RR(argL, dst));
3447 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3451 case Iop_QNarrow32Sx4:
3452 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3453 case Iop_QNarrow16Sx8:
3454 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3455 case Iop_QNarrow16Ux8:
3456 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3458 case Iop_InterleaveHI8x16:
3459 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3460 case Iop_InterleaveHI16x8:
3461 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3462 case Iop_InterleaveHI32x4:
3463 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3464 case Iop_InterleaveHI64x2:
3465 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3467 case Iop_InterleaveLO8x16:
3468 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3469 case Iop_InterleaveLO16x8:
3470 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3471 case Iop_InterleaveLO32x4:
3472 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3473 case Iop_InterleaveLO64x2:
3474 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3476 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3477 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3478 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3479 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3480 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3481 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3482 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3483 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3484 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3485 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3486 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3487 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3488 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3489 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3490 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3491 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3492 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3493 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3494 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3495 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3496 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3497 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3498 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3499 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3500 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3501 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3502 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3503 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3504 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3505 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3506 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3507 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3508 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3509 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3511 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3512 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3513 HReg dst = newVRegV(env);
3514 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3517 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3518 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3520 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3521 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3526 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3527 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3528 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3529 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3530 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3531 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3532 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3533 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3535 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3536 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3537 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3538 HReg ereg = newVRegV(env);
3539 HReg dst = newVRegV(env);
3541 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3542 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3543 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3544 addInstr(env, X86Instr_Push(rmi));
3545 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3546 addInstr(env, mk_vMOVsd_RR(greg, dst));
3547 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3548 add_to_esp(env, 16);
3554 } /* switch (e->Iex.Binop.op) */
3555 } /* if (e->tag == Iex_Binop) */
3557 if (e->tag == Iex_Mux0X) {
3558 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3559 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3560 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3561 HReg dst = newVRegV(env);
3562 addInstr(env, mk_vMOVsd_RR(rX,dst));
3563 addInstr(env, X86Instr_Test32(0xFF, r8));
3564 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3569 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3570 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3572 vpanic("iselVecExpr_wrk");
3574 # undef REQUIRE_SSE1
3575 # undef REQUIRE_SSE2
3576 # undef SSE2_OR_ABOVE
3580 /*---------------------------------------------------------*/
3581 /*--- ISEL: Statements ---*/
3582 /*---------------------------------------------------------*/
3584 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3586 if (vex_traceflags & VEX_TRACE_VCODE) {
3587 vex_printf("\n-- ");
3592 switch (stmt->tag) {
3594 /* --------- STORE --------- */
3596 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3597 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3598 IREndness end = stmt->Ist.Store.end;
3600 if (tya != Ity_I32 || end != Iend_LE)
3603 if (tyd == Ity_I32) {
3604 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3605 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3606 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3609 if (tyd == Ity_I8 || tyd == Ity_I16) {
3610 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3611 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3612 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3616 if (tyd == Ity_F64) {
3617 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3618 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3619 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3622 if (tyd == Ity_F32) {
3623 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3624 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3625 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3628 if (tyd == Ity_I64) {
3630 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3631 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3632 addInstr(env, X86Instr_Alu32M(
3633 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3634 addInstr(env, X86Instr_Alu32M(
3635 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3638 if (tyd == Ity_V128) {
3639 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3640 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3641 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3647 /* --------- PUT --------- */
3649 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3650 if (ty == Ity_I32) {
3651 /* We're going to write to memory, so compute the RHS into an
3653 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3658 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3662 if (ty == Ity_I8 || ty == Ity_I16) {
3663 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3664 addInstr(env, X86Instr_Store(
3665 toUChar(ty==Ity_I8 ? 1 : 2),
3667 X86AMode_IR(stmt->Ist.Put.offset,
3671 if (ty == Ity_I64) {
3673 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3674 X86AMode* am4 = advance4(am);
3675 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3676 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3677 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3680 if (ty == Ity_V128) {
3681 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3682 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3683 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3686 if (ty == Ity_F32) {
3687 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3688 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3689 set_FPU_rounding_default(env); /* paranoia */
3690 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3693 if (ty == Ity_F64) {
3694 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3695 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3696 set_FPU_rounding_default(env); /* paranoia */
3697 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3703 /* --------- Indexed PUT --------- */
3706 = genGuestArrayOffset(
3707 env, stmt->Ist.PutI.descr,
3708 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3710 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3711 if (ty == Ity_F64) {
3712 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3713 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3717 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3718 addInstr(env, X86Instr_Store( 1, r, am ));
3721 if (ty == Ity_I32) {
3722 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3723 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
3726 if (ty == Ity_I64) {
3728 X86AMode* am4 = advance4(am);
3729 iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
3730 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3731 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3737 /* --------- TMP --------- */
3739 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3740 IRType ty = typeOfIRTemp(env->type_env, tmp);
3742 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
3743 compute it into an AMode and then use LEA. This usually
3744 produces fewer instructions, often because (for memcheck
3745 created IR) we get t = address-expression, (t is later used
3746 twice) and so doing this naturally turns address-expression
3747 back into an X86 amode. */
3749 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3750 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
3751 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3752 HReg dst = lookupIRTemp(env, tmp);
3753 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
3754 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3755 value into a register. Just emit a normal reg-reg move
3756 so reg-alloc can coalesce it away in the usual way. */
3757 HReg src = am->Xam.IR.reg;
3758 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
3760 addInstr(env, X86Instr_Lea32(am,dst));
3765 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3766 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3767 HReg dst = lookupIRTemp(env, tmp);
3768 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
3771 if (ty == Ity_I64) {
3772 HReg rHi, rLo, dstHi, dstLo;
3773 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3774 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
3775 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3776 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3780 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3781 HReg dst = lookupIRTemp(env, tmp);
3782 addInstr(env, X86Instr_Set32(cond, dst));
3785 if (ty == Ity_F64) {
3786 HReg dst = lookupIRTemp(env, tmp);
3787 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3788 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3791 if (ty == Ity_F32) {
3792 HReg dst = lookupIRTemp(env, tmp);
3793 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3794 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3797 if (ty == Ity_V128) {
3798 HReg dst = lookupIRTemp(env, tmp);
3799 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3800 addInstr(env, mk_vMOVsd_RR(src,dst));
3806 /* --------- Call to DIRTY helper --------- */
3809 IRDirty* d = stmt->Ist.Dirty.details;
3810 Bool passBBP = False;
3812 if (d->nFxState == 0)
3813 vassert(!d->needsBBP);
3815 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3817 /* Marshal args, do the call, clear stack. */
3818 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3820 /* Now figure out what to do with the returned value, if any. */
3821 if (d->tmp == IRTemp_INVALID)
3822 /* No return value. Nothing to do. */
3825 retty = typeOfIRTemp(env->type_env, d->tmp);
3826 if (retty == Ity_I64) {
3828 /* The returned value is in %edx:%eax. Park it in the
3829 register-pair associated with tmp. */
3830 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
3831 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
3832 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
3835 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
3836 /* The returned value is in %eax. Park it in the register
3837 associated with tmp. */
3838 HReg dst = lookupIRTemp(env, d->tmp);
3839 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
3845 /* --------- MEM FENCE --------- */
3847 switch (stmt->Ist.MBE.event) {
3849 addInstr(env, X86Instr_MFence(env->hwcaps));
3856 /* --------- ACAS --------- */
3858 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3859 /* "normal" singleton CAS */
3861 IRCAS* cas = stmt->Ist.CAS.details;
3862 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3863 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3864 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3865 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3866 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3867 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3868 vassert(cas->expdHi == NULL);
3869 vassert(cas->dataHi == NULL);
3870 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3871 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3872 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3874 case Ity_I32: sz = 4; break;
3875 case Ity_I16: sz = 2; break;
3876 case Ity_I8: sz = 1; break;
3877 default: goto unhandled_cas;
3879 addInstr(env, X86Instr_ACAS(am, sz));
3881 X86Instr_CMov32(Xcc_NZ,
3882 X86RM_Reg(hregX86_EAX()), rOldLo));
3886 IRCAS* cas = stmt->Ist.CAS.details;
3887 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3888 /* only 32-bit allowed in this case */
3889 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3890 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
3891 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3892 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
3893 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3894 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
3895 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3896 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
3897 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3900 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
3901 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3902 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
3903 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3904 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
3905 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3906 addInstr(env, X86Instr_DACAS(am));
3908 X86Instr_CMov32(Xcc_NZ,
3909 X86RM_Reg(hregX86_EDX()), rOldHi));
3911 X86Instr_CMov32(Xcc_NZ,
3912 X86RM_Reg(hregX86_EAX()), rOldLo));
3918 /* --------- INSTR MARK --------- */
3919 /* Doesn't generate any executable code ... */
3923 /* --------- NO-OP --------- */
3924 /* Fairly self-explanatory, wouldn't you say? */
3928 /* --------- EXIT --------- */
3932 if (stmt->Ist.Exit.dst->tag != Ico_U32)
3933 vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
3934 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3935 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3936 addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3948 /*---------------------------------------------------------*/
3949 /*--- ISEL: Basic block terminators (Nexts) ---*/
3950 /*---------------------------------------------------------*/
3952 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
3955 if (vex_traceflags & VEX_TRACE_VCODE) {
3956 vex_printf("\n-- goto {");
3962 ri = iselIntExpr_RI(env, next);
3963 addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
3967 /*---------------------------------------------------------*/
3968 /*--- Insn selector top-level ---*/
3969 /*---------------------------------------------------------*/
3971 /* Translate an entire SB to x86 code. */
3973 HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
3974 VexArchInfo* archinfo_host,
3975 VexAbiInfo* vbi/*UNUSED*/ )
3980 UInt hwcaps_host = archinfo_host->hwcaps;
3983 vassert(arch_host == VexArchX86);
3984 vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1
3985 |VEX_HWCAPS_X86_SSE2
3986 |VEX_HWCAPS_X86_SSE3)));
3988 /* Make up an initial environment to use. */
3989 env = LibVEX_Alloc(sizeof(ISelEnv));
3992 /* Set up output code array. */
3993 env->code = newHInstrArray();
3995 /* Copy BB's type env. */
3996 env->type_env = bb->tyenv;
3998 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
3999 change as we go along. */
4000 env->n_vregmap = bb->tyenv->types_used;
4001 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4002 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4004 /* and finally ... */
4005 env->hwcaps = hwcaps_host;
4007 /* For each IR temporary, allocate a suitably-kinded virtual
4010 for (i = 0; i < env->n_vregmap; i++) {
4011 hregHI = hreg = INVALID_HREG;
4012 switch (bb->tyenv->types[i]) {
4016 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
4017 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True);
4018 hregHI = mkHReg(j++, HRcInt32, True); break;
4020 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
4021 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4022 default: ppIRType(bb->tyenv->types[i]);
4023 vpanic("iselBB: IRTemp type");
4025 env->vregmap[i] = hreg;
4026 env->vregmapHI[i] = hregHI;
4030 /* Ok, finally we can iterate over the statements. */
4031 for (i = 0; i < bb->stmts_used; i++)
4032 iselStmt(env,bb->stmts[i]);
4034 iselNext(env,bb->next,bb->jumpkind);
4036 /* record the number of vregs we used. */
4037 env->code->n_vregs = env->vreg_ctr;
4042 /*---------------------------------------------------------------*/
4043 /*--- end host_x86_isel.c ---*/
4044 /*---------------------------------------------------------------*/