2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_x86_defs.h"
49 -- (Really an assembler issue) don't emit CMov32 as a cmov
50 insn, since that's expensive on P4 and conditional branch
51 is cheaper if (as we expect) the condition is highly predictable
53 -- preserve xmm registers across function calls (by declaring them
54 as trashed by call insns)
56 -- preserve x87 ST stack discipline across function calls. Sigh.
58 -- Check doHelperCall: if a call is conditional, we cannot safely
59 compute any regparm args directly to registers. Hence, the
60 fast-regparm marshalling should be restricted to unconditional
64 /*---------------------------------------------------------*/
65 /*--- x87 control word stuff ---*/
66 /*---------------------------------------------------------*/
68 /* Vex-generated code expects to run with the FPU set as follows: all
69 exceptions masked, round-to-nearest, precision = 53 bits. This
70 corresponds to a FPU control word value of 0x027F.
72 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74 %fpucw and %mxcsr should have these values on entry to
75 Vex-generated code, and should those values should be
79 #define DEFAULT_FPUCW 0x027F
81 /* debugging only, do not use */
82 /* define DEFAULT_FPUCW 0x037F */
85 /*---------------------------------------------------------*/
86 /*--- misc helpers ---*/
87 /*---------------------------------------------------------*/
89 /* These are duplicated in guest-x86/toIR.c */
90 static IRExpr* unop ( IROp op, IRExpr* a )
92 return IRExpr_Unop(op, a);
95 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 return IRExpr_Binop(op, a1, a2);
100 static IRExpr* bind ( Int binder )
102 return IRExpr_Binder(binder);
105 static Bool isZeroU8 ( IRExpr* e )
107 return e->tag == Iex_Const
108 && e->Iex.Const.con->tag == Ico_U8
109 && e->Iex.Const.con->Ico.U8 == 0;
112 static Bool isZeroU32 ( IRExpr* e )
114 return e->tag == Iex_Const
115 && e->Iex.Const.con->tag == Ico_U32
116 && e->Iex.Const.con->Ico.U32 == 0;
119 static Bool isZeroU64 ( IRExpr* e )
121 return e->tag == Iex_Const
122 && e->Iex.Const.con->tag == Ico_U64
123 && e->Iex.Const.con->Ico.U64 == 0ULL;
127 /*---------------------------------------------------------*/
129 /*---------------------------------------------------------*/
131 /* This carries around:
133 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
134 might encounter. This is computed before insn selection starts,
137 - A mapping from IRTemp to HReg. This tells the insn selector
138 which virtual register(s) are associated with each IRTemp
139 temporary. This is computed before insn selection starts, and
140 does not change. We expect this mapping to map precisely the
141 same set of IRTemps as the type mapping does.
143 - vregmap holds the primary register for the IRTemp.
144 - vregmapHI is only used for 64-bit integer-typed
145 IRTemps. It holds the identity of a second
146 32-bit virtual HReg, which holds the high half
149 - The code array, that is, the insns selected so far.
151 - A counter, for generating new virtual registers.
153 - The host subarchitecture we are selecting insns for.
154 This is set at the start and does not change.
156 Note, this is all host-independent. */
175 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
178 vassert(tmp < env->n_vregmap);
179 return env->vregmap[tmp];
182 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
185 vassert(tmp < env->n_vregmap);
186 vassert(env->vregmapHI[tmp] != INVALID_HREG);
187 *vrLO = env->vregmap[tmp];
188 *vrHI = env->vregmapHI[tmp];
191 static void addInstr ( ISelEnv* env, X86Instr* instr )
193 addHInstr(env->code, instr);
194 if (vex_traceflags & VEX_TRACE_VCODE) {
195 ppX86Instr(instr, False);
200 static HReg newVRegI ( ISelEnv* env )
202 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
207 static HReg newVRegF ( ISelEnv* env )
209 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
214 static HReg newVRegV ( ISelEnv* env )
216 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
222 /*---------------------------------------------------------*/
223 /*--- ISEL: Forward declarations ---*/
224 /*---------------------------------------------------------*/
226 /* These are organised as iselXXX and iselXXX_wrk pairs. The
227 iselXXX_wrk do the real work, but are not to be called directly.
228 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
229 checks that all returned registers are virtual. You should not
230 call the _wrk version directly.
232 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
233 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
235 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
236 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
238 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
239 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
241 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
242 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
244 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
245 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
247 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
248 ISelEnv* env, IRExpr* e );
249 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
250 ISelEnv* env, IRExpr* e );
252 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
253 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
255 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
256 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
258 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
259 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
261 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
262 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
265 /*---------------------------------------------------------*/
266 /*--- ISEL: Misc helpers ---*/
267 /*---------------------------------------------------------*/
269 /* Make a int reg-reg move. */
271 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
273 vassert(hregClass(src) == HRcInt32);
274 vassert(hregClass(dst) == HRcInt32);
275 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
279 /* Make a vector reg-reg move. */
281 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
283 vassert(hregClass(src) == HRcVec128);
284 vassert(hregClass(dst) == HRcVec128);
285 return X86Instr_SseReRg(Xsse_MOV, src, dst);
288 /* Advance/retreat %esp by n. */
290 static void add_to_esp ( ISelEnv* env, Int n )
292 vassert(n > 0 && n < 256 && (n%4) == 0);
294 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
297 static void sub_from_esp ( ISelEnv* env, Int n )
299 vassert(n > 0 && n < 256 && (n%4) == 0);
301 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
305 /* Given an amode, return one which references 4 bytes further
308 static X86AMode* advance4 ( X86AMode* am )
310 X86AMode* am4 = dopyX86AMode(am);
313 am4->Xam.IRRS.imm += 4; break;
315 am4->Xam.IR.imm += 4; break;
317 vpanic("advance4(x86,host)");
323 /* Push an arg onto the host stack, in preparation for a call to a
324 helper function of some kind. Returns the number of 32-bit words
327 static Int pushArg ( ISelEnv* env, IRExpr* arg )
329 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
330 if (arg_ty == Ity_I32) {
331 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
334 if (arg_ty == Ity_I64) {
336 iselInt64Expr(&rHi, &rLo, env, arg);
337 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
338 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
342 vpanic("pushArg(x86): can't handle arg of this type");
346 /* Complete the call to a helper function, by calling the
347 helper and clearing the args off the stack. */
350 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
351 IRCallee* cee, Int n_arg_ws )
353 /* Complication. Need to decide which reg to use as the fn address
354 pointer, in a way that doesn't trash regparm-passed
356 vassert(sizeof(void*) == 4);
358 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
361 add_to_esp(env, 4*n_arg_ws);
365 /* Used only in doHelperCall. See big comment in doHelperCall re
366 handling of regparm args. This function figures out whether
367 evaluation of an expression might require use of a fixed register.
368 If in doubt return True (safe but suboptimal).
371 Bool mightRequireFixedRegs ( IRExpr* e )
374 case Iex_RdTmp: case Iex_Const: case Iex_Get:
382 /* Do a complete function call. guard is a Ity_Bit expression
383 indicating whether or not the call happens. If guard==NULL, the
384 call is unconditional. */
387 void doHelperCall ( ISelEnv* env,
389 IRExpr* guard, IRCallee* cee, IRExpr** args )
395 Int not_done_yet, n_args, n_arg_ws, stack_limit,
398 /* Marshal args for a call, do the call, and clear the stack.
399 Complexities to consider:
401 * if passBBP is True, %ebp (the baseblock pointer) is to be
402 passed as the first arg.
404 * If the callee claims regparmness of 1, 2 or 3, we must pass the
405 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
406 respectively). To keep things relatively simple, only args of
407 type I32 may be passed as regparms -- just bomb out if anything
408 else turns up. Clearly this depends on the front ends not
409 trying to pass any other types as regparms.
412 /* 16 Nov 2004: the regparm handling is complicated by the
415 Consider a call two a function with two regparm parameters:
416 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
417 Suppose code is first generated to compute e1 into %eax. Then,
418 code is generated to compute e2 into %edx. Unfortunately, if
419 the latter code sequence uses %eax, it will trash the value of
420 e1 computed by the former sequence. This could happen if (for
421 example) e2 itself involved a function call. In the code below,
422 args are evaluated right-to-left, not left-to-right, but the
423 principle and the problem are the same.
425 One solution is to compute all regparm-bound args into vregs
426 first, and once they are all done, move them to the relevant
427 real regs. This always gives correct code, but it also gives
428 a bunch of vreg-to-rreg moves which are usually redundant but
429 are hard for the register allocator to get rid of.
431 A compromise is to first examine all regparm'd argument
432 expressions. If they are all so simple that it is clear
433 they will be evaluated without use of any fixed registers,
434 use the old compute-directly-to-fixed-target scheme. If not,
435 be safe and use the via-vregs scheme.
437 Note this requires being able to examine an expression and
438 determine whether or not evaluation of it might use a fixed
439 register. That requires knowledge of how the rest of this
440 insn selector works. Currently just the following 3 are
441 regarded as safe -- hopefully they cover the majority of
442 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
444 vassert(cee->regparms >= 0 && cee->regparms <= 3);
446 n_args = n_arg_ws = 0;
447 while (args[n_args]) n_args++;
449 not_done_yet = n_args;
453 stack_limit = cee->regparms;
454 if (cee->regparms > 0 && passBBP) stack_limit--;
456 /* ------ BEGIN marshall all arguments ------ */
458 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
459 for (i = n_args-1; i >= stack_limit; i--) {
460 n_arg_ws += pushArg(env, args[i]);
464 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
467 if (cee->regparms > 0) {
469 /* ------ BEGIN deal with regparms ------ */
471 /* deal with regparms, not forgetting %ebp if needed. */
472 argregs[0] = hregX86_EAX();
473 argregs[1] = hregX86_EDX();
474 argregs[2] = hregX86_ECX();
475 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
477 argreg = cee->regparms;
479 /* In keeping with big comment above, detect potential danger
480 and use the via-vregs scheme if needed. */
482 for (i = stack_limit-1; i >= 0; i--) {
483 if (mightRequireFixedRegs(args[i])) {
491 /* Move via temporaries */
493 for (i = stack_limit-1; i >= 0; i--) {
496 vex_printf("x86 host: register param is complex: ");
502 vassert(argreg >= 0);
503 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
504 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
507 for (i = stack_limit-1; i >= 0; i--) {
509 vassert(argregX >= 0);
510 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
514 /* It's safe to compute all regparm args directly into their
516 for (i = stack_limit-1; i >= 0; i--) {
518 vassert(argreg >= 0);
519 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
520 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
521 iselIntExpr_RMI(env, args[i]),
528 /* Not forgetting %ebp if needed. */
530 vassert(argreg == 1);
531 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
535 /* ------ END deal with regparms ------ */
539 /* No regparms. Heave %ebp on the stack if needed. */
541 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
548 vassert(not_done_yet == 0);
550 /* ------ END marshall all arguments ------ */
552 /* Now we can compute the condition. We can't do it earlier
553 because the argument computations could trash the condition
554 codes. Be a bit clever to handle the common case where the
558 if (guard->tag == Iex_Const
559 && guard->Iex.Const.con->tag == Ico_U1
560 && guard->Iex.Const.con->Ico.U1 == True) {
561 /* unconditional -- do nothing */
563 cc = iselCondCode( env, guard );
567 /* call the helper, and get the args off the stack afterwards. */
568 callHelperAndClearArgs( env, cc, cee, n_arg_ws );
572 /* Given a guest-state array descriptor, an index expression and a
573 bias, generate an X86AMode holding the relevant guest state
577 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
578 IRExpr* off, Int bias )
581 Int elemSz = sizeofIRType(descr->elemTy);
582 Int nElems = descr->nElems;
585 /* throw out any cases not generated by an x86 front end. In
586 theory there might be a day where we need to handle them -- if
587 we ever run non-x86-guest on x86 host. */
590 vpanic("genGuestArrayOffset(x86 host)(1)");
593 case 1: shift = 0; break;
594 case 4: shift = 2; break;
595 case 8: shift = 3; break;
596 default: vpanic("genGuestArrayOffset(x86 host)(2)");
599 /* Compute off into a reg, %off. Then return:
602 addl $bias, %tmp (if bias != 0)
604 ... base(%ebp, %tmp, shift) ...
607 roff = iselIntExpr_R(env, off);
608 addInstr(env, mk_iMOVsd_RR(roff, tmp));
611 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
614 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
616 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
620 /* Mess with the FPU's rounding mode: set to the default rounding mode
623 void set_FPU_rounding_default ( ISelEnv* env )
625 /* pushl $DEFAULT_FPUCW
629 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
630 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
631 addInstr(env, X86Instr_FpLdCW(zero_esp));
636 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
637 expression denoting a value in the range 0 .. 3, indicating a round
638 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
642 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
644 HReg rrm = iselIntExpr_R(env, mode);
645 HReg rrm2 = newVRegI(env);
646 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
649 andl $3, %rrm2 -- shouldn't be needed; paranoia
651 orl $DEFAULT_FPUCW, %rrm2
656 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
657 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
658 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
659 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
660 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
661 addInstr(env, X86Instr_FpLdCW(zero_esp));
666 /* Generate !src into a new vector register, and be sure that the code
667 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
670 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
672 HReg dst = newVRegV(env);
673 /* Set dst to zero. If dst contains a NaN then all hell might
674 break loose after the comparison. So, first zero it. */
675 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
676 /* And now make it all 1s ... */
677 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
678 /* Finally, xor 'src' into it. */
679 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
680 /* Doesn't that just totally suck? */
685 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
686 after most non-simple FPU operations (simple = +, -, *, / and
689 This could be done a lot more efficiently if needed, by loading
690 zero and adding it to the value to be rounded (fldz ; faddp?).
692 static void roundToF64 ( ISelEnv* env, HReg reg )
694 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
695 sub_from_esp(env, 8);
696 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
697 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
702 /*---------------------------------------------------------*/
703 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
704 /*---------------------------------------------------------*/
706 /* Select insns for an integer-typed expression, and add them to the
707 code list. Return a reg holding the result. This reg will be a
708 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
709 want to modify it, ask for a new vreg, copy it in there, and modify
710 the copy. The register allocator will do its best to map both
711 vregs to the same real register, so the copies will often disappear
714 This should handle expressions of 32, 16 and 8-bit type. All
715 results are returned in a 32-bit register. For 16- and 8-bit
716 expressions, the upper 16/24 bits are arbitrary, so you should mask
717 or sign extend partial values if necessary.
720 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
722 HReg r = iselIntExpr_R_wrk(env, e);
723 /* sanity checks ... */
725 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
727 vassert(hregClass(r) == HRcInt32);
728 vassert(hregIsVirtual(r));
732 /* DO NOT CALL THIS DIRECTLY ! */
733 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
737 IRType ty = typeOfIRExpr(env->type_env,e);
738 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
742 /* --------- TEMP --------- */
744 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
747 /* --------- LOAD --------- */
749 HReg dst = newVRegI(env);
750 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
752 /* We can't handle big-endian loads, nor load-linked. */
753 if (e->Iex.Load.end != Iend_LE)
757 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
758 X86RMI_Mem(amode), dst) );
762 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
766 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
772 /* --------- TERNARY OP --------- */
774 /* C3210 flags following FPU partial remainder (fprem), both
775 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
776 if (e->Iex.Triop.op == Iop_PRemC3210F64
777 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
778 HReg junk = newVRegF(env);
779 HReg dst = newVRegI(env);
780 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
781 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
782 /* XXXROUNDINGFIXME */
783 /* set roundingmode here */
784 addInstr(env, X86Instr_FpBinary(
785 e->Iex.Binop.op==Iop_PRemC3210F64
786 ? Xfp_PREM : Xfp_PREM1,
789 /* The previous pseudo-insn will have left the FPU's C3210
790 flags set correctly. So bag them. */
791 addInstr(env, X86Instr_FpStSW_AX());
792 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
793 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
800 /* --------- BINARY OP --------- */
805 /* Pattern: Sub32(0,x) */
806 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
807 HReg dst = newVRegI(env);
808 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
809 addInstr(env, mk_iMOVsd_RR(reg,dst));
810 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
814 /* Is it an addition or logical style op? */
815 switch (e->Iex.Binop.op) {
816 case Iop_Add8: case Iop_Add16: case Iop_Add32:
817 aluOp = Xalu_ADD; break;
818 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
819 aluOp = Xalu_SUB; break;
820 case Iop_And8: case Iop_And16: case Iop_And32:
821 aluOp = Xalu_AND; break;
822 case Iop_Or8: case Iop_Or16: case Iop_Or32:
823 aluOp = Xalu_OR; break;
824 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
825 aluOp = Xalu_XOR; break;
826 case Iop_Mul16: case Iop_Mul32:
827 aluOp = Xalu_MUL; break;
829 aluOp = Xalu_INVALID; break;
831 /* For commutative ops we assume any literal
832 values are on the second operand. */
833 if (aluOp != Xalu_INVALID) {
834 HReg dst = newVRegI(env);
835 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
836 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
837 addInstr(env, mk_iMOVsd_RR(reg,dst));
838 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
841 /* Could do better here; forcing the first arg into a reg
843 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
844 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
845 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
846 movl 0xFFFFFFA0(%vr41),%vr107
847 movl 0xFFFFFFA4(%vr41),%vr108
850 movl 0xFFFFFFA8(%vr41),%vr109
853 movl 0xFFFFFFA0(%vr41),%vr110
859 /* Perhaps a shift op? */
860 switch (e->Iex.Binop.op) {
861 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
862 shOp = Xsh_SHL; break;
863 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
864 shOp = Xsh_SHR; break;
865 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
866 shOp = Xsh_SAR; break;
868 shOp = Xsh_INVALID; break;
870 if (shOp != Xsh_INVALID) {
871 HReg dst = newVRegI(env);
873 /* regL = the value to be shifted */
874 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
875 addInstr(env, mk_iMOVsd_RR(regL,dst));
877 /* Do any necessary widening for 16/8 bit operands */
878 switch (e->Iex.Binop.op) {
880 addInstr(env, X86Instr_Alu32R(
881 Xalu_AND, X86RMI_Imm(0xFF), dst));
884 addInstr(env, X86Instr_Alu32R(
885 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
888 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
889 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
892 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
893 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
898 /* Now consider the shift amount. If it's a literal, we
899 can do a much better job than the general case. */
900 if (e->Iex.Binop.arg2->tag == Iex_Const) {
901 /* assert that the IR is well-typed */
903 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
904 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
905 vassert(nshift >= 0);
907 /* Can't allow nshift==0 since that means %cl */
908 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
910 /* General case; we have to force the amount into %cl. */
911 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
912 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
913 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
918 /* Handle misc other ops. */
920 if (e->Iex.Binop.op == Iop_Max32U) {
921 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
922 HReg dst = newVRegI(env);
923 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
924 addInstr(env, mk_iMOVsd_RR(src1,dst));
925 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
926 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
930 if (e->Iex.Binop.op == Iop_8HLto16) {
931 HReg hi8 = newVRegI(env);
932 HReg lo8 = newVRegI(env);
933 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
934 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
935 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
936 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
937 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
938 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
939 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
943 if (e->Iex.Binop.op == Iop_16HLto32) {
944 HReg hi16 = newVRegI(env);
945 HReg lo16 = newVRegI(env);
946 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
947 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
948 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
949 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
950 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
951 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
952 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
956 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
957 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
958 HReg a16 = newVRegI(env);
959 HReg b16 = newVRegI(env);
960 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
961 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
962 Int shift = (e->Iex.Binop.op == Iop_MullS8
963 || e->Iex.Binop.op == Iop_MullU8)
965 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
966 || e->Iex.Binop.op == Iop_MullS16)
969 addInstr(env, mk_iMOVsd_RR(a16s, a16));
970 addInstr(env, mk_iMOVsd_RR(b16s, b16));
971 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
972 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
973 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
974 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
975 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
979 if (e->Iex.Binop.op == Iop_CmpF64) {
980 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
981 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
982 HReg dst = newVRegI(env);
983 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
984 /* shift this right 8 bits so as to conform to CmpF64
986 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
990 if (e->Iex.Binop.op == Iop_F64toI32S
991 || e->Iex.Binop.op == Iop_F64toI16S) {
992 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
993 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
994 HReg dst = newVRegI(env);
996 /* Used several times ... */
997 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
999 /* rf now holds the value to be converted, and rrm holds the
1000 rounding mode value, encoded as per the IRRoundingMode
1001 enum. The first thing to do is set the FPU's rounding
1002 mode accordingly. */
1004 /* Create a space for the format conversion. */
1006 sub_from_esp(env, 4);
1008 /* Set host rounding mode */
1009 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1011 /* gistw/l %rf, 0(%esp) */
1012 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1013 toUChar(sz), rf, zero_esp));
1016 /* movzwl 0(%esp), %dst */
1017 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1019 /* movl 0(%esp), %dst */
1021 addInstr(env, X86Instr_Alu32R(
1022 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1025 /* Restore default FPU rounding. */
1026 set_FPU_rounding_default( env );
1036 /* --------- UNARY OP --------- */
1039 /* 1Uto8(32to1(expr32)) */
1040 if (e->Iex.Unop.op == Iop_1Uto8) {
1041 DECLARE_PATTERN(p_32to1_then_1Uto8);
1042 DEFINE_PATTERN(p_32to1_then_1Uto8,
1043 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1044 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1045 IRExpr* expr32 = mi.bindee[0];
1046 HReg dst = newVRegI(env);
1047 HReg src = iselIntExpr_R(env, expr32);
1048 addInstr(env, mk_iMOVsd_RR(src,dst) );
1049 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1050 X86RMI_Imm(1), dst));
1055 /* 8Uto32(LDle(expr32)) */
1056 if (e->Iex.Unop.op == Iop_8Uto32) {
1057 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1058 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1060 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1061 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1062 HReg dst = newVRegI(env);
1063 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1064 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1069 /* 8Sto32(LDle(expr32)) */
1070 if (e->Iex.Unop.op == Iop_8Sto32) {
1071 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1072 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1074 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1075 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1076 HReg dst = newVRegI(env);
1077 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1078 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1083 /* 16Uto32(LDle(expr32)) */
1084 if (e->Iex.Unop.op == Iop_16Uto32) {
1085 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1086 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1088 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1089 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1090 HReg dst = newVRegI(env);
1091 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1092 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1097 /* 8Uto32(GET:I8) */
1098 if (e->Iex.Unop.op == Iop_8Uto32) {
1099 if (e->Iex.Unop.arg->tag == Iex_Get) {
1102 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1103 dst = newVRegI(env);
1104 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1106 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1111 /* 16to32(GET:I16) */
1112 if (e->Iex.Unop.op == Iop_16Uto32) {
1113 if (e->Iex.Unop.arg->tag == Iex_Get) {
1116 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1117 dst = newVRegI(env);
1118 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1120 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125 switch (e->Iex.Unop.op) {
1129 HReg dst = newVRegI(env);
1130 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1131 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1132 addInstr(env, mk_iMOVsd_RR(src,dst) );
1133 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1134 X86RMI_Imm(mask), dst));
1140 HReg dst = newVRegI(env);
1141 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1142 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1143 addInstr(env, mk_iMOVsd_RR(src,dst) );
1144 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1145 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1151 HReg dst = newVRegI(env);
1152 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1153 addInstr(env, mk_iMOVsd_RR(src,dst) );
1154 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1157 case Iop_64HIto32: {
1159 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1160 return rHi; /* and abandon rLo .. poor wee thing :-) */
1164 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1165 return rLo; /* similar stupid comment to the above ... */
1168 case Iop_32HIto16: {
1169 HReg dst = newVRegI(env);
1170 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1171 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1172 addInstr(env, mk_iMOVsd_RR(src,dst) );
1173 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1178 HReg dst = newVRegI(env);
1179 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1180 addInstr(env, X86Instr_Set32(cond,dst));
1186 /* could do better than this, but for now ... */
1187 HReg dst = newVRegI(env);
1188 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1189 addInstr(env, X86Instr_Set32(cond,dst));
1190 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1191 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1195 /* Count trailing zeroes, implemented by x86 'bsfl' */
1196 HReg dst = newVRegI(env);
1197 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1198 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1202 /* Count leading zeroes. Do 'bsrl' to establish the index
1203 of the highest set bit, and subtract that value from
1205 HReg tmp = newVRegI(env);
1206 HReg dst = newVRegI(env);
1207 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1208 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1209 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1210 X86RMI_Imm(31), dst));
1211 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1212 X86RMI_Reg(tmp), dst));
1216 case Iop_CmpwNEZ32: {
1217 HReg dst = newVRegI(env);
1218 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1219 addInstr(env, mk_iMOVsd_RR(src,dst));
1220 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1221 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1222 X86RMI_Reg(src), dst));
1223 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1229 HReg dst = newVRegI(env);
1230 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1231 addInstr(env, mk_iMOVsd_RR(src, dst));
1232 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1233 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1237 case Iop_V128to32: {
1238 HReg dst = newVRegI(env);
1239 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1240 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1241 sub_from_esp(env, 16);
1242 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1243 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1244 add_to_esp(env, 16);
1248 /* ReinterpF32asI32(e) */
1249 /* Given an IEEE754 single, produce an I32 with the same bit
1250 pattern. Keep stack 8-aligned even though only using 4
1252 case Iop_ReinterpF32asI32: {
1253 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1254 HReg dst = newVRegI(env);
1255 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1257 set_FPU_rounding_default(env);
1259 sub_from_esp(env, 8);
1260 /* gstF %rf, 0(%esp) */
1262 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1263 /* movl 0(%esp), %dst */
1265 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1274 /* These are no-ops. */
1275 return iselIntExpr_R(env, e->Iex.Unop.arg);
1283 /* --------- GET --------- */
1285 if (ty == Ity_I32) {
1286 HReg dst = newVRegI(env);
1287 addInstr(env, X86Instr_Alu32R(
1289 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1294 if (ty == Ity_I8 || ty == Ity_I16) {
1295 HReg dst = newVRegI(env);
1296 addInstr(env, X86Instr_LoadEX(
1297 toUChar(ty==Ity_I8 ? 1 : 2),
1299 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1308 = genGuestArrayOffset(
1309 env, e->Iex.GetI.descr,
1310 e->Iex.GetI.ix, e->Iex.GetI.bias );
1311 HReg dst = newVRegI(env);
1313 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1316 if (ty == Ity_I32) {
1317 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1323 /* --------- CCALL --------- */
1325 HReg dst = newVRegI(env);
1326 vassert(ty == e->Iex.CCall.retty);
1328 /* be very restrictive for now. Only 32/64-bit ints allowed
1329 for args, and 32 bits for return type. */
1330 if (e->Iex.CCall.retty != Ity_I32)
1333 /* Marshal args, do the call, clear stack. */
1334 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1336 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1340 /* --------- LITERAL --------- */
1341 /* 32/16/8-bit literals */
1343 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1344 HReg r = newVRegI(env);
1345 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1349 /* --------- MULTIPLEX --------- */
1351 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1352 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1354 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1355 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1356 HReg dst = newVRegI(env);
1357 addInstr(env, mk_iMOVsd_RR(rX,dst));
1358 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1359 addInstr(env, X86Instr_Test32(0xFF, r8));
1360 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
1368 } /* switch (e->tag) */
1370 /* We get here if no pattern matched. */
1373 vpanic("iselIntExpr_R: cannot reduce tree");
1377 /*---------------------------------------------------------*/
1378 /*--- ISEL: Integer expression auxiliaries ---*/
1379 /*---------------------------------------------------------*/
1381 /* --------------------- AMODEs --------------------- */
1383 /* Return an AMode which computes the value of the specified
1384 expression, possibly also adding insns to the code list as a
1385 result. The expression may only be a 32-bit one.
1388 static Bool sane_AMode ( X86AMode* am )
1393 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1394 && (hregIsVirtual(am->Xam.IR.reg)
1395 || am->Xam.IR.reg == hregX86_EBP()) );
1398 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1399 && hregIsVirtual(am->Xam.IRRS.base)
1400 && hregClass(am->Xam.IRRS.index) == HRcInt32
1401 && hregIsVirtual(am->Xam.IRRS.index) );
1403 vpanic("sane_AMode: unknown x86 amode tag");
1407 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1409 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1410 vassert(sane_AMode(am));
1414 /* DO NOT CALL THIS DIRECTLY ! */
1415 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1417 IRType ty = typeOfIRExpr(env->type_env,e);
1418 vassert(ty == Ity_I32);
1420 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1421 if (e->tag == Iex_Binop
1422 && e->Iex.Binop.op == Iop_Add32
1423 && e->Iex.Binop.arg2->tag == Iex_Const
1424 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1425 && e->Iex.Binop.arg1->tag == Iex_Binop
1426 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1427 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1428 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1429 && e->Iex.Binop.arg1
1430 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1431 && e->Iex.Binop.arg1
1432 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1433 UInt shift = e->Iex.Binop.arg1
1434 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1435 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1436 if (shift == 1 || shift == 2 || shift == 3) {
1437 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1438 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1439 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1440 return X86AMode_IRRS(imm32, r1, r2, shift);
1444 /* Add32(expr1, Shl32(expr2, imm)) */
1445 if (e->tag == Iex_Binop
1446 && e->Iex.Binop.op == Iop_Add32
1447 && e->Iex.Binop.arg2->tag == Iex_Binop
1448 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1449 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1450 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1451 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1452 if (shift == 1 || shift == 2 || shift == 3) {
1453 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1454 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1455 return X86AMode_IRRS(0, r1, r2, shift);
1460 if (e->tag == Iex_Binop
1461 && e->Iex.Binop.op == Iop_Add32
1462 && e->Iex.Binop.arg2->tag == Iex_Const
1463 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1464 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1465 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1468 /* Doesn't match anything in particular. Generate it into
1469 a register and use that. */
1471 HReg r1 = iselIntExpr_R(env, e);
1472 return X86AMode_IR(0, r1);
1477 /* --------------------- RMIs --------------------- */
1479 /* Similarly, calculate an expression into an X86RMI operand. As with
1480 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1482 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1484 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1485 /* sanity checks ... */
1490 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1491 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1494 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1497 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1501 /* DO NOT CALL THIS DIRECTLY ! */
1502 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1504 IRType ty = typeOfIRExpr(env->type_env,e);
1505 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1507 /* special case: immediate */
1508 if (e->tag == Iex_Const) {
1510 switch (e->Iex.Const.con->tag) {
1511 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1512 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1513 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1514 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1516 return X86RMI_Imm(u);
1519 /* special case: 32-bit GET */
1520 if (e->tag == Iex_Get && ty == Ity_I32) {
1521 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1525 /* special case: 32-bit load from memory */
1526 if (e->tag == Iex_Load && ty == Ity_I32
1527 && e->Iex.Load.end == Iend_LE) {
1528 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1529 return X86RMI_Mem(am);
1532 /* default case: calculate into a register and return that */
1534 HReg r = iselIntExpr_R ( env, e );
1535 return X86RMI_Reg(r);
1540 /* --------------------- RIs --------------------- */
1542 /* Calculate an expression into an X86RI operand. As with
1543 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1545 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1547 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1548 /* sanity checks ... */
1553 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1554 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1557 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1561 /* DO NOT CALL THIS DIRECTLY ! */
1562 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1564 IRType ty = typeOfIRExpr(env->type_env,e);
1565 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1567 /* special case: immediate */
1568 if (e->tag == Iex_Const) {
1570 switch (e->Iex.Const.con->tag) {
1571 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1572 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1573 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1574 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1576 return X86RI_Imm(u);
1579 /* default case: calculate into a register and return that */
1581 HReg r = iselIntExpr_R ( env, e );
1582 return X86RI_Reg(r);
1587 /* --------------------- RMs --------------------- */
1589 /* Similarly, calculate an expression into an X86RM operand. As with
1590 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1592 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1594 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1595 /* sanity checks ... */
1598 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1599 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1602 vassert(sane_AMode(rm->Xrm.Mem.am));
1605 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1609 /* DO NOT CALL THIS DIRECTLY ! */
1610 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1612 IRType ty = typeOfIRExpr(env->type_env,e);
1613 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1615 /* special case: 32-bit GET */
1616 if (e->tag == Iex_Get && ty == Ity_I32) {
1617 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1621 /* special case: load from memory */
1623 /* default case: calculate into a register and return that */
1625 HReg r = iselIntExpr_R ( env, e );
1626 return X86RM_Reg(r);
1631 /* --------------------- CONDCODE --------------------- */
1633 /* Generate code to evaluated a bit-typed expression, returning the
1634 condition code which would correspond when the expression would
1635 notionally have returned 1. */
1637 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1639 /* Uh, there's nothing we can sanity check here, unfortunately. */
1640 return iselCondCode_wrk(env,e);
1643 /* DO NOT CALL THIS DIRECTLY ! */
1644 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1649 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1652 if (e->tag == Iex_RdTmp) {
1653 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1654 /* Test32 doesn't modify r32; so this is OK. */
1655 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1659 /* Constant 1:Bit */
1660 if (e->tag == Iex_Const) {
1662 vassert(e->Iex.Const.con->tag == Ico_U1);
1663 vassert(e->Iex.Const.con->Ico.U1 == True
1664 || e->Iex.Const.con->Ico.U1 == False);
1666 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1667 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1668 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1672 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1673 /* Generate code for the arg, and negate the test condition */
1674 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1677 /* --- patterns rooted at: 32to1 --- */
1679 if (e->tag == Iex_Unop
1680 && e->Iex.Unop.op == Iop_32to1) {
1681 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1682 addInstr(env, X86Instr_Test32(1,rm));
1686 /* --- patterns rooted at: CmpNEZ8 --- */
1689 if (e->tag == Iex_Unop
1690 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1691 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1692 addInstr(env, X86Instr_Test32(0xFF,rm));
1696 /* --- patterns rooted at: CmpNEZ16 --- */
1699 if (e->tag == Iex_Unop
1700 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1701 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1702 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1706 /* --- patterns rooted at: CmpNEZ32 --- */
1708 /* CmpNEZ32(And32(x,y)) */
1710 DECLARE_PATTERN(p_CmpNEZ32_And32);
1711 DEFINE_PATTERN(p_CmpNEZ32_And32,
1712 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1713 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1714 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1715 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1716 HReg tmp = newVRegI(env);
1717 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1718 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1723 /* CmpNEZ32(Or32(x,y)) */
1725 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1726 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1727 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1728 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1729 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1730 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1731 HReg tmp = newVRegI(env);
1732 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1733 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1738 /* CmpNEZ32(GET(..):I32) */
1739 if (e->tag == Iex_Unop
1740 && e->Iex.Unop.op == Iop_CmpNEZ32
1741 && e->Iex.Unop.arg->tag == Iex_Get) {
1742 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1744 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1749 if (e->tag == Iex_Unop
1750 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1751 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1752 X86RMI* rmi2 = X86RMI_Imm(0);
1753 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1757 /* --- patterns rooted at: CmpNEZ64 --- */
1759 /* CmpNEZ64(Or64(x,y)) */
1761 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1762 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1763 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1764 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1765 HReg hi1, lo1, hi2, lo2;
1766 HReg tmp = newVRegI(env);
1767 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1768 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1769 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1770 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1771 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1772 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1778 if (e->tag == Iex_Unop
1779 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1781 HReg tmp = newVRegI(env);
1782 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1783 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1784 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1788 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1790 /* CmpEQ8 / CmpNE8 */
1791 if (e->tag == Iex_Binop
1792 && (e->Iex.Binop.op == Iop_CmpEQ8
1793 || e->Iex.Binop.op == Iop_CmpNE8
1794 || e->Iex.Binop.op == Iop_CasCmpEQ8
1795 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1796 if (isZeroU8(e->Iex.Binop.arg2)) {
1797 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1798 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1799 switch (e->Iex.Binop.op) {
1800 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1801 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1802 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1805 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1806 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1807 HReg r = newVRegI(env);
1808 addInstr(env, mk_iMOVsd_RR(r1,r));
1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1810 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1811 switch (e->Iex.Binop.op) {
1812 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1813 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1814 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1819 /* CmpEQ16 / CmpNE16 */
1820 if (e->tag == Iex_Binop
1821 && (e->Iex.Binop.op == Iop_CmpEQ16
1822 || e->Iex.Binop.op == Iop_CmpNE16
1823 || e->Iex.Binop.op == Iop_CasCmpEQ16
1824 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1825 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1826 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1827 HReg r = newVRegI(env);
1828 addInstr(env, mk_iMOVsd_RR(r1,r));
1829 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1830 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1831 switch (e->Iex.Binop.op) {
1832 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
1833 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
1834 default: vpanic("iselCondCode(x86): CmpXX16");
1838 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1839 Saves a "movl %eax, %tmp" compared to the default route. */
1840 if (e->tag == Iex_Binop
1841 && e->Iex.Binop.op == Iop_CmpNE32
1842 && e->Iex.Binop.arg1->tag == Iex_CCall
1843 && e->Iex.Binop.arg2->tag == Iex_Const) {
1844 IRExpr* cal = e->Iex.Binop.arg1;
1845 IRExpr* con = e->Iex.Binop.arg2;
1846 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1847 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1848 vassert(con->Iex.Const.con->tag == Ico_U32);
1849 /* Marshal args, do the call. */
1850 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
1851 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
1852 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
1858 if (e->tag == Iex_Binop
1859 && (e->Iex.Binop.op == Iop_CmpEQ32
1860 || e->Iex.Binop.op == Iop_CmpNE32
1861 || e->Iex.Binop.op == Iop_CmpLT32S
1862 || e->Iex.Binop.op == Iop_CmpLT32U
1863 || e->Iex.Binop.op == Iop_CmpLE32S
1864 || e->Iex.Binop.op == Iop_CmpLE32U
1865 || e->Iex.Binop.op == Iop_CasCmpEQ32
1866 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1867 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1868 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1869 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1870 switch (e->Iex.Binop.op) {
1871 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
1872 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
1873 case Iop_CmpLT32S: return Xcc_L;
1874 case Iop_CmpLT32U: return Xcc_B;
1875 case Iop_CmpLE32S: return Xcc_LE;
1876 case Iop_CmpLE32U: return Xcc_BE;
1877 default: vpanic("iselCondCode(x86): CmpXX32");
1882 if (e->tag == Iex_Binop
1883 && (e->Iex.Binop.op == Iop_CmpNE64
1884 || e->Iex.Binop.op == Iop_CmpEQ64)) {
1885 HReg hi1, hi2, lo1, lo2;
1886 HReg tHi = newVRegI(env);
1887 HReg tLo = newVRegI(env);
1888 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1889 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1890 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1891 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1892 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1893 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1894 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1895 switch (e->Iex.Binop.op) {
1896 case Iop_CmpNE64: return Xcc_NZ;
1897 case Iop_CmpEQ64: return Xcc_Z;
1898 default: vpanic("iselCondCode(x86): CmpXX64");
1903 vpanic("iselCondCode");
1907 /*---------------------------------------------------------*/
1908 /*--- ISEL: Integer expressions (64 bit) ---*/
1909 /*---------------------------------------------------------*/
1911 /* Compute a 64-bit value into a register pair, which is returned as
1912 the first two parameters. As with iselIntExpr_R, these may be
1913 either real or virtual regs; in any case they must not be changed
1914 by subsequent code emitted by the caller. */
1916 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1918 iselInt64Expr_wrk(rHi, rLo, env, e);
1920 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1922 vassert(hregClass(*rHi) == HRcInt32);
1923 vassert(hregIsVirtual(*rHi));
1924 vassert(hregClass(*rLo) == HRcInt32);
1925 vassert(hregIsVirtual(*rLo));
1928 /* DO NOT CALL THIS DIRECTLY ! */
1929 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1932 HWord fn = 0; /* helper fn for most SIMD64 stuff */
1934 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1936 /* 64-bit literal */
1937 if (e->tag == Iex_Const) {
1938 ULong w64 = e->Iex.Const.con->Ico.U64;
1939 UInt wHi = toUInt(w64 >> 32);
1940 UInt wLo = toUInt(w64);
1941 HReg tLo = newVRegI(env);
1942 HReg tHi = newVRegI(env);
1943 vassert(e->Iex.Const.con->tag == Ico_U64);
1945 /* Save a precious Int register in this special case. */
1946 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1950 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1951 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1958 /* read 64-bit IRTemp */
1959 if (e->tag == Iex_RdTmp) {
1960 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1965 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1967 X86AMode *am0, *am4;
1968 vassert(e->Iex.Load.ty == Ity_I64);
1969 tLo = newVRegI(env);
1970 tHi = newVRegI(env);
1971 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
1972 am4 = advance4(am0);
1973 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1974 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1981 if (e->tag == Iex_Get) {
1982 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
1983 X86AMode* am4 = advance4(am);
1984 HReg tLo = newVRegI(env);
1985 HReg tHi = newVRegI(env);
1986 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1987 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1994 if (e->tag == Iex_GetI) {
1996 = genGuestArrayOffset( env, e->Iex.GetI.descr,
1997 e->Iex.GetI.ix, e->Iex.GetI.bias );
1998 X86AMode* am4 = advance4(am);
1999 HReg tLo = newVRegI(env);
2000 HReg tHi = newVRegI(env);
2001 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2002 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2008 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
2009 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
2012 HReg tLo = newVRegI(env);
2013 HReg tHi = newVRegI(env);
2014 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2015 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2016 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2017 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2018 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2019 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2020 addInstr(env, X86Instr_Test32(0xFF, r8));
2021 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
2022 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
2028 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
2029 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
2032 HReg tLo = newVRegI(env);
2033 HReg tHi = newVRegI(env);
2034 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2035 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
2036 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2037 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2038 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2039 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2040 addInstr(env, X86Instr_Test32(0xFF, r8));
2041 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
2042 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
2049 /* 64-bit Mux0X: Mux0X(g, expr, expr) */
2050 if (e->tag == Iex_Mux0X) {
2052 HReg e0Lo, e0Hi, eXLo, eXHi;
2053 HReg tLo = newVRegI(env);
2054 HReg tHi = newVRegI(env);
2055 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2056 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2057 addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2058 addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2059 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2060 addInstr(env, X86Instr_Test32(0xFF, r8));
2061 /* This assumes the first cmov32 doesn't trash the condition
2062 codes, so they are still available for the second cmov32 */
2063 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2064 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2070 /* --------- BINARY ops --------- */
2071 if (e->tag == Iex_Binop) {
2072 switch (e->Iex.Binop.op) {
2073 /* 32 x 32 -> 64 multiply */
2076 /* get one operand into %eax, and the other into a R/M.
2077 Need to make an educated guess about which is better in
2079 HReg tLo = newVRegI(env);
2080 HReg tHi = newVRegI(env);
2081 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2082 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2083 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2084 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2085 addInstr(env, X86Instr_MulL(syned, rmLeft));
2086 /* Result is now in EDX:EAX. Tell the caller. */
2087 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2088 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2094 /* 64 x 32 -> (32(rem),32(div)) division */
2095 case Iop_DivModU64to32:
2096 case Iop_DivModS64to32: {
2097 /* Get the 64-bit operand into edx:eax, and the other into
2100 HReg tLo = newVRegI(env);
2101 HReg tHi = newVRegI(env);
2102 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2103 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2104 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2105 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2106 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2107 addInstr(env, X86Instr_Div(syned, rmRight));
2108 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2109 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2115 /* Or64/And64/Xor64 */
2119 HReg xLo, xHi, yLo, yHi;
2120 HReg tLo = newVRegI(env);
2121 HReg tHi = newVRegI(env);
2122 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2123 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2125 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2126 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2127 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2128 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2129 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2130 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2138 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2139 /* special case Add64(e, const) */
2140 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2141 UInt wHi = toUInt(w64 >> 32);
2142 UInt wLo = toUInt(w64);
2143 HReg tLo = newVRegI(env);
2144 HReg tHi = newVRegI(env);
2146 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2147 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2148 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2149 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2150 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2151 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2156 /* else fall through to the generic case */
2158 HReg xLo, xHi, yLo, yHi;
2159 HReg tLo = newVRegI(env);
2160 HReg tHi = newVRegI(env);
2161 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2162 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2163 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2164 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2165 if (e->Iex.Binop.op==Iop_Add64) {
2166 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2167 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2169 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2170 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2177 /* 32HLto64(e1,e2) */
2179 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2180 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2185 /* We use the same ingenious scheme as gcc. Put the value
2186 to be shifted into %hi:%lo, and the shift amount into
2187 %cl. Then (dsts on right, a la ATT syntax):
2189 shldl %cl, %lo, %hi -- make %hi be right for the
2190 -- shift amt %cl % 32
2191 shll %cl, %lo -- make %lo be right for the
2192 -- shift amt %cl % 32
2194 Now, if (shift amount % 64) is in the range 32 .. 63,
2195 we have to do a fixup, which puts the result low half
2196 into the result high half, and zeroes the low half:
2201 movl $0, %tmp -- sigh; need yet another reg
2204 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2205 tLo = newVRegI(env);
2206 tHi = newVRegI(env);
2207 tTemp = newVRegI(env);
2208 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2209 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2210 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2211 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2212 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2213 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2214 and those regs are legitimately modifiable. */
2215 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2216 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2217 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2218 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2219 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2220 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2227 /* We use the same ingenious scheme as gcc. Put the value
2228 to be shifted into %hi:%lo, and the shift amount into
2231 shrdl %cl, %hi, %lo -- make %lo be right for the
2232 -- shift amt %cl % 32
2233 shrl %cl, %hi -- make %hi be right for the
2234 -- shift amt %cl % 32
2236 Now, if (shift amount % 64) is in the range 32 .. 63,
2237 we have to do a fixup, which puts the result high half
2238 into the result low half, and zeroes the high half:
2243 movl $0, %tmp -- sigh; need yet another reg
2246 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2247 tLo = newVRegI(env);
2248 tHi = newVRegI(env);
2249 tTemp = newVRegI(env);
2250 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2251 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2252 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2253 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2254 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2255 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2256 and those regs are legitimately modifiable. */
2257 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2258 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2259 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2260 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2261 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2262 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2269 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2270 case. Unfortunately I see no easy way to avoid the
2272 case Iop_F64toI64S: {
2273 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2274 HReg tLo = newVRegI(env);
2275 HReg tHi = newVRegI(env);
2277 /* Used several times ... */
2278 /* Careful ... this sharing is only safe because
2279 zero_esp/four_esp do not hold any registers which the
2280 register allocator could attempt to swizzle later. */
2281 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2282 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2284 /* rf now holds the value to be converted, and rrm holds
2285 the rounding mode value, encoded as per the
2286 IRRoundingMode enum. The first thing to do is set the
2287 FPU's rounding mode accordingly. */
2289 /* Create a space for the format conversion. */
2291 sub_from_esp(env, 8);
2293 /* Set host rounding mode */
2294 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2296 /* gistll %rf, 0(%esp) */
2297 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2299 /* movl 0(%esp), %dstLo */
2300 /* movl 4(%esp), %dstHi */
2301 addInstr(env, X86Instr_Alu32R(
2302 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2303 addInstr(env, X86Instr_Alu32R(
2304 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2306 /* Restore default FPU rounding. */
2307 set_FPU_rounding_default( env );
2318 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2320 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2322 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2325 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2327 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2330 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2332 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2334 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2337 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2338 case Iop_CmpGT16Sx4:
2339 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2340 case Iop_CmpGT32Sx2:
2341 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2343 case Iop_InterleaveHI8x8:
2344 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2345 case Iop_InterleaveLO8x8:
2346 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2347 case Iop_InterleaveHI16x4:
2348 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2349 case Iop_InterleaveLO16x4:
2350 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2351 case Iop_InterleaveHI32x2:
2352 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2353 case Iop_InterleaveLO32x2:
2354 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2355 case Iop_CatOddLanes16x4:
2356 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2357 case Iop_CatEvenLanes16x4:
2358 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2360 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2363 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2365 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2367 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2369 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2372 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2374 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2375 case Iop_MulHi16Sx4:
2376 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2377 case Iop_MulHi16Ux4:
2378 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2381 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2383 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2385 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2387 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2389 case Iop_QNarrow32Sx2:
2390 fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
2391 case Iop_QNarrow16Sx4:
2392 fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
2393 case Iop_QNarrow16Ux4:
2394 fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
2397 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2399 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2401 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2403 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2406 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2408 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2410 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2413 /* Note: the following assumes all helpers are of
2415 ULong fn ( ULong, ULong ), and they are
2416 not marked as regparm functions.
2418 HReg xLo, xHi, yLo, yHi;
2419 HReg tLo = newVRegI(env);
2420 HReg tHi = newVRegI(env);
2421 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2422 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2423 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2424 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2425 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2426 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2427 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2428 add_to_esp(env, 4*4);
2429 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2430 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2437 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2439 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2441 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2443 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2445 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2447 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2449 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2451 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2453 /* Note: the following assumes all helpers are of
2455 ULong fn ( ULong, UInt ), and they are
2456 not marked as regparm functions.
2459 HReg tLo = newVRegI(env);
2460 HReg tHi = newVRegI(env);
2461 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2462 addInstr(env, X86Instr_Push(y));
2463 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2464 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2465 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2466 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2467 add_to_esp(env, 3*4);
2468 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2469 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2478 } /* if (e->tag == Iex_Binop) */
2481 /* --------- UNARY ops --------- */
2482 if (e->tag == Iex_Unop) {
2483 switch (e->Iex.Unop.op) {
2487 HReg tLo = newVRegI(env);
2488 HReg tHi = newVRegI(env);
2489 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2490 addInstr(env, mk_iMOVsd_RR(src,tHi));
2491 addInstr(env, mk_iMOVsd_RR(src,tLo));
2492 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2500 HReg tLo = newVRegI(env);
2501 HReg tHi = newVRegI(env);
2502 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2503 addInstr(env, mk_iMOVsd_RR(src,tLo));
2504 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2512 HReg tLo = newVRegI(env);
2513 HReg tHi = newVRegI(env);
2514 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2515 addInstr(env, mk_iMOVsd_RR(src,tLo));
2516 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2517 X86RMI_Imm(0xFFFF), tLo));
2518 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2525 case Iop_V128HIto64:
2526 case Iop_V128to64: {
2527 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2528 HReg tLo = newVRegI(env);
2529 HReg tHi = newVRegI(env);
2530 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2531 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2532 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2533 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2534 sub_from_esp(env, 16);
2535 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2536 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2537 X86RMI_Mem(espLO), tLo ));
2538 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2539 X86RMI_Mem(espHI), tHi ));
2540 add_to_esp(env, 16);
2546 /* could do better than this, but for now ... */
2548 HReg tLo = newVRegI(env);
2549 HReg tHi = newVRegI(env);
2550 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2551 addInstr(env, X86Instr_Set32(cond,tLo));
2552 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2553 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2554 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2562 HReg tLo = newVRegI(env);
2563 HReg tHi = newVRegI(env);
2565 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2566 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2567 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2568 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2569 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2578 HReg tLo = newVRegI(env);
2579 HReg tHi = newVRegI(env);
2581 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2582 /* tLo = 0 - yLo, and set carry */
2583 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2584 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2585 /* tHi = 0 - yHi - carry */
2586 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2587 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2588 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2589 back in, so as to give the final result
2590 tHi:tLo = arg | -arg. */
2591 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2592 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2598 /* --- patterns rooted at: CmpwNEZ64 --- */
2601 case Iop_CmpwNEZ64: {
2603 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2604 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2605 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2606 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2607 /* CmpwNEZ64(Or64(x,y)) */
2608 HReg xHi,xLo,yHi,yLo;
2609 HReg xBoth = newVRegI(env);
2610 HReg merged = newVRegI(env);
2611 HReg tmp2 = newVRegI(env);
2613 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2614 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2615 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2616 X86RMI_Reg(xLo),xBoth));
2618 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2619 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2620 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2621 X86RMI_Reg(yLo),merged));
2622 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2623 X86RMI_Reg(xBoth),merged));
2625 /* tmp2 = (merged | -merged) >>s 31 */
2626 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2627 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2628 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2629 X86RMI_Reg(merged), tmp2));
2630 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2637 HReg tmp1 = newVRegI(env);
2638 HReg tmp2 = newVRegI(env);
2639 /* srcHi:srcLo = arg */
2640 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2641 /* tmp1 = srcHi | srcLo */
2642 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2643 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2644 X86RMI_Reg(srcLo), tmp1));
2645 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2646 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2647 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2648 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2649 X86RMI_Reg(tmp1), tmp2));
2650 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2657 /* ReinterpF64asI64(e) */
2658 /* Given an IEEE754 double, produce an I64 with the same bit
2660 case Iop_ReinterpF64asI64: {
2661 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2662 HReg tLo = newVRegI(env);
2663 HReg tHi = newVRegI(env);
2664 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2665 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2667 set_FPU_rounding_default(env);
2669 sub_from_esp(env, 8);
2670 /* gstD %rf, 0(%esp) */
2672 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2673 /* movl 0(%esp), %tLo */
2675 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2676 /* movl 4(%esp), %tHi */
2678 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2686 case Iop_CmpNEZ32x2:
2687 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2688 case Iop_CmpNEZ16x4:
2689 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2691 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2693 /* Note: the following assumes all helpers are of
2695 ULong fn ( ULong ), and they are
2696 not marked as regparm functions.
2699 HReg tLo = newVRegI(env);
2700 HReg tHi = newVRegI(env);
2701 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2702 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2703 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2704 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2705 add_to_esp(env, 2*4);
2706 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2707 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2716 } /* if (e->tag == Iex_Unop) */
2719 /* --------- CCALL --------- */
2720 if (e->tag == Iex_CCall) {
2721 HReg tLo = newVRegI(env);
2722 HReg tHi = newVRegI(env);
2724 /* Marshal args, do the call, clear stack. */
2725 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2727 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2728 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2735 vpanic("iselInt64Expr");
2739 /*---------------------------------------------------------*/
2740 /*--- ISEL: Floating point expressions (32 bit) ---*/
2741 /*---------------------------------------------------------*/
2743 /* Nothing interesting here; really just wrappers for
2746 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2748 HReg r = iselFltExpr_wrk( env, e );
2750 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2752 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2753 vassert(hregIsVirtual(r));
2757 /* DO NOT CALL THIS DIRECTLY */
2758 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2760 IRType ty = typeOfIRExpr(env->type_env,e);
2761 vassert(ty == Ity_F32);
2763 if (e->tag == Iex_RdTmp) {
2764 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2767 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2769 HReg res = newVRegF(env);
2770 vassert(e->Iex.Load.ty == Ity_F32);
2771 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2772 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2776 if (e->tag == Iex_Binop
2777 && e->Iex.Binop.op == Iop_F64toF32) {
2778 /* Although the result is still held in a standard FPU register,
2779 we need to round it to reflect the loss of accuracy/range
2780 entailed in casting it to a 32-bit float. */
2781 HReg dst = newVRegF(env);
2782 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2783 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2784 addInstr(env, X86Instr_Fp64to32(src,dst));
2785 set_FPU_rounding_default( env );
2789 if (e->tag == Iex_Get) {
2790 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2792 HReg res = newVRegF(env);
2793 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2797 if (e->tag == Iex_Unop
2798 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2799 /* Given an I32, produce an IEEE754 float with the same bit
2801 HReg dst = newVRegF(env);
2802 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2804 addInstr(env, X86Instr_Push(rmi));
2805 addInstr(env, X86Instr_FpLdSt(
2806 True/*load*/, 4, dst,
2807 X86AMode_IR(0, hregX86_ESP())));
2812 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2813 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
2814 HReg dst = newVRegF(env);
2816 /* rf now holds the value to be rounded. The first thing to do
2817 is set the FPU's rounding mode accordingly. */
2819 /* Set host rounding mode */
2820 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2822 /* grndint %rf, %dst */
2823 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2825 /* Restore default FPU rounding. */
2826 set_FPU_rounding_default( env );
2832 vpanic("iselFltExpr_wrk");
2836 /*---------------------------------------------------------*/
2837 /*--- ISEL: Floating point expressions (64 bit) ---*/
2838 /*---------------------------------------------------------*/
2840 /* Compute a 64-bit floating point value into a register, the identity
2841 of which is returned. As with iselIntExpr_R, the reg may be either
2842 real or virtual; in any case it must not be changed by subsequent
2843 code emitted by the caller. */
2845 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2847 Type S (1 bit) E (11 bits) F (52 bits)
2848 ---- --------- ----------- -----------
2849 signalling NaN u 2047 (max) .0uuuuu---u
2852 quiet NaN u 2047 (max) .1uuuuu---u
2854 negative infinity 1 2047 (max) .000000---0
2856 positive infinity 0 2047 (max) .000000---0
2858 negative zero 1 0 .000000---0
2860 positive zero 0 0 .000000---0
2863 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2865 HReg r = iselDblExpr_wrk( env, e );
2867 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2869 vassert(hregClass(r) == HRcFlt64);
2870 vassert(hregIsVirtual(r));
2874 /* DO NOT CALL THIS DIRECTLY */
2875 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2877 IRType ty = typeOfIRExpr(env->type_env,e);
2879 vassert(ty == Ity_F64);
2881 if (e->tag == Iex_RdTmp) {
2882 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2885 if (e->tag == Iex_Const) {
2886 union { UInt u32x2[2]; ULong u64; Double f64; } u;
2887 HReg freg = newVRegF(env);
2888 vassert(sizeof(u) == 8);
2889 vassert(sizeof(u.u64) == 8);
2890 vassert(sizeof(u.f64) == 8);
2891 vassert(sizeof(u.u32x2) == 8);
2893 if (e->Iex.Const.con->tag == Ico_F64) {
2894 u.f64 = e->Iex.Const.con->Ico.F64;
2896 else if (e->Iex.Const.con->tag == Ico_F64i) {
2897 u.u64 = e->Iex.Const.con->Ico.F64i;
2900 vpanic("iselDblExpr(x86): const");
2902 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
2903 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
2904 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
2905 X86AMode_IR(0, hregX86_ESP())));
2910 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2912 HReg res = newVRegF(env);
2913 vassert(e->Iex.Load.ty == Ity_F64);
2914 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2915 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
2919 if (e->tag == Iex_Get) {
2920 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2922 HReg res = newVRegF(env);
2923 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2927 if (e->tag == Iex_GetI) {
2929 = genGuestArrayOffset(
2930 env, e->Iex.GetI.descr,
2931 e->Iex.GetI.ix, e->Iex.GetI.bias );
2932 HReg res = newVRegF(env);
2933 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2937 if (e->tag == Iex_Triop) {
2938 X86FpOp fpop = Xfp_INVALID;
2939 switch (e->Iex.Triop.op) {
2940 case Iop_AddF64: fpop = Xfp_ADD; break;
2941 case Iop_SubF64: fpop = Xfp_SUB; break;
2942 case Iop_MulF64: fpop = Xfp_MUL; break;
2943 case Iop_DivF64: fpop = Xfp_DIV; break;
2944 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2945 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2946 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2947 case Iop_AtanF64: fpop = Xfp_ATAN; break;
2948 case Iop_PRemF64: fpop = Xfp_PREM; break;
2949 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2952 if (fpop != Xfp_INVALID) {
2953 HReg res = newVRegF(env);
2954 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
2955 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
2956 /* XXXROUNDINGFIXME */
2957 /* set roundingmode here */
2958 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2959 if (fpop != Xfp_ADD && fpop != Xfp_SUB
2960 && fpop != Xfp_MUL && fpop != Xfp_DIV)
2961 roundToF64(env, res);
2966 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2967 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2968 HReg dst = newVRegF(env);
2970 /* rf now holds the value to be rounded. The first thing to do
2971 is set the FPU's rounding mode accordingly. */
2973 /* Set host rounding mode */
2974 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2976 /* grndint %rf, %dst */
2977 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2979 /* Restore default FPU rounding. */
2980 set_FPU_rounding_default( env );
2985 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
2986 HReg dst = newVRegF(env);
2988 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
2989 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
2990 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
2992 /* Set host rounding mode */
2993 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2995 addInstr(env, X86Instr_FpLdStI(
2996 True/*load*/, 8, dst,
2997 X86AMode_IR(0, hregX86_ESP())));
2999 /* Restore default FPU rounding. */
3000 set_FPU_rounding_default( env );
3006 if (e->tag == Iex_Binop) {
3007 X86FpOp fpop = Xfp_INVALID;
3008 switch (e->Iex.Binop.op) {
3009 case Iop_SinF64: fpop = Xfp_SIN; break;
3010 case Iop_CosF64: fpop = Xfp_COS; break;
3011 case Iop_TanF64: fpop = Xfp_TAN; break;
3012 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3013 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3016 if (fpop != Xfp_INVALID) {
3017 HReg res = newVRegF(env);
3018 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3019 /* XXXROUNDINGFIXME */
3020 /* set roundingmode here */
3021 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3022 if (fpop != Xfp_SQRT
3023 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3024 roundToF64(env, res);
3029 if (e->tag == Iex_Unop) {
3030 X86FpOp fpop = Xfp_INVALID;
3031 switch (e->Iex.Unop.op) {
3032 case Iop_NegF64: fpop = Xfp_NEG; break;
3033 case Iop_AbsF64: fpop = Xfp_ABS; break;
3036 if (fpop != Xfp_INVALID) {
3037 HReg res = newVRegF(env);
3038 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3039 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3040 if (fpop != Xfp_NEG && fpop != Xfp_ABS)
3041 roundToF64(env, res);
3046 if (e->tag == Iex_Unop) {
3047 switch (e->Iex.Unop.op) {
3048 case Iop_I32StoF64: {
3049 HReg dst = newVRegF(env);
3050 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3051 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3052 set_FPU_rounding_default(env);
3053 addInstr(env, X86Instr_FpLdStI(
3054 True/*load*/, 4, dst,
3055 X86AMode_IR(0, hregX86_ESP())));
3059 case Iop_ReinterpI64asF64: {
3060 /* Given an I64, produce an IEEE754 double with the same
3062 HReg dst = newVRegF(env);
3064 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3066 set_FPU_rounding_default(env);
3067 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3068 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3069 addInstr(env, X86Instr_FpLdSt(
3070 True/*load*/, 8, dst,
3071 X86AMode_IR(0, hregX86_ESP())));
3075 case Iop_F32toF64: {
3076 /* this is a no-op */
3077 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3085 /* --------- MULTIPLEX --------- */
3086 if (e->tag == Iex_Mux0X) {
3088 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
3089 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3090 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3091 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3092 HReg dst = newVRegF(env);
3093 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
3094 addInstr(env, X86Instr_Test32(0xFF, r8));
3095 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
3101 vpanic("iselDblExpr_wrk");
3105 /*---------------------------------------------------------*/
3106 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3107 /*---------------------------------------------------------*/
3109 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3111 HReg r = iselVecExpr_wrk( env, e );
3113 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3115 vassert(hregClass(r) == HRcVec128);
3116 vassert(hregIsVirtual(r));
3121 /* DO NOT CALL THIS DIRECTLY */
3122 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3125 # define REQUIRE_SSE1 \
3126 do { if (env->hwcaps == 0/*baseline, no sse*/) \
3130 # define REQUIRE_SSE2 \
3131 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3135 # define SSE2_OR_ABOVE \
3136 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3139 Bool arg1isEReg = False;
3140 X86SseOp op = Xsse_INVALID;
3141 IRType ty = typeOfIRExpr(env->type_env,e);
3143 vassert(ty == Ity_V128);
3147 if (e->tag == Iex_RdTmp) {
3148 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3151 if (e->tag == Iex_Get) {
3152 HReg dst = newVRegV(env);
3153 addInstr(env, X86Instr_SseLdSt(
3156 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3162 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3163 HReg dst = newVRegV(env);
3164 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3165 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3169 if (e->tag == Iex_Const) {
3170 HReg dst = newVRegV(env);
3171 vassert(e->Iex.Const.con->tag == Ico_V128);
3172 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3176 if (e->tag == Iex_Unop) {
3178 if (SSE2_OR_ABOVE) {
3179 /* 64UtoV128(LDle:I64(addr)) */
3180 DECLARE_PATTERN(p_zwiden_load64);
3181 DEFINE_PATTERN(p_zwiden_load64,
3183 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3184 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3185 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3186 HReg dst = newVRegV(env);
3187 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3192 switch (e->Iex.Unop.op) {
3195 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3196 return do_sse_Not128(env, arg);
3199 case Iop_CmpNEZ64x2: {
3200 /* We can use SSE2 instructions for this. */
3201 /* Ideally, we want to do a 64Ix2 comparison against zero of
3202 the operand. Problem is no such insn exists. Solution
3203 therefore is to do a 32Ix4 comparison instead, and bitwise-
3204 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3205 let the not'd result of this initial comparison be a:b:c:d.
3206 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3207 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3208 giving the required result.
3210 The required selection sequence is 2,3,0,1, which
3211 according to Intel's documentation means the pshufd
3212 literal value is 0xB1, that is,
3213 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3215 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3216 HReg tmp = newVRegV(env);
3217 HReg dst = newVRegV(env);
3219 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3220 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3221 tmp = do_sse_Not128(env, tmp);
3222 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3223 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3227 case Iop_CmpNEZ32x4: {
3228 /* Sigh, we have to generate lousy code since this has to
3229 work on SSE1 hosts */
3230 /* basically, the idea is: for each lane:
3231 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3232 sbbl %r, %r (now %r = 1Sto32(CF))
3237 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3238 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3239 HReg dst = newVRegV(env);
3240 HReg r32 = newVRegI(env);
3241 sub_from_esp(env, 16);
3242 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3243 for (i = 0; i < 4; i++) {
3244 am = X86AMode_IR(i*4, hregX86_ESP());
3245 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3246 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3247 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3248 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3250 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3251 add_to_esp(env, 16);
3255 case Iop_CmpNEZ8x16:
3256 case Iop_CmpNEZ16x8: {
3257 /* We can use SSE2 instructions for this. */
3259 HReg vec0 = newVRegV(env);
3260 HReg vec1 = newVRegV(env);
3261 HReg dst = newVRegV(env);
3263 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3266 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3267 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3268 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3269 /* defer arg computation to here so as to give CMPEQF as long
3270 as possible to complete */
3271 arg = iselVecExpr(env, e->Iex.Unop.arg);
3272 /* vec0 is all 0s; vec1 is all 1s */
3273 addInstr(env, mk_vMOVsd_RR(arg, dst));
3274 /* 16x8 or 8x16 comparison == */
3275 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3277 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3281 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3282 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3283 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3286 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3287 HReg dst = newVRegV(env);
3288 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3292 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3293 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3294 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3297 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3298 HReg dst = newVRegV(env);
3300 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3304 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3305 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3306 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3309 /* A bit subtle. We have to copy the arg to the result
3310 register first, because actually doing the SSE scalar insn
3311 leaves the upper 3/4 of the destination register
3312 unchanged. Whereas the required semantics of these
3313 primops is that the upper 3/4 is simply copied in from the
3315 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3316 HReg dst = newVRegV(env);
3317 addInstr(env, mk_vMOVsd_RR(arg, dst));
3318 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3322 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3323 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3324 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3327 /* A bit subtle. We have to copy the arg to the result
3328 register first, because actually doing the SSE scalar insn
3329 leaves the upper half of the destination register
3330 unchanged. Whereas the required semantics of these
3331 primops is that the upper half is simply copied in from the
3333 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3334 HReg dst = newVRegV(env);
3336 addInstr(env, mk_vMOVsd_RR(arg, dst));
3337 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3341 case Iop_32UtoV128: {
3342 HReg dst = newVRegV(env);
3343 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3344 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3345 addInstr(env, X86Instr_Push(rmi));
3346 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3351 case Iop_64UtoV128: {
3353 HReg dst = newVRegV(env);
3354 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3355 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3356 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3357 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3358 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3365 } /* switch (e->Iex.Unop.op) */
3366 } /* if (e->tag == Iex_Unop) */
3368 if (e->tag == Iex_Binop) {
3369 switch (e->Iex.Binop.op) {
3371 case Iop_SetV128lo32: {
3372 HReg dst = newVRegV(env);
3373 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3374 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3375 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3376 sub_from_esp(env, 16);
3377 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3378 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3379 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3380 add_to_esp(env, 16);
3384 case Iop_SetV128lo64: {
3385 HReg dst = newVRegV(env);
3386 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3387 HReg srcIhi, srcIlo;
3388 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3389 X86AMode* esp4 = advance4(esp0);
3390 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3391 sub_from_esp(env, 16);
3392 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3393 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3394 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3395 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3396 add_to_esp(env, 16);
3400 case Iop_64HLtoV128: {
3401 HReg r3, r2, r1, r0;
3402 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3403 X86AMode* esp4 = advance4(esp0);
3404 X86AMode* esp8 = advance4(esp4);
3405 X86AMode* esp12 = advance4(esp8);
3406 HReg dst = newVRegV(env);
3407 /* do this via the stack (easy, convenient, etc) */
3408 sub_from_esp(env, 16);
3409 /* Do the less significant 64 bits */
3410 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3411 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3412 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3413 /* Do the more significant 64 bits */
3414 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3415 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3416 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3417 /* Fetch result back from stack. */
3418 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3419 add_to_esp(env, 16);
3423 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3424 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3425 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3426 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3427 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3428 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3429 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3430 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3431 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3432 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3435 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3436 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3437 HReg dst = newVRegV(env);
3438 addInstr(env, mk_vMOVsd_RR(argL, dst));
3439 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3443 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3444 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3445 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3446 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3447 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3448 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3449 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3450 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3451 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3452 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3455 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3456 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3457 HReg dst = newVRegV(env);
3459 addInstr(env, mk_vMOVsd_RR(argL, dst));
3460 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3464 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3465 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3466 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3467 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3468 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3469 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3470 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3471 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3472 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3473 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3475 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3476 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3477 HReg dst = newVRegV(env);
3478 addInstr(env, mk_vMOVsd_RR(argL, dst));
3479 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3483 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3484 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3485 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3486 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3487 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3488 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3489 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3490 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3491 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3492 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3494 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3495 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3496 HReg dst = newVRegV(env);
3498 addInstr(env, mk_vMOVsd_RR(argL, dst));
3499 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3503 case Iop_QNarrow32Sx4:
3504 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3505 case Iop_QNarrow16Sx8:
3506 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3507 case Iop_QNarrow16Ux8:
3508 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3510 case Iop_InterleaveHI8x16:
3511 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3512 case Iop_InterleaveHI16x8:
3513 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3514 case Iop_InterleaveHI32x4:
3515 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3516 case Iop_InterleaveHI64x2:
3517 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3519 case Iop_InterleaveLO8x16:
3520 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3521 case Iop_InterleaveLO16x8:
3522 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3523 case Iop_InterleaveLO32x4:
3524 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3525 case Iop_InterleaveLO64x2:
3526 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3528 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3529 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3530 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3531 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3532 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3533 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3534 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3535 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3536 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3537 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3538 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3539 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3540 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3541 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3542 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3543 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3544 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3545 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3546 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3547 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3548 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3549 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3550 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3551 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3552 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3553 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3554 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3555 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3556 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3557 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3558 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3559 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3560 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3561 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3563 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3564 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3565 HReg dst = newVRegV(env);
3566 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3569 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3570 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3572 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3573 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3578 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3579 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3580 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3581 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3582 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3583 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3584 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3585 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3587 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3588 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3589 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3590 HReg ereg = newVRegV(env);
3591 HReg dst = newVRegV(env);
3593 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3594 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3595 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3596 addInstr(env, X86Instr_Push(rmi));
3597 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3598 addInstr(env, mk_vMOVsd_RR(greg, dst));
3599 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3600 add_to_esp(env, 16);
3606 } /* switch (e->Iex.Binop.op) */
3607 } /* if (e->tag == Iex_Binop) */
3609 if (e->tag == Iex_Mux0X) {
3610 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3611 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3612 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3613 HReg dst = newVRegV(env);
3614 addInstr(env, mk_vMOVsd_RR(rX,dst));
3615 addInstr(env, X86Instr_Test32(0xFF, r8));
3616 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3621 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3622 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3624 vpanic("iselVecExpr_wrk");
3626 # undef REQUIRE_SSE1
3627 # undef REQUIRE_SSE2
3628 # undef SSE2_OR_ABOVE
3632 /*---------------------------------------------------------*/
3633 /*--- ISEL: Statements ---*/
3634 /*---------------------------------------------------------*/
3636 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3638 if (vex_traceflags & VEX_TRACE_VCODE) {
3639 vex_printf("\n-- ");
3644 switch (stmt->tag) {
3646 /* --------- STORE --------- */
3648 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3649 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3650 IREndness end = stmt->Ist.Store.end;
3652 if (tya != Ity_I32 || end != Iend_LE)
3655 if (tyd == Ity_I32) {
3656 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3657 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3658 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3661 if (tyd == Ity_I8 || tyd == Ity_I16) {
3662 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3663 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3664 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3668 if (tyd == Ity_F64) {
3669 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3670 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3671 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3674 if (tyd == Ity_F32) {
3675 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3676 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3677 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3680 if (tyd == Ity_I64) {
3682 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3683 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3684 addInstr(env, X86Instr_Alu32M(
3685 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3686 addInstr(env, X86Instr_Alu32M(
3687 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3690 if (tyd == Ity_V128) {
3691 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3692 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3693 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3699 /* --------- PUT --------- */
3701 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3702 if (ty == Ity_I32) {
3703 /* We're going to write to memory, so compute the RHS into an
3705 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3710 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3714 if (ty == Ity_I8 || ty == Ity_I16) {
3715 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3716 addInstr(env, X86Instr_Store(
3717 toUChar(ty==Ity_I8 ? 1 : 2),
3719 X86AMode_IR(stmt->Ist.Put.offset,
3723 if (ty == Ity_I64) {
3725 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3726 X86AMode* am4 = advance4(am);
3727 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3728 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3729 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3732 if (ty == Ity_V128) {
3733 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3734 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3735 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3738 if (ty == Ity_F32) {
3739 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3740 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3741 set_FPU_rounding_default(env); /* paranoia */
3742 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3745 if (ty == Ity_F64) {
3746 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3747 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3748 set_FPU_rounding_default(env); /* paranoia */
3749 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3755 /* --------- Indexed PUT --------- */
3758 = genGuestArrayOffset(
3759 env, stmt->Ist.PutI.descr,
3760 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3762 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3763 if (ty == Ity_F64) {
3764 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3765 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3769 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3770 addInstr(env, X86Instr_Store( 1, r, am ));
3773 if (ty == Ity_I32) {
3774 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3775 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
3778 if (ty == Ity_I64) {
3780 X86AMode* am4 = advance4(am);
3781 iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
3782 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3783 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3789 /* --------- TMP --------- */
3791 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3792 IRType ty = typeOfIRTemp(env->type_env, tmp);
3794 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
3795 compute it into an AMode and then use LEA. This usually
3796 produces fewer instructions, often because (for memcheck
3797 created IR) we get t = address-expression, (t is later used
3798 twice) and so doing this naturally turns address-expression
3799 back into an X86 amode. */
3801 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3802 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
3803 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3804 HReg dst = lookupIRTemp(env, tmp);
3805 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
3806 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3807 value into a register. Just emit a normal reg-reg move
3808 so reg-alloc can coalesce it away in the usual way. */
3809 HReg src = am->Xam.IR.reg;
3810 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
3812 addInstr(env, X86Instr_Lea32(am,dst));
3817 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3818 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3819 HReg dst = lookupIRTemp(env, tmp);
3820 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
3823 if (ty == Ity_I64) {
3824 HReg rHi, rLo, dstHi, dstLo;
3825 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3826 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
3827 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3828 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3832 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3833 HReg dst = lookupIRTemp(env, tmp);
3834 addInstr(env, X86Instr_Set32(cond, dst));
3837 if (ty == Ity_F64) {
3838 HReg dst = lookupIRTemp(env, tmp);
3839 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3840 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3843 if (ty == Ity_F32) {
3844 HReg dst = lookupIRTemp(env, tmp);
3845 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3846 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3849 if (ty == Ity_V128) {
3850 HReg dst = lookupIRTemp(env, tmp);
3851 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3852 addInstr(env, mk_vMOVsd_RR(src,dst));
3858 /* --------- Call to DIRTY helper --------- */
3861 IRDirty* d = stmt->Ist.Dirty.details;
3862 Bool passBBP = False;
3864 if (d->nFxState == 0)
3865 vassert(!d->needsBBP);
3867 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3869 /* Marshal args, do the call, clear stack. */
3870 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3872 /* Now figure out what to do with the returned value, if any. */
3873 if (d->tmp == IRTemp_INVALID)
3874 /* No return value. Nothing to do. */
3877 retty = typeOfIRTemp(env->type_env, d->tmp);
3878 if (retty == Ity_I64) {
3880 /* The returned value is in %edx:%eax. Park it in the
3881 register-pair associated with tmp. */
3882 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
3883 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
3884 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
3887 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
3888 /* The returned value is in %eax. Park it in the register
3889 associated with tmp. */
3890 HReg dst = lookupIRTemp(env, d->tmp);
3891 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
3897 /* --------- MEM FENCE --------- */
3899 switch (stmt->Ist.MBE.event) {
3901 addInstr(env, X86Instr_MFence(env->hwcaps));
3908 /* --------- ACAS --------- */
3910 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3911 /* "normal" singleton CAS */
3913 IRCAS* cas = stmt->Ist.CAS.details;
3914 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3915 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3916 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3917 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3918 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3919 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3920 vassert(cas->expdHi == NULL);
3921 vassert(cas->dataHi == NULL);
3922 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3923 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3924 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3926 case Ity_I32: sz = 4; break;
3927 case Ity_I16: sz = 2; break;
3928 case Ity_I8: sz = 1; break;
3929 default: goto unhandled_cas;
3931 addInstr(env, X86Instr_ACAS(am, sz));
3933 X86Instr_CMov32(Xcc_NZ,
3934 X86RM_Reg(hregX86_EAX()), rOldLo));
3938 IRCAS* cas = stmt->Ist.CAS.details;
3939 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3940 /* only 32-bit allowed in this case */
3941 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3942 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
3943 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3944 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
3945 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3946 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
3947 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3948 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
3949 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3952 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
3953 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3954 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
3955 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3956 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
3957 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3958 addInstr(env, X86Instr_DACAS(am));
3960 X86Instr_CMov32(Xcc_NZ,
3961 X86RM_Reg(hregX86_EDX()), rOldHi));
3963 X86Instr_CMov32(Xcc_NZ,
3964 X86RM_Reg(hregX86_EAX()), rOldLo));
3970 /* --------- INSTR MARK --------- */
3971 /* Doesn't generate any executable code ... */
3975 /* --------- NO-OP --------- */
3976 /* Fairly self-explanatory, wouldn't you say? */
3980 /* --------- EXIT --------- */
3984 if (stmt->Ist.Exit.dst->tag != Ico_U32)
3985 vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
3986 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3987 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3988 addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
4000 /*---------------------------------------------------------*/
4001 /*--- ISEL: Basic block terminators (Nexts) ---*/
4002 /*---------------------------------------------------------*/
4004 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4007 if (vex_traceflags & VEX_TRACE_VCODE) {
4008 vex_printf("\n-- goto {");
4014 ri = iselIntExpr_RI(env, next);
4015 addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
4019 /*---------------------------------------------------------*/
4020 /*--- Insn selector top-level ---*/
4021 /*---------------------------------------------------------*/
4023 /* Translate an entire SB to x86 code. */
4025 HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
4026 VexArchInfo* archinfo_host,
4027 VexAbiInfo* vbi/*UNUSED*/ )
4032 UInt hwcaps_host = archinfo_host->hwcaps;
4035 vassert(arch_host == VexArchX86);
4036 vassert(0 == (hwcaps_host
4037 & ~(VEX_HWCAPS_X86_SSE1
4038 | VEX_HWCAPS_X86_SSE2
4039 | VEX_HWCAPS_X86_SSE3
4040 | VEX_HWCAPS_X86_LZCNT)));
4042 /* Make up an initial environment to use. */
4043 env = LibVEX_Alloc(sizeof(ISelEnv));
4046 /* Set up output code array. */
4047 env->code = newHInstrArray();
4049 /* Copy BB's type env. */
4050 env->type_env = bb->tyenv;
4052 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4053 change as we go along. */
4054 env->n_vregmap = bb->tyenv->types_used;
4055 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4056 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4058 /* and finally ... */
4059 env->hwcaps = hwcaps_host;
4061 /* For each IR temporary, allocate a suitably-kinded virtual
4064 for (i = 0; i < env->n_vregmap; i++) {
4065 hregHI = hreg = INVALID_HREG;
4066 switch (bb->tyenv->types[i]) {
4070 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
4071 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True);
4072 hregHI = mkHReg(j++, HRcInt32, True); break;
4074 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
4075 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4076 default: ppIRType(bb->tyenv->types[i]);
4077 vpanic("iselBB: IRTemp type");
4079 env->vregmap[i] = hreg;
4080 env->vregmapHI[i] = hregHI;
4084 /* Ok, finally we can iterate over the statements. */
4085 for (i = 0; i < bb->stmts_used; i++)
4086 iselStmt(env,bb->stmts[i]);
4088 iselNext(env,bb->next,bb->jumpkind);
4090 /* record the number of vregs we used. */
4091 env->code->n_vregs = env->vreg_ctr;
4096 /*---------------------------------------------------------------*/
4097 /*--- end host_x86_isel.c ---*/
4098 /*---------------------------------------------------------------*/