2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_amd64_defs.h"
48 /*---------------------------------------------------------*/
49 /*--- x87/SSE control word stuff ---*/
50 /*---------------------------------------------------------*/
52 /* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, precision = 53 bits. This
54 corresponds to a FPU control word value of 0x027F.
56 Similarly the SSE control word (%mxcsr) should be 0x1F80.
58 %fpucw and %mxcsr should have these values on entry to
59 Vex-generated code, and should those values should be
63 #define DEFAULT_FPUCW 0x027F
65 #define DEFAULT_MXCSR 0x1F80
67 /* debugging only, do not use */
68 /* define DEFAULT_FPUCW 0x037F */
71 /*---------------------------------------------------------*/
72 /*--- misc helpers ---*/
73 /*---------------------------------------------------------*/
75 /* These are duplicated in guest-amd64/toIR.c */
76 static IRExpr* unop ( IROp op, IRExpr* a )
78 return IRExpr_Unop(op, a);
81 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
83 return IRExpr_Binop(op, a1, a2);
86 static IRExpr* bind ( Int binder )
88 return IRExpr_Binder(binder);
92 /*---------------------------------------------------------*/
94 /*---------------------------------------------------------*/
96 /* This carries around:
98 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
99 might encounter. This is computed before insn selection starts,
102 - A mapping from IRTemp to HReg. This tells the insn selector
103 which virtual register is associated with each IRTemp
104 temporary. This is computed before insn selection starts, and
105 does not change. We expect this mapping to map precisely the
106 same set of IRTemps as the type mapping does.
108 - vregmap holds the primary register for the IRTemp.
109 - vregmapHI is only used for 128-bit integer-typed
110 IRTemps. It holds the identity of a second
111 64-bit virtual HReg, which holds the high half
114 - The code array, that is, the insns selected so far.
116 - A counter, for generating new virtual registers.
118 - The host subarchitecture we are selecting insns for.
119 This is set at the start and does not change.
121 Note, this is all host-independent. (JRS 20050201: well, kinda
122 ... not completely. Compare with ISelEnv for X86.)
142 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
145 vassert(tmp < env->n_vregmap);
146 return env->vregmap[tmp];
149 static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
150 ISelEnv* env, IRTemp tmp )
153 vassert(tmp < env->n_vregmap);
154 vassert(env->vregmapHI[tmp] != INVALID_HREG);
155 *vrLO = env->vregmap[tmp];
156 *vrHI = env->vregmapHI[tmp];
159 static void addInstr ( ISelEnv* env, AMD64Instr* instr )
161 addHInstr(env->code, instr);
162 if (vex_traceflags & VEX_TRACE_VCODE) {
163 ppAMD64Instr(instr, True);
168 static HReg newVRegI ( ISelEnv* env )
170 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
175 //.. static HReg newVRegF ( ISelEnv* env )
177 //.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
178 //.. env->vreg_ctr++;
182 static HReg newVRegV ( ISelEnv* env )
184 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
190 /*---------------------------------------------------------*/
191 /*--- ISEL: Forward declarations ---*/
192 /*---------------------------------------------------------*/
194 /* These are organised as iselXXX and iselXXX_wrk pairs. The
195 iselXXX_wrk do the real work, but are not to be called directly.
196 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
197 checks that all returned registers are virtual. You should not
198 call the _wrk version directly.
200 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
201 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
203 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
204 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
206 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
207 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
209 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
210 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
212 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
213 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
215 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
217 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
218 ISelEnv* env, IRExpr* e );
220 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
221 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
223 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
224 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
226 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
227 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
229 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
230 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
233 /*---------------------------------------------------------*/
234 /*--- ISEL: Misc helpers ---*/
235 /*---------------------------------------------------------*/
237 static Bool sane_AMode ( AMD64AMode* am )
242 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
243 && (hregIsVirtual(am->Aam.IR.reg)
244 || am->Aam.IR.reg == hregAMD64_RBP()) );
247 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
248 && hregIsVirtual(am->Aam.IRRS.base)
249 && hregClass(am->Aam.IRRS.index) == HRcInt64
250 && hregIsVirtual(am->Aam.IRRS.index) );
252 vpanic("sane_AMode: unknown amd64 amode tag");
257 /* Can the lower 32 bits be signedly widened to produce the whole
258 64-bit value? In other words, are the top 33 bits either all 0 or
260 static Bool fitsIn32Bits ( ULong x )
266 return toBool(x == y1);
269 /* Is this a 64-bit zero expression? */
271 static Bool isZeroU64 ( IRExpr* e )
273 return e->tag == Iex_Const
274 && e->Iex.Const.con->tag == Ico_U64
275 && e->Iex.Const.con->Ico.U64 == 0ULL;
278 static Bool isZeroU32 ( IRExpr* e )
280 return e->tag == Iex_Const
281 && e->Iex.Const.con->tag == Ico_U32
282 && e->Iex.Const.con->Ico.U32 == 0;
285 /* Make a int reg-reg move. */
287 static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
289 vassert(hregClass(src) == HRcInt64);
290 vassert(hregClass(dst) == HRcInt64);
291 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
294 /* Make a vector reg-reg move. */
296 static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
298 vassert(hregClass(src) == HRcVec128);
299 vassert(hregClass(dst) == HRcVec128);
300 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
303 /* Advance/retreat %rsp by n. */
305 static void add_to_rsp ( ISelEnv* env, Int n )
307 vassert(n > 0 && n < 256 && (n%8) == 0);
309 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
313 static void sub_from_rsp ( ISelEnv* env, Int n )
315 vassert(n > 0 && n < 256 && (n%8) == 0);
317 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
321 /* Push 64-bit constants on the stack. */
322 static void push_uimm64( ISelEnv* env, ULong uimm64 )
324 /* If uimm64 can be expressed as the sign extension of its
325 lower 32 bits, we can do it the easy way. */
326 Long simm64 = (Long)uimm64;
327 if ( simm64 == ((simm64 << 32) >> 32) ) {
328 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
330 HReg tmp = newVRegI(env);
331 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
332 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
336 //.. /* Given an amode, return one which references 4 bytes further
339 //.. static X86AMode* advance4 ( X86AMode* am )
341 //.. X86AMode* am4 = dopyX86AMode(am);
342 //.. switch (am4->tag) {
344 //.. am4->Xam.IRRS.imm += 4; break;
346 //.. am4->Xam.IR.imm += 4; break;
348 //.. vpanic("advance4(x86,host)");
354 //.. /* Push an arg onto the host stack, in preparation for a call to a
355 //.. helper function of some kind. Returns the number of 32-bit words
358 //.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
360 //.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361 //.. if (arg_ty == Ity_I32) {
362 //.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
365 //.. if (arg_ty == Ity_I64) {
367 //.. iselInt64Expr(&rHi, &rLo, env, arg);
368 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
373 //.. vpanic("pushArg(x86): can't handle arg of this type");
377 /* Used only in doHelperCall. If possible, produce a single
378 instruction which computes 'e' into 'dst'. If not possible, return
381 static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
385 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
387 if (e->tag == Iex_Const) {
388 vassert(e->Iex.Const.con->tag == Ico_U64);
389 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
390 return AMD64Instr_Alu64R(
392 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
396 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
400 if (e->tag == Iex_RdTmp) {
401 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
402 return mk_iMOVsd_RR(src, dst);
405 if (e->tag == Iex_Get) {
406 vassert(e->Iex.Get.ty == Ity_I64);
407 return AMD64Instr_Alu64R(
410 AMD64AMode_IR(e->Iex.Get.offset,
415 if (e->tag == Iex_Unop
416 && e->Iex.Unop.op == Iop_32Uto64
417 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
418 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
419 return AMD64Instr_MovZLQ(src, dst);
422 if (0) { ppIRExpr(e); vex_printf("\n"); }
428 /* Do a complete function call. guard is a Ity_Bit expression
429 indicating whether or not the call happens. If guard==NULL, the
430 call is unconditional. */
433 void doHelperCall ( ISelEnv* env,
435 IRExpr* guard, IRCallee* cee, IRExpr** args )
440 AMD64Instr* fastinstrs[6];
441 Int n_args, i, argreg;
443 /* Marshal args for a call and do the call.
445 If passBBP is True, %rbp (the baseblock pointer) is to be passed
448 This function only deals with a tiny set of possibilities, which
449 cover all helpers in practice. The restrictions are that only
450 arguments in registers are supported, hence only 6x64 integer
451 bits in total can be passed. In fact the only supported arg
454 Generating code which is both efficient and correct when
455 parameters are to be passed in registers is difficult, for the
456 reasons elaborated in detail in comments attached to
457 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
458 of the method described in those comments.
460 The problem is split into two cases: the fast scheme and the
461 slow scheme. In the fast scheme, arguments are computed
462 directly into the target (real) registers. This is only safe
463 when we can be sure that computation of each argument will not
464 trash any real registers set by computation of any other
467 In the slow scheme, all args are first computed into vregs, and
468 once they are all done, they are moved to the relevant real
469 regs. This always gives correct code, but it also gives a bunch
470 of vreg-to-rreg moves which are usually redundant but are hard
471 for the register allocator to get rid of.
473 To decide which scheme to use, all argument expressions are
474 first examined. If they are all so simple that it is clear they
475 will be evaluated without use of any fixed registers, use the
476 fast scheme, else use the slow scheme. Note also that only
477 unconditional calls may use the fast scheme, since having to
478 compute a condition expression could itself trash real
481 Note this requires being able to examine an expression and
482 determine whether or not evaluation of it might use a fixed
483 register. That requires knowledge of how the rest of this insn
484 selector works. Currently just the following 3 are regarded as
485 safe -- hopefully they cover the majority of arguments in
486 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
489 /* Note that the cee->regparms field is meaningless on AMD64 host
490 (since there is only one calling convention) and so we always
494 for (i = 0; args[i]; i++)
497 if (6 < n_args + (passBBP ? 1 : 0))
498 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
500 argregs[0] = hregAMD64_RDI();
501 argregs[1] = hregAMD64_RSI();
502 argregs[2] = hregAMD64_RDX();
503 argregs[3] = hregAMD64_RCX();
504 argregs[4] = hregAMD64_R8();
505 argregs[5] = hregAMD64_R9();
507 tmpregs[0] = tmpregs[1] = tmpregs[2] =
508 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
510 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
511 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
513 /* First decide which scheme (slow or fast) is to be used. First
514 assume the fast scheme, and select slow if any contraindications
518 if (guard->tag == Iex_Const
519 && guard->Iex.Const.con->tag == Ico_U1
520 && guard->Iex.Const.con->Ico.U1 == True) {
523 /* Not manifestly unconditional -- be conservative. */
528 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
529 use the slow scheme. Because this is tentative, we can't call
530 addInstr (that is, commit to) any instructions until we're
531 handled all the arguments. So park the resulting instructions
532 in a buffer and emit that if we're successful. */
537 fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
541 for (i = 0; i < n_args; i++) {
543 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
545 = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
546 if (fastinstrs[argreg] == NULL)
551 /* Looks like we're in luck. Emit the accumulated instructions and
552 move on to doing the call itself. */
553 vassert(argreg <= 6);
554 for (i = 0; i < argreg; i++)
555 addInstr(env, fastinstrs[i]);
557 /* Fast scheme only applies for unconditional calls. Hence: */
563 /* SLOW SCHEME; move via temporaries */
566 if (n_args > 0) {for (i = 0; args[i]; i++) {
567 ppIRExpr(args[i]); vex_printf(" "); }
573 /* This is pretty stupid; better to move directly to rdi
574 after the rest of the args are done. */
575 tmpregs[argreg] = newVRegI(env);
576 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
580 for (i = 0; i < n_args; i++) {
582 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
583 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
587 /* Now we can compute the condition. We can't do it earlier
588 because the argument computations could trash the condition
589 codes. Be a bit clever to handle the common case where the
593 if (guard->tag == Iex_Const
594 && guard->Iex.Const.con->tag == Ico_U1
595 && guard->Iex.Const.con->Ico.U1 == True) {
596 /* unconditional -- do nothing */
598 cc = iselCondCode( env, guard );
602 /* Move the args to their final destinations. */
603 for (i = 0; i < argreg; i++) {
604 /* None of these insns, including any spill code that might
605 be generated, may alter the condition codes. */
606 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
610 /* Finally, the call itself. */
612 addInstr(env, AMD64Instr_Call(
614 Ptr_to_ULong(cee->addr),
615 n_args + (passBBP ? 1 : 0)
621 /* Given a guest-state array descriptor, an index expression and a
622 bias, generate an AMD64AMode holding the relevant guest state
626 AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
627 IRExpr* off, Int bias )
630 Int elemSz = sizeofIRType(descr->elemTy);
631 Int nElems = descr->nElems;
633 /* Throw out any cases not generated by an amd64 front end. In
634 theory there might be a day where we need to handle them -- if
635 we ever run non-amd64-guest on amd64 host. */
637 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
638 vpanic("genGuestArrayOffset(amd64 host)");
640 /* Compute off into a reg, %off. Then return:
643 addq $bias, %tmp (if bias != 0)
645 ... base(%rbp, %tmp, shift) ...
648 roff = iselIntExpr_R(env, off);
649 addInstr(env, mk_iMOVsd_RR(roff, tmp));
651 /* Make sure the bias is sane, in the sense that there are
652 no significant bits above bit 30 in it. */
653 vassert(-10000 < bias && bias < 10000);
655 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
658 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
659 vassert(elemSz == 1 || elemSz == 8);
661 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
666 /* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
668 void set_SSE_rounding_default ( ISelEnv* env )
670 /* pushq $DEFAULT_MXCSR
674 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
675 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
676 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
680 /* Mess with the FPU's rounding mode: set to the default rounding mode
683 void set_FPU_rounding_default ( ISelEnv* env )
685 /* movq $DEFAULT_FPUCW, -8(%rsp)
688 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
689 addInstr(env, AMD64Instr_Alu64M(
690 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
691 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
695 /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
696 expression denoting a value in the range 0 .. 3, indicating a round
697 mode encoded as per type IRRoundingMode. Set the SSE machinery to
698 have the same rounding.
701 void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
703 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
704 both rounding bits == 0. If that wasn't the case, we couldn't
705 create a new rounding field simply by ORing the new value into
709 andq [[mode]], %reg -- shouldn't be needed; paranoia
711 orq $DEFAULT_MXCSR, %reg
716 HReg reg = newVRegI(env);
717 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
718 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
719 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
720 iselIntExpr_RMI(env, mode), reg));
721 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
722 addInstr(env, AMD64Instr_Alu64R(
723 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
724 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
725 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
730 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
731 expression denoting a value in the range 0 .. 3, indicating a round
732 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
736 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
738 HReg rrm = iselIntExpr_R(env, mode);
739 HReg rrm2 = newVRegI(env);
740 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
743 andq $3, %rrm2 -- shouldn't be needed; paranoia
745 orq $DEFAULT_FPUCW, %rrm2
749 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
750 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
751 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
752 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
753 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
754 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
755 AMD64RI_Reg(rrm2), m8_rsp));
756 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
760 /* Generate all-zeroes into a new vector register.
762 static HReg generate_zeroes_V128 ( ISelEnv* env )
764 HReg dst = newVRegV(env);
765 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
769 /* Generate all-ones into a new vector register.
771 static HReg generate_ones_V128 ( ISelEnv* env )
773 HReg dst = newVRegV(env);
774 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
779 /* Generate !src into a new vector register. Amazing that there isn't
780 a less crappy way to do this.
782 static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
784 HReg dst = generate_ones_V128(env);
785 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
790 //.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
791 //.. after most non-simple FPU operations (simple = +, -, *, / and
794 //.. This could be done a lot more efficiently if needed, by loading
795 //.. zero and adding it to the value to be rounded (fldz ; faddp?).
797 //.. static void roundToF64 ( ISelEnv* env, HReg reg )
799 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
800 //.. sub_from_esp(env, 8);
801 //.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
802 //.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
803 //.. add_to_esp(env, 8);
807 /*---------------------------------------------------------*/
808 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
809 /*---------------------------------------------------------*/
811 /* Select insns for an integer-typed expression, and add them to the
812 code list. Return a reg holding the result. This reg will be a
813 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
814 want to modify it, ask for a new vreg, copy it in there, and modify
815 the copy. The register allocator will do its best to map both
816 vregs to the same real register, so the copies will often disappear
819 This should handle expressions of 64, 32, 16 and 8-bit type. All
820 results are returned in a 64-bit register. For 32-, 16- and 8-bit
821 expressions, the upper 32/16/24 bits are arbitrary, so you should
822 mask or sign extend partial values if necessary.
825 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
827 HReg r = iselIntExpr_R_wrk(env, e);
828 /* sanity checks ... */
830 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
832 vassert(hregClass(r) == HRcInt64);
833 vassert(hregIsVirtual(r));
837 /* DO NOT CALL THIS DIRECTLY ! */
838 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
840 /* Used for unary/binary SIMD64 ops. */
845 DECLARE_PATTERN(p_8Uto64);
846 DECLARE_PATTERN(p_1Uto8_64to1);
848 IRType ty = typeOfIRExpr(env->type_env,e);
849 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
853 /* --------- TEMP --------- */
855 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
858 /* --------- LOAD --------- */
860 HReg dst = newVRegI(env);
861 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
863 /* We can't handle big-endian loads, nor load-linked. */
864 if (e->Iex.Load.end != Iend_LE)
868 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
869 AMD64RMI_Mem(amode), dst) );
873 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
877 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
881 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
887 /* --------- BINARY OP --------- */
892 /* Pattern: Sub64(0,x) */
893 /* and: Sub32(0,x) */
894 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
895 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
896 HReg dst = newVRegI(env);
897 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
898 addInstr(env, mk_iMOVsd_RR(reg,dst));
899 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
903 /* Is it an addition or logical style op? */
904 switch (e->Iex.Binop.op) {
905 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
906 aluOp = Aalu_ADD; break;
907 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
908 aluOp = Aalu_SUB; break;
909 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
910 aluOp = Aalu_AND; break;
911 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
912 aluOp = Aalu_OR; break;
913 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
914 aluOp = Aalu_XOR; break;
915 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
916 aluOp = Aalu_MUL; break;
918 aluOp = Aalu_INVALID; break;
920 /* For commutative ops we assume any literal
921 values are on the second operand. */
922 if (aluOp != Aalu_INVALID) {
923 HReg dst = newVRegI(env);
924 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
925 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
926 addInstr(env, mk_iMOVsd_RR(reg,dst));
927 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
931 /* Perhaps a shift op? */
932 switch (e->Iex.Binop.op) {
933 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
934 shOp = Ash_SHL; break;
935 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
936 shOp = Ash_SHR; break;
937 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
938 shOp = Ash_SAR; break;
940 shOp = Ash_INVALID; break;
942 if (shOp != Ash_INVALID) {
943 HReg dst = newVRegI(env);
945 /* regL = the value to be shifted */
946 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
947 addInstr(env, mk_iMOVsd_RR(regL,dst));
949 /* Do any necessary widening for 32/16/8 bit operands */
950 switch (e->Iex.Binop.op) {
951 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
953 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
956 addInstr(env, AMD64Instr_Alu64R(
957 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
960 addInstr(env, AMD64Instr_Alu64R(
961 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
964 addInstr(env, AMD64Instr_MovZLQ(dst,dst));
967 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
968 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
971 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
972 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
975 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, dst));
976 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, dst));
979 ppIROp(e->Iex.Binop.op);
983 /* Now consider the shift amount. If it's a literal, we
984 can do a much better job than the general case. */
985 if (e->Iex.Binop.arg2->tag == Iex_Const) {
986 /* assert that the IR is well-typed */
988 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
989 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
990 vassert(nshift >= 0);
992 /* Can't allow nshift==0 since that means %cl */
993 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
995 /* General case; we have to force the amount into %cl. */
996 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
997 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
998 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
1003 /* Deal with 64-bit SIMD binary ops */
1004 second_is_UInt = False;
1005 switch (e->Iex.Binop.op) {
1007 fn = (HWord)h_generic_calc_Add8x8; break;
1009 fn = (HWord)h_generic_calc_Add16x4; break;
1011 fn = (HWord)h_generic_calc_Add32x2; break;
1014 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1016 fn = (HWord)h_generic_calc_Avg16Ux4; break;
1019 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1021 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1023 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1026 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1027 case Iop_CmpGT16Sx4:
1028 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1029 case Iop_CmpGT32Sx2:
1030 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1032 case Iop_InterleaveHI8x8:
1033 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1034 case Iop_InterleaveLO8x8:
1035 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1036 case Iop_InterleaveHI16x4:
1037 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1038 case Iop_InterleaveLO16x4:
1039 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1040 case Iop_InterleaveHI32x2:
1041 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1042 case Iop_InterleaveLO32x2:
1043 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
1044 case Iop_CatOddLanes16x4:
1045 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1046 case Iop_CatEvenLanes16x4:
1047 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1049 fn = (HWord)h_generic_calc_Perm8x8; break;
1052 fn = (HWord)h_generic_calc_Max8Ux8; break;
1054 fn = (HWord)h_generic_calc_Max16Sx4; break;
1056 fn = (HWord)h_generic_calc_Min8Ux8; break;
1058 fn = (HWord)h_generic_calc_Min16Sx4; break;
1061 fn = (HWord)h_generic_calc_Mul16x4; break;
1063 fn = (HWord)h_generic_calc_Mul32x2; break;
1064 case Iop_MulHi16Sx4:
1065 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
1066 case Iop_MulHi16Ux4:
1067 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1070 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1072 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1074 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1076 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1078 case Iop_QNarrow32Sx2:
1079 fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
1080 case Iop_QNarrow16Sx4:
1081 fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
1082 case Iop_QNarrow16Ux4:
1083 fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
1086 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1088 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1090 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1092 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1095 fn = (HWord)h_generic_calc_Sub8x8; break;
1097 fn = (HWord)h_generic_calc_Sub16x4; break;
1099 fn = (HWord)h_generic_calc_Sub32x2; break;
1102 fn = (HWord)h_generic_calc_ShlN32x2;
1103 second_is_UInt = True;
1106 fn = (HWord)h_generic_calc_ShlN16x4;
1107 second_is_UInt = True;
1110 fn = (HWord)h_generic_calc_ShlN8x8;
1111 second_is_UInt = True;
1114 fn = (HWord)h_generic_calc_ShrN32x2;
1115 second_is_UInt = True;
1118 fn = (HWord)h_generic_calc_ShrN16x4;
1119 second_is_UInt = True;
1122 fn = (HWord)h_generic_calc_SarN32x2;
1123 second_is_UInt = True;
1126 fn = (HWord)h_generic_calc_SarN16x4;
1127 second_is_UInt = True;
1130 fn = (HWord)h_generic_calc_SarN8x8;
1131 second_is_UInt = True;
1135 fn = (HWord)0; break;
1137 if (fn != (HWord)0) {
1138 /* Note: the following assumes all helpers are of signature
1139 ULong fn ( ULong, ULong ), and they are
1140 not marked as regparm functions.
1142 HReg dst = newVRegI(env);
1143 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1144 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1146 addInstr(env, AMD64Instr_MovZLQ(argR, argR));
1147 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1148 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
1149 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
1150 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1154 /* Handle misc other ops. */
1156 if (e->Iex.Binop.op == Iop_Max32U) {
1157 /* This generates a truly rotten piece of code. Just as well
1158 it doesn't happen very often. */
1159 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1160 HReg src1L = newVRegI(env);
1161 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1162 HReg src2L = newVRegI(env);
1163 HReg dst = newVRegI(env);
1164 addInstr(env, mk_iMOVsd_RR(src1,dst));
1165 addInstr(env, mk_iMOVsd_RR(src1,src1L));
1166 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
1167 addInstr(env, mk_iMOVsd_RR(src2,src2L));
1168 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
1169 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
1170 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1174 if (e->Iex.Binop.op == Iop_DivModS64to32
1175 || e->Iex.Binop.op == Iop_DivModU64to32) {
1176 /* 64 x 32 -> (32(rem),32(div)) division */
1177 /* Get the 64-bit operand into edx:eax, and the other into
1179 HReg rax = hregAMD64_RAX();
1180 HReg rdx = hregAMD64_RDX();
1181 HReg dst = newVRegI(env);
1182 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
1183 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
1184 /* Compute the left operand into a reg, and then
1185 put the top half in edx and the bottom in eax. */
1186 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1187 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1188 addInstr(env, mk_iMOVsd_RR(left64, rax));
1189 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
1190 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
1191 addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
1192 addInstr(env, AMD64Instr_MovZLQ(rax,rax));
1193 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
1194 addInstr(env, mk_iMOVsd_RR(rax, dst));
1195 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1199 if (e->Iex.Binop.op == Iop_32HLto64) {
1200 HReg hi32 = newVRegI(env);
1201 HReg lo32 = newVRegI(env);
1202 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1203 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1204 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1205 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
1206 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
1207 addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
1208 addInstr(env, AMD64Instr_Alu64R(
1209 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1213 if (e->Iex.Binop.op == Iop_16HLto32) {
1214 HReg hi16 = newVRegI(env);
1215 HReg lo16 = newVRegI(env);
1216 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1217 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1218 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1219 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1220 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
1221 addInstr(env, AMD64Instr_Alu64R(
1222 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1223 addInstr(env, AMD64Instr_Alu64R(
1224 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1228 if (e->Iex.Binop.op == Iop_8HLto16) {
1229 HReg hi8 = newVRegI(env);
1230 HReg lo8 = newVRegI(env);
1231 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1232 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1233 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1234 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1235 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
1236 addInstr(env, AMD64Instr_Alu64R(
1237 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1238 addInstr(env, AMD64Instr_Alu64R(
1239 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1243 if (e->Iex.Binop.op == Iop_MullS32
1244 || e->Iex.Binop.op == Iop_MullS16
1245 || e->Iex.Binop.op == Iop_MullS8
1246 || e->Iex.Binop.op == Iop_MullU32
1247 || e->Iex.Binop.op == Iop_MullU16
1248 || e->Iex.Binop.op == Iop_MullU8) {
1249 HReg a32 = newVRegI(env);
1250 HReg b32 = newVRegI(env);
1251 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1252 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1254 AMD64ShiftOp shr_op = Ash_SHR;
1255 switch (e->Iex.Binop.op) {
1256 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1257 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1258 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1259 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1260 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1261 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1262 default: vassert(0);
1265 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1266 addInstr(env, mk_iMOVsd_RR(b32s, b32));
1267 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1268 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1269 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1270 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
1271 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1275 if (e->Iex.Binop.op == Iop_CmpF64) {
1276 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1277 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1278 HReg dst = newVRegI(env);
1279 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1280 /* Mask out irrelevant parts of the result so as to conform
1281 to the CmpF64 definition. */
1282 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1286 if (e->Iex.Binop.op == Iop_F64toI32S
1287 || e->Iex.Binop.op == Iop_F64toI64S) {
1288 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
1289 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1290 HReg dst = newVRegI(env);
1291 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
1292 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
1293 set_SSE_rounding_default(env);
1297 //.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1298 //.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1299 //.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1300 //.. HReg dst = newVRegI(env);
1302 //.. /* Used several times ... */
1303 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1305 //.. /* rf now holds the value to be converted, and rrm holds the
1306 //.. rounding mode value, encoded as per the IRRoundingMode
1307 //.. enum. The first thing to do is set the FPU's rounding
1308 //.. mode accordingly. */
1310 //.. /* Create a space for the format conversion. */
1311 //.. /* subl $4, %esp */
1312 //.. sub_from_esp(env, 4);
1314 //.. /* Set host rounding mode */
1315 //.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1317 //.. /* gistw/l %rf, 0(%esp) */
1318 //.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1321 //.. /* movzwl 0(%esp), %dst */
1322 //.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1324 //.. /* movl 0(%esp), %dst */
1325 //.. vassert(sz == 4);
1326 //.. addInstr(env, X86Instr_Alu32R(
1327 //.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1330 //.. /* Restore default FPU rounding. */
1331 //.. set_FPU_rounding_default( env );
1333 //.. /* addl $4, %esp */
1334 //.. add_to_esp(env, 4);
1338 //.. /* C3210 flags following FPU partial remainder (fprem), both
1339 //.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1340 //.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1341 //.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1342 //.. HReg junk = newVRegF(env);
1343 //.. HReg dst = newVRegI(env);
1344 //.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1345 //.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1346 //.. addInstr(env, X86Instr_FpBinary(
1347 //.. e->Iex.Binop.op==Iop_PRemC3210F64
1348 //.. ? Xfp_PREM : Xfp_PREM1,
1351 //.. /* The previous pseudo-insn will have left the FPU's C3210
1352 //.. flags set correctly. So bag them. */
1353 //.. addInstr(env, X86Instr_FpStSW_AX());
1354 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1355 //.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1362 /* --------- UNARY OP --------- */
1364 /* 32Uto64(8Uto32(expr8)) */
1365 DEFINE_PATTERN(p_8Uto64,
1366 unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
1367 if (matchIRExpr(&mi,p_8Uto64,e)) {
1368 IRExpr* expr8 = mi.bindee[0];
1369 HReg dst = newVRegI(env);
1370 HReg src = iselIntExpr_R(env, expr8);
1371 addInstr(env, mk_iMOVsd_RR(src,dst) );
1372 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1373 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, dst));
1377 /* 1Uto8(64to1(expr64)) */
1378 DEFINE_PATTERN( p_1Uto8_64to1,
1379 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1380 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1381 IRExpr* expr64 = mi.bindee[0];
1382 HReg dst = newVRegI(env);
1383 HReg src = iselIntExpr_R(env, expr64);
1384 addInstr(env, mk_iMOVsd_RR(src,dst) );
1385 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1386 AMD64RMI_Imm(1), dst));
1390 //.. /* 16Uto32(LDle(expr32)) */
1392 //.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
1393 //.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
1394 //.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
1395 //.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1396 //.. HReg dst = newVRegI(env);
1397 //.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398 //.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1403 switch (e->Iex.Unop.op) {
1405 HReg dst = newVRegI(env);
1406 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1407 addInstr(env, AMD64Instr_MovZLQ(src,dst) );
1411 HReg dst = newVRegI(env);
1412 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1414 addInstr(env, mk_iMOVsd_RR(src,dst) );
1415 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1416 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
1419 case Iop_128HIto64: {
1421 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1422 return rHi; /* and abandon rLo */
1426 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1427 return rLo; /* and abandon rHi */
1434 HReg dst = newVRegI(env);
1435 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1436 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1437 || e->Iex.Unop.op==Iop_16Uto64 );
1438 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
1439 addInstr(env, mk_iMOVsd_RR(src,dst) );
1440 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1441 AMD64RMI_Imm(mask), dst));
1449 HReg dst = newVRegI(env);
1450 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1451 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1452 || e->Iex.Unop.op==Iop_16Sto64 );
1453 UInt amt = srcIs16 ? 48 : 56;
1454 addInstr(env, mk_iMOVsd_RR(src,dst) );
1455 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1456 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
1463 HReg dst = newVRegI(env);
1464 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1465 addInstr(env, mk_iMOVsd_RR(src,dst) );
1466 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
1469 //.. case Iop_64HIto32: {
1471 //.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1472 //.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1474 //.. case Iop_64to32: {
1476 //.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1477 //.. return rLo; /* similar stupid comment to the above ... */
1481 case Iop_64HIto32: {
1482 HReg dst = newVRegI(env);
1483 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1485 switch (e->Iex.Unop.op) {
1486 case Iop_16HIto8: shift = 8; break;
1487 case Iop_32HIto16: shift = 16; break;
1488 case Iop_64HIto32: shift = 32; break;
1489 default: vassert(0);
1491 addInstr(env, mk_iMOVsd_RR(src,dst) );
1492 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
1498 HReg dst = newVRegI(env);
1499 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1500 addInstr(env, AMD64Instr_Set64(cond,dst));
1507 /* could do better than this, but for now ... */
1508 HReg dst = newVRegI(env);
1509 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1510 addInstr(env, AMD64Instr_Set64(cond,dst));
1511 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1512 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1516 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1517 HReg dst = newVRegI(env);
1518 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1519 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1523 /* Count leading zeroes. Do 'bsrq' to establish the index
1524 of the highest set bit, and subtract that value from
1526 HReg tmp = newVRegI(env);
1527 HReg dst = newVRegI(env);
1528 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1529 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1530 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1531 AMD64RMI_Imm(63), dst));
1532 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1533 AMD64RMI_Reg(tmp), dst));
1537 case Iop_CmpwNEZ64: {
1538 HReg dst = newVRegI(env);
1539 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1540 addInstr(env, mk_iMOVsd_RR(src,dst));
1541 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1542 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1543 AMD64RMI_Reg(src), dst));
1544 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1548 case Iop_CmpwNEZ32: {
1549 HReg src = newVRegI(env);
1550 HReg dst = newVRegI(env);
1551 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1552 addInstr(env, mk_iMOVsd_RR(pre,src));
1553 addInstr(env, AMD64Instr_MovZLQ(src,src));
1554 addInstr(env, mk_iMOVsd_RR(src,dst));
1555 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1556 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1557 AMD64RMI_Reg(src), dst));
1558 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1566 HReg dst = newVRegI(env);
1567 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1568 addInstr(env, mk_iMOVsd_RR(src, dst));
1569 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1570 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
1574 case Iop_V128to32: {
1575 HReg dst = newVRegI(env);
1576 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1577 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1578 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1579 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1584 case Iop_V128HIto64:
1585 case Iop_V128to64: {
1586 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1587 HReg dst = newVRegI(env);
1588 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1589 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1590 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1591 sub_from_rsp(env, 16);
1592 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1593 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1594 AMD64RMI_Mem(rspN), dst ));
1595 add_to_rsp(env, 16);
1599 /* ReinterpF64asI64(e) */
1600 /* Given an IEEE754 double, produce an I64 with the same bit
1602 case Iop_ReinterpF64asI64: {
1603 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1604 HReg dst = newVRegI(env);
1605 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1607 set_SSE_rounding_default(env);
1608 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1609 addInstr(env, AMD64Instr_Alu64R(
1610 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1614 /* ReinterpF32asI32(e) */
1615 /* Given an IEEE754 single, produce an I64 with the same bit
1616 pattern in the lower half. */
1617 case Iop_ReinterpF32asI32: {
1618 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1619 HReg dst = newVRegI(env);
1620 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1622 set_SSE_rounding_default(env);
1623 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1624 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1634 /* These are no-ops. */
1635 return iselIntExpr_R(env, e->Iex.Unop.arg);
1641 /* Deal with unary 64-bit SIMD ops. */
1642 switch (e->Iex.Unop.op) {
1643 case Iop_CmpNEZ32x2:
1644 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1645 case Iop_CmpNEZ16x4:
1646 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1648 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1650 fn = (HWord)0; break;
1652 if (fn != (HWord)0) {
1653 /* Note: the following assumes all helpers are of
1655 ULong fn ( ULong ), and they are
1656 not marked as regparm functions.
1658 HReg dst = newVRegI(env);
1659 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1660 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
1661 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
1662 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1669 /* --------- GET --------- */
1671 if (ty == Ity_I64) {
1672 HReg dst = newVRegI(env);
1673 addInstr(env, AMD64Instr_Alu64R(
1676 AMD64AMode_IR(e->Iex.Get.offset,
1681 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1682 HReg dst = newVRegI(env);
1683 addInstr(env, AMD64Instr_LoadEX(
1684 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
1686 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1695 = genGuestArrayOffset(
1696 env, e->Iex.GetI.descr,
1697 e->Iex.GetI.ix, e->Iex.GetI.bias );
1698 HReg dst = newVRegI(env);
1700 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1703 if (ty == Ity_I64) {
1704 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1710 /* --------- CCALL --------- */
1712 HReg dst = newVRegI(env);
1713 vassert(ty == e->Iex.CCall.retty);
1715 /* be very restrictive for now. Only 64-bit ints allowed
1716 for args, and 64 or 32 bits for return type. */
1717 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
1720 /* Marshal args, do the call. */
1721 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1723 /* Move to dst, and zero out the top 32 bits if the result type is
1724 Ity_I32. Probably overkill, but still .. */
1725 if (e->Iex.CCall.retty == Ity_I64)
1726 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1728 addInstr(env, AMD64Instr_MovZLQ(hregAMD64_RAX(), dst));
1733 /* --------- LITERAL --------- */
1734 /* 64/32/16/8-bit literals */
1736 if (ty == Ity_I64) {
1737 HReg r = newVRegI(env);
1738 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1741 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1742 HReg r = newVRegI(env);
1743 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1747 /* --------- MULTIPLEX --------- */
1749 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1750 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1752 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1753 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1754 HReg dst = newVRegI(env);
1755 addInstr(env, mk_iMOVsd_RR(rX,dst));
1756 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
1757 addInstr(env, AMD64Instr_Test64(0xFF, r8));
1758 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1764 /* --------- TERNARY OP --------- */
1766 /* C3210 flags following FPU partial remainder (fprem), both
1767 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1768 if (e->Iex.Triop.op == Iop_PRemC3210F64
1769 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1770 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1771 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
1772 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
1773 HReg dst = newVRegI(env);
1774 addInstr(env, AMD64Instr_A87Free(2));
1776 /* one arg -> top of x87 stack */
1777 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
1778 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1780 /* other arg -> top of x87 stack */
1781 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
1782 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1784 switch (e->Iex.Triop.op) {
1785 case Iop_PRemC3210F64:
1786 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1788 case Iop_PRem1C3210F64:
1789 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1794 /* Ignore the result, and instead make off with the FPU's
1795 C3210 flags (in the status word). */
1796 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1797 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1798 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1806 } /* switch (e->tag) */
1808 /* We get here if no pattern matched. */
1811 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1815 /*---------------------------------------------------------*/
1816 /*--- ISEL: Integer expression auxiliaries ---*/
1817 /*---------------------------------------------------------*/
1819 /* --------------------- AMODEs --------------------- */
1821 /* Return an AMode which computes the value of the specified
1822 expression, possibly also adding insns to the code list as a
1823 result. The expression may only be a 32-bit one.
1826 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1828 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1829 vassert(sane_AMode(am));
1833 /* DO NOT CALL THIS DIRECTLY ! */
1834 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1837 DECLARE_PATTERN(p_complex);
1838 IRType ty = typeOfIRExpr(env->type_env,e);
1839 vassert(ty == Ity_I64);
1841 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1842 /* bind0 bind1 bind2 bind3 */
1843 DEFINE_PATTERN(p_complex,
1847 binop(Iop_Shl64, bind(1), bind(2))
1852 if (matchIRExpr(&mi, p_complex, e)) {
1853 IRExpr* expr1 = mi.bindee[0];
1854 IRExpr* expr2 = mi.bindee[1];
1855 IRExpr* imm8 = mi.bindee[2];
1856 IRExpr* simm32 = mi.bindee[3];
1857 if (imm8->tag == Iex_Const
1858 && imm8->Iex.Const.con->tag == Ico_U8
1859 && imm8->Iex.Const.con->Ico.U8 < 4
1860 /* imm8 is OK, now check simm32 */
1861 && simm32->tag == Iex_Const
1862 && simm32->Iex.Const.con->tag == Ico_U64
1863 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1864 UInt shift = imm8->Iex.Const.con->Ico.U8;
1865 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
1866 HReg r1 = iselIntExpr_R(env, expr1);
1867 HReg r2 = iselIntExpr_R(env, expr2);
1868 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1869 return AMD64AMode_IRRS(offset, r1, r2, shift);
1873 /* Add64(expr1, Shl64(expr2, imm)) */
1874 if (e->tag == Iex_Binop
1875 && e->Iex.Binop.op == Iop_Add64
1876 && e->Iex.Binop.arg2->tag == Iex_Binop
1877 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1878 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1879 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1880 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1881 if (shift == 1 || shift == 2 || shift == 3) {
1882 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1883 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1884 return AMD64AMode_IRRS(0, r1, r2, shift);
1889 if (e->tag == Iex_Binop
1890 && e->Iex.Binop.op == Iop_Add64
1891 && e->Iex.Binop.arg2->tag == Iex_Const
1892 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1893 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1894 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1895 return AMD64AMode_IR(
1896 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
1901 /* Doesn't match anything in particular. Generate it into
1902 a register and use that. */
1904 HReg r1 = iselIntExpr_R(env, e);
1905 return AMD64AMode_IR(0, r1);
1910 /* --------------------- RMIs --------------------- */
1912 /* Similarly, calculate an expression into an X86RMI operand. As with
1913 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1915 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1917 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1918 /* sanity checks ... */
1923 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1924 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1927 vassert(sane_AMode(rmi->Armi.Mem.am));
1930 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1934 /* DO NOT CALL THIS DIRECTLY ! */
1935 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1937 IRType ty = typeOfIRExpr(env->type_env,e);
1938 vassert(ty == Ity_I64 || ty == Ity_I32
1939 || ty == Ity_I16 || ty == Ity_I8);
1941 /* special case: immediate 64/32/16/8 */
1942 if (e->tag == Iex_Const) {
1943 switch (e->Iex.Const.con->tag) {
1945 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
1946 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
1950 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1952 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1954 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1956 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1960 /* special case: 64-bit GET */
1961 if (e->tag == Iex_Get && ty == Ity_I64) {
1962 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1966 /* special case: 64-bit load from memory */
1967 if (e->tag == Iex_Load && ty == Ity_I64
1968 && e->Iex.Load.end == Iend_LE) {
1969 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1970 return AMD64RMI_Mem(am);
1973 /* default case: calculate into a register and return that */
1975 HReg r = iselIntExpr_R ( env, e );
1976 return AMD64RMI_Reg(r);
1981 /* --------------------- RIs --------------------- */
1983 /* Calculate an expression into an AMD64RI operand. As with
1984 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1987 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1989 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
1990 /* sanity checks ... */
1995 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
1996 vassert(hregIsVirtual(ri->Ari.Reg.reg));
1999 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2003 /* DO NOT CALL THIS DIRECTLY ! */
2004 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2006 IRType ty = typeOfIRExpr(env->type_env,e);
2007 vassert(ty == Ity_I64 || ty == Ity_I32
2008 || ty == Ity_I16 || ty == Ity_I8);
2010 /* special case: immediate */
2011 if (e->tag == Iex_Const) {
2012 switch (e->Iex.Const.con->tag) {
2014 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
2015 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
2019 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2021 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2023 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2025 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2029 /* default case: calculate into a register and return that */
2031 HReg r = iselIntExpr_R ( env, e );
2032 return AMD64RI_Reg(r);
2037 /* --------------------- RMs --------------------- */
2039 /* Similarly, calculate an expression into an AMD64RM operand. As
2040 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2043 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2045 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2046 /* sanity checks ... */
2049 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2050 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2053 vassert(sane_AMode(rm->Arm.Mem.am));
2056 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2060 /* DO NOT CALL THIS DIRECTLY ! */
2061 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2063 IRType ty = typeOfIRExpr(env->type_env,e);
2064 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2066 /* special case: 64-bit GET */
2067 if (e->tag == Iex_Get && ty == Ity_I64) {
2068 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2072 /* special case: load from memory */
2074 /* default case: calculate into a register and return that */
2076 HReg r = iselIntExpr_R ( env, e );
2077 return AMD64RM_Reg(r);
2082 /* --------------------- CONDCODE --------------------- */
2084 /* Generate code to evaluated a bit-typed expression, returning the
2085 condition code which would correspond when the expression would
2086 notionally have returned 1. */
2088 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2090 /* Uh, there's nothing we can sanity check here, unfortunately. */
2091 return iselCondCode_wrk(env,e);
2094 /* DO NOT CALL THIS DIRECTLY ! */
2095 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2100 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2103 if (e->tag == Iex_RdTmp) {
2104 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
2105 HReg dst = newVRegI(env);
2106 addInstr(env, mk_iMOVsd_RR(r64,dst));
2107 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2111 /* Constant 1:Bit */
2112 if (e->tag == Iex_Const) {
2114 vassert(e->Iex.Const.con->tag == Ico_U1);
2115 vassert(e->Iex.Const.con->Ico.U1 == True
2116 || e->Iex.Const.con->Ico.U1 == False);
2118 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2119 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2120 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2124 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2125 /* Generate code for the arg, and negate the test condition */
2126 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2129 /* --- patterns rooted at: 64to1 --- */
2132 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
2133 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2134 addInstr(env, AMD64Instr_Test64(1,reg));
2138 /* --- patterns rooted at: CmpNEZ8 --- */
2141 if (e->tag == Iex_Unop
2142 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2143 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2144 addInstr(env, AMD64Instr_Test64(0xFF,r));
2148 /* --- patterns rooted at: CmpNEZ16 --- */
2151 if (e->tag == Iex_Unop
2152 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2153 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2154 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
2158 /* --- patterns rooted at: CmpNEZ32 --- */
2161 if (e->tag == Iex_Unop
2162 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2163 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2164 HReg tmp = newVRegI(env);
2165 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2166 addInstr(env, AMD64Instr_MovZLQ(r1,tmp));
2167 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
2171 /* --- patterns rooted at: CmpNEZ64 --- */
2173 /* CmpNEZ64(Or64(x,y)) */
2175 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2176 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2177 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2178 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2179 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2180 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2181 HReg tmp = newVRegI(env);
2182 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2183 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2189 if (e->tag == Iex_Unop
2190 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2191 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2192 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2193 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2197 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2199 /* CmpEQ8 / CmpNE8 */
2200 if (e->tag == Iex_Binop
2201 && (e->Iex.Binop.op == Iop_CmpEQ8
2202 || e->Iex.Binop.op == Iop_CmpNE8
2203 || e->Iex.Binop.op == Iop_CasCmpEQ8
2204 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
2205 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2206 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2207 HReg r = newVRegI(env);
2208 addInstr(env, mk_iMOVsd_RR(r1,r));
2209 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2210 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2211 switch (e->Iex.Binop.op) {
2212 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2213 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2214 default: vpanic("iselCondCode(amd64): CmpXX8");
2218 /* CmpEQ16 / CmpNE16 */
2219 if (e->tag == Iex_Binop
2220 && (e->Iex.Binop.op == Iop_CmpEQ16
2221 || e->Iex.Binop.op == Iop_CmpNE16
2222 || e->Iex.Binop.op == Iop_CasCmpEQ16
2223 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
2224 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2225 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2226 HReg r = newVRegI(env);
2227 addInstr(env, mk_iMOVsd_RR(r1,r));
2228 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2229 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2230 switch (e->Iex.Binop.op) {
2231 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2232 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
2233 default: vpanic("iselCondCode(amd64): CmpXX16");
2237 /* CmpEQ32 / CmpNE32 */
2238 if (e->tag == Iex_Binop
2239 && (e->Iex.Binop.op == Iop_CmpEQ32
2240 || e->Iex.Binop.op == Iop_CmpNE32
2241 || e->Iex.Binop.op == Iop_CasCmpEQ32
2242 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
2243 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2244 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2245 HReg r = newVRegI(env);
2246 addInstr(env, mk_iMOVsd_RR(r1,r));
2247 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2248 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
2249 switch (e->Iex.Binop.op) {
2250 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
2251 case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
2252 default: vpanic("iselCondCode(amd64): CmpXX32");
2257 if (e->tag == Iex_Binop
2258 && (e->Iex.Binop.op == Iop_CmpEQ64
2259 || e->Iex.Binop.op == Iop_CmpNE64
2260 || e->Iex.Binop.op == Iop_CmpLT64S
2261 || e->Iex.Binop.op == Iop_CmpLT64U
2262 || e->Iex.Binop.op == Iop_CmpLE64S
2263 || e->Iex.Binop.op == Iop_CmpLE64U
2264 || e->Iex.Binop.op == Iop_CasCmpEQ64
2265 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
2266 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2267 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2268 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2269 switch (e->Iex.Binop.op) {
2270 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
2271 case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
2272 case Iop_CmpLT64S: return Acc_L;
2273 case Iop_CmpLT64U: return Acc_B;
2274 case Iop_CmpLE64S: return Acc_LE;
2275 case Iop_CmpLE64U: return Acc_BE;
2276 default: vpanic("iselCondCode(amd64): CmpXX64");
2281 vpanic("iselCondCode(amd64)");
2285 /*---------------------------------------------------------*/
2286 /*--- ISEL: Integer expressions (128 bit) ---*/
2287 /*---------------------------------------------------------*/
2289 /* Compute a 128-bit value into a register pair, which is returned as
2290 the first two parameters. As with iselIntExpr_R, these may be
2291 either real or virtual regs; in any case they must not be changed
2292 by subsequent code emitted by the caller. */
2294 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2295 ISelEnv* env, IRExpr* e )
2297 iselInt128Expr_wrk(rHi, rLo, env, e);
2299 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2301 vassert(hregClass(*rHi) == HRcInt64);
2302 vassert(hregIsVirtual(*rHi));
2303 vassert(hregClass(*rLo) == HRcInt64);
2304 vassert(hregIsVirtual(*rLo));
2307 /* DO NOT CALL THIS DIRECTLY ! */
2308 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2309 ISelEnv* env, IRExpr* e )
2311 //.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
2313 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2315 //.. /* 64-bit literal */
2316 //.. if (e->tag == Iex_Const) {
2317 //.. ULong w64 = e->Iex.Const.con->Ico.U64;
2318 //.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2319 //.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
2320 //.. HReg tLo = newVRegI(env);
2321 //.. HReg tHi = newVRegI(env);
2322 //.. vassert(e->Iex.Const.con->tag == Ico_U64);
2323 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2324 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2330 /* read 128-bit IRTemp */
2331 if (e->tag == Iex_RdTmp) {
2332 lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
2336 //.. /* 64-bit load */
2337 //.. if (e->tag == Iex_LDle) {
2339 //.. X86AMode *am0, *am4;
2340 //.. vassert(e->Iex.LDle.ty == Ity_I64);
2341 //.. tLo = newVRegI(env);
2342 //.. tHi = newVRegI(env);
2343 //.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2344 //.. am4 = advance4(am0);
2345 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2346 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2352 //.. /* 64-bit GET */
2353 //.. if (e->tag == Iex_Get) {
2354 //.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2355 //.. X86AMode* am4 = advance4(am);
2356 //.. HReg tLo = newVRegI(env);
2357 //.. HReg tHi = newVRegI(env);
2358 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2359 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2365 //.. /* 64-bit GETI */
2366 //.. if (e->tag == Iex_GetI) {
2368 //.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
2369 //.. e->Iex.GetI.ix, e->Iex.GetI.bias );
2370 //.. X86AMode* am4 = advance4(am);
2371 //.. HReg tLo = newVRegI(env);
2372 //.. HReg tHi = newVRegI(env);
2373 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2374 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2380 //.. /* 64-bit Mux0X */
2381 //.. if (e->tag == Iex_Mux0X) {
2382 //.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2383 //.. HReg tLo = newVRegI(env);
2384 //.. HReg tHi = newVRegI(env);
2385 //.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2386 //.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2387 //.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2388 //.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2389 //.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2390 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2391 //.. /* This assumes the first cmov32 doesn't trash the condition
2392 //.. codes, so they are still available for the second cmov32 */
2393 //.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2394 //.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2400 /* --------- BINARY ops --------- */
2401 if (e->tag == Iex_Binop) {
2402 switch (e->Iex.Binop.op) {
2403 /* 64 x 64 -> 128 multiply */
2406 /* get one operand into %rax, and the other into a R/M.
2407 Need to make an educated guess about which is better in
2409 HReg tLo = newVRegI(env);
2410 HReg tHi = newVRegI(env);
2411 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2412 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2413 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
2415 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
2416 /* Result is now in RDX:RAX. Tell the caller. */
2417 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2418 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2424 /* 128 x 64 -> (64(rem),64(div)) division */
2425 case Iop_DivModU128to64:
2426 case Iop_DivModS128to64: {
2427 /* Get the 128-bit operand into rdx:rax, and the other into
2430 HReg tLo = newVRegI(env);
2431 HReg tHi = newVRegI(env);
2432 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
2433 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2434 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2435 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2436 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2437 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2438 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2439 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2445 /* 64HLto128(e1,e2) */
2447 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2448 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2451 //.. /* Or64/And64/Xor64 */
2453 //.. case Iop_And64:
2454 //.. case Iop_Xor64: {
2455 //.. HReg xLo, xHi, yLo, yHi;
2456 //.. HReg tLo = newVRegI(env);
2457 //.. HReg tHi = newVRegI(env);
2458 //.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2459 //.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2461 //.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2462 //.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2463 //.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2464 //.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2465 //.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2466 //.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2472 //.. /* Add64/Sub64 */
2473 //.. case Iop_Add64:
2474 //.. case Iop_Sub64: {
2475 //.. HReg xLo, xHi, yLo, yHi;
2476 //.. HReg tLo = newVRegI(env);
2477 //.. HReg tHi = newVRegI(env);
2478 //.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2479 //.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2480 //.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2481 //.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2482 //.. if (e->Iex.Binop.op==Iop_Add64) {
2483 //.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2484 //.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2486 //.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2487 //.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2494 //.. /* 32HLto64(e1,e2) */
2495 //.. case Iop_32HLto64:
2496 //.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2497 //.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2500 //.. /* 64-bit shifts */
2501 //.. case Iop_Shl64: {
2502 //.. /* We use the same ingenious scheme as gcc. Put the value
2503 //.. to be shifted into %hi:%lo, and the shift amount into
2504 //.. %cl. Then (dsts on right, a la ATT syntax):
2506 //.. shldl %cl, %lo, %hi -- make %hi be right for the
2507 //.. -- shift amt %cl % 32
2508 //.. shll %cl, %lo -- make %lo be right for the
2509 //.. -- shift amt %cl % 32
2511 //.. Now, if (shift amount % 64) is in the range 32 .. 63,
2512 //.. we have to do a fixup, which puts the result low half
2513 //.. into the result high half, and zeroes the low half:
2515 //.. testl $32, %ecx
2517 //.. cmovnz %lo, %hi
2518 //.. movl $0, %tmp -- sigh; need yet another reg
2519 //.. cmovnz %tmp, %lo
2521 //.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2522 //.. tLo = newVRegI(env);
2523 //.. tHi = newVRegI(env);
2524 //.. tTemp = newVRegI(env);
2525 //.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2526 //.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2527 //.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2528 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2529 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2530 //.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2531 //.. and those regs are legitimately modifiable. */
2532 //.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2533 //.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2534 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2535 //.. X86RM_Reg(hregX86_ECX())));
2536 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2537 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2538 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2544 //.. case Iop_Shr64: {
2545 //.. /* We use the same ingenious scheme as gcc. Put the value
2546 //.. to be shifted into %hi:%lo, and the shift amount into
2549 //.. shrdl %cl, %hi, %lo -- make %lo be right for the
2550 //.. -- shift amt %cl % 32
2551 //.. shrl %cl, %hi -- make %hi be right for the
2552 //.. -- shift amt %cl % 32
2554 //.. Now, if (shift amount % 64) is in the range 32 .. 63,
2555 //.. we have to do a fixup, which puts the result high half
2556 //.. into the result low half, and zeroes the high half:
2558 //.. testl $32, %ecx
2560 //.. cmovnz %hi, %lo
2561 //.. movl $0, %tmp -- sigh; need yet another reg
2562 //.. cmovnz %tmp, %hi
2564 //.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2565 //.. tLo = newVRegI(env);
2566 //.. tHi = newVRegI(env);
2567 //.. tTemp = newVRegI(env);
2568 //.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2569 //.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2570 //.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2571 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2572 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2573 //.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2574 //.. and those regs are legitimately modifiable. */
2575 //.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2576 //.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2577 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2578 //.. X86RM_Reg(hregX86_ECX())));
2579 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2580 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2581 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2587 //.. /* F64 -> I64 */
2588 //.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2589 //.. case. Unfortunately I see no easy way to avoid the
2590 //.. duplication. */
2591 //.. case Iop_F64toI64: {
2592 //.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2593 //.. HReg tLo = newVRegI(env);
2594 //.. HReg tHi = newVRegI(env);
2596 //.. /* Used several times ... */
2597 //.. /* Careful ... this sharing is only safe because
2598 //.. zero_esp/four_esp do not hold any registers which the
2599 //.. register allocator could attempt to swizzle later. */
2600 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2601 //.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2603 //.. /* rf now holds the value to be converted, and rrm holds
2604 //.. the rounding mode value, encoded as per the
2605 //.. IRRoundingMode enum. The first thing to do is set the
2606 //.. FPU's rounding mode accordingly. */
2608 //.. /* Create a space for the format conversion. */
2609 //.. /* subl $8, %esp */
2610 //.. sub_from_esp(env, 8);
2612 //.. /* Set host rounding mode */
2613 //.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2615 //.. /* gistll %rf, 0(%esp) */
2616 //.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2618 //.. /* movl 0(%esp), %dstLo */
2619 //.. /* movl 4(%esp), %dstHi */
2620 //.. addInstr(env, X86Instr_Alu32R(
2621 //.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2622 //.. addInstr(env, X86Instr_Alu32R(
2623 //.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2625 //.. /* Restore default FPU rounding. */
2626 //.. set_FPU_rounding_default( env );
2628 //.. /* addl $8, %esp */
2629 //.. add_to_esp(env, 8);
2639 } /* if (e->tag == Iex_Binop) */
2642 //.. /* --------- UNARY ops --------- */
2643 //.. if (e->tag == Iex_Unop) {
2644 //.. switch (e->Iex.Unop.op) {
2646 //.. /* 32Sto64(e) */
2647 //.. case Iop_32Sto64: {
2648 //.. HReg tLo = newVRegI(env);
2649 //.. HReg tHi = newVRegI(env);
2650 //.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2651 //.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2652 //.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2653 //.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2659 //.. /* 32Uto64(e) */
2660 //.. case Iop_32Uto64: {
2661 //.. HReg tLo = newVRegI(env);
2662 //.. HReg tHi = newVRegI(env);
2663 //.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2664 //.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2665 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2671 //.. /* could do better than this, but for now ... */
2672 //.. case Iop_1Sto64: {
2673 //.. HReg tLo = newVRegI(env);
2674 //.. HReg tHi = newVRegI(env);
2675 //.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2676 //.. addInstr(env, X86Instr_Set32(cond,tLo));
2677 //.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2678 //.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2679 //.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2686 //.. case Iop_Not64: {
2687 //.. HReg tLo = newVRegI(env);
2688 //.. HReg tHi = newVRegI(env);
2690 //.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2691 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2692 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2693 //.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2694 //.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2703 //.. } /* if (e->tag == Iex_Unop) */
2706 //.. /* --------- CCALL --------- */
2707 //.. if (e->tag == Iex_CCall) {
2708 //.. HReg tLo = newVRegI(env);
2709 //.. HReg tHi = newVRegI(env);
2711 //.. /* Marshal args, do the call, clear stack. */
2712 //.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2714 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2715 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2722 vpanic("iselInt128Expr");
2726 /*---------------------------------------------------------*/
2727 /*--- ISEL: Floating point expressions (32 bit) ---*/
2728 /*---------------------------------------------------------*/
2730 /* Nothing interesting here; really just wrappers for
2733 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2735 HReg r = iselFltExpr_wrk( env, e );
2737 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2739 vassert(hregClass(r) == HRcVec128);
2740 vassert(hregIsVirtual(r));
2744 /* DO NOT CALL THIS DIRECTLY */
2745 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2747 IRType ty = typeOfIRExpr(env->type_env,e);
2748 vassert(ty == Ity_F32);
2750 if (e->tag == Iex_RdTmp) {
2751 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2754 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2756 HReg res = newVRegV(env);
2757 vassert(e->Iex.Load.ty == Ity_F32);
2758 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2759 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2763 if (e->tag == Iex_Binop
2764 && e->Iex.Binop.op == Iop_F64toF32) {
2765 /* Although the result is still held in a standard SSE register,
2766 we need to round it to reflect the loss of accuracy/range
2767 entailed in casting it to a 32-bit float. */
2768 HReg dst = newVRegV(env);
2769 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2770 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2771 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2772 set_SSE_rounding_default( env );
2776 if (e->tag == Iex_Get) {
2777 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2779 HReg res = newVRegV(env);
2780 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2784 if (e->tag == Iex_Unop
2785 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2786 /* Given an I32, produce an IEEE754 float with the same bit
2788 HReg dst = newVRegV(env);
2789 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2790 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2791 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2792 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2797 vpanic("iselFltExpr_wrk");
2801 /*---------------------------------------------------------*/
2802 /*--- ISEL: Floating point expressions (64 bit) ---*/
2803 /*---------------------------------------------------------*/
2805 /* Compute a 64-bit floating point value into the lower half of an xmm
2806 register, the identity of which is returned. As with
2807 iselIntExpr_R, the returned reg will be virtual, and it must not be
2808 changed by subsequent code emitted by the caller.
2811 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2813 Type S (1 bit) E (11 bits) F (52 bits)
2814 ---- --------- ----------- -----------
2815 signalling NaN u 2047 (max) .0uuuuu---u
2818 quiet NaN u 2047 (max) .1uuuuu---u
2820 negative infinity 1 2047 (max) .000000---0
2822 positive infinity 0 2047 (max) .000000---0
2824 negative zero 1 0 .000000---0
2826 positive zero 0 0 .000000---0
2829 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2831 HReg r = iselDblExpr_wrk( env, e );
2833 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2835 vassert(hregClass(r) == HRcVec128);
2836 vassert(hregIsVirtual(r));
2840 /* DO NOT CALL THIS DIRECTLY */
2841 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2843 IRType ty = typeOfIRExpr(env->type_env,e);
2845 vassert(ty == Ity_F64);
2847 if (e->tag == Iex_RdTmp) {
2848 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2851 if (e->tag == Iex_Const) {
2852 union { ULong u64; Double f64; } u;
2853 HReg res = newVRegV(env);
2854 HReg tmp = newVRegI(env);
2855 vassert(sizeof(u) == 8);
2856 vassert(sizeof(u.u64) == 8);
2857 vassert(sizeof(u.f64) == 8);
2859 if (e->Iex.Const.con->tag == Ico_F64) {
2860 u.f64 = e->Iex.Const.con->Ico.F64;
2862 else if (e->Iex.Const.con->tag == Ico_F64i) {
2863 u.u64 = e->Iex.Const.con->Ico.F64i;
2866 vpanic("iselDblExpr(amd64): const");
2868 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2869 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2870 addInstr(env, AMD64Instr_SseLdSt(
2871 True/*load*/, 8, res,
2872 AMD64AMode_IR(0, hregAMD64_RSP())
2878 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2880 HReg res = newVRegV(env);
2881 vassert(e->Iex.Load.ty == Ity_F64);
2882 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2883 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2887 if (e->tag == Iex_Get) {
2888 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2890 HReg res = newVRegV(env);
2891 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2895 if (e->tag == Iex_GetI) {
2897 = genGuestArrayOffset(
2898 env, e->Iex.GetI.descr,
2899 e->Iex.GetI.ix, e->Iex.GetI.bias );
2900 HReg res = newVRegV(env);
2901 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2905 if (e->tag == Iex_Triop) {
2906 AMD64SseOp op = Asse_INVALID;
2907 switch (e->Iex.Triop.op) {
2908 case Iop_AddF64: op = Asse_ADDF; break;
2909 case Iop_SubF64: op = Asse_SUBF; break;
2910 case Iop_MulF64: op = Asse_MULF; break;
2911 case Iop_DivF64: op = Asse_DIVF; break;
2914 if (op != Asse_INVALID) {
2915 HReg dst = newVRegV(env);
2916 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
2917 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
2918 addInstr(env, mk_vMOVsd_RR(argL, dst));
2919 /* XXXROUNDINGFIXME */
2920 /* set roundingmode here */
2921 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2926 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2927 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2928 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2929 HReg dst = newVRegV(env);
2931 /* rf now holds the value to be rounded. The first thing to do
2932 is set the FPU's rounding mode accordingly. */
2934 /* Set host x87 rounding mode */
2935 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2937 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2938 addInstr(env, AMD64Instr_A87Free(1));
2939 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2940 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2941 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
2942 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2944 /* Restore default x87 rounding. */
2945 set_FPU_rounding_default( env );
2950 if (e->tag == Iex_Triop
2951 && (e->Iex.Triop.op == Iop_ScaleF64
2952 || e->Iex.Triop.op == Iop_AtanF64
2953 || e->Iex.Triop.op == Iop_Yl2xF64
2954 || e->Iex.Triop.op == Iop_Yl2xp1F64
2955 || e->Iex.Triop.op == Iop_PRemF64
2956 || e->Iex.Triop.op == Iop_PRem1F64)
2958 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2959 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
2960 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
2961 HReg dst = newVRegV(env);
2962 Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
2963 || e->Iex.Triop.op == Iop_PRemF64
2964 || e->Iex.Triop.op == Iop_PRem1F64);
2965 addInstr(env, AMD64Instr_A87Free(2));
2967 /* one arg -> top of x87 stack */
2968 addInstr(env, AMD64Instr_SseLdSt(
2969 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
2970 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2972 /* other arg -> top of x87 stack */
2973 addInstr(env, AMD64Instr_SseLdSt(
2974 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
2975 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2978 /* XXXROUNDINGFIXME */
2979 /* set roundingmode here */
2980 switch (e->Iex.Triop.op) {
2982 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2985 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2988 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2991 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2994 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2997 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
3004 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3005 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3009 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3010 HReg dst = newVRegV(env);
3011 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
3012 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
3013 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
3014 set_SSE_rounding_default( env );
3018 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
3019 HReg dst = newVRegV(env);
3020 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3021 set_SSE_rounding_default( env );
3022 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
3026 if (e->tag == Iex_Unop
3027 && (e->Iex.Unop.op == Iop_NegF64
3028 || e->Iex.Unop.op == Iop_AbsF64)) {
3029 /* Sigh ... very rough code. Could do much better. */
3030 /* Get the 128-bit literal 00---0 10---0 into a register
3031 and xor/nand it with the value to be negated. */
3032 HReg r1 = newVRegI(env);
3033 HReg dst = newVRegV(env);
3034 HReg tmp = newVRegV(env);
3035 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3036 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3037 addInstr(env, mk_vMOVsd_RR(src,tmp));
3038 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3039 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3040 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
3041 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3043 if (e->Iex.Unop.op == Iop_NegF64)
3044 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3046 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3048 add_to_rsp(env, 16);
3052 if (e->tag == Iex_Binop) {
3053 A87FpOp fpop = Afp_INVALID;
3054 switch (e->Iex.Binop.op) {
3055 case Iop_SqrtF64: fpop = Afp_SQRT; break;
3056 case Iop_SinF64: fpop = Afp_SIN; break;
3057 case Iop_CosF64: fpop = Afp_COS; break;
3058 case Iop_TanF64: fpop = Afp_TAN; break;
3059 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3062 if (fpop != Afp_INVALID) {
3063 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3064 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
3065 HReg dst = newVRegV(env);
3066 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
3067 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
3068 addInstr(env, AMD64Instr_A87Free(nNeeded));
3069 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
3070 /* XXXROUNDINGFIXME */
3071 /* set roundingmode here */
3072 addInstr(env, AMD64Instr_A87FpOp(fpop));
3073 if (e->Iex.Binop.op==Iop_TanF64) {
3074 /* get rid of the extra 1.0 that fptan pushes */
3075 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3077 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3078 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3083 if (e->tag == Iex_Unop) {
3084 switch (e->Iex.Unop.op) {
3085 //.. case Iop_I32toF64: {
3086 //.. HReg dst = newVRegF(env);
3087 //.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3088 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3089 //.. set_FPU_rounding_default(env);
3090 //.. addInstr(env, X86Instr_FpLdStI(
3091 //.. True/*load*/, 4, dst,
3092 //.. X86AMode_IR(0, hregX86_ESP())));
3093 //.. add_to_esp(env, 4);
3096 case Iop_ReinterpI64asF64: {
3097 /* Given an I64, produce an IEEE754 double with the same
3099 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3100 HReg dst = newVRegV(env);
3101 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3103 set_SSE_rounding_default(env);
3104 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3105 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3108 case Iop_F32toF64: {
3110 HReg f64 = newVRegV(env);
3111 /* this shouldn't be necessary, but be paranoid ... */
3112 set_SSE_rounding_default(env);
3113 f32 = iselFltExpr(env, e->Iex.Unop.arg);
3114 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3122 /* --------- MULTIPLEX --------- */
3123 if (e->tag == Iex_Mux0X) {
3124 HReg r8, rX, r0, dst;
3125 vassert(ty == Ity_F64);
3126 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
3127 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3128 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3129 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3130 dst = newVRegV(env);
3131 addInstr(env, mk_vMOVsd_RR(rX,dst));
3132 addInstr(env, AMD64Instr_Test64(0xFF, r8));
3133 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3138 vpanic("iselDblExpr_wrk");
3142 /*---------------------------------------------------------*/
3143 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3144 /*---------------------------------------------------------*/
3146 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3148 HReg r = iselVecExpr_wrk( env, e );
3150 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3152 vassert(hregClass(r) == HRcVec128);
3153 vassert(hregIsVirtual(r));
3158 /* DO NOT CALL THIS DIRECTLY */
3159 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3161 Bool arg1isEReg = False;
3162 AMD64SseOp op = Asse_INVALID;
3163 IRType ty = typeOfIRExpr(env->type_env,e);
3165 vassert(ty == Ity_V128);
3167 if (e->tag == Iex_RdTmp) {
3168 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3171 if (e->tag == Iex_Get) {
3172 HReg dst = newVRegV(env);
3173 addInstr(env, AMD64Instr_SseLdSt(
3177 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3183 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3184 HReg dst = newVRegV(env);
3185 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3186 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3190 if (e->tag == Iex_Const) {
3191 HReg dst = newVRegV(env);
3192 vassert(e->Iex.Const.con->tag == Ico_V128);
3193 switch (e->Iex.Const.con->Ico.V128) {
3195 dst = generate_zeroes_V128(env);
3198 dst = generate_ones_V128(env);
3203 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3204 const ULong const_z64 = 0x0000000000000000ULL;
3205 const ULong const_o64 = 0xFFFFFFFFFFFFFFFFULL;
3206 const ULong const_z32o32 = 0x00000000FFFFFFFFULL;
3207 const ULong const_o32z32 = 0xFFFFFFFF00000000ULL;
3208 switch (e->Iex.Const.con->Ico.V128) {
3209 case 0x0000: case 0xFFFF:
3210 vassert(0); /* handled just above */
3211 /* do push_uimm64 twice, first time for the high-order half. */
3213 push_uimm64(env, const_z64);
3214 push_uimm64(env, const_o32z32);
3217 push_uimm64(env, const_z64);
3218 push_uimm64(env, const_o64);
3221 push_uimm64(env, const_z64);
3222 push_uimm64(env, const_z32o32);
3225 push_uimm64(env, const_z32o32);
3226 push_uimm64(env, const_z64);
3229 push_uimm64(env, const_z32o32);
3230 push_uimm64(env, const_z32o32);
3233 push_uimm64(env, const_z32o32);
3234 push_uimm64(env, const_o32z32);
3237 push_uimm64(env, const_z32o32);
3238 push_uimm64(env, const_o64);
3241 push_uimm64(env, const_o32z32);
3242 push_uimm64(env, const_z64);
3245 push_uimm64(env, const_o32z32);
3246 push_uimm64(env, const_z32o32);
3249 push_uimm64(env, const_o32z32);
3250 push_uimm64(env, const_o32z32);
3253 push_uimm64(env, const_o32z32);
3254 push_uimm64(env, const_o64);
3257 push_uimm64(env, const_o64);
3258 push_uimm64(env, const_z64);
3261 push_uimm64(env, const_o64);
3262 push_uimm64(env, const_z32o32);
3265 push_uimm64(env, const_o64);
3266 push_uimm64(env, const_o32z32);
3271 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3272 add_to_rsp(env, 16);
3276 if (e->tag == Iex_Unop) {
3277 switch (e->Iex.Unop.op) {
3280 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3281 return do_sse_NotV128(env, arg);
3284 case Iop_CmpNEZ64x2: {
3285 /* We can use SSE2 instructions for this. */
3286 /* Ideally, we want to do a 64Ix2 comparison against zero of
3287 the operand. Problem is no such insn exists. Solution
3288 therefore is to do a 32Ix4 comparison instead, and bitwise-
3289 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3290 let the not'd result of this initial comparison be a:b:c:d.
3291 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3292 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3293 giving the required result.
3295 The required selection sequence is 2,3,0,1, which
3296 according to Intel's documentation means the pshufd
3297 literal value is 0xB1, that is,
3298 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3300 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3301 HReg tmp = generate_zeroes_V128(env);
3302 HReg dst = newVRegV(env);
3303 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3304 tmp = do_sse_NotV128(env, tmp);
3305 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3306 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3310 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3311 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3312 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3315 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3316 HReg tmp = newVRegV(env);
3317 HReg zero = generate_zeroes_V128(env);
3319 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3320 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3321 dst = do_sse_NotV128(env, tmp);
3325 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3326 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3327 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3330 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3331 HReg dst = newVRegV(env);
3332 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3336 //.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3337 //.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
3338 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3341 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3342 HReg dst = newVRegV(env);
3343 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3347 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3348 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3349 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3352 /* A bit subtle. We have to copy the arg to the result
3353 register first, because actually doing the SSE scalar insn
3354 leaves the upper 3/4 of the destination register
3355 unchanged. Whereas the required semantics of these
3356 primops is that the upper 3/4 is simply copied in from the
3358 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3359 HReg dst = newVRegV(env);
3360 addInstr(env, mk_vMOVsd_RR(arg, dst));
3361 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3365 //.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3366 //.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3367 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3370 /* A bit subtle. We have to copy the arg to the result
3371 register first, because actually doing the SSE scalar insn
3372 leaves the upper half of the destination register
3373 unchanged. Whereas the required semantics of these
3374 primops is that the upper half is simply copied in from the
3376 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3377 HReg dst = newVRegV(env);
3378 addInstr(env, mk_vMOVsd_RR(arg, dst));
3379 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3383 case Iop_32UtoV128: {
3384 HReg dst = newVRegV(env);
3385 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3386 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3387 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3388 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3392 case Iop_64UtoV128: {
3393 HReg dst = newVRegV(env);
3394 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3395 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3396 addInstr(env, AMD64Instr_Push(rmi));
3397 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3404 } /* switch (e->Iex.Unop.op) */
3405 } /* if (e->tag == Iex_Unop) */
3407 if (e->tag == Iex_Binop) {
3408 switch (e->Iex.Binop.op) {
3410 case Iop_SetV128lo64: {
3411 HReg dst = newVRegV(env);
3412 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3413 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3414 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3415 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3416 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3417 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3421 case Iop_SetV128lo32: {
3422 HReg dst = newVRegV(env);
3423 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3424 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3425 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3426 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3427 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3428 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3432 case Iop_64HLtoV128: {
3433 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3434 HReg dst = newVRegV(env);
3435 /* do this via the stack (easy, convenient, etc) */
3436 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3437 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3438 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3439 add_to_rsp(env, 16);
3443 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3444 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3445 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
3446 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
3447 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3448 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3449 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3450 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3451 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3452 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3455 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3456 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3457 HReg dst = newVRegV(env);
3458 addInstr(env, mk_vMOVsd_RR(argL, dst));
3459 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3463 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3464 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3465 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
3466 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
3467 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
3468 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3469 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3470 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
3471 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3472 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3475 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3476 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3477 HReg dst = newVRegV(env);
3478 addInstr(env, mk_vMOVsd_RR(argL, dst));
3479 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3483 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
3484 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
3485 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
3486 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
3487 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
3488 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
3489 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3490 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
3491 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3492 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3494 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3495 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3496 HReg dst = newVRegV(env);
3497 addInstr(env, mk_vMOVsd_RR(argL, dst));
3498 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3502 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
3503 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
3504 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
3505 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
3506 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3507 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
3508 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
3509 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
3510 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3511 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3513 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3514 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3515 HReg dst = newVRegV(env);
3516 addInstr(env, mk_vMOVsd_RR(argL, dst));
3517 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3521 case Iop_QNarrow32Sx4:
3522 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3523 case Iop_QNarrow16Sx8:
3524 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3525 case Iop_QNarrow16Ux8:
3526 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3528 case Iop_InterleaveHI8x16:
3529 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3530 case Iop_InterleaveHI16x8:
3531 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3532 case Iop_InterleaveHI32x4:
3533 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3534 case Iop_InterleaveHI64x2:
3535 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3537 case Iop_InterleaveLO8x16:
3538 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3539 case Iop_InterleaveLO16x8:
3540 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3541 case Iop_InterleaveLO32x4:
3542 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3543 case Iop_InterleaveLO64x2:
3544 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3546 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
3547 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
3548 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
3549 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
3550 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
3551 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
3552 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
3553 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3554 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3555 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3556 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3557 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3558 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3559 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3560 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3561 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3562 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3563 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3564 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
3565 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3566 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3567 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3568 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3569 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3570 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3571 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
3572 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3573 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3574 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
3575 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
3576 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3577 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3578 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3579 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
3581 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3582 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3583 HReg dst = newVRegV(env);
3585 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3586 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3588 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3589 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3594 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3595 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3596 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3597 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3598 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3599 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3600 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
3601 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3603 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3604 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3605 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3606 HReg ereg = newVRegV(env);
3607 HReg dst = newVRegV(env);
3608 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3609 addInstr(env, AMD64Instr_Push(rmi));
3610 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3611 addInstr(env, mk_vMOVsd_RR(greg, dst));
3612 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3613 add_to_rsp(env, 16);
3619 } /* switch (e->Iex.Binop.op) */
3620 } /* if (e->tag == Iex_Binop) */
3622 if (e->tag == Iex_Mux0X) {
3623 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3624 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3625 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3626 HReg dst = newVRegV(env);
3627 addInstr(env, mk_vMOVsd_RR(rX,dst));
3628 addInstr(env, AMD64Instr_Test64(0xFF, r8));
3629 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3634 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3635 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
3637 vpanic("iselVecExpr_wrk");
3641 /*---------------------------------------------------------*/
3642 /*--- ISEL: Statements ---*/
3643 /*---------------------------------------------------------*/
3645 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3647 if (vex_traceflags & VEX_TRACE_VCODE) {
3648 vex_printf("\n-- ");
3653 switch (stmt->tag) {
3655 /* --------- STORE --------- */
3657 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3658 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3659 IREndness end = stmt->Ist.Store.end;
3661 if (tya != Ity_I64 || end != Iend_LE)
3664 if (tyd == Ity_I64) {
3665 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3666 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3667 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3670 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
3671 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3672 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3673 addInstr(env, AMD64Instr_Store(
3674 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3678 if (tyd == Ity_F64) {
3679 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3680 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3681 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3684 if (tyd == Ity_F32) {
3685 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3686 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3687 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3690 if (tyd == Ity_V128) {
3691 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3692 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3693 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
3699 /* --------- PUT --------- */
3701 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3702 if (ty == Ity_I64) {
3703 /* We're going to write to memory, so compute the RHS into an
3705 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3710 AMD64AMode_IR(stmt->Ist.Put.offset,
3715 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3716 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3717 addInstr(env, AMD64Instr_Store(
3718 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
3720 AMD64AMode_IR(stmt->Ist.Put.offset,
3724 if (ty == Ity_V128) {
3725 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3726 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3728 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
3731 if (ty == Ity_F32) {
3732 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3733 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3734 set_SSE_rounding_default(env); /* paranoia */
3735 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3738 if (ty == Ity_F64) {
3739 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3740 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3742 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3748 /* --------- Indexed PUT --------- */
3751 = genGuestArrayOffset(
3752 env, stmt->Ist.PutI.descr,
3753 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3755 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3756 if (ty == Ity_F64) {
3757 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3758 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3762 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3763 addInstr(env, AMD64Instr_Store( 1, r, am ));
3766 if (ty == Ity_I64) {
3767 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3768 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3774 /* --------- TMP --------- */
3776 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3777 IRType ty = typeOfIRTemp(env->type_env, tmp);
3779 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
3780 compute it into an AMode and then use LEA. This usually
3781 produces fewer instructions, often because (for memcheck
3782 created IR) we get t = address-expression, (t is later used
3783 twice) and so doing this naturally turns address-expression
3784 back into an AMD64 amode. */
3786 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3787 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
3788 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3789 HReg dst = lookupIRTemp(env, tmp);
3790 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
3791 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3792 value into a register. Just emit a normal reg-reg move
3793 so reg-alloc can coalesce it away in the usual way. */
3794 HReg src = am->Aam.IR.reg;
3795 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
3797 addInstr(env, AMD64Instr_Lea64(am,dst));
3802 if (ty == Ity_I64 || ty == Ity_I32
3803 || ty == Ity_I16 || ty == Ity_I8) {
3804 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3805 HReg dst = lookupIRTemp(env, tmp);
3806 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3809 if (ty == Ity_I128) {
3810 HReg rHi, rLo, dstHi, dstLo;
3811 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3812 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3813 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3814 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3818 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3819 HReg dst = lookupIRTemp(env, tmp);
3820 addInstr(env, AMD64Instr_Set64(cond, dst));
3823 if (ty == Ity_F64) {
3824 HReg dst = lookupIRTemp(env, tmp);
3825 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3826 addInstr(env, mk_vMOVsd_RR(src, dst));
3829 if (ty == Ity_F32) {
3830 HReg dst = lookupIRTemp(env, tmp);
3831 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3832 addInstr(env, mk_vMOVsd_RR(src, dst));
3835 if (ty == Ity_V128) {
3836 HReg dst = lookupIRTemp(env, tmp);
3837 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3838 addInstr(env, mk_vMOVsd_RR(src, dst));
3844 /* --------- Call to DIRTY helper --------- */
3847 IRDirty* d = stmt->Ist.Dirty.details;
3848 Bool passBBP = False;
3850 if (d->nFxState == 0)
3851 vassert(!d->needsBBP);
3853 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3855 /* Marshal args, do the call, clear stack. */
3856 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3858 /* Now figure out what to do with the returned value, if any. */
3859 if (d->tmp == IRTemp_INVALID)
3860 /* No return value. Nothing to do. */
3863 retty = typeOfIRTemp(env->type_env, d->tmp);
3864 if (retty == Ity_I64 || retty == Ity_I32
3865 || retty == Ity_I16 || retty == Ity_I8) {
3866 /* The returned value is in %rax. Park it in the register
3867 associated with tmp. */
3868 HReg dst = lookupIRTemp(env, d->tmp);
3869 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3875 /* --------- MEM FENCE --------- */
3877 switch (stmt->Ist.MBE.event) {
3879 addInstr(env, AMD64Instr_MFence());
3886 /* --------- ACAS --------- */
3888 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3889 /* "normal" singleton CAS */
3891 IRCAS* cas = stmt->Ist.CAS.details;
3892 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3893 /* get: cas->expd into %rax, and cas->data into %rbx */
3894 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3895 HReg rData = iselIntExpr_R(env, cas->dataLo);
3896 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3897 HReg rOld = lookupIRTemp(env, cas->oldLo);
3898 vassert(cas->expdHi == NULL);
3899 vassert(cas->dataHi == NULL);
3900 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
3901 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
3902 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
3904 case Ity_I64: sz = 8; break;
3905 case Ity_I32: sz = 4; break;
3906 case Ity_I16: sz = 2; break;
3907 case Ity_I8: sz = 1; break;
3908 default: goto unhandled_cas;
3910 addInstr(env, AMD64Instr_ACAS(am, sz));
3911 addInstr(env, AMD64Instr_CMov64(
3912 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
3917 IRCAS* cas = stmt->Ist.CAS.details;
3918 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3919 /* only 32-bit and 64-bit allowed in this case */
3920 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
3921 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
3922 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3923 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
3924 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3925 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
3926 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3927 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
3928 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3931 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
3932 goto unhandled_cas; /* we'd have to generate
3933 cmpxchg16b, but the host
3934 doesn't support that */
3943 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
3944 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3945 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
3946 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
3947 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
3948 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
3949 addInstr(env, AMD64Instr_DACAS(am, sz));
3952 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
3955 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
3961 /* --------- INSTR MARK --------- */
3962 /* Doesn't generate any executable code ... */
3966 /* --------- ABI HINT --------- */
3967 /* These have no meaning (denotation in the IR) and so we ignore
3968 them ... if any actually made it this far. */
3972 /* --------- NO-OP --------- */
3976 /* --------- EXIT --------- */
3980 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3981 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
3982 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3983 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3984 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3992 vpanic("iselStmt(amd64)");
3996 /*---------------------------------------------------------*/
3997 /*--- ISEL: Basic block terminators (Nexts) ---*/
3998 /*---------------------------------------------------------*/
4000 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4003 if (vex_traceflags & VEX_TRACE_VCODE) {
4004 vex_printf("\n-- goto {");
4010 ri = iselIntExpr_RI(env, next);
4011 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
4015 /*---------------------------------------------------------*/
4016 /*--- Insn selector top-level ---*/
4017 /*---------------------------------------------------------*/
4019 /* Translate an entire SB to amd64 code. */
4021 HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
4022 VexArchInfo* archinfo_host,
4023 VexAbiInfo* vbi/*UNUSED*/ )
4028 UInt hwcaps_host = archinfo_host->hwcaps;
4031 vassert(arch_host == VexArchAMD64);
4032 vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
4033 |VEX_HWCAPS_AMD64_CX16)));
4035 /* Make up an initial environment to use. */
4036 env = LibVEX_Alloc(sizeof(ISelEnv));
4039 /* Set up output code array. */
4040 env->code = newHInstrArray();
4042 /* Copy BB's type env. */
4043 env->type_env = bb->tyenv;
4045 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4046 change as we go along. */
4047 env->n_vregmap = bb->tyenv->types_used;
4048 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4049 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4051 /* and finally ... */
4052 env->hwcaps = hwcaps_host;
4054 /* For each IR temporary, allocate a suitably-kinded virtual
4057 for (i = 0; i < env->n_vregmap; i++) {
4058 hregHI = hreg = INVALID_HREG;
4059 switch (bb->tyenv->types[i]) {
4064 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
4065 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
4066 hregHI = mkHReg(j++, HRcInt64, True); break;
4069 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4070 default: ppIRType(bb->tyenv->types[i]);
4071 vpanic("iselBB(amd64): IRTemp type");
4073 env->vregmap[i] = hreg;
4074 env->vregmapHI[i] = hregHI;
4078 /* Ok, finally we can iterate over the statements. */
4079 for (i = 0; i < bb->stmts_used; i++)
4081 iselStmt(env,bb->stmts[i]);
4083 iselNext(env,bb->next,bb->jumpkind);
4085 /* record the number of vregs we used. */
4086 env->code->n_vregs = env->vreg_ctr;
4091 /*---------------------------------------------------------------*/
4092 /*--- end host_amd64_isel.c ---*/
4093 /*---------------------------------------------------------------*/