2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
14 Copyright (C) 2010-2010 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 The GNU General Public License is contained in the file COPYING.
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff ---*/
50 /*---------------------------------------------------------*/
52 /* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
62 #define DEFAULT_FPSCR 0
65 /*---------------------------------------------------------*/
67 /*---------------------------------------------------------*/
69 /* This carries around:
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
81 - vregmap holds the primary register for the IRTemp.
82 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
87 - The name of the vreg in which we stash a copy of the link reg, so
88 helper functions don't kill it.
90 - The code array, that is, the insns selected so far.
92 - A counter, for generating new virtual registers.
94 - The host hardware capabilities word. This is set at the start
97 Note, this is all host-independent. */
117 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
120 vassert(tmp < env->n_vregmap);
121 return env->vregmap[tmp];
124 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
127 vassert(tmp < env->n_vregmap);
128 vassert(env->vregmapHI[tmp] != INVALID_HREG);
129 *vrLO = env->vregmap[tmp];
130 *vrHI = env->vregmapHI[tmp];
133 static void addInstr ( ISelEnv* env, ARMInstr* instr )
135 addHInstr(env->code, instr);
136 if (vex_traceflags & VEX_TRACE_VCODE) {
141 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
142 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
143 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
150 static HReg newVRegI ( ISelEnv* env )
152 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
157 static HReg newVRegD ( ISelEnv* env )
159 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
164 static HReg newVRegF ( ISelEnv* env )
166 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
171 static HReg newVRegV ( ISelEnv* env )
173 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
178 /* These are duplicated in guest_arm_toIR.c */
179 static IRExpr* unop ( IROp op, IRExpr* a )
181 return IRExpr_Unop(op, a);
184 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
186 return IRExpr_Binop(op, a1, a2);
189 static IRExpr* bind ( Int binder )
191 return IRExpr_Binder(binder);
195 /*---------------------------------------------------------*/
196 /*--- ISEL: Forward declarations ---*/
197 /*---------------------------------------------------------*/
199 /* These are organised as iselXXX and iselXXX_wrk pairs. The
200 iselXXX_wrk do the real work, but are not to be called directly.
201 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202 checks that all returned registers are virtual. You should not
203 call the _wrk version directly.
205 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
208 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
211 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
214 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
215 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
217 static ARMRI84* iselIntExpr_RI84_wrk
218 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219 static ARMRI84* iselIntExpr_RI84
220 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
222 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
223 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
225 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
226 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
228 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
231 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
232 ISelEnv* env, IRExpr* e );
233 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
234 ISelEnv* env, IRExpr* e );
236 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
237 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
239 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
240 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
242 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
243 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
245 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
246 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
248 /*---------------------------------------------------------*/
249 /*--- ISEL: Misc helpers ---*/
250 /*---------------------------------------------------------*/
252 static UInt ROR32 ( UInt x, UInt sh ) {
253 vassert(sh >= 0 && sh < 32);
257 return (x << (32-sh)) | (x >> sh);
260 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261 form, and if so return the components. */
262 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
265 for (i = 0; i < 16; i++) {
266 if (0 == (u & 0xFFFFFF00)) {
277 /* Make a int reg-reg move. */
278 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
280 vassert(hregClass(src) == HRcInt32);
281 vassert(hregClass(dst) == HRcInt32);
282 return ARMInstr_Mov(dst, ARMRI84_R(src));
285 /* Set the VFP unit's rounding mode to default (round to nearest). */
286 static void set_VFP_rounding_default ( ISelEnv* env )
288 /* mov rTmp, #DEFAULT_FPSCR
291 HReg rTmp = newVRegI(env);
292 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
296 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297 expression denoting a value in the range 0 .. 3, indicating a round
298 mode encoded as per type IRRoundingMode. Set FPSCR to have the
302 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
304 /* This isn't simple, because 'mode' carries an IR rounding
305 encoding, and we need to translate that to an ARMvfp one:
307 00 to nearest (the default)
316 Easy enough to do; just swap the two bits.
318 HReg irrm = iselIntExpr_R(env, mode);
319 HReg tL = newVRegI(env);
320 HReg tR = newVRegI(env);
321 HReg t3 = newVRegI(env);
323 tR = irrm >> 1; if we're lucky, these will issue together
330 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
340 /*---------------------------------------------------------*/
341 /*--- ISEL: Function call helpers ---*/
342 /*---------------------------------------------------------*/
344 /* Used only in doHelperCall. See big comment in doHelperCall re
345 handling of register-parameter args. This function figures out
346 whether evaluation of an expression might require use of a fixed
347 register. If in doubt return True (safe but suboptimal).
350 Bool mightRequireFixedRegs ( IRExpr* e )
353 case Iex_RdTmp: case Iex_Const: case Iex_Get:
361 /* Do a complete function call. guard is a Ity_Bit expression
362 indicating whether or not the call happens. If guard==NULL, the
363 call is unconditional. Returns True iff it managed to handle this
364 combination of arg/return types, else returns False. */
367 Bool doHelperCall ( ISelEnv* env,
369 IRExpr* guard, IRCallee* cee, IRExpr** args )
372 HReg argregs[ARM_N_ARGREGS];
373 HReg tmpregs[ARM_N_ARGREGS];
375 Int n_args, i, nextArgReg;
378 vassert(ARM_N_ARGREGS == 4);
380 /* Marshal args for a call and do the call.
382 If passBBP is True, r8 (the baseblock pointer) is to be passed
385 This function only deals with a tiny set of possibilities, which
386 cover all helpers in practice. The restrictions are that only
387 arguments in registers are supported, hence only ARM_N_REGPARMS
388 x 32 integer bits in total can be passed. In fact the only
389 supported arg types are I32 and I64.
391 Generating code which is both efficient and correct when
392 parameters are to be passed in registers is difficult, for the
393 reasons elaborated in detail in comments attached to
394 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
395 of the method described in those comments.
397 The problem is split into two cases: the fast scheme and the
398 slow scheme. In the fast scheme, arguments are computed
399 directly into the target (real) registers. This is only safe
400 when we can be sure that computation of each argument will not
401 trash any real registers set by computation of any other
404 In the slow scheme, all args are first computed into vregs, and
405 once they are all done, they are moved to the relevant real
406 regs. This always gives correct code, but it also gives a bunch
407 of vreg-to-rreg moves which are usually redundant but are hard
408 for the register allocator to get rid of.
410 To decide which scheme to use, all argument expressions are
411 first examined. If they are all so simple that it is clear they
412 will be evaluated without use of any fixed registers, use the
413 fast scheme, else use the slow scheme. Note also that only
414 unconditional calls may use the fast scheme, since having to
415 compute a condition expression could itself trash real
418 Note this requires being able to examine an expression and
419 determine whether or not evaluation of it might use a fixed
420 register. That requires knowledge of how the rest of this insn
421 selector works. Currently just the following 3 are regarded as
422 safe -- hopefully they cover the majority of arguments in
423 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
426 /* Note that the cee->regparms field is meaningless on ARM hosts
427 (since there is only one calling convention) and so we always
431 for (i = 0; args[i]; i++)
434 argregs[0] = hregARM_R0();
435 argregs[1] = hregARM_R1();
436 argregs[2] = hregARM_R2();
437 argregs[3] = hregARM_R3();
439 tmpregs[0] = tmpregs[1] = tmpregs[2] =
440 tmpregs[3] = INVALID_HREG;
442 /* First decide which scheme (slow or fast) is to be used. First
443 assume the fast scheme, and select slow if any contraindications
449 if (guard->tag == Iex_Const
450 && guard->Iex.Const.con->tag == Ico_U1
451 && guard->Iex.Const.con->Ico.U1 == True) {
454 /* Not manifestly unconditional -- be conservative. */
460 for (i = 0; i < n_args; i++) {
461 if (mightRequireFixedRegs(args[i])) {
467 /* At this point the scheme to use has been established. Generate
468 code to get the arg values into the argument rregs. If we run
469 out of arg regs, give up. */
476 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
481 for (i = 0; i < n_args; i++) {
482 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
483 if (nextArgReg >= ARM_N_ARGREGS)
484 return False; /* out of argregs */
485 if (aTy == Ity_I32) {
486 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
487 iselIntExpr_R(env, args[i]) ));
490 else if (aTy == Ity_I64) {
491 /* 64-bit args must be passed in an a reg-pair of the form
492 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
493 On a little-endian host, the less significant word is
494 passed in the lower-numbered register. */
495 if (nextArgReg & 1) {
496 if (nextArgReg >= ARM_N_ARGREGS)
497 return False; /* out of argregs */
498 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
501 if (nextArgReg >= ARM_N_ARGREGS)
502 return False; /* out of argregs */
504 iselInt64Expr(&raHi, &raLo, env, args[i]);
505 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
507 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
511 return False; /* unhandled arg type */
514 /* Fast scheme only applies for unconditional calls. Hence: */
519 /* SLOW SCHEME; move via temporaries */
523 /* This is pretty stupid; better to move directly to r0
524 after the rest of the args are done. */
525 tmpregs[nextArgReg] = newVRegI(env);
526 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
531 for (i = 0; i < n_args; i++) {
532 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
533 if (nextArgReg >= ARM_N_ARGREGS)
534 return False; /* out of argregs */
535 if (aTy == Ity_I32) {
536 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
539 else if (aTy == Ity_I64) {
540 /* Same comment applies as in the Fast-scheme case. */
543 if (nextArgReg + 1 >= ARM_N_ARGREGS)
544 return False; /* out of argregs */
546 iselInt64Expr(&raHi, &raLo, env, args[i]);
547 tmpregs[nextArgReg] = raLo;
549 tmpregs[nextArgReg] = raHi;
554 /* Now we can compute the condition. We can't do it earlier
555 because the argument computations could trash the condition
556 codes. Be a bit clever to handle the common case where the
560 if (guard->tag == Iex_Const
561 && guard->Iex.Const.con->tag == Ico_U1
562 && guard->Iex.Const.con->Ico.U1 == True) {
563 /* unconditional -- do nothing */
565 cc = iselCondCode( env, guard );
569 /* Move the args to their final destinations. */
570 for (i = 0; i < nextArgReg; i++) {
571 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
572 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
575 /* None of these insns, including any spill code that might
576 be generated, may alter the condition codes. */
577 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
582 /* Should be assured by checks above */
583 vassert(nextArgReg <= ARM_N_ARGREGS);
585 target = (HWord)Ptr_to_ULong(cee->addr);
587 /* nextArgReg doles out argument registers. Since these are
588 assigned in the order r0, r1, r2, r3, its numeric value at this
589 point, which must be between 0 and 4 inclusive, is going to be
590 equal to the number of arg regs in use for the call. Hence bake
591 that number into the call (we'll need to know it when doing
592 register allocation, to know what regs the call reads.)
594 There is a bit of a twist -- harmless but worth recording.
595 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
596 the first arg in r0 and the second in r3:r2, but r1 isn't used.
597 We nevertheless have nextArgReg==4 and bake that into the call
598 instruction. This will mean the register allocator wil believe
599 this insn reads r1 when in fact it doesn't. But that's
600 harmless; it just artificially extends the live range of r1
601 unnecessarily. The best fix would be to put into the
602 instruction, a bitmask indicating which of r0/1/2/3 carry live
603 values. But that's too much hassle. */
605 /* Finally, the call itself. */
606 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
608 return True; /* success */
612 /*---------------------------------------------------------*/
613 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
614 /*---------------------------------------------------------*/
616 /* Select insns for an integer-typed expression, and add them to the
617 code list. Return a reg holding the result. This reg will be a
618 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
619 want to modify it, ask for a new vreg, copy it in there, and modify
620 the copy. The register allocator will do its best to map both
621 vregs to the same real register, so the copies will often disappear
624 This should handle expressions of 32, 16 and 8-bit type. All
625 results are returned in a 32-bit register. For 16- and 8-bit
626 expressions, the upper 16/24 bits are arbitrary, so you should mask
627 or sign extend partial values if necessary.
630 /* --------------------- AMode1 --------------------- */
632 /* Return an AMode1 which computes the value of the specified
633 expression, possibly also adding insns to the code list as a
634 result. The expression may only be a 32-bit one.
637 static Bool sane_AMode1 ( ARMAMode1* am )
642 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
643 && (hregIsVirtual(am->ARMam1.RI.reg)
644 || am->ARMam1.RI.reg == hregARM_R8())
645 && am->ARMam1.RI.simm13 >= -4095
646 && am->ARMam1.RI.simm13 <= 4095 );
649 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
650 && hregIsVirtual(am->ARMam1.RRS.base)
651 && hregClass(am->ARMam1.RRS.index) == HRcInt32
652 && hregIsVirtual(am->ARMam1.RRS.index)
653 && am->ARMam1.RRS.shift >= 0
654 && am->ARMam1.RRS.shift <= 3 );
656 vpanic("sane_AMode: unknown ARM AMode1 tag");
660 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
662 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
663 vassert(sane_AMode1(am));
667 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
669 IRType ty = typeOfIRExpr(env->type_env,e);
670 vassert(ty == Ity_I32);
672 /* FIXME: add RRS matching */
674 /* {Add32,Sub32}(expr,simm13) */
675 if (e->tag == Iex_Binop
676 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
677 && e->Iex.Binop.arg2->tag == Iex_Const
678 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
679 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
680 if (simm >= -4095 && simm <= 4095) {
682 if (e->Iex.Binop.op == Iop_Sub32)
684 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
685 return ARMAMode1_RI(reg, simm);
689 /* Doesn't match anything in particular. Generate it into
690 a register and use that. */
692 HReg reg = iselIntExpr_R(env, e);
693 return ARMAMode1_RI(reg, 0);
699 /* --------------------- AMode2 --------------------- */
701 /* Return an AMode2 which computes the value of the specified
702 expression, possibly also adding insns to the code list as a
703 result. The expression may only be a 32-bit one.
706 static Bool sane_AMode2 ( ARMAMode2* am )
711 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
712 && hregIsVirtual(am->ARMam2.RI.reg)
713 && am->ARMam2.RI.simm9 >= -255
714 && am->ARMam2.RI.simm9 <= 255 );
717 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
718 && hregIsVirtual(am->ARMam2.RR.base)
719 && hregClass(am->ARMam2.RR.index) == HRcInt32
720 && hregIsVirtual(am->ARMam2.RR.index) );
722 vpanic("sane_AMode: unknown ARM AMode2 tag");
726 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
728 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
729 vassert(sane_AMode2(am));
733 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
735 IRType ty = typeOfIRExpr(env->type_env,e);
736 vassert(ty == Ity_I32);
738 /* FIXME: add RR matching */
740 /* {Add32,Sub32}(expr,simm8) */
741 if (e->tag == Iex_Binop
742 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
743 && e->Iex.Binop.arg2->tag == Iex_Const
744 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
745 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
746 if (simm >= -255 && simm <= 255) {
748 if (e->Iex.Binop.op == Iop_Sub32)
750 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
751 return ARMAMode2_RI(reg, simm);
755 /* Doesn't match anything in particular. Generate it into
756 a register and use that. */
758 HReg reg = iselIntExpr_R(env, e);
759 return ARMAMode2_RI(reg, 0);
765 /* --------------------- AModeV --------------------- */
767 /* Return an AModeV which computes the value of the specified
768 expression, possibly also adding insns to the code list as a
769 result. The expression may only be a 32-bit one.
772 static Bool sane_AModeV ( ARMAModeV* am )
774 return toBool( hregClass(am->reg) == HRcInt32
775 && hregIsVirtual(am->reg)
776 && am->simm11 >= -1020 && am->simm11 <= 1020
777 && 0 == (am->simm11 & 3) );
780 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
782 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
783 vassert(sane_AModeV(am));
787 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
789 IRType ty = typeOfIRExpr(env->type_env,e);
790 vassert(ty == Ity_I32);
792 /* {Add32,Sub32}(expr, simm8 << 2) */
793 if (e->tag == Iex_Binop
794 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
795 && e->Iex.Binop.arg2->tag == Iex_Const
796 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
797 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
798 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
800 if (e->Iex.Binop.op == Iop_Sub32)
802 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
803 return mkARMAModeV(reg, simm);
807 /* Doesn't match anything in particular. Generate it into
808 a register and use that. */
810 HReg reg = iselIntExpr_R(env, e);
811 return mkARMAModeV(reg, 0);
816 /* -------------------- AModeN -------------------- */
818 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
820 return iselIntExpr_AModeN_wrk(env, e);
823 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
825 HReg reg = iselIntExpr_R(env, e);
826 return mkARMAModeN_R(reg);
830 /* --------------------- RI84 --------------------- */
832 /* Select instructions to generate 'e' into a RI84. If mayInv is
833 true, then the caller will also accept an I84 form that denotes
834 'not e'. In this case didInv may not be NULL, and *didInv is set
835 to True. This complication is so as to allow generation of an RI84
836 which is suitable for use in either an AND or BIC instruction,
837 without knowing (before this call) which one.
839 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
840 ISelEnv* env, IRExpr* e )
844 vassert(didInv != NULL);
845 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
846 /* sanity checks ... */
851 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
852 vassert(hregIsVirtual(ri->ARMri84.R.reg));
855 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
859 /* DO NOT CALL THIS DIRECTLY ! */
860 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
861 ISelEnv* env, IRExpr* e )
863 IRType ty = typeOfIRExpr(env->type_env,e);
864 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
866 if (didInv) *didInv = False;
868 /* special case: immediate */
869 if (e->tag == Iex_Const) {
870 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
871 switch (e->Iex.Const.con->tag) {
872 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
873 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
874 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
875 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
877 if (fitsIn8x4(&u8, &u4, u)) {
878 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
880 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
883 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
885 /* else fail, fall through to default case */
888 /* default case: calculate into a register and return that */
890 HReg r = iselIntExpr_R ( env, e );
896 /* --------------------- RI5 --------------------- */
898 /* Select instructions to generate 'e' into a RI5. */
900 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
902 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
903 /* sanity checks ... */
908 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
909 vassert(hregIsVirtual(ri->ARMri5.R.reg));
912 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
916 /* DO NOT CALL THIS DIRECTLY ! */
917 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
919 IRType ty = typeOfIRExpr(env->type_env,e);
920 vassert(ty == Ity_I32 || ty == Ity_I8);
922 /* special case: immediate */
923 if (e->tag == Iex_Const) {
924 UInt u; /* both invalid */
925 switch (e->Iex.Const.con->tag) {
926 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
927 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
928 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
929 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
931 if (u >= 1 && u <= 31) {
934 /* else fail, fall through to default case */
937 /* default case: calculate into a register and return that */
939 HReg r = iselIntExpr_R ( env, e );
945 /* ------------------- CondCode ------------------- */
947 /* Generate code to evaluated a bit-typed expression, returning the
948 condition code which would correspond when the expression would
949 notionally have returned 1. */
951 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
953 ARMCondCode cc = iselCondCode_wrk(env,e);
954 vassert(cc != ARMcc_NV);
958 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
961 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
964 if (e->tag == Iex_RdTmp) {
965 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
966 /* CmpOrTst doesn't modify rTmp; so this is OK. */
967 ARMRI84* one = ARMRI84_I84(1,0);
968 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
973 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
974 /* Generate code for the arg, and negate the test condition */
975 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
978 /* --- patterns rooted at: 32to1 --- */
980 if (e->tag == Iex_Unop
981 && e->Iex.Unop.op == Iop_32to1) {
982 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
983 ARMRI84* one = ARMRI84_I84(1,0);
984 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
988 /* --- patterns rooted at: CmpNEZ8 --- */
990 if (e->tag == Iex_Unop
991 && e->Iex.Unop.op == Iop_CmpNEZ8) {
992 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
993 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
994 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
998 /* --- patterns rooted at: CmpNEZ32 --- */
1000 if (e->tag == Iex_Unop
1001 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1002 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1003 ARMRI84* zero = ARMRI84_I84(0,0);
1004 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1008 /* --- patterns rooted at: CmpNEZ64 --- */
1010 if (e->tag == Iex_Unop
1011 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1013 HReg tmp = newVRegI(env);
1014 ARMRI84* zero = ARMRI84_I84(0,0);
1015 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1016 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1017 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1021 /* --- Cmp*32*(x,y) --- */
1022 if (e->tag == Iex_Binop
1023 && (e->Iex.Binop.op == Iop_CmpEQ32
1024 || e->Iex.Binop.op == Iop_CmpNE32
1025 || e->Iex.Binop.op == Iop_CmpLT32S
1026 || e->Iex.Binop.op == Iop_CmpLT32U
1027 || e->Iex.Binop.op == Iop_CmpLE32S
1028 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1029 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1030 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1031 env, e->Iex.Binop.arg2);
1032 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1033 switch (e->Iex.Binop.op) {
1034 case Iop_CmpEQ32: return ARMcc_EQ;
1035 case Iop_CmpNE32: return ARMcc_NE;
1036 case Iop_CmpLT32S: return ARMcc_LT;
1037 case Iop_CmpLT32U: return ARMcc_LO;
1038 case Iop_CmpLE32S: return ARMcc_LE;
1039 case Iop_CmpLE32U: return ARMcc_LS;
1040 default: vpanic("iselCondCode(arm): CmpXX32");
1044 /* --- CasCmpEQ* --- */
1045 /* Ist_Cas has a dummy argument to compare with, so comparison is
1047 if (e->tag == Iex_Binop
1048 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1049 || e->Iex.Binop.op == Iop_CasCmpEQ16
1050 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1055 vpanic("iselCondCode");
1059 /* --------------------- Reg --------------------- */
1061 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1063 HReg r = iselIntExpr_R_wrk(env, e);
1064 /* sanity checks ... */
1066 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1068 vassert(hregClass(r) == HRcInt32);
1069 vassert(hregIsVirtual(r));
1073 /* DO NOT CALL THIS DIRECTLY ! */
1074 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1076 IRType ty = typeOfIRExpr(env->type_env,e);
1077 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1078 // vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1082 /* --------- TEMP --------- */
1084 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1087 /* --------- LOAD --------- */
1089 HReg dst = newVRegI(env);
1091 if (e->Iex.Load.end != Iend_LE)
1094 if (ty == Ity_I32) {
1095 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1096 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1099 if (ty == Ity_I16) {
1100 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1101 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1106 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1107 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1111 //zz if (ty == Ity_I16) {
1112 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1115 //zz if (ty == Ity_I8) {
1116 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1122 //zz /* --------- TERNARY OP --------- */
1123 //zz case Iex_Triop: {
1124 //zz /* C3210 flags following FPU partial remainder (fprem), both
1125 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1126 //zz if (e->Iex.Triop.op == Iop_PRemC3210F64
1127 //zz || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1128 //zz HReg junk = newVRegF(env);
1129 //zz HReg dst = newVRegI(env);
1130 //zz HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1131 //zz HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1132 //zz /* XXXROUNDINGFIXME */
1133 //zz /* set roundingmode here */
1134 //zz addInstr(env, X86Instr_FpBinary(
1135 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1136 //zz ? Xfp_PREM : Xfp_PREM1,
1139 //zz /* The previous pseudo-insn will have left the FPU's C3210
1140 //zz flags set correctly. So bag them. */
1141 //zz addInstr(env, X86Instr_FpStSW_AX());
1142 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1143 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1150 /* --------- BINARY OP --------- */
1153 ARMAluOp aop = 0; /* invalid */
1154 ARMShiftOp sop = 0; /* invalid */
1156 /* ADD/SUB/AND/OR/XOR */
1157 switch (e->Iex.Binop.op) {
1159 Bool didInv = False;
1160 HReg dst = newVRegI(env);
1161 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1162 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1163 env, e->Iex.Binop.arg2);
1164 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1168 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1169 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1170 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1171 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1173 HReg dst = newVRegI(env);
1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1176 env, e->Iex.Binop.arg2);
1177 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1184 switch (e->Iex.Binop.op) {
1185 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1186 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1187 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1189 HReg dst = newVRegI(env);
1190 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1191 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1192 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1193 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1200 if (e->Iex.Binop.op == Iop_Mul32) {
1201 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1202 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1203 HReg dst = newVRegI(env);
1204 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1205 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1206 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1207 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1211 /* Handle misc other ops. */
1213 if (e->Iex.Binop.op == Iop_Max32U) {
1214 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216 HReg dst = newVRegI(env);
1217 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1219 addInstr(env, mk_iMOVds_RR(dst, argL));
1220 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1224 if (e->Iex.Binop.op == Iop_CmpF64) {
1225 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1226 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1227 HReg dst = newVRegI(env);
1228 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1229 FMSTAT, so we can examine the results directly. */
1230 addInstr(env, ARMInstr_VCmpD(dL, dR));
1231 /* Create in dst, the IRCmpF64Result encoded result. */
1232 addInstr(env, ARMInstr_Imm32(dst, 0));
1233 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1234 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1235 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1236 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1240 if (e->Iex.Binop.op == Iop_F64toI32S
1241 || e->Iex.Binop.op == Iop_F64toI32U) {
1242 /* Wretched uglyness all round, due to having to deal
1243 with rounding modes. Oh well. */
1244 /* FIXME: if arg1 is a constant indicating round-to-zero,
1245 then we could skip all this arsing around with FPSCR and
1246 simply emit FTO{S,U}IZD. */
1247 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1248 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1249 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1250 /* FTO{S,U}ID valF, valD */
1251 HReg valF = newVRegF(env);
1252 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1254 set_VFP_rounding_default(env);
1255 /* VMOV dst, valF */
1256 HReg dst = newVRegI(env);
1257 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1261 if (e->Iex.Binop.op == Iop_GetElem8x8
1262 || e->Iex.Binop.op == Iop_GetElem16x4
1263 || e->Iex.Binop.op == Iop_GetElem32x2) {
1264 HReg res = newVRegI(env);
1265 HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1267 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1268 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1269 vpanic("ARM target supports GetElem with constant "
1270 "second argument only\n");
1272 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1273 switch (e->Iex.Binop.op) {
1274 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1275 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1276 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1277 default: vassert(0);
1279 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1280 mkARMNRS(ARMNRS_Reg, res, 0),
1281 mkARMNRS(ARMNRS_Scalar, arg, index),
1286 if (e->Iex.Binop.op == Iop_GetElem8x16
1287 || e->Iex.Binop.op == Iop_GetElem16x8
1288 || e->Iex.Binop.op == Iop_GetElem32x4) {
1289 HReg res = newVRegI(env);
1290 HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1292 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1293 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1294 vpanic("ARM target supports GetElem with constant "
1295 "second argument only\n");
1297 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1298 switch (e->Iex.Binop.op) {
1299 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1300 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1301 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1302 default: vassert(0);
1304 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1305 mkARMNRS(ARMNRS_Reg, res, 0),
1306 mkARMNRS(ARMNRS_Scalar, arg, index),
1311 /* All cases involving host-side helper calls. */
1313 switch (e->Iex.Binop.op) {
1315 fn = &h_generic_calc_Add16x2; break;
1317 fn = &h_generic_calc_Sub16x2; break;
1319 fn = &h_generic_calc_HAdd16Ux2; break;
1321 fn = &h_generic_calc_HAdd16Sx2; break;
1323 fn = &h_generic_calc_HSub16Ux2; break;
1325 fn = &h_generic_calc_HSub16Sx2; break;
1327 fn = &h_generic_calc_QAdd16Sx2; break;
1329 fn = &h_generic_calc_QSub16Sx2; break;
1331 fn = &h_generic_calc_Add8x4; break;
1333 fn = &h_generic_calc_Sub8x4; break;
1335 fn = &h_generic_calc_HAdd8Ux4; break;
1337 fn = &h_generic_calc_HAdd8Sx4; break;
1339 fn = &h_generic_calc_HSub8Ux4; break;
1341 fn = &h_generic_calc_HSub8Sx4; break;
1343 fn = &h_generic_calc_QAdd8Sx4; break;
1345 fn = &h_generic_calc_QAdd8Ux4; break;
1347 fn = &h_generic_calc_QSub8Sx4; break;
1349 fn = &h_generic_calc_QSub8Ux4; break;
1351 fn = &h_generic_calc_Sad8Ux4; break;
1357 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1358 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1359 HReg res = newVRegI(env);
1360 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1361 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1362 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1363 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1370 /* --------- UNARY OP --------- */
1373 //zz /* 1Uto8(32to1(expr32)) */
1374 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1375 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1376 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1377 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1378 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1379 //zz IRExpr* expr32 = mi.bindee[0];
1380 //zz HReg dst = newVRegI(env);
1381 //zz HReg src = iselIntExpr_R(env, expr32);
1382 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1383 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1384 //zz X86RMI_Imm(1), dst));
1389 //zz /* 8Uto32(LDle(expr32)) */
1390 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1391 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1392 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1393 //zz unop(Iop_8Uto32,
1394 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1395 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1396 //zz HReg dst = newVRegI(env);
1397 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1403 //zz /* 8Sto32(LDle(expr32)) */
1404 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1405 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1406 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1407 //zz unop(Iop_8Sto32,
1408 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1409 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1410 //zz HReg dst = newVRegI(env);
1411 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1412 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1417 //zz /* 16Uto32(LDle(expr32)) */
1418 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1419 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1420 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1421 //zz unop(Iop_16Uto32,
1422 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1423 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1424 //zz HReg dst = newVRegI(env);
1425 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1426 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1431 //zz /* 8Uto32(GET:I8) */
1432 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1433 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1435 //zz X86AMode* amode;
1436 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1437 //zz dst = newVRegI(env);
1438 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1439 //zz hregX86_EBP());
1440 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1445 //zz /* 16to32(GET:I16) */
1446 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1447 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1449 //zz X86AMode* amode;
1450 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1451 //zz dst = newVRegI(env);
1452 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1453 //zz hregX86_EBP());
1454 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1459 switch (e->Iex.Unop.op) {
1461 HReg dst = newVRegI(env);
1462 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1464 dst, src, ARMRI84_I84(0xFF,0)));
1467 //zz case Iop_8Uto16:
1468 //zz case Iop_8Uto32:
1469 //zz case Iop_16Uto32: {
1470 //zz HReg dst = newVRegI(env);
1471 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1472 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1473 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1474 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1475 //zz X86RMI_Imm(mask), dst));
1478 //zz case Iop_8Sto16:
1479 //zz case Iop_8Sto32:
1481 HReg dst = newVRegI(env);
1482 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1483 ARMRI5* amt = ARMRI5_I5(16);
1484 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1485 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1490 HReg dst = newVRegI(env);
1491 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1492 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1493 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1494 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1498 //zz case Iop_Not16:
1500 HReg dst = newVRegI(env);
1501 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1502 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1505 case Iop_64HIto32: {
1507 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1508 return rHi; /* and abandon rLo .. poor wee thing :-) */
1512 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1513 return rLo; /* similar stupid comment to the above ... */
1517 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1518 HReg tHi = newVRegI(env);
1519 HReg tLo = newVRegI(env);
1520 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1521 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1525 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1529 //zz case Iop_16HIto8:
1530 //zz case Iop_32HIto16: {
1531 //zz HReg dst = newVRegI(env);
1532 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1533 //zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1534 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1535 //zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1540 HReg dst = newVRegI(env);
1541 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1542 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1543 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1548 HReg dst = newVRegI(env);
1549 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1550 ARMRI5* amt = ARMRI5_I5(31);
1551 /* This is really rough. We could do much better here;
1552 perhaps mvn{cond} dst, #0 as the second insn?
1553 (same applies to 1Sto64) */
1554 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1557 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1562 //zz case Iop_1Sto8:
1563 //zz case Iop_1Sto16:
1564 //zz case Iop_1Sto32: {
1565 //zz /* could do better than this, but for now ... */
1566 //zz HReg dst = newVRegI(env);
1567 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1568 //zz addInstr(env, X86Instr_Set32(cond,dst));
1569 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1570 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1573 //zz case Iop_Ctz32: {
1574 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1575 //zz HReg dst = newVRegI(env);
1576 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1577 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1581 /* Count leading zeroes; easy on ARM. */
1582 HReg dst = newVRegI(env);
1583 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1584 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1588 case Iop_CmpwNEZ32: {
1589 HReg dst = newVRegI(env);
1590 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1591 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1592 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1593 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1598 HReg dst = newVRegI(env);
1599 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1600 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1601 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1605 //zz case Iop_V128to32: {
1606 //zz HReg dst = newVRegI(env);
1607 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1608 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1609 //zz sub_from_esp(env, 16);
1610 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1611 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1612 //zz add_to_esp(env, 16);
1616 case Iop_ReinterpF32asI32: {
1617 HReg dst = newVRegI(env);
1618 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1619 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1624 //zz case Iop_16to8:
1627 /* These are no-ops. */
1628 return iselIntExpr_R(env, e->Iex.Unop.arg);
1634 /* All Unop cases involving host-side helper calls. */
1636 switch (e->Iex.Unop.op) {
1637 case Iop_CmpNEZ16x2:
1638 fn = &h_generic_calc_CmpNEZ16x2; break;
1640 fn = &h_generic_calc_CmpNEZ8x4; break;
1646 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1647 HReg res = newVRegI(env);
1648 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1649 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1650 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1657 /* --------- GET --------- */
1660 && 0 == (e->Iex.Get.offset & 3)
1661 && e->Iex.Get.offset < 4096-4) {
1662 HReg dst = newVRegI(env);
1663 addInstr(env, ARMInstr_LdSt32(
1666 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1669 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1670 //zz HReg dst = newVRegI(env);
1671 //zz addInstr(env, X86Instr_LoadEX(
1672 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1674 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1681 //zz case Iex_GetI: {
1683 //zz = genGuestArrayOffset(
1684 //zz env, e->Iex.GetI.descr,
1685 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1686 //zz HReg dst = newVRegI(env);
1687 //zz if (ty == Ity_I8) {
1688 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1691 //zz if (ty == Ity_I32) {
1692 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1698 /* --------- CCALL --------- */
1700 HReg dst = newVRegI(env);
1701 vassert(ty == e->Iex.CCall.retty);
1703 /* be very restrictive for now. Only 32/64-bit ints allowed
1704 for args, and 32 bits for return type. */
1705 if (e->Iex.CCall.retty != Ity_I32)
1708 /* Marshal args, do the call, clear stack. */
1709 Bool ok = doHelperCall( env, False,
1710 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1712 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1715 /* else fall through; will hit the irreducible: label */
1718 /* --------- LITERAL --------- */
1722 HReg dst = newVRegI(env);
1723 switch (e->Iex.Const.con->tag) {
1724 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1725 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1726 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1727 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1729 addInstr(env, ARMInstr_Imm32(dst, u));
1733 /* --------- MULTIPLEX --------- */
1735 IRExpr* cond = e->Iex.Mux0X.cond;
1737 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1739 && cond->tag == Iex_Unop
1740 && cond->Iex.Unop.op == Iop_32to8
1741 && cond->Iex.Unop.arg->tag == Iex_Unop
1742 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1744 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1745 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1746 HReg dst = newVRegI(env);
1747 addInstr(env, mk_iMOVds_RR(dst, rX));
1748 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1749 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1753 /* Mux0X(cond, expr0, exprX) (general case) */
1754 if (ty == Ity_I32) {
1756 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758 HReg dst = newVRegI(env);
1759 addInstr(env, mk_iMOVds_RR(dst, rX));
1760 r8 = iselIntExpr_R(env, cond);
1761 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1762 ARMRI84_I84(0xFF,0)));
1763 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1771 } /* switch (e->tag) */
1773 /* We get here if no pattern matched. */
1776 vpanic("iselIntExpr_R: cannot reduce tree");
1780 /* -------------------- 64-bit -------------------- */
1782 /* Compute a 64-bit value into a register pair, which is returned as
1783 the first two parameters. As with iselIntExpr_R, these may be
1784 either real or virtual regs; in any case they must not be changed
1785 by subsequent code emitted by the caller. */
1787 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1789 iselInt64Expr_wrk(rHi, rLo, env, e);
1791 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1793 vassert(hregClass(*rHi) == HRcInt32);
1794 vassert(hregIsVirtual(*rHi));
1795 vassert(hregClass(*rLo) == HRcInt32);
1796 vassert(hregIsVirtual(*rLo));
1799 /* DO NOT CALL THIS DIRECTLY ! */
1800 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1803 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1805 /* 64-bit literal */
1806 if (e->tag == Iex_Const) {
1807 ULong w64 = e->Iex.Const.con->Ico.U64;
1808 UInt wHi = toUInt(w64 >> 32);
1809 UInt wLo = toUInt(w64);
1810 HReg tHi = newVRegI(env);
1811 HReg tLo = newVRegI(env);
1812 vassert(e->Iex.Const.con->tag == Ico_U64);
1813 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1814 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1820 /* read 64-bit IRTemp */
1821 if (e->tag == Iex_RdTmp) {
1822 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1823 HReg tHi = newVRegI(env);
1824 HReg tLo = newVRegI(env);
1825 HReg tmp = iselNeon64Expr(env, e);
1826 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1830 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1836 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1838 vassert(e->Iex.Load.ty == Ity_I64);
1839 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1840 tHi = newVRegI(env);
1841 tLo = newVRegI(env);
1842 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1843 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1850 if (e->tag == Iex_Get) {
1851 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1852 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1853 HReg tHi = newVRegI(env);
1854 HReg tLo = newVRegI(env);
1855 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1856 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1862 /* --------- BINARY ops --------- */
1863 if (e->tag == Iex_Binop) {
1864 switch (e->Iex.Binop.op) {
1866 /* 32 x 32 -> 64 multiply */
1869 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1870 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1871 HReg tHi = newVRegI(env);
1872 HReg tLo = newVRegI(env);
1873 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1874 ? ARMmul_SX : ARMmul_ZX;
1875 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1876 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1877 addInstr(env, ARMInstr_Mul(mop));
1878 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1879 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1886 HReg xLo, xHi, yLo, yHi;
1887 HReg tHi = newVRegI(env);
1888 HReg tLo = newVRegI(env);
1889 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1890 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1891 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1892 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1899 HReg xLo, xHi, yLo, yHi;
1900 HReg tHi = newVRegI(env);
1901 HReg tLo = newVRegI(env);
1902 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1905 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1911 /* 32HLto64(e1,e2) */
1912 case Iop_32HLto64: {
1913 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1923 /* --------- UNARY ops --------- */
1924 if (e->tag == Iex_Unop) {
1925 switch (e->Iex.Unop.op) {
1927 /* ReinterpF64asI64 */
1928 case Iop_ReinterpF64asI64: {
1929 HReg dstHi = newVRegI(env);
1930 HReg dstLo = newVRegI(env);
1931 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1932 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1941 HReg tHi = newVRegI(env);
1942 HReg tLo = newVRegI(env);
1943 HReg zero = newVRegI(env);
1945 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1947 addInstr(env, ARMInstr_Imm32(zero, 0));
1948 /* tLo = 0 - yLo, and set carry */
1949 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1950 tLo, zero, ARMRI84_R(yLo)));
1951 /* tHi = 0 - yHi - carry */
1952 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1953 tHi, zero, ARMRI84_R(yHi)));
1954 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1955 back in, so as to give the final result
1956 tHi:tLo = arg | -arg. */
1957 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1958 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1965 case Iop_CmpwNEZ64: {
1967 HReg tmp1 = newVRegI(env);
1968 HReg tmp2 = newVRegI(env);
1969 /* srcHi:srcLo = arg */
1970 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1971 /* tmp1 = srcHi | srcLo */
1972 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1973 tmp1, srcHi, ARMRI84_R(srcLo)));
1974 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1975 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1976 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1977 tmp2, tmp2, ARMRI84_R(tmp1)));
1978 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1979 tmp2, tmp2, ARMRI5_I5(31)));
1986 HReg dst = newVRegI(env);
1987 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1988 ARMRI5* amt = ARMRI5_I5(31);
1989 /* This is really rough. We could do much better here;
1990 perhaps mvn{cond} dst, #0 as the second insn?
1991 (same applies to 1Sto32) */
1992 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1993 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1994 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1995 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2004 } /* if (e->tag == Iex_Unop) */
2006 /* --------- MULTIPLEX --------- */
2007 if (e->tag == Iex_Mux0X) {
2009 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2010 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2011 vassert(ty8 == Ity_I8);
2012 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2013 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2014 dstHi = newVRegI(env);
2015 dstLo = newVRegI(env);
2016 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2017 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2018 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2019 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2020 ARMRI84_I84(0xFF,0)));
2021 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2022 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2028 /* It is convenient sometimes to call iselInt64Expr even when we
2029 have NEON support (e.g. in do_helper_call we need 64-bit
2030 arguments as 2 x 32 regs). */
2031 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
2032 HReg tHi = newVRegI(env);
2033 HReg tLo = newVRegI(env);
2034 HReg tmp = iselNeon64Expr(env, e);
2035 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2042 vpanic("iselInt64Expr");
2046 /*---------------------------------------------------------*/
2047 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2048 /*---------------------------------------------------------*/
2050 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2052 HReg r = iselNeon64Expr_wrk( env, e );
2053 vassert(hregClass(r) == HRcFlt64);
2054 vassert(hregIsVirtual(r));
2058 /* DO NOT CALL THIS DIRECTLY */
2059 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2061 IRType ty = typeOfIRExpr(env->type_env, e);
2064 vassert(ty == Ity_I64);
2066 if (e->tag == Iex_RdTmp) {
2067 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2070 if (e->tag == Iex_Const) {
2072 HReg res = newVRegD(env);
2073 iselInt64Expr(&rHi, &rLo, env, e);
2074 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2079 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2080 HReg res = newVRegD(env);
2081 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2082 vassert(ty == Ity_I64);
2083 addInstr(env, ARMInstr_NLdStD(True, res, am));
2088 if (e->tag == Iex_Get) {
2089 HReg addr = newVRegI(env);
2090 HReg res = newVRegD(env);
2091 vassert(ty == Ity_I64);
2092 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2093 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2097 /* --------- BINARY ops --------- */
2098 if (e->tag == Iex_Binop) {
2099 switch (e->Iex.Binop.op) {
2101 /* 32 x 32 -> 64 multiply */
2105 HReg res = newVRegD(env);
2106 iselInt64Expr(&rHi, &rLo, env, e);
2107 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2112 HReg res = newVRegD(env);
2113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2115 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2116 res, argL, argR, 4, False));
2120 HReg res = newVRegD(env);
2121 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2122 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2123 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2124 res, argL, argR, 4, False));
2128 HReg res = newVRegD(env);
2129 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2130 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2131 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2132 res, argL, argR, 4, False));
2136 /* 32HLto64(e1,e2) */
2137 case Iop_32HLto64: {
2138 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140 HReg res = newVRegD(env);
2141 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2149 HReg res = newVRegD(env);
2150 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2151 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2153 switch (e->Iex.Binop.op) {
2154 case Iop_Add8x8: size = 0; break;
2155 case Iop_Add16x4: size = 1; break;
2156 case Iop_Add32x2: size = 2; break;
2157 case Iop_Add64: size = 3; break;
2158 default: vassert(0);
2160 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2161 res, argL, argR, size, False));
2164 case Iop_Add32Fx2: {
2165 HReg res = newVRegD(env);
2166 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2167 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2169 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2170 res, argL, argR, size, False));
2173 case Iop_Recps32Fx2: {
2174 HReg res = newVRegD(env);
2175 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2176 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2178 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2179 res, argL, argR, size, False));
2182 case Iop_Rsqrts32Fx2: {
2183 HReg res = newVRegD(env);
2184 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2185 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2187 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2188 res, argL, argR, size, False));
2191 case Iop_InterleaveOddLanes8x8:
2192 case Iop_InterleaveOddLanes16x4:
2193 case Iop_InterleaveLO32x2:
2194 case Iop_InterleaveEvenLanes8x8:
2195 case Iop_InterleaveEvenLanes16x4:
2196 case Iop_InterleaveHI32x2: {
2197 HReg tmp = newVRegD(env);
2198 HReg res = newVRegD(env);
2199 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2200 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2203 switch (e->Iex.Binop.op) {
2204 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2205 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2206 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2207 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2208 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2209 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2210 default: vassert(0);
2213 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2214 tmp, argL, 4, False));
2215 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2216 res, argR, 4, False));
2217 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2218 res, tmp, size, False));
2220 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2221 tmp, argR, 4, False));
2222 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2223 res, argL, 4, False));
2224 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2225 tmp, res, size, False));
2229 case Iop_InterleaveHI8x8:
2230 case Iop_InterleaveHI16x4:
2231 case Iop_InterleaveLO8x8:
2232 case Iop_InterleaveLO16x4: {
2233 HReg tmp = newVRegD(env);
2234 HReg res = newVRegD(env);
2235 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2236 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2239 switch (e->Iex.Binop.op) {
2240 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2241 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2242 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2243 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2244 default: vassert(0);
2247 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248 tmp, argL, 4, False));
2249 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2250 res, argR, 4, False));
2251 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2252 res, tmp, size, False));
2254 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255 tmp, argR, 4, False));
2256 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2257 res, argL, 4, False));
2258 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2259 tmp, res, size, False));
2263 case Iop_CatOddLanes8x8:
2264 case Iop_CatOddLanes16x4:
2265 case Iop_CatEvenLanes8x8:
2266 case Iop_CatEvenLanes16x4: {
2267 HReg tmp = newVRegD(env);
2268 HReg res = newVRegD(env);
2269 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2270 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2273 switch (e->Iex.Binop.op) {
2274 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2275 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2276 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2277 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2278 default: vassert(0);
2281 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282 tmp, argL, 4, False));
2283 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2284 res, argR, 4, False));
2285 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2286 res, tmp, size, False));
2288 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289 tmp, argR, 4, False));
2290 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2291 res, argL, 4, False));
2292 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2293 tmp, res, size, False));
2300 case Iop_QAdd64Ux1: {
2301 HReg res = newVRegD(env);
2302 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2303 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2305 switch (e->Iex.Binop.op) {
2306 case Iop_QAdd8Ux8: size = 0; break;
2307 case Iop_QAdd16Ux4: size = 1; break;
2308 case Iop_QAdd32Ux2: size = 2; break;
2309 case Iop_QAdd64Ux1: size = 3; break;
2310 default: vassert(0);
2312 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2313 res, argL, argR, size, False));
2319 case Iop_QAdd64Sx1: {
2320 HReg res = newVRegD(env);
2321 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2322 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2324 switch (e->Iex.Binop.op) {
2325 case Iop_QAdd8Sx8: size = 0; break;
2326 case Iop_QAdd16Sx4: size = 1; break;
2327 case Iop_QAdd32Sx2: size = 2; break;
2328 case Iop_QAdd64Sx1: size = 3; break;
2329 default: vassert(0);
2331 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2332 res, argL, argR, size, False));
2339 HReg res = newVRegD(env);
2340 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2341 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2343 switch (e->Iex.Binop.op) {
2344 case Iop_Sub8x8: size = 0; break;
2345 case Iop_Sub16x4: size = 1; break;
2346 case Iop_Sub32x2: size = 2; break;
2347 case Iop_Sub64: size = 3; break;
2348 default: vassert(0);
2350 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2351 res, argL, argR, size, False));
2354 case Iop_Sub32Fx2: {
2355 HReg res = newVRegD(env);
2356 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2359 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2360 res, argL, argR, size, False));
2366 case Iop_QSub64Ux1: {
2367 HReg res = newVRegD(env);
2368 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2371 switch (e->Iex.Binop.op) {
2372 case Iop_QSub8Ux8: size = 0; break;
2373 case Iop_QSub16Ux4: size = 1; break;
2374 case Iop_QSub32Ux2: size = 2; break;
2375 case Iop_QSub64Ux1: size = 3; break;
2376 default: vassert(0);
2378 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2379 res, argL, argR, size, False));
2385 case Iop_QSub64Sx1: {
2386 HReg res = newVRegD(env);
2387 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2390 switch (e->Iex.Binop.op) {
2391 case Iop_QSub8Sx8: size = 0; break;
2392 case Iop_QSub16Sx4: size = 1; break;
2393 case Iop_QSub32Sx2: size = 2; break;
2394 case Iop_QSub64Sx1: size = 3; break;
2395 default: vassert(0);
2397 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2398 res, argL, argR, size, False));
2403 case Iop_Max32Ux2: {
2404 HReg res = newVRegD(env);
2405 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2406 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2408 switch (e->Iex.Binop.op) {
2409 case Iop_Max8Ux8: size = 0; break;
2410 case Iop_Max16Ux4: size = 1; break;
2411 case Iop_Max32Ux2: size = 2; break;
2412 default: vassert(0);
2414 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2415 res, argL, argR, size, False));
2420 case Iop_Max32Sx2: {
2421 HReg res = newVRegD(env);
2422 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2425 switch (e->Iex.Binop.op) {
2426 case Iop_Max8Sx8: size = 0; break;
2427 case Iop_Max16Sx4: size = 1; break;
2428 case Iop_Max32Sx2: size = 2; break;
2429 default: vassert(0);
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2432 res, argL, argR, size, False));
2437 case Iop_Min32Ux2: {
2438 HReg res = newVRegD(env);
2439 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442 switch (e->Iex.Binop.op) {
2443 case Iop_Min8Ux8: size = 0; break;
2444 case Iop_Min16Ux4: size = 1; break;
2445 case Iop_Min32Ux2: size = 2; break;
2446 default: vassert(0);
2448 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2449 res, argL, argR, size, False));
2454 case Iop_Min32Sx2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2459 switch (e->Iex.Binop.op) {
2460 case Iop_Min8Sx8: size = 0; break;
2461 case Iop_Min16Sx4: size = 1; break;
2462 case Iop_Min32Sx2: size = 2; break;
2463 default: vassert(0);
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2466 res, argL, argR, size, False));
2472 HReg res = newVRegD(env);
2473 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475 HReg argR2 = newVRegD(env);
2476 HReg zero = newVRegD(env);
2478 switch (e->Iex.Binop.op) {
2479 case Iop_Sar8x8: size = 0; break;
2480 case Iop_Sar16x4: size = 1; break;
2481 case Iop_Sar32x2: size = 2; break;
2482 case Iop_Sar64: size = 3; break;
2483 default: vassert(0);
2485 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2486 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2487 argR2, zero, argR, size, False));
2488 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2489 res, argL, argR2, size, False));
2496 HReg res = newVRegD(env);
2497 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2500 switch (e->Iex.Binop.op) {
2501 case Iop_Sal8x8: size = 0; break;
2502 case Iop_Sal16x4: size = 1; break;
2503 case Iop_Sal32x2: size = 2; break;
2504 case Iop_Sal64x1: size = 3; break;
2505 default: vassert(0);
2507 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2508 res, argL, argR, size, False));
2514 HReg res = newVRegD(env);
2515 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2516 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2517 HReg argR2 = newVRegD(env);
2518 HReg zero = newVRegD(env);
2520 switch (e->Iex.Binop.op) {
2521 case Iop_Shr8x8: size = 0; break;
2522 case Iop_Shr16x4: size = 1; break;
2523 case Iop_Shr32x2: size = 2; break;
2524 default: vassert(0);
2526 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2527 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2528 argR2, zero, argR, size, False));
2529 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2530 res, argL, argR2, size, False));
2536 HReg res = newVRegD(env);
2537 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2538 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2540 switch (e->Iex.Binop.op) {
2541 case Iop_Shl8x8: size = 0; break;
2542 case Iop_Shl16x4: size = 1; break;
2543 case Iop_Shl32x2: size = 2; break;
2544 default: vassert(0);
2546 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547 res, argL, argR, size, False));
2553 case Iop_QShl64x1: {
2554 HReg res = newVRegD(env);
2555 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2556 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2558 switch (e->Iex.Binop.op) {
2559 case Iop_QShl8x8: size = 0; break;
2560 case Iop_QShl16x4: size = 1; break;
2561 case Iop_QShl32x2: size = 2; break;
2562 case Iop_QShl64x1: size = 3; break;
2563 default: vassert(0);
2565 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2566 res, argL, argR, size, False));
2572 case Iop_QSal64x1: {
2573 HReg res = newVRegD(env);
2574 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2575 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2577 switch (e->Iex.Binop.op) {
2578 case Iop_QSal8x8: size = 0; break;
2579 case Iop_QSal16x4: size = 1; break;
2580 case Iop_QSal32x2: size = 2; break;
2581 case Iop_QSal64x1: size = 3; break;
2582 default: vassert(0);
2584 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2585 res, argL, argR, size, False));
2591 case Iop_QShlN64x1: {
2592 HReg res = newVRegD(env);
2593 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2595 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2596 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2597 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2598 "second argument only\n");
2600 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2601 switch (e->Iex.Binop.op) {
2602 case Iop_QShlN8x8: size = 8 | imm; break;
2603 case Iop_QShlN16x4: size = 16 | imm; break;
2604 case Iop_QShlN32x2: size = 32 | imm; break;
2605 case Iop_QShlN64x1: size = 64 | imm; break;
2606 default: vassert(0);
2608 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2609 res, argL, size, False));
2613 case Iop_QShlN16Sx4:
2614 case Iop_QShlN32Sx2:
2615 case Iop_QShlN64Sx1: {
2616 HReg res = newVRegD(env);
2617 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2619 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2620 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2621 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2622 "second argument only\n");
2624 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2625 switch (e->Iex.Binop.op) {
2626 case Iop_QShlN8Sx8: size = 8 | imm; break;
2627 case Iop_QShlN16Sx4: size = 16 | imm; break;
2628 case Iop_QShlN32Sx2: size = 32 | imm; break;
2629 case Iop_QShlN64Sx1: size = 64 | imm; break;
2630 default: vassert(0);
2632 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2633 res, argL, size, False));
2639 case Iop_QSalN64x1: {
2640 HReg res = newVRegD(env);
2641 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2643 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2644 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2645 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2646 "second argument only\n");
2648 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2649 switch (e->Iex.Binop.op) {
2650 case Iop_QSalN8x8: size = 8 | imm; break;
2651 case Iop_QSalN16x4: size = 16 | imm; break;
2652 case Iop_QSalN32x2: size = 32 | imm; break;
2653 case Iop_QSalN64x1: size = 64 | imm; break;
2654 default: vassert(0);
2656 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2657 res, argL, size, False));
2664 HReg res = newVRegD(env);
2665 HReg tmp = newVRegD(env);
2666 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2667 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2668 HReg argR2 = newVRegI(env);
2670 switch (e->Iex.Binop.op) {
2671 case Iop_ShrN8x8: size = 0; break;
2672 case Iop_ShrN16x4: size = 1; break;
2673 case Iop_ShrN32x2: size = 2; break;
2674 case Iop_Shr64: size = 3; break;
2675 default: vassert(0);
2677 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2678 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2679 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2680 res, argL, tmp, size, False));
2687 HReg res = newVRegD(env);
2688 HReg tmp = newVRegD(env);
2689 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2692 switch (e->Iex.Binop.op) {
2693 case Iop_ShlN8x8: size = 0; break;
2694 case Iop_ShlN16x4: size = 1; break;
2695 case Iop_ShlN32x2: size = 2; break;
2696 case Iop_Shl64: size = 3; break;
2697 default: vassert(0);
2699 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2700 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2701 res, argL, tmp, size, False));
2708 HReg res = newVRegD(env);
2709 HReg tmp = newVRegD(env);
2710 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2711 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2712 HReg argR2 = newVRegI(env);
2714 switch (e->Iex.Binop.op) {
2715 case Iop_SarN8x8: size = 0; break;
2716 case Iop_SarN16x4: size = 1; break;
2717 case Iop_SarN32x2: size = 2; break;
2718 case Iop_Sar64: size = 3; break;
2719 default: vassert(0);
2721 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2722 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2723 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2724 res, argL, tmp, size, False));
2728 case Iop_CmpGT16Ux4:
2729 case Iop_CmpGT32Ux2: {
2730 HReg res = newVRegD(env);
2731 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2732 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2734 switch (e->Iex.Binop.op) {
2735 case Iop_CmpGT8Ux8: size = 0; break;
2736 case Iop_CmpGT16Ux4: size = 1; break;
2737 case Iop_CmpGT32Ux2: size = 2; break;
2738 default: vassert(0);
2740 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2741 res, argL, argR, size, False));
2745 case Iop_CmpGT16Sx4:
2746 case Iop_CmpGT32Sx2: {
2747 HReg res = newVRegD(env);
2748 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2751 switch (e->Iex.Binop.op) {
2752 case Iop_CmpGT8Sx8: size = 0; break;
2753 case Iop_CmpGT16Sx4: size = 1; break;
2754 case Iop_CmpGT32Sx2: size = 2; break;
2755 default: vassert(0);
2757 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2758 res, argL, argR, size, False));
2763 case Iop_CmpEQ32x2: {
2764 HReg res = newVRegD(env);
2765 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2768 switch (e->Iex.Binop.op) {
2769 case Iop_CmpEQ8x8: size = 0; break;
2770 case Iop_CmpEQ16x4: size = 1; break;
2771 case Iop_CmpEQ32x2: size = 2; break;
2772 default: vassert(0);
2774 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2775 res, argL, argR, size, False));
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2785 switch(e->Iex.Binop.op) {
2786 case Iop_Mul8x8: size = 0; break;
2787 case Iop_Mul16x4: size = 1; break;
2788 case Iop_Mul32x2: size = 2; break;
2789 default: vassert(0);
2791 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2792 res, argL, argR, size, False));
2795 case Iop_Mul32Fx2: {
2796 HReg res = newVRegD(env);
2797 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2800 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2801 res, argL, argR, size, False));
2804 case Iop_QDMulHi16Sx4:
2805 case Iop_QDMulHi32Sx2: {
2806 HReg res = newVRegD(env);
2807 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2810 switch(e->Iex.Binop.op) {
2811 case Iop_QDMulHi16Sx4: size = 1; break;
2812 case Iop_QDMulHi32Sx2: size = 2; break;
2813 default: vassert(0);
2815 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2816 res, argL, argR, size, False));
2820 case Iop_QRDMulHi16Sx4:
2821 case Iop_QRDMulHi32Sx2: {
2822 HReg res = newVRegD(env);
2823 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2826 switch(e->Iex.Binop.op) {
2827 case Iop_QRDMulHi16Sx4: size = 1; break;
2828 case Iop_QRDMulHi32Sx2: size = 2; break;
2829 default: vassert(0);
2831 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2832 res, argL, argR, size, False));
2838 case Iop_PwAdd32x2: {
2839 HReg res = newVRegD(env);
2840 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2843 switch(e->Iex.Binop.op) {
2844 case Iop_PwAdd8x8: size = 0; break;
2845 case Iop_PwAdd16x4: size = 1; break;
2846 case Iop_PwAdd32x2: size = 2; break;
2847 default: vassert(0);
2849 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2850 res, argL, argR, size, False));
2853 case Iop_PwAdd32Fx2: {
2854 HReg res = newVRegD(env);
2855 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2858 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2859 res, argL, argR, size, False));
2863 case Iop_PwMin16Ux4:
2864 case Iop_PwMin32Ux2: {
2865 HReg res = newVRegD(env);
2866 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2867 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2869 switch(e->Iex.Binop.op) {
2870 case Iop_PwMin8Ux8: size = 0; break;
2871 case Iop_PwMin16Ux4: size = 1; break;
2872 case Iop_PwMin32Ux2: size = 2; break;
2873 default: vassert(0);
2875 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2876 res, argL, argR, size, False));
2880 case Iop_PwMin16Sx4:
2881 case Iop_PwMin32Sx2: {
2882 HReg res = newVRegD(env);
2883 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2886 switch(e->Iex.Binop.op) {
2887 case Iop_PwMin8Sx8: size = 0; break;
2888 case Iop_PwMin16Sx4: size = 1; break;
2889 case Iop_PwMin32Sx2: size = 2; break;
2890 default: vassert(0);
2892 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2893 res, argL, argR, size, False));
2897 case Iop_PwMax16Ux4:
2898 case Iop_PwMax32Ux2: {
2899 HReg res = newVRegD(env);
2900 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2903 switch(e->Iex.Binop.op) {
2904 case Iop_PwMax8Ux8: size = 0; break;
2905 case Iop_PwMax16Ux4: size = 1; break;
2906 case Iop_PwMax32Ux2: size = 2; break;
2907 default: vassert(0);
2909 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2910 res, argL, argR, size, False));
2914 case Iop_PwMax16Sx4:
2915 case Iop_PwMax32Sx2: {
2916 HReg res = newVRegD(env);
2917 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2920 switch(e->Iex.Binop.op) {
2921 case Iop_PwMax8Sx8: size = 0; break;
2922 case Iop_PwMax16Sx4: size = 1; break;
2923 case Iop_PwMax32Sx2: size = 2; break;
2924 default: vassert(0);
2926 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2927 res, argL, argR, size, False));
2931 HReg res = newVRegD(env);
2932 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2935 res, argL, argR, 0, False));
2938 case Iop_PolynomialMul8x8: {
2939 HReg res = newVRegD(env);
2940 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2943 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2944 res, argL, argR, size, False));
2947 case Iop_Max32Fx2: {
2948 HReg res = newVRegD(env);
2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2952 res, argL, argR, 2, False));
2955 case Iop_Min32Fx2: {
2956 HReg res = newVRegD(env);
2957 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2960 res, argL, argR, 2, False));
2963 case Iop_PwMax32Fx2: {
2964 HReg res = newVRegD(env);
2965 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2968 res, argL, argR, 2, False));
2971 case Iop_PwMin32Fx2: {
2972 HReg res = newVRegD(env);
2973 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2974 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2975 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2976 res, argL, argR, 2, False));
2979 case Iop_CmpGT32Fx2: {
2980 HReg res = newVRegD(env);
2981 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2984 res, argL, argR, 2, False));
2987 case Iop_CmpGE32Fx2: {
2988 HReg res = newVRegD(env);
2989 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
2992 res, argL, argR, 2, False));
2995 case Iop_CmpEQ32Fx2: {
2996 HReg res = newVRegD(env);
2997 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3000 res, argL, argR, 2, False));
3003 case Iop_F32ToFixed32Ux2_RZ:
3004 case Iop_F32ToFixed32Sx2_RZ:
3005 case Iop_Fixed32UToF32x2_RN:
3006 case Iop_Fixed32SToF32x2_RN: {
3007 HReg res = newVRegD(env);
3008 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3011 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3012 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3013 vpanic("ARM supports FP <-> Fixed conversion with constant "
3014 "second argument less than 33 only\n");
3016 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3017 vassert(imm6 <= 32 && imm6 > 0);
3019 switch(e->Iex.Binop.op) {
3020 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3021 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3022 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3023 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3024 default: vassert(0);
3026 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3030 FIXME: is this here or not?
3033 case Iop_VDup32x2: {
3034 HReg res = newVRegD(env);
3035 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3039 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3040 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3041 vpanic("ARM supports Iop_VDup with constant "
3042 "second argument less than 16 only\n");
3044 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3045 switch(e->Iex.Binop.op) {
3046 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3047 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3048 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3049 default: vassert(0);
3052 vpanic("ARM supports Iop_VDup with constant "
3053 "second argument less than 16 only\n");
3055 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3056 res, argL, imm4, False));
3065 /* --------- UNARY ops --------- */
3066 if (e->tag == Iex_Unop) {
3067 switch (e->Iex.Unop.op) {
3069 /* ReinterpF64asI64 */
3070 case Iop_ReinterpF64asI64:
3074 //case Iop_CmpwNEZ64:
3077 HReg res = newVRegD(env);
3078 iselInt64Expr(&rHi, &rLo, env, e);
3079 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3083 DECLARE_PATTERN(p_veqz_8x8);
3084 DECLARE_PATTERN(p_veqz_16x4);
3085 DECLARE_PATTERN(p_veqz_32x2);
3086 DECLARE_PATTERN(p_vcge_8sx8);
3087 DECLARE_PATTERN(p_vcge_16sx4);
3088 DECLARE_PATTERN(p_vcge_32sx2);
3089 DECLARE_PATTERN(p_vcge_8ux8);
3090 DECLARE_PATTERN(p_vcge_16ux4);
3091 DECLARE_PATTERN(p_vcge_32ux2);
3092 DEFINE_PATTERN(p_veqz_8x8,
3093 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3094 DEFINE_PATTERN(p_veqz_16x4,
3095 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3096 DEFINE_PATTERN(p_veqz_32x2,
3097 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3098 DEFINE_PATTERN(p_vcge_8sx8,
3099 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3100 DEFINE_PATTERN(p_vcge_16sx4,
3101 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3102 DEFINE_PATTERN(p_vcge_32sx2,
3103 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3104 DEFINE_PATTERN(p_vcge_8ux8,
3105 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3106 DEFINE_PATTERN(p_vcge_16ux4,
3107 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3108 DEFINE_PATTERN(p_vcge_32ux2,
3109 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3110 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3111 HReg res = newVRegD(env);
3112 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3113 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3115 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3116 HReg res = newVRegD(env);
3117 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3118 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3120 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3121 HReg res = newVRegD(env);
3122 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3123 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3125 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3126 HReg res = newVRegD(env);
3127 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3128 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3129 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3130 res, argL, argR, 0, False));
3132 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3133 HReg res = newVRegD(env);
3134 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3135 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3136 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3137 res, argL, argR, 1, False));
3139 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3140 HReg res = newVRegD(env);
3141 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3142 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3143 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3144 res, argL, argR, 2, False));
3146 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3147 HReg res = newVRegD(env);
3148 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3149 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3150 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3151 res, argL, argR, 0, False));
3153 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3154 HReg res = newVRegD(env);
3155 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3156 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3157 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3158 res, argL, argR, 1, False));
3160 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3161 HReg res = newVRegD(env);
3162 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3163 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3164 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3165 res, argL, argR, 2, False));
3168 HReg res = newVRegD(env);
3169 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3170 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3179 DECLARE_PATTERN(p_vdup_8x8);
3180 DECLARE_PATTERN(p_vdup_16x4);
3181 DECLARE_PATTERN(p_vdup_32x2);
3182 DEFINE_PATTERN(p_vdup_8x8,
3183 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3184 DEFINE_PATTERN(p_vdup_16x4,
3185 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3186 DEFINE_PATTERN(p_vdup_32x2,
3187 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3188 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3191 if (mi.bindee[1]->tag == Iex_Const &&
3192 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3193 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3194 imm4 = (index << 1) + 1;
3196 res = newVRegD(env);
3197 arg = iselNeon64Expr(env, mi.bindee[0]);
3198 addInstr(env, ARMInstr_NUnaryS(
3200 mkARMNRS(ARMNRS_Reg, res, 0),
3201 mkARMNRS(ARMNRS_Scalar, arg, index),
3207 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3210 if (mi.bindee[1]->tag == Iex_Const &&
3211 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3212 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3213 imm4 = (index << 2) + 2;
3215 res = newVRegD(env);
3216 arg = iselNeon64Expr(env, mi.bindee[0]);
3217 addInstr(env, ARMInstr_NUnaryS(
3219 mkARMNRS(ARMNRS_Reg, res, 0),
3220 mkARMNRS(ARMNRS_Scalar, arg, index),
3226 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3229 if (mi.bindee[1]->tag == Iex_Const &&
3230 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3231 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3232 imm4 = (index << 3) + 4;
3234 res = newVRegD(env);
3235 arg = iselNeon64Expr(env, mi.bindee[0]);
3236 addInstr(env, ARMInstr_NUnaryS(
3238 mkARMNRS(ARMNRS_Reg, res, 0),
3239 mkARMNRS(ARMNRS_Scalar, arg, index),
3246 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3247 res = newVRegD(env);
3248 switch (e->Iex.Unop.op) {
3249 case Iop_Dup8x8: size = 0; break;
3250 case Iop_Dup16x4: size = 1; break;
3251 case Iop_Dup32x2: size = 2; break;
3252 default: vassert(0);
3254 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3260 HReg res = newVRegD(env);
3261 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3263 switch(e->Iex.Binop.op) {
3264 case Iop_Abs8x8: size = 0; break;
3265 case Iop_Abs16x4: size = 1; break;
3266 case Iop_Abs32x2: size = 2; break;
3267 default: vassert(0);
3269 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3272 case Iop_Reverse64_8x8:
3273 case Iop_Reverse64_16x4:
3274 case Iop_Reverse64_32x2: {
3275 HReg res = newVRegD(env);
3276 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3278 switch(e->Iex.Binop.op) {
3279 case Iop_Reverse64_8x8: size = 0; break;
3280 case Iop_Reverse64_16x4: size = 1; break;
3281 case Iop_Reverse64_32x2: size = 2; break;
3282 default: vassert(0);
3284 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3285 res, arg, size, False));
3288 case Iop_Reverse32_8x8:
3289 case Iop_Reverse32_16x4: {
3290 HReg res = newVRegD(env);
3291 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3293 switch(e->Iex.Binop.op) {
3294 case Iop_Reverse32_8x8: size = 0; break;
3295 case Iop_Reverse32_16x4: size = 1; break;
3296 default: vassert(0);
3298 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3299 res, arg, size, False));
3302 case Iop_Reverse16_8x8: {
3303 HReg res = newVRegD(env);
3304 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3306 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3307 res, arg, size, False));
3310 case Iop_CmpwNEZ64: {
3311 HReg x_lsh = newVRegD(env);
3312 HReg x_rsh = newVRegD(env);
3313 HReg lsh_amt = newVRegD(env);
3314 HReg rsh_amt = newVRegD(env);
3315 HReg zero = newVRegD(env);
3316 HReg tmp = newVRegD(env);
3317 HReg tmp2 = newVRegD(env);
3318 HReg res = newVRegD(env);
3319 HReg x = newVRegD(env);
3320 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3321 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3322 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3323 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3324 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3325 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3326 rsh_amt, zero, lsh_amt, 2, False));
3327 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3328 x_lsh, x, lsh_amt, 3, False));
3329 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3330 x_rsh, x, rsh_amt, 3, False));
3331 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3332 tmp, x_lsh, x_rsh, 0, False));
3333 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3334 res, tmp, x, 0, False));
3338 case Iop_CmpNEZ16x4:
3339 case Iop_CmpNEZ32x2: {
3340 HReg res = newVRegD(env);
3341 HReg tmp = newVRegD(env);
3342 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3344 switch (e->Iex.Unop.op) {
3345 case Iop_CmpNEZ8x8: size = 0; break;
3346 case Iop_CmpNEZ16x4: size = 1; break;
3347 case Iop_CmpNEZ32x2: size = 2; break;
3348 default: vassert(0);
3350 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3351 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3354 case Iop_Shorten16x8:
3355 case Iop_Shorten32x4:
3356 case Iop_Shorten64x2: {
3357 HReg res = newVRegD(env);
3358 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3360 switch(e->Iex.Binop.op) {
3361 case Iop_Shorten16x8: size = 0; break;
3362 case Iop_Shorten32x4: size = 1; break;
3363 case Iop_Shorten64x2: size = 2; break;
3364 default: vassert(0);
3366 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3367 res, arg, size, False));
3370 case Iop_QShortenS16Sx8:
3371 case Iop_QShortenS32Sx4:
3372 case Iop_QShortenS64Sx2: {
3373 HReg res = newVRegD(env);
3374 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3376 switch(e->Iex.Binop.op) {
3377 case Iop_QShortenS16Sx8: size = 0; break;
3378 case Iop_QShortenS32Sx4: size = 1; break;
3379 case Iop_QShortenS64Sx2: size = 2; break;
3380 default: vassert(0);
3382 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3383 res, arg, size, False));
3386 case Iop_QShortenU16Sx8:
3387 case Iop_QShortenU32Sx4:
3388 case Iop_QShortenU64Sx2: {
3389 HReg res = newVRegD(env);
3390 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3392 switch(e->Iex.Binop.op) {
3393 case Iop_QShortenU16Sx8: size = 0; break;
3394 case Iop_QShortenU32Sx4: size = 1; break;
3395 case Iop_QShortenU64Sx2: size = 2; break;
3396 default: vassert(0);
3398 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3399 res, arg, size, False));
3402 case Iop_QShortenU16Ux8:
3403 case Iop_QShortenU32Ux4:
3404 case Iop_QShortenU64Ux2: {
3405 HReg res = newVRegD(env);
3406 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3408 switch(e->Iex.Binop.op) {
3409 case Iop_QShortenU16Ux8: size = 0; break;
3410 case Iop_QShortenU32Ux4: size = 1; break;
3411 case Iop_QShortenU64Ux2: size = 2; break;
3412 default: vassert(0);
3414 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3415 res, arg, size, False));
3418 case Iop_PwAddL8Sx8:
3419 case Iop_PwAddL16Sx4:
3420 case Iop_PwAddL32Sx2: {
3421 HReg res = newVRegD(env);
3422 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3424 switch(e->Iex.Binop.op) {
3425 case Iop_PwAddL8Sx8: size = 0; break;
3426 case Iop_PwAddL16Sx4: size = 1; break;
3427 case Iop_PwAddL32Sx2: size = 2; break;
3428 default: vassert(0);
3430 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3431 res, arg, size, False));
3434 case Iop_PwAddL8Ux8:
3435 case Iop_PwAddL16Ux4:
3436 case Iop_PwAddL32Ux2: {
3437 HReg res = newVRegD(env);
3438 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3440 switch(e->Iex.Binop.op) {
3441 case Iop_PwAddL8Ux8: size = 0; break;
3442 case Iop_PwAddL16Ux4: size = 1; break;
3443 case Iop_PwAddL32Ux2: size = 2; break;
3444 default: vassert(0);
3446 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3447 res, arg, size, False));
3451 HReg res = newVRegD(env);
3452 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3454 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3455 res, arg, size, False));
3460 case Iop_Clz32Sx2: {
3461 HReg res = newVRegD(env);
3462 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3464 switch(e->Iex.Binop.op) {
3465 case Iop_Clz8Sx8: size = 0; break;
3466 case Iop_Clz16Sx4: size = 1; break;
3467 case Iop_Clz32Sx2: size = 2; break;
3468 default: vassert(0);
3470 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3471 res, arg, size, False));
3476 case Iop_Cls32Sx2: {
3477 HReg res = newVRegD(env);
3478 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3480 switch(e->Iex.Binop.op) {
3481 case Iop_Cls8Sx8: size = 0; break;
3482 case Iop_Cls16Sx4: size = 1; break;
3483 case Iop_Cls32Sx2: size = 2; break;
3484 default: vassert(0);
3486 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3487 res, arg, size, False));
3490 case Iop_FtoI32Sx2_RZ: {
3491 HReg res = newVRegD(env);
3492 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3493 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3494 res, arg, 2, False));
3497 case Iop_FtoI32Ux2_RZ: {
3498 HReg res = newVRegD(env);
3499 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3500 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3501 res, arg, 2, False));
3504 case Iop_I32StoFx2: {
3505 HReg res = newVRegD(env);
3506 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3507 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3508 res, arg, 2, False));
3511 case Iop_I32UtoFx2: {
3512 HReg res = newVRegD(env);
3513 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3514 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3515 res, arg, 2, False));
3518 case Iop_F32toF16x4: {
3519 HReg res = newVRegD(env);
3520 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3521 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3522 res, arg, 2, False));
3525 case Iop_Recip32Fx2: {
3526 HReg res = newVRegD(env);
3527 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3529 res, argL, 0, False));
3532 case Iop_Recip32x2: {
3533 HReg res = newVRegD(env);
3534 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3535 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3536 res, argL, 0, False));
3539 case Iop_Abs32Fx2: {
3540 DECLARE_PATTERN(p_vabd_32fx2);
3541 DEFINE_PATTERN(p_vabd_32fx2,
3546 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3547 HReg res = newVRegD(env);
3548 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3549 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3550 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3551 res, argL, argR, 0, False));
3554 HReg res = newVRegD(env);
3555 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3556 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3557 res, arg, 0, False));
3561 case Iop_Rsqrte32Fx2: {
3562 HReg res = newVRegD(env);
3563 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3565 res, arg, 0, False));
3568 case Iop_Rsqrte32x2: {
3569 HReg res = newVRegD(env);
3570 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3571 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3572 res, arg, 0, False));
3575 case Iop_Neg32Fx2: {
3576 HReg res = newVRegD(env);
3577 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3578 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3579 res, arg, 0, False));
3585 } /* if (e->tag == Iex_Unop) */
3587 if (e->tag == Iex_Triop) {
3588 switch (e->Iex.Triop.op) {
3589 case Iop_Extract64: {
3590 HReg res = newVRegD(env);
3591 HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3592 HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3594 if (e->Iex.Triop.arg3->tag != Iex_Const ||
3595 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3596 vpanic("ARM target supports Iop_Extract64 with constant "
3597 "third argument less than 16 only\n");
3599 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3601 vpanic("ARM target supports Iop_Extract64 with constant "
3602 "third argument less than 16 only\n");
3604 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3605 res, argL, argR, imm4, False));
3608 case Iop_SetElem8x8:
3609 case Iop_SetElem16x4:
3610 case Iop_SetElem32x2: {
3611 HReg res = newVRegD(env);
3612 HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3613 HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3615 if (e->Iex.Triop.arg2->tag != Iex_Const ||
3616 typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3617 vpanic("ARM target supports SetElem with constant "
3618 "second argument only\n");
3620 index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3621 switch (e->Iex.Triop.op) {
3622 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3623 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3624 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3625 default: vassert(0);
3627 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3628 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3629 mkARMNRS(ARMNRS_Scalar, res, index),
3630 mkARMNRS(ARMNRS_Reg, arg, 0),
3639 /* --------- MULTIPLEX --------- */
3640 if (e->tag == Iex_Mux0X) {
3642 HReg res = newVRegD(env);
3643 iselInt64Expr(&rHi, &rLo, env, e);
3644 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3649 vpanic("iselNeon64Expr");
3652 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3654 HReg r = iselNeonExpr_wrk( env, e );
3655 vassert(hregClass(r) == HRcVec128);
3656 vassert(hregIsVirtual(r));
3660 /* DO NOT CALL THIS DIRECTLY */
3661 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3663 IRType ty = typeOfIRExpr(env->type_env, e);
3666 vassert(ty == Ity_V128);
3668 if (e->tag == Iex_RdTmp) {
3669 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3672 if (e->tag == Iex_Const) {
3673 /* At the moment there should be no 128-bit constants in IR for ARM
3674 generated during disassemble. They are represented as Iop_64HLtoV128
3675 binary operation and are handled among binary ops. */
3676 /* But zero can be created by valgrind internal optimizer */
3677 if (e->Iex.Const.con->Ico.V128 == 0) {
3678 HReg res = newVRegV(env);
3679 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3683 vpanic("128-bit constant is not implemented");
3686 if (e->tag == Iex_Load) {
3687 HReg res = newVRegV(env);
3688 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3689 vassert(ty == Ity_V128);
3690 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3694 if (e->tag == Iex_Get) {
3695 HReg addr = newVRegI(env);
3696 HReg res = newVRegV(env);
3697 vassert(ty == Ity_V128);
3698 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3699 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3703 if (e->tag == Iex_Unop) {
3704 switch (e->Iex.Unop.op) {
3706 DECLARE_PATTERN(p_veqz_8x16);
3707 DECLARE_PATTERN(p_veqz_16x8);
3708 DECLARE_PATTERN(p_veqz_32x4);
3709 DECLARE_PATTERN(p_vcge_8sx16);
3710 DECLARE_PATTERN(p_vcge_16sx8);
3711 DECLARE_PATTERN(p_vcge_32sx4);
3712 DECLARE_PATTERN(p_vcge_8ux16);
3713 DECLARE_PATTERN(p_vcge_16ux8);
3714 DECLARE_PATTERN(p_vcge_32ux4);
3715 DEFINE_PATTERN(p_veqz_8x16,
3716 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3717 DEFINE_PATTERN(p_veqz_16x8,
3718 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3719 DEFINE_PATTERN(p_veqz_32x4,
3720 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3721 DEFINE_PATTERN(p_vcge_8sx16,
3722 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3723 DEFINE_PATTERN(p_vcge_16sx8,
3724 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3725 DEFINE_PATTERN(p_vcge_32sx4,
3726 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3727 DEFINE_PATTERN(p_vcge_8ux16,
3728 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3729 DEFINE_PATTERN(p_vcge_16ux8,
3730 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3731 DEFINE_PATTERN(p_vcge_32ux4,
3732 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3733 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3734 HReg res = newVRegV(env);
3735 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3736 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3738 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3739 HReg res = newVRegV(env);
3740 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3741 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3743 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3744 HReg res = newVRegV(env);
3745 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3746 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3748 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3749 HReg res = newVRegV(env);
3750 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3751 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3752 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3753 res, argL, argR, 0, True));
3755 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3756 HReg res = newVRegV(env);
3757 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3758 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3759 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3760 res, argL, argR, 1, True));
3762 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3763 HReg res = newVRegV(env);
3764 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3765 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3766 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3767 res, argL, argR, 2, True));
3769 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3770 HReg res = newVRegV(env);
3771 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3772 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3773 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3774 res, argL, argR, 0, True));
3776 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3777 HReg res = newVRegV(env);
3778 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3779 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3780 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3781 res, argL, argR, 1, True));
3783 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3784 HReg res = newVRegV(env);
3785 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3786 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3787 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3788 res, argL, argR, 2, True));
3791 HReg res = newVRegV(env);
3792 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3793 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3802 DECLARE_PATTERN(p_vdup_8x16);
3803 DECLARE_PATTERN(p_vdup_16x8);
3804 DECLARE_PATTERN(p_vdup_32x4);
3805 DEFINE_PATTERN(p_vdup_8x16,
3806 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3807 DEFINE_PATTERN(p_vdup_16x8,
3808 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3809 DEFINE_PATTERN(p_vdup_32x4,
3810 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3811 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3814 if (mi.bindee[1]->tag == Iex_Const &&
3815 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3816 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3817 imm4 = (index << 1) + 1;
3819 res = newVRegV(env);
3820 arg = iselNeon64Expr(env, mi.bindee[0]);
3821 addInstr(env, ARMInstr_NUnaryS(
3823 mkARMNRS(ARMNRS_Reg, res, 0),
3824 mkARMNRS(ARMNRS_Scalar, arg, index),
3830 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3833 if (mi.bindee[1]->tag == Iex_Const &&
3834 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836 imm4 = (index << 2) + 2;
3838 res = newVRegV(env);
3839 arg = iselNeon64Expr(env, mi.bindee[0]);
3840 addInstr(env, ARMInstr_NUnaryS(
3842 mkARMNRS(ARMNRS_Reg, res, 0),
3843 mkARMNRS(ARMNRS_Scalar, arg, index),
3849 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3852 if (mi.bindee[1]->tag == Iex_Const &&
3853 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855 imm4 = (index << 3) + 4;
3857 res = newVRegV(env);
3858 arg = iselNeon64Expr(env, mi.bindee[0]);
3859 addInstr(env, ARMInstr_NUnaryS(
3861 mkARMNRS(ARMNRS_Reg, res, 0),
3862 mkARMNRS(ARMNRS_Scalar, arg, index),
3869 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3870 res = newVRegV(env);
3871 switch (e->Iex.Unop.op) {
3872 case Iop_Dup8x16: size = 0; break;
3873 case Iop_Dup16x8: size = 1; break;
3874 case Iop_Dup32x4: size = 2; break;
3875 default: vassert(0);
3877 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3883 HReg res = newVRegV(env);
3884 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3886 switch(e->Iex.Binop.op) {
3887 case Iop_Abs8x16: size = 0; break;
3888 case Iop_Abs16x8: size = 1; break;
3889 case Iop_Abs32x4: size = 2; break;
3890 default: vassert(0);
3892 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3895 case Iop_Reverse64_8x16:
3896 case Iop_Reverse64_16x8:
3897 case Iop_Reverse64_32x4: {
3898 HReg res = newVRegV(env);
3899 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3901 switch(e->Iex.Binop.op) {
3902 case Iop_Reverse64_8x16: size = 0; break;
3903 case Iop_Reverse64_16x8: size = 1; break;
3904 case Iop_Reverse64_32x4: size = 2; break;
3905 default: vassert(0);
3907 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3908 res, arg, size, True));
3911 case Iop_Reverse32_8x16:
3912 case Iop_Reverse32_16x8: {
3913 HReg res = newVRegV(env);
3914 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3916 switch(e->Iex.Binop.op) {
3917 case Iop_Reverse32_8x16: size = 0; break;
3918 case Iop_Reverse32_16x8: size = 1; break;
3919 default: vassert(0);
3921 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3922 res, arg, size, True));
3925 case Iop_Reverse16_8x16: {
3926 HReg res = newVRegV(env);
3927 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3929 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3930 res, arg, size, True));
3933 case Iop_CmpNEZ64x2: {
3934 HReg x_lsh = newVRegV(env);
3935 HReg x_rsh = newVRegV(env);
3936 HReg lsh_amt = newVRegV(env);
3937 HReg rsh_amt = newVRegV(env);
3938 HReg zero = newVRegV(env);
3939 HReg tmp = newVRegV(env);
3940 HReg tmp2 = newVRegV(env);
3941 HReg res = newVRegV(env);
3942 HReg x = newVRegV(env);
3943 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3945 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3946 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3947 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3948 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3949 rsh_amt, zero, lsh_amt, 2, True));
3950 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3951 x_lsh, x, lsh_amt, 3, True));
3952 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3953 x_rsh, x, rsh_amt, 3, True));
3954 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3955 tmp, x_lsh, x_rsh, 0, True));
3956 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3957 res, tmp, x, 0, True));
3960 case Iop_CmpNEZ8x16:
3961 case Iop_CmpNEZ16x8:
3962 case Iop_CmpNEZ32x4: {
3963 HReg res = newVRegV(env);
3964 HReg tmp = newVRegV(env);
3965 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3967 switch (e->Iex.Unop.op) {
3968 case Iop_CmpNEZ8x16: size = 0; break;
3969 case Iop_CmpNEZ16x8: size = 1; break;
3970 case Iop_CmpNEZ32x4: size = 2; break;
3971 default: vassert(0);
3973 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3974 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3977 case Iop_Longen8Ux8:
3978 case Iop_Longen16Ux4:
3979 case Iop_Longen32Ux2: {
3980 HReg res = newVRegV(env);
3981 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3983 switch (e->Iex.Unop.op) {
3984 case Iop_Longen8Ux8: size = 0; break;
3985 case Iop_Longen16Ux4: size = 1; break;
3986 case Iop_Longen32Ux2: size = 2; break;
3987 default: vassert(0);
3989 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
3990 res, arg, size, True));
3993 case Iop_Longen8Sx8:
3994 case Iop_Longen16Sx4:
3995 case Iop_Longen32Sx2: {
3996 HReg res = newVRegV(env);
3997 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3999 switch (e->Iex.Unop.op) {
4000 case Iop_Longen8Sx8: size = 0; break;
4001 case Iop_Longen16Sx4: size = 1; break;
4002 case Iop_Longen32Sx2: size = 2; break;
4003 default: vassert(0);
4005 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4006 res, arg, size, True));
4009 case Iop_PwAddL8Sx16:
4010 case Iop_PwAddL16Sx8:
4011 case Iop_PwAddL32Sx4: {
4012 HReg res = newVRegV(env);
4013 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4015 switch(e->Iex.Binop.op) {
4016 case Iop_PwAddL8Sx16: size = 0; break;
4017 case Iop_PwAddL16Sx8: size = 1; break;
4018 case Iop_PwAddL32Sx4: size = 2; break;
4019 default: vassert(0);
4021 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4022 res, arg, size, True));
4025 case Iop_PwAddL8Ux16:
4026 case Iop_PwAddL16Ux8:
4027 case Iop_PwAddL32Ux4: {
4028 HReg res = newVRegV(env);
4029 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4031 switch(e->Iex.Binop.op) {
4032 case Iop_PwAddL8Ux16: size = 0; break;
4033 case Iop_PwAddL16Ux8: size = 1; break;
4034 case Iop_PwAddL32Ux4: size = 2; break;
4035 default: vassert(0);
4037 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4038 res, arg, size, True));
4042 HReg res = newVRegV(env);
4043 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4045 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4050 case Iop_Clz32Sx4: {
4051 HReg res = newVRegV(env);
4052 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4054 switch(e->Iex.Binop.op) {
4055 case Iop_Clz8Sx16: size = 0; break;
4056 case Iop_Clz16Sx8: size = 1; break;
4057 case Iop_Clz32Sx4: size = 2; break;
4058 default: vassert(0);
4060 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4065 case Iop_Cls32Sx4: {
4066 HReg res = newVRegV(env);
4067 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4069 switch(e->Iex.Binop.op) {
4070 case Iop_Cls8Sx16: size = 0; break;
4071 case Iop_Cls16Sx8: size = 1; break;
4072 case Iop_Cls32Sx4: size = 2; break;
4073 default: vassert(0);
4075 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4078 case Iop_FtoI32Sx4_RZ: {
4079 HReg res = newVRegV(env);
4080 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4081 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4082 res, arg, 2, True));
4085 case Iop_FtoI32Ux4_RZ: {
4086 HReg res = newVRegV(env);
4087 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4088 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4089 res, arg, 2, True));
4092 case Iop_I32StoFx4: {
4093 HReg res = newVRegV(env);
4094 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4096 res, arg, 2, True));
4099 case Iop_I32UtoFx4: {
4100 HReg res = newVRegV(env);
4101 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4103 res, arg, 2, True));
4106 case Iop_F16toF32x4: {
4107 HReg res = newVRegV(env);
4108 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4109 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4110 res, arg, 2, True));
4113 case Iop_Recip32Fx4: {
4114 HReg res = newVRegV(env);
4115 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4116 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4117 res, argL, 0, True));
4120 case Iop_Recip32x4: {
4121 HReg res = newVRegV(env);
4122 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4123 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4124 res, argL, 0, True));
4127 case Iop_Abs32Fx4: {
4128 DECLARE_PATTERN(p_vabd_32fx4);
4129 DEFINE_PATTERN(p_vabd_32fx4,
4134 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4135 HReg res = newVRegV(env);
4136 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4137 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4138 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4139 res, argL, argR, 0, True));
4142 HReg res = newVRegV(env);
4143 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4144 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4145 res, argL, 0, True));
4149 case Iop_Rsqrte32Fx4: {
4150 HReg res = newVRegV(env);
4151 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4153 res, argL, 0, True));
4156 case Iop_Rsqrte32x4: {
4157 HReg res = newVRegV(env);
4158 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4160 res, argL, 0, True));
4163 case Iop_Neg32Fx4: {
4164 HReg res = newVRegV(env);
4165 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4166 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4167 res, arg, 0, True));
4176 if (e->tag == Iex_Binop) {
4177 switch (e->Iex.Binop.op) {
4178 case Iop_64HLtoV128:
4179 /* Try to match into single "VMOV reg, imm" instruction */
4180 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4181 e->Iex.Binop.arg2->tag == Iex_Const &&
4182 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4183 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4184 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4185 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4186 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4187 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4189 HReg res = newVRegV(env);
4190 addInstr(env, ARMInstr_NeonImm(res, imm));
4193 if ((imm64 >> 32) == 0LL &&
4194 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4195 HReg tmp1 = newVRegV(env);
4196 HReg tmp2 = newVRegV(env);
4197 HReg res = newVRegV(env);
4198 if (imm->type < 10) {
4199 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4200 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4201 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4202 res, tmp1, tmp2, 4, True));
4206 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4207 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4208 HReg tmp1 = newVRegV(env);
4209 HReg tmp2 = newVRegV(env);
4210 HReg res = newVRegV(env);
4211 if (imm->type < 10) {
4212 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4213 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4214 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4215 res, tmp1, tmp2, 4, True));
4220 /* Does not match "VMOV Reg, Imm" form */
4223 HReg res = newVRegV(env);
4224 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4225 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4226 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4227 res, argL, argR, 4, True));
4231 HReg res = newVRegV(env);
4232 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4233 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4234 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4235 res, argL, argR, 4, True));
4239 HReg res = newVRegV(env);
4240 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4241 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4242 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4243 res, argL, argR, 4, True));
4251 FIXME: remove this if not used
4252 DECLARE_PATTERN(p_vrhadd_32sx4);
4253 ULong one = (1LL << 32) | 1LL;
4254 DEFINE_PATTERN(p_vrhadd_32sx4,
4275 HReg res = newVRegV(env);
4276 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4277 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4279 switch (e->Iex.Binop.op) {
4280 case Iop_Add8x16: size = 0; break;
4281 case Iop_Add16x8: size = 1; break;
4282 case Iop_Add32x4: size = 2; break;
4283 case Iop_Add64x2: size = 3; break;
4285 ppIROp(e->Iex.Binop.op);
4286 vpanic("Illegal element size in VADD");
4288 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4289 res, argL, argR, size, True));
4292 case Iop_Add32Fx4: {
4293 HReg res = newVRegV(env);
4294 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4295 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4297 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4298 res, argL, argR, size, True));
4301 case Iop_Recps32Fx4: {
4302 HReg res = newVRegV(env);
4303 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4304 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4306 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4307 res, argL, argR, size, True));
4310 case Iop_Rsqrts32Fx4: {
4311 HReg res = newVRegV(env);
4312 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4313 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4315 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4316 res, argL, argR, size, True));
4319 case Iop_InterleaveEvenLanes8x16:
4320 case Iop_InterleaveEvenLanes16x8:
4321 case Iop_InterleaveEvenLanes32x4:
4322 case Iop_InterleaveOddLanes8x16:
4323 case Iop_InterleaveOddLanes16x8:
4324 case Iop_InterleaveOddLanes32x4: {
4325 HReg tmp = newVRegV(env);
4326 HReg res = newVRegV(env);
4327 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4328 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4331 switch (e->Iex.Binop.op) {
4332 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4333 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4334 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4335 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4336 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4337 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4339 ppIROp(e->Iex.Binop.op);
4340 vpanic("Illegal element size in VTRN");
4343 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4344 tmp, argL, 4, True));
4345 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4346 res, argR, 4, True));
4347 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4348 res, tmp, size, True));
4350 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4351 tmp, argR, 4, True));
4352 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4353 res, argL, 4, True));
4354 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4355 tmp, res, size, True));
4359 case Iop_InterleaveHI8x16:
4360 case Iop_InterleaveHI16x8:
4361 case Iop_InterleaveHI32x4:
4362 case Iop_InterleaveLO8x16:
4363 case Iop_InterleaveLO16x8:
4364 case Iop_InterleaveLO32x4: {
4365 HReg tmp = newVRegV(env);
4366 HReg res = newVRegV(env);
4367 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4368 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4371 switch (e->Iex.Binop.op) {
4372 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4373 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4374 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4375 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4376 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4377 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4379 ppIROp(e->Iex.Binop.op);
4380 vpanic("Illegal element size in VZIP");
4383 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4384 tmp, argL, 4, True));
4385 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4386 res, argR, 4, True));
4387 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4388 res, tmp, size, True));
4390 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4391 tmp, argR, 4, True));
4392 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4393 res, argL, 4, True));
4394 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4395 tmp, res, size, True));
4399 case Iop_CatOddLanes8x16:
4400 case Iop_CatOddLanes16x8:
4401 case Iop_CatOddLanes32x4:
4402 case Iop_CatEvenLanes8x16:
4403 case Iop_CatEvenLanes16x8:
4404 case Iop_CatEvenLanes32x4: {
4405 HReg tmp = newVRegV(env);
4406 HReg res = newVRegV(env);
4407 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4408 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4411 switch (e->Iex.Binop.op) {
4412 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4413 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4414 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4415 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4416 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4417 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4419 ppIROp(e->Iex.Binop.op);
4420 vpanic("Illegal element size in VUZP");
4423 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4424 tmp, argL, 4, True));
4425 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4426 res, argR, 4, True));
4427 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4428 res, tmp, size, True));
4430 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4431 tmp, argR, 4, True));
4432 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4433 res, argL, 4, True));
4434 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4435 tmp, res, size, True));
4442 case Iop_QAdd64Ux2: {
4443 HReg res = newVRegV(env);
4444 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4445 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4447 switch (e->Iex.Binop.op) {
4448 case Iop_QAdd8Ux16: size = 0; break;
4449 case Iop_QAdd16Ux8: size = 1; break;
4450 case Iop_QAdd32Ux4: size = 2; break;
4451 case Iop_QAdd64Ux2: size = 3; break;
4453 ppIROp(e->Iex.Binop.op);
4454 vpanic("Illegal element size in VQADDU");
4456 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4457 res, argL, argR, size, True));
4463 case Iop_QAdd64Sx2: {
4464 HReg res = newVRegV(env);
4465 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4466 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4468 switch (e->Iex.Binop.op) {
4469 case Iop_QAdd8Sx16: size = 0; break;
4470 case Iop_QAdd16Sx8: size = 1; break;
4471 case Iop_QAdd32Sx4: size = 2; break;
4472 case Iop_QAdd64Sx2: size = 3; break;
4474 ppIROp(e->Iex.Binop.op);
4475 vpanic("Illegal element size in VQADDS");
4477 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4478 res, argL, argR, size, True));
4485 HReg res = newVRegV(env);
4486 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4487 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4489 switch (e->Iex.Binop.op) {
4490 case Iop_Sub8x16: size = 0; break;
4491 case Iop_Sub16x8: size = 1; break;
4492 case Iop_Sub32x4: size = 2; break;
4493 case Iop_Sub64x2: size = 3; break;
4495 ppIROp(e->Iex.Binop.op);
4496 vpanic("Illegal element size in VSUB");
4498 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4499 res, argL, argR, size, True));
4502 case Iop_Sub32Fx4: {
4503 HReg res = newVRegV(env);
4504 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4505 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4507 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4508 res, argL, argR, size, True));
4514 case Iop_QSub64Ux2: {
4515 HReg res = newVRegV(env);
4516 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4517 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4519 switch (e->Iex.Binop.op) {
4520 case Iop_QSub8Ux16: size = 0; break;
4521 case Iop_QSub16Ux8: size = 1; break;
4522 case Iop_QSub32Ux4: size = 2; break;
4523 case Iop_QSub64Ux2: size = 3; break;
4525 ppIROp(e->Iex.Binop.op);
4526 vpanic("Illegal element size in VQSUBU");
4528 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4529 res, argL, argR, size, True));
4535 case Iop_QSub64Sx2: {
4536 HReg res = newVRegV(env);
4537 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4538 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4540 switch (e->Iex.Binop.op) {
4541 case Iop_QSub8Sx16: size = 0; break;
4542 case Iop_QSub16Sx8: size = 1; break;
4543 case Iop_QSub32Sx4: size = 2; break;
4544 case Iop_QSub64Sx2: size = 3; break;
4546 ppIROp(e->Iex.Binop.op);
4547 vpanic("Illegal element size in VQSUBS");
4549 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4550 res, argL, argR, size, True));
4555 case Iop_Max32Ux4: {
4556 HReg res = newVRegV(env);
4557 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4558 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4560 switch (e->Iex.Binop.op) {
4561 case Iop_Max8Ux16: size = 0; break;
4562 case Iop_Max16Ux8: size = 1; break;
4563 case Iop_Max32Ux4: size = 2; break;
4564 default: vpanic("Illegal element size in VMAXU");
4566 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4567 res, argL, argR, size, True));
4572 case Iop_Max32Sx4: {
4573 HReg res = newVRegV(env);
4574 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4575 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4577 switch (e->Iex.Binop.op) {
4578 case Iop_Max8Sx16: size = 0; break;
4579 case Iop_Max16Sx8: size = 1; break;
4580 case Iop_Max32Sx4: size = 2; break;
4581 default: vpanic("Illegal element size in VMAXU");
4583 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4584 res, argL, argR, size, True));
4589 case Iop_Min32Ux4: {
4590 HReg res = newVRegV(env);
4591 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4592 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4594 switch (e->Iex.Binop.op) {
4595 case Iop_Min8Ux16: size = 0; break;
4596 case Iop_Min16Ux8: size = 1; break;
4597 case Iop_Min32Ux4: size = 2; break;
4598 default: vpanic("Illegal element size in VMAXU");
4600 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4601 res, argL, argR, size, True));
4606 case Iop_Min32Sx4: {
4607 HReg res = newVRegV(env);
4608 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4609 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4611 switch (e->Iex.Binop.op) {
4612 case Iop_Min8Sx16: size = 0; break;
4613 case Iop_Min16Sx8: size = 1; break;
4614 case Iop_Min32Sx4: size = 2; break;
4615 default: vpanic("Illegal element size in VMAXU");
4617 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4618 res, argL, argR, size, True));
4625 HReg res = newVRegV(env);
4626 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628 HReg argR2 = newVRegV(env);
4629 HReg zero = newVRegV(env);
4631 switch (e->Iex.Binop.op) {
4632 case Iop_Sar8x16: size = 0; break;
4633 case Iop_Sar16x8: size = 1; break;
4634 case Iop_Sar32x4: size = 2; break;
4635 case Iop_Sar64x2: size = 3; break;
4636 default: vassert(0);
4638 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4639 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4640 argR2, zero, argR, size, True));
4641 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4642 res, argL, argR2, size, True));
4649 HReg res = newVRegV(env);
4650 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4651 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4653 switch (e->Iex.Binop.op) {
4654 case Iop_Sal8x16: size = 0; break;
4655 case Iop_Sal16x8: size = 1; break;
4656 case Iop_Sal32x4: size = 2; break;
4657 case Iop_Sal64x2: size = 3; break;
4658 default: vassert(0);
4660 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4661 res, argL, argR, size, True));
4668 HReg res = newVRegV(env);
4669 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4670 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4671 HReg argR2 = newVRegV(env);
4672 HReg zero = newVRegV(env);
4674 switch (e->Iex.Binop.op) {
4675 case Iop_Shr8x16: size = 0; break;
4676 case Iop_Shr16x8: size = 1; break;
4677 case Iop_Shr32x4: size = 2; break;
4678 case Iop_Shr64x2: size = 3; break;
4679 default: vassert(0);
4681 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4682 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4683 argR2, zero, argR, size, True));
4684 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4685 res, argL, argR2, size, True));
4692 HReg res = newVRegV(env);
4693 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4694 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4696 switch (e->Iex.Binop.op) {
4697 case Iop_Shl8x16: size = 0; break;
4698 case Iop_Shl16x8: size = 1; break;
4699 case Iop_Shl32x4: size = 2; break;
4700 case Iop_Shl64x2: size = 3; break;
4701 default: vassert(0);
4703 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4704 res, argL, argR, size, True));
4710 case Iop_QShl64x2: {
4711 HReg res = newVRegV(env);
4712 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4713 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4715 switch (e->Iex.Binop.op) {
4716 case Iop_QShl8x16: size = 0; break;
4717 case Iop_QShl16x8: size = 1; break;
4718 case Iop_QShl32x4: size = 2; break;
4719 case Iop_QShl64x2: size = 3; break;
4720 default: vassert(0);
4722 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4723 res, argL, argR, size, True));
4729 case Iop_QSal64x2: {
4730 HReg res = newVRegV(env);
4731 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4732 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4734 switch (e->Iex.Binop.op) {
4735 case Iop_QSal8x16: size = 0; break;
4736 case Iop_QSal16x8: size = 1; break;
4737 case Iop_QSal32x4: size = 2; break;
4738 case Iop_QSal64x2: size = 3; break;
4739 default: vassert(0);
4741 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4742 res, argL, argR, size, True));
4748 case Iop_QShlN64x2: {
4749 HReg res = newVRegV(env);
4750 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4752 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4753 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4754 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4755 "second argument only\n");
4757 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4758 switch (e->Iex.Binop.op) {
4759 case Iop_QShlN8x16: size = 8 | imm; break;
4760 case Iop_QShlN16x8: size = 16 | imm; break;
4761 case Iop_QShlN32x4: size = 32 | imm; break;
4762 case Iop_QShlN64x2: size = 64 | imm; break;
4763 default: vassert(0);
4765 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4766 res, argL, size, True));
4769 case Iop_QShlN8Sx16:
4770 case Iop_QShlN16Sx8:
4771 case Iop_QShlN32Sx4:
4772 case Iop_QShlN64Sx2: {
4773 HReg res = newVRegV(env);
4774 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4776 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4777 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4778 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4779 "second argument only\n");
4781 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4782 switch (e->Iex.Binop.op) {
4783 case Iop_QShlN8Sx16: size = 8 | imm; break;
4784 case Iop_QShlN16Sx8: size = 16 | imm; break;
4785 case Iop_QShlN32Sx4: size = 32 | imm; break;
4786 case Iop_QShlN64Sx2: size = 64 | imm; break;
4787 default: vassert(0);
4789 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4790 res, argL, size, True));
4796 case Iop_QSalN64x2: {
4797 HReg res = newVRegV(env);
4798 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4800 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4801 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4802 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4803 "second argument only\n");
4805 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4806 switch (e->Iex.Binop.op) {
4807 case Iop_QSalN8x16: size = 8 | imm; break;
4808 case Iop_QSalN16x8: size = 16 | imm; break;
4809 case Iop_QSalN32x4: size = 32 | imm; break;
4810 case Iop_QSalN64x2: size = 64 | imm; break;
4811 default: vassert(0);
4813 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4814 res, argL, size, True));
4820 case Iop_ShrN64x2: {
4821 HReg res = newVRegV(env);
4822 HReg tmp = newVRegV(env);
4823 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4824 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4825 HReg argR2 = newVRegI(env);
4827 switch (e->Iex.Binop.op) {
4828 case Iop_ShrN8x16: size = 0; break;
4829 case Iop_ShrN16x8: size = 1; break;
4830 case Iop_ShrN32x4: size = 2; break;
4831 case Iop_ShrN64x2: size = 3; break;
4832 default: vassert(0);
4834 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4835 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4836 tmp, argR2, 0, True));
4837 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4838 res, argL, tmp, size, True));
4844 case Iop_ShlN64x2: {
4845 HReg res = newVRegV(env);
4846 HReg tmp = newVRegV(env);
4847 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4850 switch (e->Iex.Binop.op) {
4851 case Iop_ShlN8x16: size = 0; break;
4852 case Iop_ShlN16x8: size = 1; break;
4853 case Iop_ShlN32x4: size = 2; break;
4854 case Iop_ShlN64x2: size = 3; break;
4855 default: vassert(0);
4857 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4858 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4859 res, argL, tmp, size, True));
4865 case Iop_SarN64x2: {
4866 HReg res = newVRegV(env);
4867 HReg tmp = newVRegV(env);
4868 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4869 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4870 HReg argR2 = newVRegI(env);
4872 switch (e->Iex.Binop.op) {
4873 case Iop_SarN8x16: size = 0; break;
4874 case Iop_SarN16x8: size = 1; break;
4875 case Iop_SarN32x4: size = 2; break;
4876 case Iop_SarN64x2: size = 3; break;
4877 default: vassert(0);
4879 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4880 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4881 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4882 res, argL, tmp, size, True));
4885 case Iop_CmpGT8Ux16:
4886 case Iop_CmpGT16Ux8:
4887 case Iop_CmpGT32Ux4: {
4888 HReg res = newVRegV(env);
4889 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4890 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4892 switch (e->Iex.Binop.op) {
4893 case Iop_CmpGT8Ux16: size = 0; break;
4894 case Iop_CmpGT16Ux8: size = 1; break;
4895 case Iop_CmpGT32Ux4: size = 2; break;
4896 default: vassert(0);
4898 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4899 res, argL, argR, size, True));
4902 case Iop_CmpGT8Sx16:
4903 case Iop_CmpGT16Sx8:
4904 case Iop_CmpGT32Sx4: {
4905 HReg res = newVRegV(env);
4906 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4907 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4909 switch (e->Iex.Binop.op) {
4910 case Iop_CmpGT8Sx16: size = 0; break;
4911 case Iop_CmpGT16Sx8: size = 1; break;
4912 case Iop_CmpGT32Sx4: size = 2; break;
4913 default: vassert(0);
4915 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4916 res, argL, argR, size, True));
4921 case Iop_CmpEQ32x4: {
4922 HReg res = newVRegV(env);
4923 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4924 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4926 switch (e->Iex.Binop.op) {
4927 case Iop_CmpEQ8x16: size = 0; break;
4928 case Iop_CmpEQ16x8: size = 1; break;
4929 case Iop_CmpEQ32x4: size = 2; break;
4930 default: vassert(0);
4932 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4933 res, argL, argR, size, True));
4939 HReg res = newVRegV(env);
4940 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4941 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4943 switch(e->Iex.Binop.op) {
4944 case Iop_Mul8x16: size = 0; break;
4945 case Iop_Mul16x8: size = 1; break;
4946 case Iop_Mul32x4: size = 2; break;
4947 default: vassert(0);
4949 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4950 res, argL, argR, size, True));
4953 case Iop_Mul32Fx4: {
4954 HReg res = newVRegV(env);
4955 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4956 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4958 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
4959 res, argL, argR, size, True));
4964 case Iop_Mull32Ux2: {
4965 HReg res = newVRegV(env);
4966 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4967 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4969 switch(e->Iex.Binop.op) {
4970 case Iop_Mull8Ux8: size = 0; break;
4971 case Iop_Mull16Ux4: size = 1; break;
4972 case Iop_Mull32Ux2: size = 2; break;
4973 default: vassert(0);
4975 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
4976 res, argL, argR, size, True));
4982 case Iop_Mull32Sx2: {
4983 HReg res = newVRegV(env);
4984 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4985 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4987 switch(e->Iex.Binop.op) {
4988 case Iop_Mull8Sx8: size = 0; break;
4989 case Iop_Mull16Sx4: size = 1; break;
4990 case Iop_Mull32Sx2: size = 2; break;
4991 default: vassert(0);
4993 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
4994 res, argL, argR, size, True));
4998 case Iop_QDMulHi16Sx8:
4999 case Iop_QDMulHi32Sx4: {
5000 HReg res = newVRegV(env);
5001 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5002 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5004 switch(e->Iex.Binop.op) {
5005 case Iop_QDMulHi16Sx8: size = 1; break;
5006 case Iop_QDMulHi32Sx4: size = 2; break;
5007 default: vassert(0);
5009 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5010 res, argL, argR, size, True));
5014 case Iop_QRDMulHi16Sx8:
5015 case Iop_QRDMulHi32Sx4: {
5016 HReg res = newVRegV(env);
5017 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5018 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5020 switch(e->Iex.Binop.op) {
5021 case Iop_QRDMulHi16Sx8: size = 1; break;
5022 case Iop_QRDMulHi32Sx4: size = 2; break;
5023 default: vassert(0);
5025 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5026 res, argL, argR, size, True));
5030 case Iop_QDMulLong16Sx4:
5031 case Iop_QDMulLong32Sx2: {
5032 HReg res = newVRegV(env);
5033 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5034 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5036 switch(e->Iex.Binop.op) {
5037 case Iop_QDMulLong16Sx4: size = 1; break;
5038 case Iop_QDMulLong32Sx2: size = 2; break;
5039 default: vassert(0);
5041 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5042 res, argL, argR, size, True));
5045 case Iop_PolynomialMul8x16: {
5046 HReg res = newVRegV(env);
5047 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5048 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5050 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5051 res, argL, argR, size, True));
5054 case Iop_Max32Fx4: {
5055 HReg res = newVRegV(env);
5056 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5059 res, argL, argR, 2, True));
5062 case Iop_Min32Fx4: {
5063 HReg res = newVRegV(env);
5064 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5065 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5066 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5067 res, argL, argR, 2, True));
5070 case Iop_PwMax32Fx4: {
5071 HReg res = newVRegV(env);
5072 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5073 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5074 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5075 res, argL, argR, 2, True));
5078 case Iop_PwMin32Fx4: {
5079 HReg res = newVRegV(env);
5080 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5081 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5082 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5083 res, argL, argR, 2, True));
5086 case Iop_CmpGT32Fx4: {
5087 HReg res = newVRegV(env);
5088 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5089 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5090 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5091 res, argL, argR, 2, True));
5094 case Iop_CmpGE32Fx4: {
5095 HReg res = newVRegV(env);
5096 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5097 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5098 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5099 res, argL, argR, 2, True));
5102 case Iop_CmpEQ32Fx4: {
5103 HReg res = newVRegV(env);
5104 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5105 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5106 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5107 res, argL, argR, 2, True));
5111 case Iop_PolynomialMull8x8: {
5112 HReg res = newVRegV(env);
5113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5116 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5117 res, argL, argR, size, True));
5120 case Iop_F32ToFixed32Ux4_RZ:
5121 case Iop_F32ToFixed32Sx4_RZ:
5122 case Iop_Fixed32UToF32x4_RN:
5123 case Iop_Fixed32SToF32x4_RN: {
5124 HReg res = newVRegV(env);
5125 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5128 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5129 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5130 vpanic("ARM supports FP <-> Fixed conversion with constant "
5131 "second argument less than 33 only\n");
5133 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5134 vassert(imm6 <= 32 && imm6 > 0);
5136 switch(e->Iex.Binop.op) {
5137 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5138 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5139 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5140 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5141 default: vassert(0);
5143 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5147 FIXME remove if not used
5150 case Iop_VDup32x4: {
5151 HReg res = newVRegV(env);
5152 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5155 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5156 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5157 vpanic("ARM supports Iop_VDup with constant "
5158 "second argument less than 16 only\n");
5160 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5161 switch(e->Iex.Binop.op) {
5162 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5163 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5164 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5165 default: vassert(0);
5168 vpanic("ARM supports Iop_VDup with constant "
5169 "second argument less than 16 only\n");
5171 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5172 res, argL, imm4, True));
5178 case Iop_PwAdd32x4: {
5179 HReg res = newVRegV(env);
5180 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5181 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5183 switch(e->Iex.Binop.op) {
5184 case Iop_PwAdd8x16: size = 0; break;
5185 case Iop_PwAdd16x8: size = 1; break;
5186 case Iop_PwAdd32x4: size = 2; break;
5187 default: vassert(0);
5189 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5190 res, argL, argR, size, True));
5199 if (e->tag == Iex_Triop) {
5200 switch (e->Iex.Triop.op) {
5201 case Iop_ExtractV128: {
5202 HReg res = newVRegV(env);
5203 HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5204 HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5206 if (e->Iex.Triop.arg3->tag != Iex_Const ||
5207 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5208 vpanic("ARM target supports Iop_ExtractV128 with constant "
5209 "third argument less than 16 only\n");
5211 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5213 vpanic("ARM target supports Iop_ExtractV128 with constant "
5214 "third argument less than 16 only\n");
5216 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5217 res, argL, argR, imm4, True));
5225 if (e->tag == Iex_Mux0X) {
5227 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5228 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5229 HReg dst = newVRegV(env);
5230 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5231 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5232 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5233 ARMRI84_I84(0xFF,0)));
5234 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5240 vpanic("iselNeonExpr_wrk");
5243 /*---------------------------------------------------------*/
5244 /*--- ISEL: Floating point expressions (64 bit) ---*/
5245 /*---------------------------------------------------------*/
5247 /* Compute a 64-bit floating point value into a register, the identity
5248 of which is returned. As with iselIntExpr_R, the reg may be either
5249 real or virtual; in any case it must not be changed by subsequent
5250 code emitted by the caller. */
5252 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5254 HReg r = iselDblExpr_wrk( env, e );
5256 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5258 vassert(hregClass(r) == HRcFlt64);
5259 vassert(hregIsVirtual(r));
5263 /* DO NOT CALL THIS DIRECTLY */
5264 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5266 IRType ty = typeOfIRExpr(env->type_env,e);
5268 vassert(ty == Ity_F64);
5270 if (e->tag == Iex_RdTmp) {
5271 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5274 if (e->tag == Iex_Const) {
5275 /* Just handle the zero case. */
5276 IRConst* con = e->Iex.Const.con;
5277 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5278 HReg z32 = newVRegI(env);
5279 HReg dst = newVRegD(env);
5280 addInstr(env, ARMInstr_Imm32(z32, 0));
5281 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5286 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5288 HReg res = newVRegD(env);
5289 vassert(e->Iex.Load.ty == Ity_F64);
5290 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5291 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5295 if (e->tag == Iex_Get) {
5296 // XXX This won't work if offset > 1020 or is not 0 % 4.
5297 // In which case we'll have to generate more longwinded code.
5298 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5299 HReg res = newVRegD(env);
5300 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5304 if (e->tag == Iex_Unop) {
5305 switch (e->Iex.Unop.op) {
5306 case Iop_ReinterpI64asF64: {
5307 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5308 return iselNeon64Expr(env, e->Iex.Unop.arg);
5311 HReg dst = newVRegD(env);
5312 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5313 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5318 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5319 HReg dst = newVRegD(env);
5320 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5324 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5325 HReg dst = newVRegD(env);
5326 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5329 case Iop_F32toF64: {
5330 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5331 HReg dst = newVRegD(env);
5332 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5336 case Iop_I32StoF64: {
5337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5338 HReg f32 = newVRegF(env);
5339 HReg dst = newVRegD(env);
5340 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5342 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5343 /* FSITOD dst, f32 */
5344 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5353 if (e->tag == Iex_Binop) {
5354 switch (e->Iex.Binop.op) {
5356 /* first arg is rounding mode; we ignore it. */
5357 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5358 HReg dst = newVRegD(env);
5359 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5367 if (e->tag == Iex_Triop) {
5368 switch (e->Iex.Triop.op) {
5373 ARMVfpOp op = 0; /*INVALID*/
5374 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5375 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5376 HReg dst = newVRegD(env);
5377 switch (e->Iex.Triop.op) {
5378 case Iop_DivF64: op = ARMvfp_DIV; break;
5379 case Iop_MulF64: op = ARMvfp_MUL; break;
5380 case Iop_AddF64: op = ARMvfp_ADD; break;
5381 case Iop_SubF64: op = ARMvfp_SUB; break;
5382 default: vassert(0);
5384 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5392 if (e->tag == Iex_Mux0X) {
5394 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5396 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5397 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5398 HReg dst = newVRegD(env);
5399 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5400 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5401 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5402 ARMRI84_I84(0xFF,0)));
5403 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5409 vpanic("iselDblExpr_wrk");
5413 /*---------------------------------------------------------*/
5414 /*--- ISEL: Floating point expressions (32 bit) ---*/
5415 /*---------------------------------------------------------*/
5417 /* Compute a 64-bit floating point value into a register, the identity
5418 of which is returned. As with iselIntExpr_R, the reg may be either
5419 real or virtual; in any case it must not be changed by subsequent
5420 code emitted by the caller. */
5422 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5424 HReg r = iselFltExpr_wrk( env, e );
5426 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5428 vassert(hregClass(r) == HRcFlt32);
5429 vassert(hregIsVirtual(r));
5433 /* DO NOT CALL THIS DIRECTLY */
5434 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5436 IRType ty = typeOfIRExpr(env->type_env,e);
5438 vassert(ty == Ity_F32);
5440 if (e->tag == Iex_RdTmp) {
5441 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5444 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5446 HReg res = newVRegF(env);
5447 vassert(e->Iex.Load.ty == Ity_F32);
5448 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5449 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5453 if (e->tag == Iex_Get) {
5454 // XXX This won't work if offset > 1020 or is not 0 % 4.
5455 // In which case we'll have to generate more longwinded code.
5456 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5457 HReg res = newVRegF(env);
5458 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5462 if (e->tag == Iex_Unop) {
5463 switch (e->Iex.Unop.op) {
5464 case Iop_ReinterpI32asF32: {
5465 HReg dst = newVRegF(env);
5466 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5467 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5471 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5472 HReg dst = newVRegF(env);
5473 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5477 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5478 HReg dst = newVRegF(env);
5479 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5487 if (e->tag == Iex_Binop) {
5488 switch (e->Iex.Binop.op) {
5490 /* first arg is rounding mode; we ignore it. */
5491 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5492 HReg dst = newVRegF(env);
5493 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5496 case Iop_F64toF32: {
5497 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5498 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5499 HReg valS = newVRegF(env);
5500 /* FCVTSD valS, valD */
5501 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5502 set_VFP_rounding_default(env);
5510 if (e->tag == Iex_Triop) {
5511 switch (e->Iex.Triop.op) {
5516 ARMVfpOp op = 0; /*INVALID*/
5517 HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5518 HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5519 HReg dst = newVRegF(env);
5520 switch (e->Iex.Triop.op) {
5521 case Iop_DivF32: op = ARMvfp_DIV; break;
5522 case Iop_MulF32: op = ARMvfp_MUL; break;
5523 case Iop_AddF32: op = ARMvfp_ADD; break;
5524 case Iop_SubF32: op = ARMvfp_SUB; break;
5525 default: vassert(0);
5527 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5535 if (e->tag == Iex_Mux0X) {
5537 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5539 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5540 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5541 HReg dst = newVRegF(env);
5542 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5543 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5544 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5545 ARMRI84_I84(0xFF,0)));
5546 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5552 vpanic("iselFltExpr_wrk");
5556 /*---------------------------------------------------------*/
5557 /*--- ISEL: Statements ---*/
5558 /*---------------------------------------------------------*/
5560 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5562 if (vex_traceflags & VEX_TRACE_VCODE) {
5563 vex_printf("\n-- ");
5567 switch (stmt->tag) {
5569 /* --------- STORE --------- */
5570 /* little-endian write to memory */
5572 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5573 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5574 IREndness end = stmt->Ist.Store.end;
5576 if (tya != Ity_I32 || end != Iend_LE)
5579 if (tyd == Ity_I32) {
5580 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5581 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5582 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5585 if (tyd == Ity_I16) {
5586 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5587 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5588 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5589 False/*!isSignedLoad*/, rD, am));
5592 if (tyd == Ity_I8) {
5593 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5594 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5595 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5598 if (tyd == Ity_I64) {
5599 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5600 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5601 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5602 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5604 HReg rDhi, rDlo, rA;
5605 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5606 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5607 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5608 ARMAMode1_RI(rA,4)));
5609 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5610 ARMAMode1_RI(rA,0)));
5614 if (tyd == Ity_F64) {
5615 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5616 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5617 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5620 if (tyd == Ity_F32) {
5621 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5622 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5623 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5626 if (tyd == Ity_V128) {
5627 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5628 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5629 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5636 /* --------- PUT --------- */
5637 /* write guest state, fixed offset */
5639 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5641 if (tyd == Ity_I32) {
5642 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5643 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5644 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5647 if (tyd == Ity_I64) {
5648 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5649 HReg addr = newVRegI(env);
5650 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5651 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5652 stmt->Ist.Put.offset));
5653 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5656 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5657 stmt->Ist.Put.offset + 0);
5658 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5659 stmt->Ist.Put.offset + 4);
5660 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5661 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5662 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5666 if (tyd == Ity_F64) {
5667 // XXX This won't work if offset > 1020 or is not 0 % 4.
5668 // In which case we'll have to generate more longwinded code.
5669 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5670 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5671 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5674 if (tyd == Ity_F32) {
5675 // XXX This won't work if offset > 1020 or is not 0 % 4.
5676 // In which case we'll have to generate more longwinded code.
5677 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5678 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5679 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5682 if (tyd == Ity_V128) {
5683 HReg addr = newVRegI(env);
5684 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5685 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5686 stmt->Ist.Put.offset));
5687 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5693 //zz /* --------- Indexed PUT --------- */
5694 //zz /* write guest state, run-time offset */
5695 //zz case Ist_PutI: {
5697 //zz = genGuestArrayOffset(
5698 //zz env, stmt->Ist.PutI.descr,
5699 //zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5701 //zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5703 //zz if (tyd == Ity_I8) {
5704 //zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5705 //zz addInstr(env, ARMInstr_StoreB(reg, am2));
5708 //zz// CAB: Ity_I32, Ity_I16 ?
5712 /* --------- TMP --------- */
5713 /* assign value to temporary */
5715 IRTemp tmp = stmt->Ist.WrTmp.tmp;
5716 IRType ty = typeOfIRTemp(env->type_env, tmp);
5718 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5719 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5720 env, stmt->Ist.WrTmp.data);
5721 HReg dst = lookupIRTemp(env, tmp);
5722 addInstr(env, ARMInstr_Mov(dst,ri84));
5726 HReg dst = lookupIRTemp(env, tmp);
5727 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5728 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5729 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5732 if (ty == Ity_I64) {
5733 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5734 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5735 HReg dst = lookupIRTemp(env, tmp);
5736 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5738 HReg rHi, rLo, dstHi, dstLo;
5739 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5740 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5741 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5742 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5746 if (ty == Ity_F64) {
5747 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5748 HReg dst = lookupIRTemp(env, tmp);
5749 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5752 if (ty == Ity_F32) {
5753 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5754 HReg dst = lookupIRTemp(env, tmp);
5755 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5758 if (ty == Ity_V128) {
5759 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5760 HReg dst = lookupIRTemp(env, tmp);
5761 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5767 /* --------- Call to DIRTY helper --------- */
5768 /* call complex ("dirty") helper function */
5771 IRDirty* d = stmt->Ist.Dirty.details;
5772 Bool passBBP = False;
5774 if (d->nFxState == 0)
5775 vassert(!d->needsBBP);
5777 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
5779 /* Marshal args, do the call, clear stack. */
5780 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5782 break; /* will go to stmt_fail: */
5784 /* Now figure out what to do with the returned value, if any. */
5785 if (d->tmp == IRTemp_INVALID)
5786 /* No return value. Nothing to do. */
5789 retty = typeOfIRTemp(env->type_env, d->tmp);
5791 if (retty == Ity_I64) {
5792 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5793 HReg tmp = lookupIRTemp(env, d->tmp);
5794 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5798 /* The returned value is in r1:r0. Park it in the
5799 register-pair associated with tmp. */
5800 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5801 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5802 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5806 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5807 /* The returned value is in r0. Park it in the register
5808 associated with tmp. */
5809 HReg dst = lookupIRTemp(env, d->tmp);
5810 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5817 /* --------- Load Linked and Store Conditional --------- */
5819 if (stmt->Ist.LLSC.storedata == NULL) {
5821 IRTemp res = stmt->Ist.LLSC.result;
5822 IRType ty = typeOfIRTemp(env->type_env, res);
5823 if (ty == Ity_I32 || ty == Ity_I8) {
5825 HReg r_dst = lookupIRTemp(env, res);
5826 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5828 case Ity_I8: szB = 1; break;
5829 case Ity_I32: szB = 4; break;
5830 default: vassert(0);
5832 addInstr(env, mk_iMOVds_RR(hregARM_R1(), raddr));
5833 addInstr(env, ARMInstr_LdrEX(szB));
5834 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R0()));
5837 /* else fall thru; is unhandled */
5840 IRTemp res = stmt->Ist.LLSC.result;
5841 IRType ty = typeOfIRTemp(env->type_env, res);
5842 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
5843 vassert(ty == Ity_I1);
5844 if (tyd == Ity_I32 || tyd == Ity_I8) {
5846 HReg r_res = lookupIRTemp(env, res);
5847 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5848 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5849 ARMRI84* one = ARMRI84_I84(1,0);
5851 case Ity_I8: szB = 1; break;
5852 case Ity_I32: szB = 4; break;
5853 default: vassert(0);
5855 addInstr(env, mk_iMOVds_RR(hregARM_R1(), rD));
5856 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rA));
5857 addInstr(env, ARMInstr_StrEX(szB));
5858 /* now r0 is 1 if failed, 0 if success. Change to IR
5859 conventions (0 is fail, 1 is success). Also transfer
5861 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5862 /* And be conservative -- mask off all but the lowest bit */
5863 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5866 /* else fall thru; is unhandled */
5871 /* --------- MEM FENCE --------- */
5873 switch (stmt->Ist.MBE.event) {
5875 addInstr(env,ARMInstr_MFence());
5882 /* --------- INSTR MARK --------- */
5883 /* Doesn't generate any executable code ... */
5887 /* --------- NO-OP --------- */
5891 /* --------- EXIT --------- */
5895 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5896 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
5897 gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5898 cc = iselCondCode(env, stmt->Ist.Exit.guard);
5899 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5900 addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
5912 /*---------------------------------------------------------*/
5913 /*--- ISEL: Basic block terminators (Nexts) ---*/
5914 /*---------------------------------------------------------*/
5916 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
5919 if (vex_traceflags & VEX_TRACE_VCODE) {
5920 vex_printf("\n-- goto {");
5926 rDst = iselIntExpr_R(env, next);
5927 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5928 addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
5932 /*---------------------------------------------------------*/
5933 /*--- Insn selector top-level ---*/
5934 /*---------------------------------------------------------*/
5936 /* Translate an entire SB to arm code. */
5938 HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
5939 VexArchInfo* archinfo_host,
5940 VexAbiInfo* vbi/*UNUSED*/ )
5945 UInt hwcaps_host = archinfo_host->hwcaps;
5946 static UInt counter = 0;
5949 vassert(arch_host == VexArchARM);
5951 /* hwcaps should not change from one ISEL call to another. */
5952 arm_hwcaps = hwcaps_host;
5954 /* Make up an initial environment to use. */
5955 env = LibVEX_Alloc(sizeof(ISelEnv));
5958 /* Set up output code array. */
5959 env->code = newHInstrArray();
5961 /* Copy BB's type env. */
5962 env->type_env = bb->tyenv;
5964 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
5965 change as we go along. */
5966 env->n_vregmap = bb->tyenv->types_used;
5967 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5968 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5970 /* For each IR temporary, allocate a suitably-kinded virtual
5973 for (i = 0; i < env->n_vregmap; i++) {
5974 hregHI = hreg = INVALID_HREG;
5975 switch (bb->tyenv->types[i]) {
5979 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
5981 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5982 hreg = mkHReg(j++, HRcFlt64, True);
5984 hregHI = mkHReg(j++, HRcInt32, True);
5985 hreg = mkHReg(j++, HRcInt32, True);
5988 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
5989 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
5990 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
5991 default: ppIRType(bb->tyenv->types[i]);
5992 vpanic("iselBB: IRTemp type");
5994 env->vregmap[i] = hreg;
5995 env->vregmapHI[i] = hregHI;
5999 /* Keep a copy of the link reg, since any call to a helper function
6000 will trash it, and we can't get back to the dispatcher once that
6002 env->savedLR = newVRegI(env);
6003 addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
6005 /* Ok, finally we can iterate over the statements. */
6006 for (i = 0; i < bb->stmts_used; i++)
6007 iselStmt(env,bb->stmts[i]);
6009 iselNext(env,bb->next,bb->jumpkind);
6011 /* record the number of vregs we used. */
6012 env->code->n_vregs = env->vreg_ctr;
6018 /*---------------------------------------------------------------*/
6019 /*--- end host_arm_isel.c ---*/
6020 /*---------------------------------------------------------------*/