2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
14 Copyright (C) 2010-2010 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 The GNU General Public License is contained in the file COPYING.
36 #include "libvex_basictypes.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
47 /* --------- Registers. --------- */
49 /* The usual HReg abstraction.
50 There are 16 general purpose regs.
53 void ppHRegARM ( HReg reg ) {
55 /* Be generic for all virtual regs. */
56 if (hregIsVirtual(reg)) {
60 /* But specific for real regs. */
61 switch (hregClass(reg)) {
64 vassert(r >= 0 && r < 16);
69 vassert(r >= 0 && r < 32);
74 vassert(r >= 0 && r < 32);
79 vassert(r >= 0 && r < 16);
87 HReg hregARM_R0 ( void ) { return mkHReg(0, HRcInt32, False); }
88 HReg hregARM_R1 ( void ) { return mkHReg(1, HRcInt32, False); }
89 HReg hregARM_R2 ( void ) { return mkHReg(2, HRcInt32, False); }
90 HReg hregARM_R3 ( void ) { return mkHReg(3, HRcInt32, False); }
91 HReg hregARM_R4 ( void ) { return mkHReg(4, HRcInt32, False); }
92 HReg hregARM_R5 ( void ) { return mkHReg(5, HRcInt32, False); }
93 HReg hregARM_R6 ( void ) { return mkHReg(6, HRcInt32, False); }
94 HReg hregARM_R7 ( void ) { return mkHReg(7, HRcInt32, False); }
95 HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); }
96 HReg hregARM_R9 ( void ) { return mkHReg(9, HRcInt32, False); }
97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
103 HReg hregARM_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
104 HReg hregARM_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
113 HReg hregARM_Q8 ( void ) { return mkHReg(8, HRcVec128, False); }
114 HReg hregARM_Q9 ( void ) { return mkHReg(9, HRcVec128, False); }
115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
126 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
127 // callee saves ones are listed first, since we prefer them
128 // if they're available
129 (*arr)[i++] = hregARM_R4();
130 (*arr)[i++] = hregARM_R5();
131 (*arr)[i++] = hregARM_R6();
132 (*arr)[i++] = hregARM_R7();
133 (*arr)[i++] = hregARM_R10();
134 (*arr)[i++] = hregARM_R11();
135 // otherwise we'll have to slum it out with caller-saves ones
136 (*arr)[i++] = hregARM_R0();
137 (*arr)[i++] = hregARM_R1();
138 (*arr)[i++] = hregARM_R2();
139 (*arr)[i++] = hregARM_R3();
140 (*arr)[i++] = hregARM_R9();
141 // FP hreegisters. Note: these are all callee-save. Yay!
142 // Hence we don't need to mention them as trashed in
143 // getHRegUsage for ARMInstr_Call.
144 (*arr)[i++] = hregARM_D8();
145 (*arr)[i++] = hregARM_D9();
146 (*arr)[i++] = hregARM_D10();
147 (*arr)[i++] = hregARM_D11();
148 (*arr)[i++] = hregARM_D12();
149 (*arr)[i++] = hregARM_S26();
150 (*arr)[i++] = hregARM_S27();
151 (*arr)[i++] = hregARM_S28();
152 (*arr)[i++] = hregARM_S29();
153 (*arr)[i++] = hregARM_S30();
155 (*arr)[i++] = hregARM_Q8();
156 (*arr)[i++] = hregARM_Q9();
157 (*arr)[i++] = hregARM_Q10();
158 (*arr)[i++] = hregARM_Q11();
159 (*arr)[i++] = hregARM_Q12();
161 //(*arr)[i++] = hregARM_Q13();
162 //(*arr)[i++] = hregARM_Q14();
163 //(*arr)[i++] = hregARM_Q15();
165 // unavail: r8 as GSP
166 // r12 is used as a spill/reload temporary
171 // All in all, we have 11 allocatable integer registers:
172 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
173 // and r12 dedicated as a spill temporary.
174 // 13 14 and 15 are not under the allocator's control.
176 // Hence for the allocatable registers we have:
178 // callee-saved: 4 5 6 7 (8) 9 10 11
179 // caller-saved: 0 1 2 3
180 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
181 // designation for it, but the Linux instantiation of the ABI
182 // specifies it as callee-saved.
184 // If the set of available registers changes or if the e/r status
185 // changes, be sure to re-check/sync the definition of
186 // getHRegUsage for ARMInstr_Call too.
187 vassert(i == *nregs);
192 /* --------- Condition codes, ARM encoding. --------- */
194 HChar* showARMCondCode ( ARMCondCode cond ) {
196 case ARMcc_EQ: return "eq";
197 case ARMcc_NE: return "ne";
198 case ARMcc_HS: return "hs";
199 case ARMcc_LO: return "lo";
200 case ARMcc_MI: return "mi";
201 case ARMcc_PL: return "pl";
202 case ARMcc_VS: return "vs";
203 case ARMcc_VC: return "vc";
204 case ARMcc_HI: return "hi";
205 case ARMcc_LS: return "ls";
206 case ARMcc_GE: return "ge";
207 case ARMcc_LT: return "lt";
208 case ARMcc_GT: return "gt";
209 case ARMcc_LE: return "le";
210 case ARMcc_AL: return "al"; // default
211 case ARMcc_NV: return "nv";
212 default: vpanic("showARMCondCode");
217 /* --------- Mem AModes: Addressing Mode 1 --------- */
219 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
220 ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
222 am->ARMam1.RI.reg = reg;
223 am->ARMam1.RI.simm13 = simm13;
224 vassert(-4095 <= simm13 && simm13 <= 4095);
227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
228 ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
229 am->tag = ARMam1_RRS;
230 am->ARMam1.RRS.base = base;
231 am->ARMam1.RRS.index = index;
232 am->ARMam1.RRS.shift = shift;
233 vassert(0 <= shift && shift <= 3);
237 void ppARMAMode1 ( ARMAMode1* am ) {
240 vex_printf("%d(", am->ARMam1.RI.simm13);
241 ppHRegARM(am->ARMam1.RI.reg);
246 ppHRegARM(am->ARMam1.RRS.base);
248 ppHRegARM(am->ARMam1.RRS.index);
249 vex_printf(",%u)", am->ARMam1.RRS.shift);
256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
259 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
262 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
263 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
266 vpanic("addRegUsage_ARMAmode1");
270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
273 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
276 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
277 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
280 vpanic("mapRegs_ARMAmode1");
285 /* --------- Mem AModes: Addressing Mode 2 --------- */
287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
288 ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
290 am->ARMam2.RI.reg = reg;
291 am->ARMam2.RI.simm9 = simm9;
292 vassert(-255 <= simm9 && simm9 <= 255);
295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
296 ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
298 am->ARMam2.RR.base = base;
299 am->ARMam2.RR.index = index;
303 void ppARMAMode2 ( ARMAMode2* am ) {
306 vex_printf("%d(", am->ARMam2.RI.simm9);
307 ppHRegARM(am->ARMam2.RI.reg);
312 ppHRegARM(am->ARMam2.RR.base);
314 ppHRegARM(am->ARMam2.RR.index);
322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
325 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
328 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
329 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
332 vpanic("addRegUsage_ARMAmode2");
336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
339 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
342 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
343 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
346 vpanic("mapRegs_ARMAmode2");
351 /* --------- Mem AModes: Addressing Mode VFP --------- */
353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
354 ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
355 vassert(simm11 >= -1020 && simm11 <= 1020);
356 vassert(0 == (simm11 & 3));
362 void ppARMAModeV ( ARMAModeV* am ) {
363 vex_printf("%d(", am->simm11);
368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
369 addHRegUse(u, HRmRead, am->reg);
372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
373 am->reg = lookupHRegRemap(m, am->reg);
377 /* --------- Mem AModes: Addressing Mode Neon ------- */
379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
380 ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
382 am->ARMamN.RR.rN = rN;
383 am->ARMamN.RR.rM = rM;
387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
388 ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
390 am->ARMamN.R.rN = rN;
394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
395 if (am->tag == ARMamN_R) {
396 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
398 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
399 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
404 if (am->tag == ARMamN_R) {
405 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
407 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
408 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
412 void ppARMAModeN ( ARMAModeN* am ) {
414 if (am->tag == ARMamN_R) {
415 ppHRegARM(am->ARMamN.R.rN);
417 ppHRegARM(am->ARMamN.RR.rN);
420 if (am->tag == ARMamN_RR) {
422 ppHRegARM(am->ARMamN.RR.rM);
427 /* --------- Reg or imm-8x4 operands --------- */
429 static UInt ROR32 ( UInt x, UInt sh ) {
430 vassert(sh >= 0 && sh < 32);
434 return (x << (32-sh)) | (x >> sh);
437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
438 ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
439 ri84->tag = ARMri84_I84;
440 ri84->ARMri84.I84.imm8 = imm8;
441 ri84->ARMri84.I84.imm4 = imm4;
442 vassert(imm8 >= 0 && imm8 <= 255);
443 vassert(imm4 >= 0 && imm4 <= 15);
446 ARMRI84* ARMRI84_R ( HReg reg ) {
447 ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
448 ri84->tag = ARMri84_R;
449 ri84->ARMri84.R.reg = reg;
453 void ppARMRI84 ( ARMRI84* ri84 ) {
456 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
457 2 * ri84->ARMri84.I84.imm4));
460 ppHRegARM(ri84->ARMri84.R.reg);
467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
472 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
475 vpanic("addRegUsage_ARMRI84");
479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
484 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
487 vpanic("mapRegs_ARMRI84");
492 /* --------- Reg or imm5 operands --------- */
494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
495 ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
496 ri5->tag = ARMri5_I5;
497 ri5->ARMri5.I5.imm5 = imm5;
498 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
501 ARMRI5* ARMRI5_R ( HReg reg ) {
502 ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
504 ri5->ARMri5.R.reg = reg;
508 void ppARMRI5 ( ARMRI5* ri5 ) {
511 vex_printf("%u", ri5->ARMri5.I5.imm5);
514 ppHRegARM(ri5->ARMri5.R.reg);
521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
526 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
529 vpanic("addRegUsage_ARMRI5");
533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
538 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
541 vpanic("mapRegs_ARMRI5");
545 /* -------- Neon Immediate operatnd --------- */
547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
548 ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
556 ULong y, x = imm->imm8;
565 return (x << 32) | x;
574 return (x << 32) | x;
579 return (x << 32) | x;
582 for (i = 7; i >= 0; i--) {
583 y = ((ULong)imm->imm8 >> i) & 1;
584 for (j = 0; j < 8; j++) {
590 x |= (x & 0x80) << 5;
591 x |= ~(x & 0x40) << 5;
592 x &= 0x187F; /* 0001 1000 0111 1111 */
593 x |= (x & 0x40) << 4;
594 x |= (x & 0x40) << 3;
595 x |= (x & 0x40) << 2;
596 x |= (x & 0x40) << 1;
601 vpanic("ARMNImm_to_Imm64");
605 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
607 if ((x & 0xFFFFFFFF) == (x >> 32)) {
608 if ((x & 0xFFFFFF00) == 0)
609 return ARMNImm_TI(0, x & 0xFF);
610 if ((x & 0xFFFF00FF) == 0)
611 return ARMNImm_TI(1, (x >> 8) & 0xFF);
612 if ((x & 0xFF00FFFF) == 0)
613 return ARMNImm_TI(2, (x >> 16) & 0xFF);
614 if ((x & 0x00FFFFFF) == 0)
615 return ARMNImm_TI(3, (x >> 24) & 0xFF);
616 if ((x & 0xFFFF00FF) == 0xFF)
617 return ARMNImm_TI(7, (x >> 8) & 0xFF);
618 if ((x & 0xFF00FFFF) == 0xFFFF)
619 return ARMNImm_TI(8, (x >> 16) & 0xFF);
620 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
621 if ((x & 0xFF00) == 0)
622 return ARMNImm_TI(4, x & 0xFF);
623 if ((x & 0x00FF) == 0)
624 return ARMNImm_TI(5, (x >> 8) & 0xFF);
625 if ((x & 0xFF) == ((x >> 8) & 0xFF))
626 return ARMNImm_TI(6, x & 0xFF);
628 if ((x & 0x7FFFF) == 0) {
630 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
631 if (ARMNImm_to_Imm64(&tmp) == x)
632 return ARMNImm_TI(tmp.type, tmp.imm8);
635 /* This can only be type 9. */
636 tmp.imm8 = (((x >> 56) & 1) << 7)
637 | (((x >> 48) & 1) << 6)
638 | (((x >> 40) & 1) << 5)
639 | (((x >> 32) & 1) << 4)
640 | (((x >> 24) & 1) << 3)
641 | (((x >> 16) & 1) << 2)
642 | (((x >> 8) & 1) << 1)
643 | (((x >> 0) & 1) << 0);
645 if (ARMNImm_to_Imm64 (&tmp) == x)
646 return ARMNImm_TI(tmp.type, tmp.imm8);
651 void ppARMNImm (ARMNImm* i) {
652 ULong x = ARMNImm_to_Imm64(i);
653 vex_printf("0x%llX%llX", x, x);
656 /* -- Register or scalar operand --- */
658 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
660 ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
667 void ppARMNRS(ARMNRS *p)
670 if (p->tag == ARMNRS_Scalar) {
671 vex_printf("[%d]", p->index);
675 /* --------- Instructions. --------- */
677 HChar* showARMAluOp ( ARMAluOp op ) {
679 case ARMalu_ADD: return "add";
680 case ARMalu_ADDS: return "adds";
681 case ARMalu_ADC: return "adc";
682 case ARMalu_SUB: return "sub";
683 case ARMalu_SUBS: return "subs";
684 case ARMalu_SBC: return "sbc";
685 case ARMalu_AND: return "and";
686 case ARMalu_BIC: return "bic";
687 case ARMalu_OR: return "orr";
688 case ARMalu_XOR: return "xor";
689 default: vpanic("showARMAluOp");
693 HChar* showARMShiftOp ( ARMShiftOp op ) {
695 case ARMsh_SHL: return "shl";
696 case ARMsh_SHR: return "shr";
697 case ARMsh_SAR: return "sar";
698 default: vpanic("showARMShiftOp");
702 HChar* showARMUnaryOp ( ARMUnaryOp op ) {
704 case ARMun_NEG: return "neg";
705 case ARMun_NOT: return "not";
706 case ARMun_CLZ: return "clz";
707 default: vpanic("showARMUnaryOp");
711 HChar* showARMMulOp ( ARMMulOp op ) {
713 case ARMmul_PLAIN: return "mul";
714 case ARMmul_ZX: return "umull";
715 case ARMmul_SX: return "smull";
716 default: vpanic("showARMMulOp");
720 HChar* showARMVfpOp ( ARMVfpOp op ) {
722 case ARMvfp_ADD: return "add";
723 case ARMvfp_SUB: return "sub";
724 case ARMvfp_MUL: return "mul";
725 case ARMvfp_DIV: return "div";
726 default: vpanic("showARMVfpOp");
730 HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
732 case ARMvfpu_COPY: return "cpy";
733 case ARMvfpu_NEG: return "neg";
734 case ARMvfpu_ABS: return "abs";
735 case ARMvfpu_SQRT: return "sqrt";
736 default: vpanic("showARMVfpUnaryOp");
740 HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
742 case ARMneon_VAND: return "vand";
743 case ARMneon_VORR: return "vorr";
744 case ARMneon_VXOR: return "veor";
745 case ARMneon_VADD: return "vadd";
746 case ARMneon_VRHADDS: return "vrhadd";
747 case ARMneon_VRHADDU: return "vrhadd";
748 case ARMneon_VADDFP: return "vadd";
749 case ARMneon_VPADDFP: return "vpadd";
750 case ARMneon_VABDFP: return "vabd";
751 case ARMneon_VSUB: return "vsub";
752 case ARMneon_VSUBFP: return "vsub";
753 case ARMneon_VMINU: return "vmin";
754 case ARMneon_VMINS: return "vmin";
755 case ARMneon_VMINF: return "vmin";
756 case ARMneon_VMAXU: return "vmax";
757 case ARMneon_VMAXS: return "vmax";
758 case ARMneon_VMAXF: return "vmax";
759 case ARMneon_VQADDU: return "vqadd";
760 case ARMneon_VQADDS: return "vqadd";
761 case ARMneon_VQSUBU: return "vqsub";
762 case ARMneon_VQSUBS: return "vqsub";
763 case ARMneon_VCGTU: return "vcgt";
764 case ARMneon_VCGTS: return "vcgt";
765 case ARMneon_VCGTF: return "vcgt";
766 case ARMneon_VCGEF: return "vcgt";
767 case ARMneon_VCGEU: return "vcge";
768 case ARMneon_VCGES: return "vcge";
769 case ARMneon_VCEQ: return "vceq";
770 case ARMneon_VCEQF: return "vceq";
771 case ARMneon_VPADD: return "vpadd";
772 case ARMneon_VPMINU: return "vpmin";
773 case ARMneon_VPMINS: return "vpmin";
774 case ARMneon_VPMINF: return "vpmin";
775 case ARMneon_VPMAXU: return "vpmax";
776 case ARMneon_VPMAXS: return "vpmax";
777 case ARMneon_VPMAXF: return "vpmax";
778 case ARMneon_VEXT: return "vext";
779 case ARMneon_VMUL: return "vmuli";
780 case ARMneon_VMULLU: return "vmull";
781 case ARMneon_VMULLS: return "vmull";
782 case ARMneon_VMULP: return "vmul";
783 case ARMneon_VMULFP: return "vmul";
784 case ARMneon_VMULLP: return "vmul";
785 case ARMneon_VQDMULH: return "vqdmulh";
786 case ARMneon_VQRDMULH: return "vqrdmulh";
787 case ARMneon_VQDMULL: return "vqdmull";
788 case ARMneon_VTBL: return "vtbl";
789 case ARMneon_VRECPS: return "vrecps";
790 case ARMneon_VRSQRTS: return "vrecps";
792 default: vpanic("showARMNeonBinOp");
796 HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
810 case ARMneon_VRHADDU:
821 case ARMneon_VRHADDS:
828 case ARMneon_VQDMULL:
832 case ARMneon_VQDMULH:
833 case ARMneon_VQRDMULH:
840 case ARMneon_VPADDFP:
851 case ARMneon_VRSQRTS:
854 default: vpanic("showARMNeonBinOpDataType");
858 HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
860 case ARMneon_COPY: return "vmov";
861 case ARMneon_COPYLS: return "vmov";
862 case ARMneon_COPYLU: return "vmov";
863 case ARMneon_COPYN: return "vmov";
864 case ARMneon_COPYQNSS: return "vqmovn";
865 case ARMneon_COPYQNUS: return "vqmovun";
866 case ARMneon_COPYQNUU: return "vqmovn";
867 case ARMneon_NOT: return "vmvn";
868 case ARMneon_EQZ: return "vceq";
869 case ARMneon_CNT: return "vcnt";
870 case ARMneon_CLS: return "vcls";
871 case ARMneon_CLZ: return "vclz";
872 case ARMneon_DUP: return "vdup";
873 case ARMneon_PADDLS: return "vpaddl";
874 case ARMneon_PADDLU: return "vpaddl";
875 case ARMneon_VQSHLNSS: return "vqshl";
876 case ARMneon_VQSHLNUU: return "vqshl";
877 case ARMneon_VQSHLNUS: return "vqshlu";
878 case ARMneon_REV16: return "vrev16";
879 case ARMneon_REV32: return "vrev32";
880 case ARMneon_REV64: return "vrev64";
881 case ARMneon_VCVTFtoU: return "vcvt";
882 case ARMneon_VCVTFtoS: return "vcvt";
883 case ARMneon_VCVTUtoF: return "vcvt";
884 case ARMneon_VCVTStoF: return "vcvt";
885 case ARMneon_VCVTFtoFixedU: return "vcvt";
886 case ARMneon_VCVTFtoFixedS: return "vcvt";
887 case ARMneon_VCVTFixedUtoF: return "vcvt";
888 case ARMneon_VCVTFixedStoF: return "vcvt";
889 case ARMneon_VCVTF32toF16: return "vcvt";
890 case ARMneon_VCVTF16toF32: return "vcvt";
891 case ARMneon_VRECIP: return "vrecip";
892 case ARMneon_VRECIPF: return "vrecipf";
893 case ARMneon_VNEGF: return "vneg";
894 case ARMneon_ABS: return "vabs";
895 case ARMneon_VABSFP: return "vabsfp";
896 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
897 case ARMneon_VRSQRTE: return "vrsqrte";
899 default: vpanic("showARMNeonUnOp");
903 HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
918 case ARMneon_COPYQNUU:
919 case ARMneon_VQSHLNUU:
921 case ARMneon_VRSQRTE:
927 case ARMneon_COPYQNSS:
928 case ARMneon_COPYQNUS:
929 case ARMneon_VQSHLNSS:
930 case ARMneon_VQSHLNUS:
933 case ARMneon_VRECIPF:
936 case ARMneon_VRSQRTEFP:
938 case ARMneon_VCVTFtoU: return ".u32.f32";
939 case ARMneon_VCVTFtoS: return ".s32.f32";
940 case ARMneon_VCVTUtoF: return ".f32.u32";
941 case ARMneon_VCVTStoF: return ".f32.s32";
942 case ARMneon_VCVTF16toF32: return ".f32.f16";
943 case ARMneon_VCVTF32toF16: return ".f16.f32";
944 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
945 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
946 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
947 case ARMneon_VCVTFixedStoF: return ".f32.s32";
949 default: vpanic("showARMNeonUnOpDataType");
953 HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
955 case ARMneon_SETELEM: return "vmov";
956 case ARMneon_GETELEMU: return "vmov";
957 case ARMneon_GETELEMS: return "vmov";
958 case ARMneon_VDUP: return "vdup";
960 default: vpanic("showARMNeonUnarySOp");
964 HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
966 case ARMneon_SETELEM:
969 case ARMneon_GETELEMS:
971 case ARMneon_GETELEMU:
974 default: vpanic("showARMNeonUnarySOp");
978 HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
980 case ARMneon_VSHL: return "vshl";
981 case ARMneon_VSAL: return "vshl";
982 case ARMneon_VQSHL: return "vqshl";
983 case ARMneon_VQSAL: return "vqshl";
985 default: vpanic("showARMNeonShiftOp");
989 HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
998 default: vpanic("showARMNeonShiftOpDataType");
1002 HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1004 case ARMneon_TRN: return "vtrn";
1005 case ARMneon_ZIP: return "vzip";
1006 case ARMneon_UZP: return "vuzp";
1008 default: vpanic("showARMNeonDualOp");
1012 HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1019 default: vpanic("showARMNeonDualOp");
1023 static HChar* showARMNeonDataSize_wrk ( UInt size )
1027 case 1: return "16";
1028 case 2: return "32";
1029 case 3: return "64";
1030 default: vpanic("showARMNeonDataSize");
1034 static HChar* showARMNeonDataSize ( ARMInstr* i )
1038 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1040 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1041 i->ARMin.NBinary.op == ARMneon_VORR ||
1042 i->ARMin.NBinary.op == ARMneon_VXOR)
1044 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1046 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1047 i->ARMin.NUnary.op == ARMneon_NOT ||
1048 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1049 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1050 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1051 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1052 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1053 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1054 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1055 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1056 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1057 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1059 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1060 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1061 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1063 size = i->ARMin.NUnary.size;
1072 vpanic("showARMNeonDataSize");
1074 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1076 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1078 size = i->ARMin.NUnaryS.size;
1079 if ((size & 1) == 1)
1081 if ((size & 3) == 2)
1083 if ((size & 7) == 4)
1085 vpanic("showARMNeonDataSize");
1087 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1089 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1091 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1093 vpanic("showARMNeonDataSize");
1097 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1098 HReg dst, HReg argL, ARMRI84* argR ) {
1099 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1101 i->ARMin.Alu.op = op;
1102 i->ARMin.Alu.dst = dst;
1103 i->ARMin.Alu.argL = argL;
1104 i->ARMin.Alu.argR = argR;
1107 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1108 HReg dst, HReg argL, ARMRI5* argR ) {
1109 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1110 i->tag = ARMin_Shift;
1111 i->ARMin.Shift.op = op;
1112 i->ARMin.Shift.dst = dst;
1113 i->ARMin.Shift.argL = argL;
1114 i->ARMin.Shift.argR = argR;
1117 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1118 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1119 i->tag = ARMin_Unary;
1120 i->ARMin.Unary.op = op;
1121 i->ARMin.Unary.dst = dst;
1122 i->ARMin.Unary.src = src;
1125 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1126 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1127 i->tag = ARMin_CmpOrTst;
1128 i->ARMin.CmpOrTst.isCmp = isCmp;
1129 i->ARMin.CmpOrTst.argL = argL;
1130 i->ARMin.CmpOrTst.argR = argR;
1133 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1134 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1136 i->ARMin.Mov.dst = dst;
1137 i->ARMin.Mov.src = src;
1140 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1141 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1142 i->tag = ARMin_Imm32;
1143 i->ARMin.Imm32.dst = dst;
1144 i->ARMin.Imm32.imm32 = imm32;
1147 ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1148 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1149 i->tag = ARMin_LdSt32;
1150 i->ARMin.LdSt32.isLoad = isLoad;
1151 i->ARMin.LdSt32.rD = rD;
1152 i->ARMin.LdSt32.amode = amode;
1155 ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
1156 HReg rD, ARMAMode2* amode ) {
1157 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1158 i->tag = ARMin_LdSt16;
1159 i->ARMin.LdSt16.isLoad = isLoad;
1160 i->ARMin.LdSt16.signedLoad = signedLoad;
1161 i->ARMin.LdSt16.rD = rD;
1162 i->ARMin.LdSt16.amode = amode;
1165 ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1166 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1167 i->tag = ARMin_LdSt8U;
1168 i->ARMin.LdSt8U.isLoad = isLoad;
1169 i->ARMin.LdSt8U.rD = rD;
1170 i->ARMin.LdSt8U.amode = amode;
1173 //extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
1174 ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
1175 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1176 i->tag = ARMin_Goto;
1177 i->ARMin.Goto.jk = jk;
1178 i->ARMin.Goto.cond = cond;
1179 i->ARMin.Goto.gnext = gnext;
1182 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1183 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1184 i->tag = ARMin_CMov;
1185 i->ARMin.CMov.cond = cond;
1186 i->ARMin.CMov.dst = dst;
1187 i->ARMin.CMov.src = src;
1188 vassert(cond != ARMcc_AL);
1191 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
1192 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1193 i->tag = ARMin_Call;
1194 i->ARMin.Call.cond = cond;
1195 i->ARMin.Call.target = target;
1196 i->ARMin.Call.nArgRegs = nArgRegs;
1199 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1200 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1202 i->ARMin.Mul.op = op;
1205 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1206 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1207 i->tag = ARMin_LdrEX;
1208 i->ARMin.LdrEX.szB = szB;
1209 vassert(szB == 4 || szB == 1);
1212 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1213 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1214 i->tag = ARMin_StrEX;
1215 i->ARMin.StrEX.szB = szB;
1216 vassert(szB == 4 || szB == 1);
1219 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1220 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1221 i->tag = ARMin_VLdStD;
1222 i->ARMin.VLdStD.isLoad = isLoad;
1223 i->ARMin.VLdStD.dD = dD;
1224 i->ARMin.VLdStD.amode = am;
1227 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1228 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1229 i->tag = ARMin_VLdStS;
1230 i->ARMin.VLdStS.isLoad = isLoad;
1231 i->ARMin.VLdStS.fD = fD;
1232 i->ARMin.VLdStS.amode = am;
1235 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1236 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1237 i->tag = ARMin_VAluD;
1238 i->ARMin.VAluD.op = op;
1239 i->ARMin.VAluD.dst = dst;
1240 i->ARMin.VAluD.argL = argL;
1241 i->ARMin.VAluD.argR = argR;
1244 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1245 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1246 i->tag = ARMin_VAluS;
1247 i->ARMin.VAluS.op = op;
1248 i->ARMin.VAluS.dst = dst;
1249 i->ARMin.VAluS.argL = argL;
1250 i->ARMin.VAluS.argR = argR;
1253 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1254 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1255 i->tag = ARMin_VUnaryD;
1256 i->ARMin.VUnaryD.op = op;
1257 i->ARMin.VUnaryD.dst = dst;
1258 i->ARMin.VUnaryD.src = src;
1261 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1262 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1263 i->tag = ARMin_VUnaryS;
1264 i->ARMin.VUnaryS.op = op;
1265 i->ARMin.VUnaryS.dst = dst;
1266 i->ARMin.VUnaryS.src = src;
1269 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1270 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1271 i->tag = ARMin_VCmpD;
1272 i->ARMin.VCmpD.argL = argL;
1273 i->ARMin.VCmpD.argR = argR;
1276 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1277 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1278 i->tag = ARMin_VCMovD;
1279 i->ARMin.VCMovD.cond = cond;
1280 i->ARMin.VCMovD.dst = dst;
1281 i->ARMin.VCMovD.src = src;
1282 vassert(cond != ARMcc_AL);
1285 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1286 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1287 i->tag = ARMin_VCMovS;
1288 i->ARMin.VCMovS.cond = cond;
1289 i->ARMin.VCMovS.dst = dst;
1290 i->ARMin.VCMovS.src = src;
1291 vassert(cond != ARMcc_AL);
1294 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1295 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1296 i->tag = ARMin_VCvtSD;
1297 i->ARMin.VCvtSD.sToD = sToD;
1298 i->ARMin.VCvtSD.dst = dst;
1299 i->ARMin.VCvtSD.src = src;
1302 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1303 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1304 i->tag = ARMin_VXferD;
1305 i->ARMin.VXferD.toD = toD;
1306 i->ARMin.VXferD.dD = dD;
1307 i->ARMin.VXferD.rHi = rHi;
1308 i->ARMin.VXferD.rLo = rLo;
1311 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1312 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1313 i->tag = ARMin_VXferS;
1314 i->ARMin.VXferS.toS = toS;
1315 i->ARMin.VXferS.fD = fD;
1316 i->ARMin.VXferS.rLo = rLo;
1319 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1320 HReg dst, HReg src ) {
1321 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1322 i->tag = ARMin_VCvtID;
1323 i->ARMin.VCvtID.iToD = iToD;
1324 i->ARMin.VCvtID.syned = syned;
1325 i->ARMin.VCvtID.dst = dst;
1326 i->ARMin.VCvtID.src = src;
1329 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1330 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1331 i->tag = ARMin_FPSCR;
1332 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1333 i->ARMin.FPSCR.iReg = iReg;
1336 ARMInstr* ARMInstr_MFence ( void ) {
1337 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1338 i->tag = ARMin_MFence;
1342 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1343 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1344 i->tag = ARMin_NLdStQ;
1345 i->ARMin.NLdStQ.isLoad = isLoad;
1346 i->ARMin.NLdStQ.dQ = dQ;
1347 i->ARMin.NLdStQ.amode = amode;
1351 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1352 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1353 i->tag = ARMin_NLdStD;
1354 i->ARMin.NLdStD.isLoad = isLoad;
1355 i->ARMin.NLdStD.dD = dD;
1356 i->ARMin.NLdStD.amode = amode;
1360 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1361 UInt size, Bool Q ) {
1362 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1363 i->tag = ARMin_NUnary;
1364 i->ARMin.NUnary.op = op;
1365 i->ARMin.NUnary.src = nQ;
1366 i->ARMin.NUnary.dst = dQ;
1367 i->ARMin.NUnary.size = size;
1368 i->ARMin.NUnary.Q = Q;
1372 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1373 UInt size, Bool Q ) {
1374 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1375 i->tag = ARMin_NUnaryS;
1376 i->ARMin.NUnaryS.op = op;
1377 i->ARMin.NUnaryS.src = src;
1378 i->ARMin.NUnaryS.dst = dst;
1379 i->ARMin.NUnaryS.size = size;
1380 i->ARMin.NUnaryS.Q = Q;
1384 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1385 UInt size, Bool Q ) {
1386 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1387 i->tag = ARMin_NDual;
1388 i->ARMin.NDual.op = op;
1389 i->ARMin.NDual.arg1 = nQ;
1390 i->ARMin.NDual.arg2 = mQ;
1391 i->ARMin.NDual.size = size;
1392 i->ARMin.NDual.Q = Q;
1396 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1397 HReg dst, HReg argL, HReg argR,
1398 UInt size, Bool Q ) {
1399 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1400 i->tag = ARMin_NBinary;
1401 i->ARMin.NBinary.op = op;
1402 i->ARMin.NBinary.argL = argL;
1403 i->ARMin.NBinary.argR = argR;
1404 i->ARMin.NBinary.dst = dst;
1405 i->ARMin.NBinary.size = size;
1406 i->ARMin.NBinary.Q = Q;
1410 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1411 ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1412 i->tag = ARMin_NeonImm;
1413 i->ARMin.NeonImm.dst = dst;
1414 i->ARMin.NeonImm.imm = imm;
1418 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1419 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1420 i->tag = ARMin_NCMovQ;
1421 i->ARMin.NCMovQ.cond = cond;
1422 i->ARMin.NCMovQ.dst = dst;
1423 i->ARMin.NCMovQ.src = src;
1424 vassert(cond != ARMcc_AL);
1428 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1429 HReg dst, HReg argL, HReg argR,
1430 UInt size, Bool Q ) {
1431 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1432 i->tag = ARMin_NShift;
1433 i->ARMin.NShift.op = op;
1434 i->ARMin.NShift.argL = argL;
1435 i->ARMin.NShift.argR = argR;
1436 i->ARMin.NShift.dst = dst;
1437 i->ARMin.NShift.size = size;
1438 i->ARMin.NShift.Q = Q;
1442 /* Helper copy-pasted from isel.c */
1443 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1446 for (i = 0; i < 16; i++) {
1447 if (0 == (u & 0xFFFFFF00)) {
1458 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1460 ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1461 /* Try to generate single ADD if possible */
1462 if (fitsIn8x4(&u8, &u4, imm32)) {
1464 i->ARMin.Alu.op = ARMalu_ADD;
1465 i->ARMin.Alu.dst = rD;
1466 i->ARMin.Alu.argL = rN;
1467 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1469 i->tag = ARMin_Add32;
1470 i->ARMin.Add32.rD = rD;
1471 i->ARMin.Add32.rN = rN;
1472 i->ARMin.Add32.imm32 = imm32;
1479 void ppARMInstr ( ARMInstr* i ) {
1482 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1483 ppHRegARM(i->ARMin.Alu.dst);
1485 ppHRegARM(i->ARMin.Alu.argL);
1487 ppARMRI84(i->ARMin.Alu.argR);
1490 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1491 ppHRegARM(i->ARMin.Shift.dst);
1493 ppHRegARM(i->ARMin.Shift.argL);
1495 ppARMRI5(i->ARMin.Shift.argR);
1498 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1499 ppHRegARM(i->ARMin.Unary.dst);
1501 ppHRegARM(i->ARMin.Unary.src);
1503 case ARMin_CmpOrTst:
1504 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1505 ppHRegARM(i->ARMin.CmpOrTst.argL);
1507 ppARMRI84(i->ARMin.CmpOrTst.argR);
1511 ppHRegARM(i->ARMin.Mov.dst);
1513 ppARMRI84(i->ARMin.Mov.src);
1517 ppHRegARM(i->ARMin.Imm32.dst);
1518 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1521 if (i->ARMin.LdSt32.isLoad) {
1523 ppHRegARM(i->ARMin.LdSt32.rD);
1525 ppARMAMode1(i->ARMin.LdSt32.amode);
1528 ppARMAMode1(i->ARMin.LdSt32.amode);
1530 ppHRegARM(i->ARMin.LdSt32.rD);
1534 if (i->ARMin.LdSt16.isLoad) {
1535 vex_printf("%s", i->ARMin.LdSt16.signedLoad
1536 ? "ldrsh " : "ldrh " );
1537 ppHRegARM(i->ARMin.LdSt16.rD);
1539 ppARMAMode2(i->ARMin.LdSt16.amode);
1541 vex_printf("strh ");
1542 ppARMAMode2(i->ARMin.LdSt16.amode);
1544 ppHRegARM(i->ARMin.LdSt16.rD);
1548 if (i->ARMin.LdSt8U.isLoad) {
1549 vex_printf("ldrb ");
1550 ppHRegARM(i->ARMin.LdSt8U.rD);
1552 ppARMAMode1(i->ARMin.LdSt8U.amode);
1554 vex_printf("strb ");
1555 ppARMAMode1(i->ARMin.LdSt8U.amode);
1557 ppHRegARM(i->ARMin.LdSt8U.rD);
1563 if (i->ARMin.Goto.cond != ARMcc_AL) {
1564 vex_printf("if (%%cpsr.%s) { ",
1565 showARMCondCode(i->ARMin.Goto.cond));
1567 vex_printf("if (1) { ");
1569 if (i->ARMin.Goto.jk != Ijk_Boring
1570 && i->ARMin.Goto.jk != Ijk_Call
1571 && i->ARMin.Goto.jk != Ijk_Ret) {
1572 vex_printf("mov r8, $");
1573 ppIRJumpKind(i->ARMin.Goto.jk);
1576 vex_printf("mov r0, ");
1577 ppHRegARM(i->ARMin.Goto.gnext);
1578 vex_printf(" ; bx r14");
1579 if (i->ARMin.Goto.cond != ARMcc_AL) {
1586 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1587 ppHRegARM(i->ARMin.CMov.dst);
1589 ppARMRI84(i->ARMin.CMov.src);
1592 vex_printf("call%s ",
1593 i->ARMin.Call.cond==ARMcc_AL
1594 ? "" : showARMCondCode(i->ARMin.Call.cond));
1595 vex_printf("0x%lx [nArgRegs=%d]",
1596 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1599 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1600 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1601 vex_printf("r0, r2, r3");
1603 vex_printf("r1:r0, r2, r3");
1607 vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b"
1608 : i->ARMin.LdrEX.szB == 2 ? "h" : "");
1609 vex_printf("r0, [r1]");
1612 vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b"
1613 : i->ARMin.StrEX.szB == 2 ? "h" : "");
1614 vex_printf("r0, r1, [r2]");
1617 if (i->ARMin.VLdStD.isLoad) {
1618 vex_printf("fldd ");
1619 ppHRegARM(i->ARMin.VLdStD.dD);
1621 ppARMAModeV(i->ARMin.VLdStD.amode);
1623 vex_printf("fstd ");
1624 ppARMAModeV(i->ARMin.VLdStD.amode);
1626 ppHRegARM(i->ARMin.VLdStD.dD);
1630 if (i->ARMin.VLdStS.isLoad) {
1631 vex_printf("flds ");
1632 ppHRegARM(i->ARMin.VLdStS.fD);
1634 ppARMAModeV(i->ARMin.VLdStS.amode);
1636 vex_printf("fsts ");
1637 ppARMAModeV(i->ARMin.VLdStS.amode);
1639 ppHRegARM(i->ARMin.VLdStS.fD);
1643 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1644 ppHRegARM(i->ARMin.VAluD.dst);
1646 ppHRegARM(i->ARMin.VAluD.argL);
1648 ppHRegARM(i->ARMin.VAluD.argR);
1651 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1652 ppHRegARM(i->ARMin.VAluS.dst);
1654 ppHRegARM(i->ARMin.VAluS.argL);
1656 ppHRegARM(i->ARMin.VAluS.argR);
1659 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1660 ppHRegARM(i->ARMin.VUnaryD.dst);
1662 ppHRegARM(i->ARMin.VUnaryD.src);
1665 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1666 ppHRegARM(i->ARMin.VUnaryS.dst);
1668 ppHRegARM(i->ARMin.VUnaryS.src);
1671 vex_printf("fcmpd ");
1672 ppHRegARM(i->ARMin.VCmpD.argL);
1674 ppHRegARM(i->ARMin.VCmpD.argR);
1675 vex_printf(" ; fmstat");
1678 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1679 ppHRegARM(i->ARMin.VCMovD.dst);
1681 ppHRegARM(i->ARMin.VCMovD.src);
1684 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1685 ppHRegARM(i->ARMin.VCMovS.dst);
1687 ppHRegARM(i->ARMin.VCMovS.src);
1690 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1691 ppHRegARM(i->ARMin.VCvtSD.dst);
1693 ppHRegARM(i->ARMin.VCvtSD.src);
1696 vex_printf("vmov ");
1697 if (i->ARMin.VXferD.toD) {
1698 ppHRegARM(i->ARMin.VXferD.dD);
1700 ppHRegARM(i->ARMin.VXferD.rLo);
1702 ppHRegARM(i->ARMin.VXferD.rHi);
1704 ppHRegARM(i->ARMin.VXferD.rLo);
1706 ppHRegARM(i->ARMin.VXferD.rHi);
1708 ppHRegARM(i->ARMin.VXferD.dD);
1712 vex_printf("vmov ");
1713 if (i->ARMin.VXferS.toS) {
1714 ppHRegARM(i->ARMin.VXferS.fD);
1716 ppHRegARM(i->ARMin.VXferS.rLo);
1718 ppHRegARM(i->ARMin.VXferS.rLo);
1720 ppHRegARM(i->ARMin.VXferS.fD);
1723 case ARMin_VCvtID: {
1725 if (i->ARMin.VCvtID.iToD) {
1726 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1728 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1730 vex_printf("%s ", nm);
1731 ppHRegARM(i->ARMin.VCvtID.dst);
1733 ppHRegARM(i->ARMin.VCvtID.src);
1737 if (i->ARMin.FPSCR.toFPSCR) {
1738 vex_printf("fmxr fpscr, ");
1739 ppHRegARM(i->ARMin.FPSCR.iReg);
1741 vex_printf("fmrx ");
1742 ppHRegARM(i->ARMin.FPSCR.iReg);
1743 vex_printf(", fpscr");
1747 vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
1748 "15,0,r0,c7,c5,4)");
1751 if (i->ARMin.NLdStQ.isLoad)
1752 vex_printf("vld1.32 {");
1754 vex_printf("vst1.32 {");
1755 ppHRegARM(i->ARMin.NLdStQ.dQ);
1757 ppARMAModeN(i->ARMin.NLdStQ.amode);
1760 if (i->ARMin.NLdStD.isLoad)
1761 vex_printf("vld1.32 {");
1763 vex_printf("vst1.32 {");
1764 ppHRegARM(i->ARMin.NLdStD.dD);
1766 ppARMAModeN(i->ARMin.NLdStD.amode);
1769 vex_printf("%s%s%s ",
1770 showARMNeonUnOp(i->ARMin.NUnary.op),
1771 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1772 showARMNeonDataSize(i));
1773 ppHRegARM(i->ARMin.NUnary.dst);
1775 ppHRegARM(i->ARMin.NUnary.src);
1776 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1778 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1779 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1780 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1781 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1782 vex_printf(", #%d", i->ARMin.NUnary.size);
1784 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1785 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1786 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1788 size = i->ARMin.NUnary.size;
1790 vex_printf(", #%d", size - 64);
1791 } else if (size & 0x20) {
1792 vex_printf(", #%d", size - 32);
1793 } else if (size & 0x10) {
1794 vex_printf(", #%d", size - 16);
1795 } else if (size & 0x08) {
1796 vex_printf(", #%d", size - 8);
1801 vex_printf("%s%s%s ",
1802 showARMNeonUnOp(i->ARMin.NUnary.op),
1803 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1804 showARMNeonDataSize(i));
1805 ppARMNRS(i->ARMin.NUnaryS.dst);
1807 ppARMNRS(i->ARMin.NUnaryS.src);
1810 vex_printf("%s%s%s ",
1811 showARMNeonShiftOp(i->ARMin.NShift.op),
1812 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1813 showARMNeonDataSize(i));
1814 ppHRegARM(i->ARMin.NShift.dst);
1816 ppHRegARM(i->ARMin.NShift.argL);
1818 ppHRegARM(i->ARMin.NShift.argR);
1821 vex_printf("%s%s%s ",
1822 showARMNeonDualOp(i->ARMin.NDual.op),
1823 showARMNeonDualOpDataType(i->ARMin.NDual.op),
1824 showARMNeonDataSize(i));
1825 ppHRegARM(i->ARMin.NDual.arg1);
1827 ppHRegARM(i->ARMin.NDual.arg2);
1830 vex_printf("%s%s%s",
1831 showARMNeonBinOp(i->ARMin.NBinary.op),
1832 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1833 showARMNeonDataSize(i));
1835 ppHRegARM(i->ARMin.NBinary.dst);
1837 ppHRegARM(i->ARMin.NBinary.argL);
1839 ppHRegARM(i->ARMin.NBinary.argR);
1842 vex_printf("vmov ");
1843 ppHRegARM(i->ARMin.NeonImm.dst);
1845 ppARMNImm(i->ARMin.NeonImm.imm);
1848 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1849 ppHRegARM(i->ARMin.NCMovQ.dst);
1851 ppHRegARM(i->ARMin.NCMovQ.src);
1854 vex_printf("add32 ");
1855 ppHRegARM(i->ARMin.Add32.rD);
1857 ppHRegARM(i->ARMin.Add32.rN);
1859 vex_printf("%d", i->ARMin.Add32.imm32);
1863 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1864 vpanic("ppARMInstr(1)");
1870 /* --------- Helpers for register allocation. --------- */
1872 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
1874 vassert(mode64 == False);
1878 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
1879 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
1880 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
1883 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
1884 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
1885 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
1888 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
1889 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
1891 case ARMin_CmpOrTst:
1892 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
1893 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
1896 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
1897 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
1900 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
1903 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
1904 if (i->ARMin.LdSt32.isLoad) {
1905 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
1907 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
1911 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
1912 if (i->ARMin.LdSt16.isLoad) {
1913 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
1915 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
1919 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
1920 if (i->ARMin.LdSt8U.isLoad) {
1921 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
1923 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
1929 /* reads the reg holding the next guest addr */
1930 addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
1931 /* writes it to the standard integer return register */
1932 addHRegUse(u, HRmWrite, hregARM_R0());
1933 /* possibly messes with the baseblock pointer */
1934 if (i->ARMin.Goto.jk != Ijk_Boring
1935 && i->ARMin.Goto.jk != Ijk_Call
1936 && i->ARMin.Goto.jk != Ijk_Ret)
1937 /* note, this is irrelevant since r8 is not actually
1938 available to the allocator. But still .. */
1939 addHRegUse(u, HRmWrite, hregARM_R8());
1942 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
1943 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
1944 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
1947 /* logic and comments copied/modified from x86 back end */
1948 /* This is a bit subtle. */
1949 /* First off, claim it trashes all the caller-saved regs
1950 which fall within the register allocator's jurisdiction.
1951 These I believe to be r0,1,2,3. If it turns out that r9
1952 is also caller-saved, then we'll have to add that here
1954 addHRegUse(u, HRmWrite, hregARM_R0());
1955 addHRegUse(u, HRmWrite, hregARM_R1());
1956 addHRegUse(u, HRmWrite, hregARM_R2());
1957 addHRegUse(u, HRmWrite, hregARM_R3());
1958 /* Now we have to state any parameter-carrying registers
1959 which might be read. This depends on nArgRegs. */
1960 switch (i->ARMin.Call.nArgRegs) {
1961 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
1962 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
1963 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
1964 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
1966 default: vpanic("getRegUsage_ARM:Call:regparms");
1968 /* Finally, there is the issue that the insn trashes a
1969 register because the literal target address has to be
1970 loaded into a register. Fortunately, for the nArgRegs=
1971 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
1972 this does not cause any further damage. For the
1973 nArgRegs=4 case, we'll have to choose another register
1974 arbitrarily since all the caller saved regs are used for
1975 parameters, and so we might as well choose r11.
1977 if (i->ARMin.Call.nArgRegs == 4)
1978 addHRegUse(u, HRmWrite, hregARM_R11());
1979 /* Upshot of this is that the assembler really must observe
1980 the here-stated convention of which register to use as an
1981 address temporary, depending on nArgRegs: 0==r0,
1982 1==r1, 2==r2, 3==r3, 4==r11 */
1985 addHRegUse(u, HRmRead, hregARM_R2());
1986 addHRegUse(u, HRmRead, hregARM_R3());
1987 addHRegUse(u, HRmWrite, hregARM_R0());
1988 if (i->ARMin.Mul.op != ARMmul_PLAIN)
1989 addHRegUse(u, HRmWrite, hregARM_R1());
1992 addHRegUse(u, HRmWrite, hregARM_R0());
1993 addHRegUse(u, HRmRead, hregARM_R1());
1996 addHRegUse(u, HRmWrite, hregARM_R0());
1997 addHRegUse(u, HRmRead, hregARM_R1());
1998 addHRegUse(u, HRmRead, hregARM_R2());
2001 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2002 if (i->ARMin.VLdStD.isLoad) {
2003 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2005 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2009 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2010 if (i->ARMin.VLdStS.isLoad) {
2011 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2013 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2017 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2018 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2019 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2022 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2023 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2024 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2027 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2028 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2031 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2032 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2035 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2036 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2039 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2040 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2041 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2044 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2045 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2046 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2049 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2050 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2053 if (i->ARMin.VXferD.toD) {
2054 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2055 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2056 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2058 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2059 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2060 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2064 if (i->ARMin.VXferS.toS) {
2065 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2066 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2068 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2069 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2073 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2074 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2077 if (i->ARMin.FPSCR.toFPSCR)
2078 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2080 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2085 if (i->ARMin.NLdStQ.isLoad)
2086 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2088 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2089 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2092 if (i->ARMin.NLdStD.isLoad)
2093 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2095 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2096 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2099 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2100 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2103 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2104 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2107 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2108 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2109 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2112 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2113 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2114 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2115 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2118 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2119 /* TODO: sometimes dst is also being read! */
2121 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2122 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2125 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2128 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2129 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2130 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2133 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2134 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2139 vpanic("getRegUsage_ARMInstr");
2144 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2146 vassert(mode64 == False);
2149 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2150 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2151 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2154 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2155 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2156 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2159 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2160 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2162 case ARMin_CmpOrTst:
2163 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2164 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2167 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2168 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2171 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2174 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2175 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2178 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2179 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2182 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2183 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2188 i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
2191 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2192 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2203 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2204 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2207 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2208 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2211 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2212 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2213 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2216 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2217 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2218 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2221 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2222 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2225 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2226 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2229 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2230 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2233 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2234 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2237 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2238 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2241 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2242 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2245 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2246 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2247 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2250 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2251 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2254 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2255 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2258 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2263 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2264 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2267 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2268 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2271 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2272 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2275 i->ARMin.NUnaryS.src->reg
2276 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2277 i->ARMin.NUnaryS.dst->reg
2278 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2281 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2282 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2283 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2286 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2287 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2290 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2291 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2292 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2295 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2298 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2299 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2302 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2303 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2307 vpanic("mapRegs_ARMInstr");
2311 /* Figure out if i represents a reg-reg move, and if so assign the
2312 source and destination to *src and *dst. If in doubt say No. Used
2313 by the register allocator to do move coalescing.
2315 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
2317 /* Moves between integer regs */
2320 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2321 *src = i->ARMin.Mov.src->ARMri84.R.reg;
2322 *dst = i->ARMin.Mov.dst;
2327 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2328 *src = i->ARMin.VUnaryD.src;
2329 *dst = i->ARMin.VUnaryD.dst;
2334 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2335 *src = i->ARMin.VUnaryS.src;
2336 *dst = i->ARMin.VUnaryS.dst;
2344 // todo: float, vector moves
2349 /* Generate arm spill/reload instructions under the direction of the
2350 register allocator. Note it's critical these don't write the
2353 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2354 HReg rreg, Int offsetB, Bool mode64 )
2357 vassert(offsetB >= 0);
2358 vassert(!hregIsVirtual(rreg));
2359 vassert(mode64 == False);
2361 rclass = hregClass(rreg);
2364 vassert(offsetB <= 4095);
2365 *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
2367 ARMAMode1_RI(hregARM_R8(), offsetB) );
2371 HReg r8 = hregARM_R8(); /* baseblock */
2372 HReg r12 = hregARM_R12(); /* spill temp */
2374 vassert(0 == (offsetB & 3));
2375 if (offsetB >= 1024) {
2376 Int offsetKB = offsetB / 1024;
2377 /* r12 = r8 + (1024 * offsetKB) */
2378 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2379 ARMRI84_I84(offsetKB, 11));
2380 offsetB -= (1024 * offsetKB);
2383 vassert(offsetB <= 1020);
2384 if (rclass == HRcFlt32) {
2385 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2387 mkARMAModeV(base, offsetB) );
2389 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2391 mkARMAModeV(base, offsetB) );
2396 HReg r8 = hregARM_R8();
2397 HReg r12 = hregARM_R12();
2398 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2399 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2403 ppHRegClass(rclass);
2404 vpanic("genSpill_ARM: unimplemented regclass");
2408 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2409 HReg rreg, Int offsetB, Bool mode64 )
2412 vassert(offsetB >= 0);
2413 vassert(!hregIsVirtual(rreg));
2414 vassert(mode64 == False);
2416 rclass = hregClass(rreg);
2419 vassert(offsetB <= 4095);
2420 *i1 = ARMInstr_LdSt32( True/*isLoad*/,
2422 ARMAMode1_RI(hregARM_R8(), offsetB) );
2426 HReg r8 = hregARM_R8(); /* baseblock */
2427 HReg r12 = hregARM_R12(); /* spill temp */
2429 vassert(0 == (offsetB & 3));
2430 if (offsetB >= 1024) {
2431 Int offsetKB = offsetB / 1024;
2432 /* r12 = r8 + (1024 * offsetKB) */
2433 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2434 ARMRI84_I84(offsetKB, 11));
2435 offsetB -= (1024 * offsetKB);
2438 vassert(offsetB <= 1020);
2439 if (rclass == HRcFlt32) {
2440 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2442 mkARMAModeV(base, offsetB) );
2444 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2446 mkARMAModeV(base, offsetB) );
2451 HReg r8 = hregARM_R8();
2452 HReg r12 = hregARM_R12();
2453 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2454 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2458 ppHRegClass(rclass);
2459 vpanic("genReload_ARM: unimplemented regclass");
2464 /* Emit an instruction into buf and return the number of bytes used.
2465 Note that buf is not the insn's final place, and therefore it is
2466 imperative to emit position-independent code. */
2468 static inline UChar iregNo ( HReg r )
2471 vassert(hregClass(r) == HRcInt32);
2472 vassert(!hregIsVirtual(r));
2478 static inline UChar dregNo ( HReg r )
2481 if (hregClass(r) != HRcFlt64)
2482 ppHRegClass(hregClass(r));
2483 vassert(hregClass(r) == HRcFlt64);
2484 vassert(!hregIsVirtual(r));
2490 static inline UChar fregNo ( HReg r )
2493 vassert(hregClass(r) == HRcFlt32);
2494 vassert(!hregIsVirtual(r));
2500 static inline UChar qregNo ( HReg r )
2503 vassert(hregClass(r) == HRcVec128);
2504 vassert(!hregIsVirtual(r));
2510 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2511 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2512 #define X0000 BITS4(0,0,0,0)
2513 #define X0001 BITS4(0,0,0,1)
2514 #define X0010 BITS4(0,0,1,0)
2515 #define X0011 BITS4(0,0,1,1)
2516 #define X0100 BITS4(0,1,0,0)
2517 #define X0101 BITS4(0,1,0,1)
2518 #define X0110 BITS4(0,1,1,0)
2519 #define X0111 BITS4(0,1,1,1)
2520 #define X1000 BITS4(1,0,0,0)
2521 #define X1001 BITS4(1,0,0,1)
2522 #define X1010 BITS4(1,0,1,0)
2523 #define X1011 BITS4(1,0,1,1)
2524 #define X1100 BITS4(1,1,0,0)
2525 #define X1101 BITS4(1,1,0,1)
2526 #define X1110 BITS4(1,1,1,0)
2527 #define X1111 BITS4(1,1,1,1)
2529 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2530 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2531 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2532 (((zzx3) & 0xF) << 12))
2534 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2535 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2536 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2537 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2539 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2540 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2541 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2542 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2544 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2545 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2546 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2547 (((zzx0) & 0xF) << 0))
2549 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2550 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2551 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2552 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2553 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2555 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2556 Returns a word which is all zeroes apart from bits 25 and 11..0,
2557 since it is those that encode the shifter operand (at least to the
2558 extent that we care about it.) */
2559 static UInt skeletal_RI84 ( ARMRI84* ri )
2562 if (ri->tag == ARMri84_I84) {
2563 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2564 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2566 instr |= (ri->ARMri84.I84.imm4 << 8);
2567 instr |= ri->ARMri84.I84.imm8;
2570 instr |= iregNo(ri->ARMri84.R.reg);
2575 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2577 static UInt skeletal_RI5 ( ARMRI5* ri )
2580 if (ri->tag == ARMri5_I5) {
2581 UInt imm5 = ri->ARMri5.I5.imm5;
2582 vassert(imm5 >= 1 && imm5 <= 31);
2587 instr |= iregNo(ri->ARMri5.R.reg) << 8;
2593 /* Get an immediate into a register, using only that
2594 register. (very lame..) */
2595 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
2598 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2600 if (0 == (imm32 & ~0xFF)) {
2601 /* mov with a immediate shifter operand of (0, imm32) (??) */
2602 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2606 // this is very bad; causes Dcache pollution
2608 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2617 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2618 /* Generate movw rD, #low16. Then, if the high 16 are
2619 nonzero, generate movt rD, #high16. */
2620 UInt lo16 = imm32 & 0xFFFF;
2621 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2622 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2623 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2627 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2628 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2636 if ((imm32 & 0xFF) || (imm32 == 0)) {
2639 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2644 if (imm32 & 0xFF000000) {
2645 imm = (imm32 >> 24) & 0xFF;
2647 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2652 if (imm32 & 0xFF0000) {
2653 imm = (imm32 >> 16) & 0xFF;
2655 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2660 if (imm32 & 0xFF00) {
2661 imm = (imm32 >> 8) & 0xFF;
2663 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2674 Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
2675 Bool mode64, void* dispatch )
2677 UInt* p = (UInt*)buf;
2678 vassert(nbuf >= 32);
2679 vassert(mode64 == False);
2680 vassert(0 == (((HWord)buf) & 3));
2681 /* since we branch to lr(r13) to get back to dispatch: */
2682 vassert(dispatch == NULL);
2687 UInt rD = iregNo(i->ARMin.Alu.dst);
2688 UInt rN = iregNo(i->ARMin.Alu.argL);
2689 ARMRI84* argR = i->ARMin.Alu.argR;
2690 switch (i->ARMin.Alu.op) {
2691 case ARMalu_ADDS: /* fallthru */
2692 case ARMalu_ADD: subopc = X0100; break;
2693 case ARMalu_ADC: subopc = X0101; break;
2694 case ARMalu_SUBS: /* fallthru */
2695 case ARMalu_SUB: subopc = X0010; break;
2696 case ARMalu_SBC: subopc = X0110; break;
2697 case ARMalu_AND: subopc = X0000; break;
2698 case ARMalu_BIC: subopc = X1110; break;
2699 case ARMalu_OR: subopc = X1100; break;
2700 case ARMalu_XOR: subopc = X0001; break;
2703 instr = skeletal_RI84(argR);
2704 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2705 (subopc << 1) & 0xF, rN, rD);
2706 if (i->ARMin.Alu.op == ARMalu_ADDS
2707 || i->ARMin.Alu.op == ARMalu_SUBS) {
2708 instr |= 1<<20; /* set the S bit */
2715 HReg rD = iregNo(i->ARMin.Shift.dst);
2716 HReg rM = iregNo(i->ARMin.Shift.argL);
2717 ARMRI5* argR = i->ARMin.Shift.argR;
2718 switch (i->ARMin.Shift.op) {
2719 case ARMsh_SHL: subopc = X0000; break;
2720 case ARMsh_SHR: subopc = X0001; break;
2721 case ARMsh_SAR: subopc = X0010; break;
2724 instr = skeletal_RI5(argR);
2725 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
2726 instr |= (subopc & 3) << 5;
2732 HReg rDst = iregNo(i->ARMin.Unary.dst);
2733 HReg rSrc = iregNo(i->ARMin.Unary.src);
2734 switch (i->ARMin.Unary.op) {
2736 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
2737 rDst,X1111,X0001,rSrc);
2740 case ARMun_NEG: /* RSB rD,rS,#0 */
2741 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
2745 UInt subopc = X1111; /* MVN */
2747 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2748 (subopc << 1) & 0xF, 0, rDst);
2757 case ARMin_CmpOrTst: {
2758 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
2759 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
2761 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2762 ((subopc << 1) & 0xF) | 1,
2763 i->ARMin.CmpOrTst.argL, SBZ );
2768 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
2769 UInt subopc = X1101; /* MOV */
2771 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2772 (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
2777 p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
2778 i->ARMin.Imm32.imm32 );
2782 case ARMin_LdSt8U: {
2786 if (i->tag == ARMin_LdSt32) {
2788 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
2789 am = i->ARMin.LdSt32.amode;
2790 rD = i->ARMin.LdSt32.rD;
2793 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
2794 am = i->ARMin.LdSt8U.amode;
2795 rD = i->ARMin.LdSt8U.rD;
2797 if (am->tag == ARMam1_RI) {
2800 if (am->ARMam1.RI.simm13 < 0) {
2802 simm12 = -am->ARMam1.RI.simm13;
2805 simm12 = am->ARMam1.RI.simm13;
2807 vassert(simm12 >= 0 && simm12 <= 4095);
2808 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2809 iregNo(am->ARMam1.RI.reg),
2819 case ARMin_LdSt16: {
2820 HReg rD = i->ARMin.LdSt16.rD;
2821 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
2822 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
2823 ARMAMode2* am = i->ARMin.LdSt16.amode;
2824 if (am->tag == ARMam2_RI) {
2825 HReg rN = am->ARMam2.RI.reg;
2827 UInt bP, imm8hi, imm8lo, instr;
2828 if (am->ARMam2.RI.simm9 < 0) {
2830 simm8 = -am->ARMam2.RI.simm9;
2833 simm8 = am->ARMam2.RI.simm9;
2835 vassert(simm8 >= 0 && simm8 <= 255);
2836 imm8hi = (simm8 >> 4) & 0xF;
2837 imm8lo = simm8 & 0xF;
2838 vassert(!(bL == 0 && bS == 1)); // "! signed store"
2839 /**/ if (bL == 0 && bS == 0) {
2841 instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
2842 iregNo(rD), imm8hi, X1011, imm8lo);
2846 else if (bL == 1 && bS == 0) {
2848 instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
2849 iregNo(rD), imm8hi, X1011, imm8lo);
2853 else if (bL == 1 && bS == 1) {
2856 else vassert(0); // ill-constructed insn
2866 IRJumpKind jk = i->ARMin.Goto.jk;
2867 ARMCondCode cond = i->ARMin.Goto.cond;
2868 UInt rnext = iregNo(i->ARMin.Goto.gnext);
2871 case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
2872 break; /* no need to set GST in these common cases */
2874 trc = VEX_TRC_JMP_CLIENTREQ; break;
2875 case Ijk_Sys_int128:
2876 case Ijk_Sys_int129:
2877 case Ijk_Sys_int130:
2883 trc = VEX_TRC_JMP_NODECODE; break;
2885 trc = VEX_TRC_JMP_TINVAL; break;
2887 trc = VEX_TRC_JMP_NOREDIR; break;
2888 case Ijk_Sys_sysenter:
2892 case Ijk_Sys_syscall:
2893 trc = VEX_TRC_JMP_SYS_SYSCALL; break;
2899 // mov{cond} r8, #trc
2900 vassert(trc >= 0 && trc <= 255);
2901 instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
2904 // mov{cond} r0, rnext
2906 instr = (cond << 28) | 0x01A00000 | rnext;
2910 instr =(cond << 28) | 0x012FFF1E;
2915 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
2916 UInt subopc = X1101; /* MOV */
2918 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
2919 (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
2925 /* Decide on a scratch reg used to hold to the call address.
2926 This has to be done as per the comments in getRegUsage. */
2928 switch (i->ARMin.Call.nArgRegs) {
2929 case 0: scratchNo = 0; break;
2930 case 1: scratchNo = 1; break;
2931 case 2: scratchNo = 2; break;
2932 case 3: scratchNo = 3; break;
2933 case 4: scratchNo = 11; break;
2934 default: vassert(0);
2936 // r"scratchNo" = &target
2937 p = imm32_to_iregNo( (UInt*)p,
2938 scratchNo, (UInt)i->ARMin.Call.target );
2939 // blx{cond} r"scratchNo"
2940 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
2942 instr |= 0xFFF << 8; // stick in the SBOnes
2947 /* E0000392 mul r0, r2, r3
2948 E0810392 umull r0(LO), r1(HI), r2, r3
2949 E0C10392 smull r0(LO), r1(HI), r2, r3
2951 switch (i->ARMin.Mul.op) {
2952 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
2953 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
2954 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
2955 default: vassert(0);
2960 /* E1910F9F ldrex r0, [r1]
2961 E1F10F9F ldrexh r0, [r1]
2962 E1D10F9F ldrexb r0, [r1]
2964 switch (i->ARMin.LdrEX.szB) {
2965 case 4: *p++ = 0xE1910F9F; goto done;
2966 //case 2: *p++ = 0xE1F10F9F; goto done;
2967 case 1: *p++ = 0xE1D10F9F; goto done;
2973 /* E1820F91 strex r0, r1, [r2]
2974 E1E20F91 strexh r0, r1, [r2]
2975 E1C20F91 strexb r0, r1, [r2]
2977 switch (i->ARMin.StrEX.szB) {
2978 case 4: *p++ = 0xE1820F91; goto done;
2979 //case 2: *p++ = 0xE1E20F91; goto done;
2980 case 1: *p++ = 0xE1C20F91; goto done;
2985 case ARMin_VLdStD: {
2986 UInt dD = dregNo(i->ARMin.VLdStD.dD);
2987 UInt rN = iregNo(i->ARMin.VLdStD.amode->reg);
2988 Int simm11 = i->ARMin.VLdStD.amode->simm11;
2989 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
2990 UInt bU = simm11 >= 0 ? 1 : 0;
2991 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
2993 vassert(0 == (off8 & 3));
2995 vassert(0 == (off8 & 0xFFFFFF00));
2996 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3001 case ARMin_VLdStS: {
3002 UInt fD = fregNo(i->ARMin.VLdStS.fD);
3003 UInt rN = iregNo(i->ARMin.VLdStS.amode->reg);
3004 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3005 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3006 UInt bU = simm11 >= 0 ? 1 : 0;
3007 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3010 vassert(0 == (off8 & 3));
3012 vassert(0 == (off8 & 0xFFFFFF00));
3013 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3019 UInt dN = dregNo(i->ARMin.VAluD.argL);
3020 UInt dD = dregNo(i->ARMin.VAluD.dst);
3021 UInt dM = dregNo(i->ARMin.VAluD.argR);
3022 UInt pqrs = X1111; /* undefined */
3023 switch (i->ARMin.VAluD.op) {
3024 case ARMvfp_ADD: pqrs = X0110; break;
3025 case ARMvfp_SUB: pqrs = X0111; break;
3026 case ARMvfp_MUL: pqrs = X0100; break;
3027 case ARMvfp_DIV: pqrs = X1000; break;
3030 vassert(pqrs != X1111);
3031 UInt bP = (pqrs >> 3) & 1;
3032 UInt bQ = (pqrs >> 2) & 1;
3033 UInt bR = (pqrs >> 1) & 1;
3034 UInt bS = (pqrs >> 0) & 1;
3035 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3036 X1011, BITS4(0,bS,0,0), dM);
3041 UInt dN = fregNo(i->ARMin.VAluS.argL);
3042 UInt dD = fregNo(i->ARMin.VAluS.dst);
3043 UInt dM = fregNo(i->ARMin.VAluS.argR);
3047 UInt pqrs = X1111; /* undefined */
3048 switch (i->ARMin.VAluS.op) {
3049 case ARMvfp_ADD: pqrs = X0110; break;
3050 case ARMvfp_SUB: pqrs = X0111; break;
3051 case ARMvfp_MUL: pqrs = X0100; break;
3052 case ARMvfp_DIV: pqrs = X1000; break;
3055 vassert(pqrs != X1111);
3056 UInt bP = (pqrs >> 3) & 1;
3057 UInt bQ = (pqrs >> 2) & 1;
3058 UInt bR = (pqrs >> 1) & 1;
3059 UInt bS = (pqrs >> 0) & 1;
3060 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3061 (dN >> 1), (dD >> 1),
3062 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3066 case ARMin_VUnaryD: {
3067 UInt dD = dregNo(i->ARMin.VUnaryD.dst);
3068 UInt dM = dregNo(i->ARMin.VUnaryD.src);
3070 switch (i->ARMin.VUnaryD.op) {
3072 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3075 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3078 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3081 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3089 case ARMin_VUnaryS: {
3090 UInt fD = fregNo(i->ARMin.VUnaryS.dst);
3091 UInt fM = fregNo(i->ARMin.VUnaryS.src);
3093 switch (i->ARMin.VUnaryS.op) {
3095 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3096 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3100 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3101 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3105 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3106 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3110 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3111 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3121 UInt dD = dregNo(i->ARMin.VCmpD.argL);
3122 UInt dM = dregNo(i->ARMin.VCmpD.argR);
3123 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3124 *p++ = insn; /* FCMPD dD, dM */
3125 *p++ = 0xEEF1FA10; /* FMSTAT */
3128 case ARMin_VCMovD: {
3129 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3130 UInt dD = dregNo(i->ARMin.VCMovD.dst);
3131 UInt dM = dregNo(i->ARMin.VCMovD.src);
3132 vassert(cc < 16 && cc != ARMcc_AL);
3133 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3137 case ARMin_VCMovS: {
3138 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3139 UInt fD = fregNo(i->ARMin.VCMovS.dst);
3140 UInt fM = fregNo(i->ARMin.VCMovS.src);
3141 vassert(cc < 16 && cc != ARMcc_AL);
3142 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3143 X0000,(fD >> 1),X1010,
3144 BITS4(0,1,(fM & 1),0), (fM >> 1));
3148 case ARMin_VCvtSD: {
3149 if (i->ARMin.VCvtSD.sToD) {
3150 UInt dD = dregNo(i->ARMin.VCvtSD.dst);
3151 UInt fM = fregNo(i->ARMin.VCvtSD.src);
3152 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3153 BITS4(1,1, (fM & 1), 0),
3158 UInt fD = fregNo(i->ARMin.VCvtSD.dst);
3159 UInt dM = dregNo(i->ARMin.VCvtSD.src);
3160 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3168 case ARMin_VXferD: {
3169 UInt dD = dregNo(i->ARMin.VXferD.dD);
3170 UInt rHi = iregNo(i->ARMin.VXferD.rHi);
3171 UInt rLo = iregNo(i->ARMin.VXferD.rLo);
3172 /* vmov dD, rLo, rHi is
3173 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3174 vmov rLo, rHi, dD is
3175 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3178 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3180 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3184 case ARMin_VXferS: {
3185 UInt fD = fregNo(i->ARMin.VXferS.fD);
3186 UInt rLo = iregNo(i->ARMin.VXferS.rLo);
3188 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3190 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3193 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3194 (fD >> 1) & 0xF, rLo, 0xA,
3195 BITS4((fD & 1),0,0,1), 0);
3199 case ARMin_VCvtID: {
3200 Bool iToD = i->ARMin.VCvtID.iToD;
3201 Bool syned = i->ARMin.VCvtID.syned;
3202 if (iToD && syned) {
3203 // FSITOD: I32S-in-freg to F64-in-dreg
3204 UInt regF = fregNo(i->ARMin.VCvtID.src);
3205 UInt regD = dregNo(i->ARMin.VCvtID.dst);
3206 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3207 X1011, BITS4(1,1,(regF & 1),0),
3212 if (iToD && (!syned)) {
3213 // FUITOD: I32U-in-freg to F64-in-dreg
3214 UInt regF = fregNo(i->ARMin.VCvtID.src);
3215 UInt regD = dregNo(i->ARMin.VCvtID.dst);
3216 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3217 X1011, BITS4(0,1,(regF & 1),0),
3222 if ((!iToD) && syned) {
3223 // FTOSID: F64-in-dreg to I32S-in-freg
3224 UInt regD = dregNo(i->ARMin.VCvtID.src);
3225 UInt regF = fregNo(i->ARMin.VCvtID.dst);
3226 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3227 X1101, (regF >> 1) & 0xF,
3228 X1011, X0100, regD);
3232 if ((!iToD) && (!syned)) {
3233 // FTOUID: F64-in-dreg to I32U-in-freg
3234 UInt regD = dregNo(i->ARMin.VCvtID.src);
3235 UInt regF = fregNo(i->ARMin.VCvtID.dst);
3236 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3237 X1100, (regF >> 1) & 0xF,
3238 X1011, X0100, regD);
3246 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3247 HReg iReg = iregNo(i->ARMin.FPSCR.iReg);
3249 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3250 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3253 goto bad; // FPSCR -> iReg case currently ATC
3255 case ARMin_MFence: {
3256 *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3257 *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3258 *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3261 case ARMin_NLdStQ: {
3262 UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
3265 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3267 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3269 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3270 regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3271 regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3273 regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3276 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3277 regN, regD, X1010, X1000, regM);
3281 case ARMin_NLdStD: {
3282 UInt regD = dregNo(i->ARMin.NLdStD.dD);
3285 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3287 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3289 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3290 regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3291 regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3293 regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3296 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3297 regN, regD, X0111, X1000, regM);
3301 case ARMin_NUnaryS: {
3302 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3305 UInt size = i->ARMin.NUnaryS.size;
3307 UInt opc, opc1, opc2;
3308 switch (i->ARMin.NUnaryS.op) {
3310 if (i->ARMin.NUnaryS.size >= 16)
3312 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3314 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3316 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3317 ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
3318 : dregNo(i->ARMin.NUnaryS.dst->reg);
3319 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3320 ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
3321 : dregNo(i->ARMin.NUnaryS.src->reg);
3326 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3327 (i->ARMin.NUnaryS.size & 0xf), regD,
3328 X1100, BITS4(0,Q,M,0), regM);
3331 case ARMneon_SETELEM:
3332 regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
3333 dregNo(i->ARMin.NUnaryS.dst->reg);
3334 regM = iregNo(i->ARMin.NUnaryS.src->reg);
3339 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3343 if (i->ARMin.NUnaryS.dst->index > 7)
3345 opc = X1000 | i->ARMin.NUnaryS.dst->index;
3348 if (i->ARMin.NUnaryS.dst->index > 3)
3350 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3353 if (i->ARMin.NUnaryS.dst->index > 1)
3355 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3360 opc1 = (opc >> 2) & 3;
3362 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3364 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3367 case ARMneon_GETELEMU:
3368 regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3369 dregNo(i->ARMin.NUnaryS.src->reg);
3370 regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3375 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3379 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3381 i->ARMin.NUnaryS.src->index -= 8;
3383 if (i->ARMin.NUnaryS.src->index > 7)
3385 opc = X1000 | i->ARMin.NUnaryS.src->index;
3388 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3390 i->ARMin.NUnaryS.src->index -= 4;
3392 if (i->ARMin.NUnaryS.src->index > 3)
3394 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3401 opc1 = (opc >> 2) & 3;
3403 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3405 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3408 case ARMneon_GETELEMS:
3409 regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3410 dregNo(i->ARMin.NUnaryS.src->reg);
3411 regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3416 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3420 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3422 i->ARMin.NUnaryS.src->index -= 8;
3424 if (i->ARMin.NUnaryS.src->index > 7)
3426 opc = X1000 | i->ARMin.NUnaryS.src->index;
3429 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3431 i->ARMin.NUnaryS.src->index -= 4;
3433 if (i->ARMin.NUnaryS.src->index > 3)
3435 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3438 if (Q && i->ARMin.NUnaryS.src->index > 1) {
3440 i->ARMin.NUnaryS.src->index -= 2;
3442 if (i->ARMin.NUnaryS.src->index > 1)
3444 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3449 opc1 = (opc >> 2) & 3;
3451 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3453 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3460 case ARMin_NUnary: {
3461 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3462 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3463 ? (qregNo(i->ARMin.NUnary.dst) << 1)
3464 : dregNo(i->ARMin.NUnary.dst);
3467 UInt sz1 = i->ARMin.NUnary.size >> 1;
3468 UInt sz2 = i->ARMin.NUnary.size & 1;
3469 UInt sz = i->ARMin.NUnary.size;
3471 UInt F = 0; /* TODO: floating point EQZ ??? */
3472 if (i->ARMin.NUnary.op != ARMneon_DUP) {
3473 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
3474 ? (qregNo(i->ARMin.NUnary.src) << 1)
3475 : dregNo(i->ARMin.NUnary.src);
3478 regM = iregNo(i->ARMin.NUnary.src);
3483 switch (i->ARMin.NUnary.op) {
3484 case ARMneon_COPY: /* VMOV reg, reg */
3485 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
3486 BITS4(M,Q,M,1), regM);
3488 case ARMneon_COPYN: /* VMOVN regD, regQ */
3489 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3490 regD, X0010, BITS4(0,0,M,0), regM);
3492 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
3493 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3494 regD, X0010, BITS4(1,0,M,0), regM);
3496 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
3497 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3498 regD, X0010, BITS4(0,1,M,0), regM);
3500 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
3501 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3502 regD, X0010, BITS4(1,1,M,0), regM);
3504 case ARMneon_COPYLS: /* VMOVL regQ, regD */
3507 insn = XXXXXXXX(0xF, X0010,
3508 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3509 BITS4((sz == 0) ? 1 : 0,0,0,0),
3510 regD, X1010, BITS4(0,0,M,1), regM);
3512 case ARMneon_COPYLU: /* VMOVL regQ, regD */
3515 insn = XXXXXXXX(0xF, X0011,
3516 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3517 BITS4((sz == 0) ? 1 : 0,0,0,0),
3518 regD, X1010, BITS4(0,0,M,1), regM);
3520 case ARMneon_NOT: /* VMVN reg, reg*/
3521 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3522 BITS4(1,Q,M,0), regM);
3525 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3526 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
3529 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3530 BITS4(0,Q,M,0), regM);
3533 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3534 regD, X0100, BITS4(1,Q,M,0), regM);
3537 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3538 regD, X0100, BITS4(0,Q,M,0), regM);
3541 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3542 regD, X0011, BITS4(0,Q,M,0), regM);
3545 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
3546 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
3547 vassert(sz1 + sz2 < 2);
3548 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
3549 X1011, BITS4(D,0,sz2,1), X0000);
3552 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3553 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
3556 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3557 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
3560 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3561 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
3563 case ARMneon_PADDLU:
3564 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3565 regD, X0010, BITS4(1,Q,M,0), regM);
3567 case ARMneon_PADDLS:
3568 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3569 regD, X0010, BITS4(0,Q,M,0), regM);
3571 case ARMneon_VQSHLNUU:
3572 insn = XXXXXXXX(0xF, X0011,
3573 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3574 sz & 0xf, regD, X0111,
3575 BITS4(sz >> 6,Q,M,1), regM);
3577 case ARMneon_VQSHLNSS:
3578 insn = XXXXXXXX(0xF, X0010,
3579 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3580 sz & 0xf, regD, X0111,
3581 BITS4(sz >> 6,Q,M,1), regM);
3583 case ARMneon_VQSHLNUS:
3584 insn = XXXXXXXX(0xF, X0011,
3585 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3586 sz & 0xf, regD, X0110,
3587 BITS4(sz >> 6,Q,M,1), regM);
3589 case ARMneon_VCVTFtoS:
3590 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
3591 BITS4(0,Q,M,0), regM);
3593 case ARMneon_VCVTFtoU:
3594 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
3595 BITS4(1,Q,M,0), regM);
3597 case ARMneon_VCVTStoF:
3598 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
3599 BITS4(0,Q,M,0), regM);
3601 case ARMneon_VCVTUtoF:
3602 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
3603 BITS4(1,Q,M,0), regM);
3605 case ARMneon_VCVTFtoFixedU:
3606 sz1 = (sz >> 5) & 1;
3607 sz2 = (sz >> 4) & 1;
3609 insn = XXXXXXXX(0xF, X0011,
3610 BITS4(1,D,sz1,sz2), sz, regD, X1111,
3611 BITS4(0,Q,M,1), regM);
3613 case ARMneon_VCVTFtoFixedS:
3614 sz1 = (sz >> 5) & 1;
3615 sz2 = (sz >> 4) & 1;
3617 insn = XXXXXXXX(0xF, X0010,
3618 BITS4(1,D,sz1,sz2), sz, regD, X1111,
3619 BITS4(0,Q,M,1), regM);
3621 case ARMneon_VCVTFixedUtoF:
3622 sz1 = (sz >> 5) & 1;
3623 sz2 = (sz >> 4) & 1;
3625 insn = XXXXXXXX(0xF, X0011,
3626 BITS4(1,D,sz1,sz2), sz, regD, X1110,
3627 BITS4(0,Q,M,1), regM);
3629 case ARMneon_VCVTFixedStoF:
3630 sz1 = (sz >> 5) & 1;
3631 sz2 = (sz >> 4) & 1;
3633 insn = XXXXXXXX(0xF, X0010,
3634 BITS4(1,D,sz1,sz2), sz, regD, X1110,
3635 BITS4(0,Q,M,1), regM);
3637 case ARMneon_VCVTF32toF16:
3638 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
3639 BITS4(0,0,M,0), regM);
3641 case ARMneon_VCVTF16toF32:
3642 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
3643 BITS4(0,0,M,0), regM);
3645 case ARMneon_VRECIP:
3646 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
3647 BITS4(0,Q,M,0), regM);
3649 case ARMneon_VRECIPF:
3650 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
3651 BITS4(0,Q,M,0), regM);
3653 case ARMneon_VABSFP:
3654 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
3655 BITS4(0,Q,M,0), regM);
3657 case ARMneon_VRSQRTEFP:
3658 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
3659 BITS4(1,Q,M,0), regM);
3661 case ARMneon_VRSQRTE:
3662 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
3663 BITS4(1,Q,M,0), regM);
3666 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
3667 BITS4(1,Q,M,0), regM);
3677 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
3678 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
3679 ? (qregNo(i->ARMin.NDual.arg1) << 1)
3680 : dregNo(i->ARMin.NDual.arg1);
3681 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
3682 ? (qregNo(i->ARMin.NDual.arg2) << 1)
3683 : dregNo(i->ARMin.NDual.arg2);
3686 UInt sz1 = i->ARMin.NDual.size >> 1;
3687 UInt sz2 = i->ARMin.NDual.size & 1;
3691 switch (i->ARMin.NDual.op) {
3692 case ARMneon_TRN: /* VTRN reg, reg */
3693 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3694 regD, X0000, BITS4(1,Q,M,0), regM);
3696 case ARMneon_ZIP: /* VZIP reg, reg */
3697 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3698 regD, X0001, BITS4(1,Q,M,0), regM);
3700 case ARMneon_UZP: /* VUZP reg, reg */
3701 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3702 regD, X0001, BITS4(0,Q,M,0), regM);
3710 case ARMin_NBinary: {
3711 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
3712 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
3713 ? (qregNo(i->ARMin.NBinary.dst) << 1)
3714 : dregNo(i->ARMin.NBinary.dst);
3715 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
3716 ? (qregNo(i->ARMin.NBinary.argL) << 1)
3717 : dregNo(i->ARMin.NBinary.argL);
3718 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
3719 ? (qregNo(i->ARMin.NBinary.argR) << 1)
3720 : dregNo(i->ARMin.NBinary.argR);
3721 UInt sz1 = i->ARMin.NBinary.size >> 1;
3722 UInt sz2 = i->ARMin.NBinary.size & 1;
3730 switch (i->ARMin.NBinary.op) {
3731 case ARMneon_VAND: /* VAND reg, reg, reg */
3732 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
3733 BITS4(N,Q,M,1), regM);
3735 case ARMneon_VORR: /* VORR reg, reg, reg*/
3736 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
3737 BITS4(N,Q,M,1), regM);
3739 case ARMneon_VXOR: /* VEOR reg, reg, reg */
3740 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
3741 BITS4(N,Q,M,1), regM);
3743 case ARMneon_VADD: /* VADD reg, reg, reg */
3744 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3745 X1000, BITS4(N,Q,M,0), regM);
3747 case ARMneon_VSUB: /* VSUB reg, reg, reg */
3748 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3749 X1000, BITS4(N,Q,M,0), regM);
3751 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
3752 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3753 X0110, BITS4(N,Q,M,1), regM);
3755 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
3756 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3757 X0110, BITS4(N,Q,M,1), regM);
3759 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
3760 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3761 X0110, BITS4(N,Q,M,0), regM);
3763 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
3764 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3765 X0110, BITS4(N,Q,M,0), regM);
3767 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
3768 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3769 X0001, BITS4(N,Q,M,0), regM);
3771 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
3772 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3773 X0001, BITS4(N,Q,M,0), regM);
3775 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
3776 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3777 X0000, BITS4(N,Q,M,1), regM);
3779 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
3780 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3781 X0000, BITS4(N,Q,M,1), regM);
3783 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
3784 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3785 X0010, BITS4(N,Q,M,1), regM);
3787 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
3788 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3789 X0010, BITS4(N,Q,M,1), regM);
3791 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
3792 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3793 X0011, BITS4(N,Q,M,0), regM);
3795 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
3796 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3797 X0011, BITS4(N,Q,M,0), regM);
3799 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
3800 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3801 X0011, BITS4(N,Q,M,1), regM);
3803 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
3804 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3805 X0011, BITS4(N,Q,M,1), regM);
3807 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
3808 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3809 X1000, BITS4(N,Q,M,1), regM);
3811 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
3812 if (i->ARMin.NBinary.size >= 16)
3814 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
3815 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
3819 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3820 X1001, BITS4(N,Q,M,1), regM);
3822 case ARMneon_VMULLU:
3823 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
3824 X1100, BITS4(N,0,M,0), regM);
3826 case ARMneon_VMULLS:
3827 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3828 X1100, BITS4(N,0,M,0), regM);
3831 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3832 X1001, BITS4(N,Q,M,1), regM);
3834 case ARMneon_VMULFP:
3835 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3836 X1101, BITS4(N,Q,M,1), regM);
3838 case ARMneon_VMULLP:
3839 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3840 X1110, BITS4(N,0,M,0), regM);
3842 case ARMneon_VQDMULH:
3843 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3844 X1011, BITS4(N,Q,M,0), regM);
3846 case ARMneon_VQRDMULH:
3847 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3848 X1011, BITS4(N,Q,M,0), regM);
3850 case ARMneon_VQDMULL:
3851 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3852 X1101, BITS4(N,0,M,0), regM);
3855 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
3856 X1000, BITS4(N,0,M,0), regM);
3859 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3860 X1011, BITS4(N,Q,M,1), regM);
3862 case ARMneon_VPADDFP:
3863 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3864 X1101, BITS4(N,Q,M,0), regM);
3866 case ARMneon_VPMINU:
3867 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3868 X1010, BITS4(N,Q,M,1), regM);
3870 case ARMneon_VPMINS:
3871 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3872 X1010, BITS4(N,Q,M,1), regM);
3874 case ARMneon_VPMAXU:
3875 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3876 X1010, BITS4(N,Q,M,0), regM);
3878 case ARMneon_VPMAXS:
3879 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3880 X1010, BITS4(N,Q,M,0), regM);
3882 case ARMneon_VADDFP: /* VADD reg, reg, reg */
3883 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
3884 X1101, BITS4(N,Q,M,0), regM);
3886 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
3887 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
3888 X1101, BITS4(N,Q,M,0), regM);
3890 case ARMneon_VABDFP: /* VABD reg, reg, reg */
3891 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
3892 X1101, BITS4(N,Q,M,0), regM);
3895 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
3896 X1111, BITS4(N,Q,M,0), regM);
3899 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
3900 X1111, BITS4(N,Q,M,0), regM);
3902 case ARMneon_VPMINF:
3903 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
3904 X1111, BITS4(N,Q,M,0), regM);
3906 case ARMneon_VPMAXF:
3907 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3908 X1111, BITS4(N,Q,M,0), regM);
3910 case ARMneon_VRECPS:
3911 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
3912 BITS4(N,Q,M,1), regM);
3915 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
3916 BITS4(N,Q,M,0), regM);
3919 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
3920 BITS4(N,Q,M,0), regM);
3923 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
3924 BITS4(N,Q,M,0), regM);
3926 case ARMneon_VRSQRTS:
3927 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
3928 BITS4(N,Q,M,1), regM);
3936 case ARMin_NShift: {
3937 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
3938 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
3939 ? (qregNo(i->ARMin.NShift.dst) << 1)
3940 : dregNo(i->ARMin.NShift.dst);
3941 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
3942 ? (qregNo(i->ARMin.NShift.argL) << 1)
3943 : dregNo(i->ARMin.NShift.argL);
3944 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
3945 ? (qregNo(i->ARMin.NShift.argR) << 1)
3946 : dregNo(i->ARMin.NShift.argR);
3947 UInt sz1 = i->ARMin.NShift.size >> 1;
3948 UInt sz2 = i->ARMin.NShift.size & 1;
3956 switch (i->ARMin.NShift.op) {
3958 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3959 X0100, BITS4(N,Q,M,0), regM);
3962 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3963 X0100, BITS4(N,Q,M,0), regM);
3966 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3967 X0100, BITS4(N,Q,M,1), regM);
3970 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3971 X0100, BITS4(N,Q,M,1), regM);
3979 case ARMin_NeonImm: {
3980 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
3981 UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
3982 dregNo(i->ARMin.NeonImm.dst);
3984 UInt imm = i->ARMin.NeonImm.imm->imm8;
3985 UInt tp = i->ARMin.NeonImm.imm->type;
3987 UInt imm3 = (imm >> 4) & 0x7;
3988 UInt imm4 = imm & 0xF;
4019 vpanic("ARMin_NeonImm");
4022 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4023 cmode, BITS4(0,Q,op,1), imm4);
4027 case ARMin_NCMovQ: {
4028 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4029 UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
4030 UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
4033 UInt M = (qM >> 4) & 1;
4034 UInt D = (qD >> 4) & 1;
4035 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4036 /* b!cc here+8: !cc A00 0000 */
4037 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4040 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4041 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4046 UInt regD = iregNo(i->ARMin.Add32.rD);
4047 UInt regN = iregNo(i->ARMin.Add32.rN);
4048 UInt imm32 = i->ARMin.Add32.imm32;
4049 vassert(regD != regN);
4050 /* MOV regD, imm32 */
4051 p = imm32_to_iregNo((UInt *)p, regD, imm32);
4052 /* ADD regD, regN, regD */
4053 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4064 vpanic("emit_ARMInstr");
4068 vassert(((UChar*)p) - &buf[0] <= 32);
4069 return ((UChar*)p) - &buf[0];
4095 /*---------------------------------------------------------------*/
4096 /*--- end host_arm_defs.c ---*/
4097 /*---------------------------------------------------------------*/