2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
45 /* --------- Registers. --------- */
47 void ppHRegAMD64 ( HReg reg )
50 static HChar* ireg64_names[16]
51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
53 /* Be generic for all virtual regs. */
54 if (hregIsVirtual(reg)) {
58 /* But specific for real regs. */
59 switch (hregClass(reg)) {
62 vassert(r >= 0 && r < 16);
63 vex_printf("%s", ireg64_names[r]);
67 vassert(r >= 0 && r < 6);
68 vex_printf("%%fake%d", r);
72 vassert(r >= 0 && r < 16);
73 vex_printf("%%xmm%d", r);
76 vpanic("ppHRegAMD64");
80 static void ppHRegAMD64_lo32 ( HReg reg )
83 static HChar* ireg32_names[16]
84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
86 /* Be generic for all virtual regs. */
87 if (hregIsVirtual(reg)) {
92 /* But specific for real regs. */
93 switch (hregClass(reg)) {
96 vassert(r >= 0 && r < 16);
97 vex_printf("%s", ireg32_names[r]);
100 vpanic("ppHRegAMD64_lo32: invalid regclass");
104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
121 //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
122 //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
123 //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
124 //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
125 //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
126 //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
128 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
129 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
130 HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); }
131 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
132 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
133 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
134 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
135 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
136 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
137 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
138 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
139 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
140 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
141 HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); }
142 HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); }
143 HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); }
146 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
151 (*arr)[ 0] = hregAMD64_RSI();
152 (*arr)[ 1] = hregAMD64_RDI();
153 (*arr)[ 2] = hregAMD64_RBX();
155 (*arr)[ 3] = hregAMD64_XMM7();
156 (*arr)[ 4] = hregAMD64_XMM8();
157 (*arr)[ 5] = hregAMD64_XMM9();
161 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
162 (*arr)[ 0] = hregAMD64_RSI();
163 (*arr)[ 1] = hregAMD64_RDI();
164 (*arr)[ 2] = hregAMD64_R8();
165 (*arr)[ 3] = hregAMD64_R9();
166 (*arr)[ 4] = hregAMD64_R12();
167 (*arr)[ 5] = hregAMD64_R13();
168 (*arr)[ 6] = hregAMD64_R14();
169 (*arr)[ 7] = hregAMD64_R15();
170 (*arr)[ 8] = hregAMD64_RBX();
172 (*arr)[ 9] = hregAMD64_XMM3();
173 (*arr)[10] = hregAMD64_XMM4();
174 (*arr)[11] = hregAMD64_XMM5();
175 (*arr)[12] = hregAMD64_XMM6();
176 (*arr)[13] = hregAMD64_XMM7();
177 (*arr)[14] = hregAMD64_XMM8();
178 (*arr)[15] = hregAMD64_XMM9();
179 (*arr)[16] = hregAMD64_XMM10();
180 (*arr)[17] = hregAMD64_XMM11();
181 (*arr)[18] = hregAMD64_XMM12();
182 (*arr)[19] = hregAMD64_R10();
187 /* --------- Condition codes, Intel encoding. --------- */
189 HChar* showAMD64CondCode ( AMD64CondCode cond )
192 case Acc_O: return "o";
193 case Acc_NO: return "no";
194 case Acc_B: return "b";
195 case Acc_NB: return "nb";
196 case Acc_Z: return "z";
197 case Acc_NZ: return "nz";
198 case Acc_BE: return "be";
199 case Acc_NBE: return "nbe";
200 case Acc_S: return "s";
201 case Acc_NS: return "ns";
202 case Acc_P: return "p";
203 case Acc_NP: return "np";
204 case Acc_L: return "l";
205 case Acc_NL: return "nl";
206 case Acc_LE: return "le";
207 case Acc_NLE: return "nle";
208 case Acc_ALWAYS: return "ALWAYS";
209 default: vpanic("ppAMD64CondCode");
214 /* --------- AMD64AMode: memory address expressions. --------- */
216 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
217 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
219 am->Aam.IR.imm = imm32;
220 am->Aam.IR.reg = reg;
223 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
224 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
226 am->Aam.IRRS.imm = imm32;
227 am->Aam.IRRS.base = base;
228 am->Aam.IRRS.index = indEx;
229 am->Aam.IRRS.shift = shift;
230 vassert(shift >= 0 && shift <= 3);
234 //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
235 //.. switch (am->tag) {
237 //.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
239 //.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
240 //.. am->Xam.IRRS.index, am->Xam.IRRS.shift );
242 //.. vpanic("dopyAMD64AMode");
246 void ppAMD64AMode ( AMD64AMode* am ) {
249 if (am->Aam.IR.imm == 0)
252 vex_printf("0x%x(", am->Aam.IR.imm);
253 ppHRegAMD64(am->Aam.IR.reg);
257 vex_printf("0x%x(", am->Aam.IRRS.imm);
258 ppHRegAMD64(am->Aam.IRRS.base);
260 ppHRegAMD64(am->Aam.IRRS.index);
261 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
264 vpanic("ppAMD64AMode");
268 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
271 addHRegUse(u, HRmRead, am->Aam.IR.reg);
274 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
275 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
278 vpanic("addRegUsage_AMD64AMode");
282 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
285 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
288 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
289 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
292 vpanic("mapRegs_AMD64AMode");
296 /* --------- Operand, which can be reg, immediate or memory. --------- */
298 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
299 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
301 op->Armi.Imm.imm32 = imm32;
304 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
305 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
307 op->Armi.Reg.reg = reg;
310 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
311 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
313 op->Armi.Mem.am = am;
317 void ppAMD64RMI ( AMD64RMI* op ) {
320 vex_printf("$0x%x", op->Armi.Imm.imm32);
323 ppHRegAMD64(op->Armi.Reg.reg);
326 ppAMD64AMode(op->Armi.Mem.am);
329 vpanic("ppAMD64RMI");
333 /* An AMD64RMI can only be used in a "read" context (what would it mean
334 to write or modify a literal?) and so we enumerate its registers
336 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
341 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
344 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
347 vpanic("addRegUsage_AMD64RMI");
351 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
356 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
359 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
362 vpanic("mapRegs_AMD64RMI");
367 /* --------- Operand, which can be reg or immediate only. --------- */
369 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
370 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
372 op->Ari.Imm.imm32 = imm32;
375 AMD64RI* AMD64RI_Reg ( HReg reg ) {
376 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
378 op->Ari.Reg.reg = reg;
382 void ppAMD64RI ( AMD64RI* op ) {
385 vex_printf("$0x%x", op->Ari.Imm.imm32);
388 ppHRegAMD64(op->Ari.Reg.reg);
395 /* An AMD64RI can only be used in a "read" context (what would it mean
396 to write or modify a literal?) and so we enumerate its registers
398 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
403 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
406 vpanic("addRegUsage_AMD64RI");
410 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
415 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
418 vpanic("mapRegs_AMD64RI");
423 /* --------- Operand, which can be reg or memory only. --------- */
425 AMD64RM* AMD64RM_Reg ( HReg reg ) {
426 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
428 op->Arm.Reg.reg = reg;
431 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
432 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
438 void ppAMD64RM ( AMD64RM* op ) {
441 ppAMD64AMode(op->Arm.Mem.am);
444 ppHRegAMD64(op->Arm.Reg.reg);
451 /* Because an AMD64RM can be both a source or destination operand, we
452 have to supply a mode -- pertaining to the operand as a whole --
453 indicating how it's being used. */
454 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
457 /* Memory is read, written or modified. So we just want to
458 know the regs read by the amode. */
459 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
462 /* reg is read, written or modified. Add it in the
464 addHRegUse(u, mode, op->Arm.Reg.reg);
467 vpanic("addRegUsage_AMD64RM");
471 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
475 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
478 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
481 vpanic("mapRegs_AMD64RM");
486 /* --------- Instructions. --------- */
488 static HChar* showAMD64ScalarSz ( Int sz ) {
493 default: vpanic("showAMD64ScalarSz");
497 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
499 case Aun_NOT: return "not";
500 case Aun_NEG: return "neg";
501 default: vpanic("showAMD64UnaryOp");
505 HChar* showAMD64AluOp ( AMD64AluOp op ) {
507 case Aalu_MOV: return "mov";
508 case Aalu_CMP: return "cmp";
509 case Aalu_ADD: return "add";
510 case Aalu_SUB: return "sub";
511 case Aalu_ADC: return "adc";
512 case Aalu_SBB: return "sbb";
513 case Aalu_AND: return "and";
514 case Aalu_OR: return "or";
515 case Aalu_XOR: return "xor";
516 case Aalu_MUL: return "imul";
517 default: vpanic("showAMD64AluOp");
521 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
523 case Ash_SHL: return "shl";
524 case Ash_SHR: return "shr";
525 case Ash_SAR: return "sar";
526 default: vpanic("showAMD64ShiftOp");
530 HChar* showA87FpOp ( A87FpOp op ) {
532 //.. case Xfp_ADD: return "add";
533 //.. case Xfp_SUB: return "sub";
534 //.. case Xfp_MUL: return "mul";
535 //.. case Xfp_DIV: return "div";
536 case Afp_SCALE: return "scale";
537 case Afp_ATAN: return "atan";
538 case Afp_YL2X: return "yl2x";
539 case Afp_YL2XP1: return "yl2xp1";
540 case Afp_PREM: return "prem";
541 case Afp_PREM1: return "prem1";
542 case Afp_SQRT: return "sqrt";
543 //.. case Xfp_ABS: return "abs";
544 //.. case Xfp_NEG: return "chs";
545 //.. case Xfp_MOV: return "mov";
546 case Afp_SIN: return "sin";
547 case Afp_COS: return "cos";
548 case Afp_TAN: return "tan";
549 case Afp_ROUND: return "round";
550 case Afp_2XM1: return "2xm1";
551 default: vpanic("showA87FpOp");
555 HChar* showAMD64SseOp ( AMD64SseOp op ) {
557 case Asse_MOV: return "movups";
558 case Asse_ADDF: return "add";
559 case Asse_SUBF: return "sub";
560 case Asse_MULF: return "mul";
561 case Asse_DIVF: return "div";
562 case Asse_MAXF: return "max";
563 case Asse_MINF: return "min";
564 case Asse_CMPEQF: return "cmpFeq";
565 case Asse_CMPLTF: return "cmpFlt";
566 case Asse_CMPLEF: return "cmpFle";
567 case Asse_CMPUNF: return "cmpFun";
568 case Asse_RCPF: return "rcp";
569 case Asse_RSQRTF: return "rsqrt";
570 case Asse_SQRTF: return "sqrt";
571 case Asse_AND: return "and";
572 case Asse_OR: return "or";
573 case Asse_XOR: return "xor";
574 case Asse_ANDN: return "andn";
575 case Asse_ADD8: return "paddb";
576 case Asse_ADD16: return "paddw";
577 case Asse_ADD32: return "paddd";
578 case Asse_ADD64: return "paddq";
579 case Asse_QADD8U: return "paddusb";
580 case Asse_QADD16U: return "paddusw";
581 case Asse_QADD8S: return "paddsb";
582 case Asse_QADD16S: return "paddsw";
583 case Asse_SUB8: return "psubb";
584 case Asse_SUB16: return "psubw";
585 case Asse_SUB32: return "psubd";
586 case Asse_SUB64: return "psubq";
587 case Asse_QSUB8U: return "psubusb";
588 case Asse_QSUB16U: return "psubusw";
589 case Asse_QSUB8S: return "psubsb";
590 case Asse_QSUB16S: return "psubsw";
591 case Asse_MUL16: return "pmullw";
592 case Asse_MULHI16U: return "pmulhuw";
593 case Asse_MULHI16S: return "pmulhw";
594 case Asse_AVG8U: return "pavgb";
595 case Asse_AVG16U: return "pavgw";
596 case Asse_MAX16S: return "pmaxw";
597 case Asse_MAX8U: return "pmaxub";
598 case Asse_MIN16S: return "pminw";
599 case Asse_MIN8U: return "pminub";
600 case Asse_CMPEQ8: return "pcmpeqb";
601 case Asse_CMPEQ16: return "pcmpeqw";
602 case Asse_CMPEQ32: return "pcmpeqd";
603 case Asse_CMPGT8S: return "pcmpgtb";
604 case Asse_CMPGT16S: return "pcmpgtw";
605 case Asse_CMPGT32S: return "pcmpgtd";
606 case Asse_SHL16: return "psllw";
607 case Asse_SHL32: return "pslld";
608 case Asse_SHL64: return "psllq";
609 case Asse_SHR16: return "psrlw";
610 case Asse_SHR32: return "psrld";
611 case Asse_SHR64: return "psrlq";
612 case Asse_SAR16: return "psraw";
613 case Asse_SAR32: return "psrad";
614 case Asse_PACKSSD: return "packssdw";
615 case Asse_PACKSSW: return "packsswb";
616 case Asse_PACKUSW: return "packuswb";
617 case Asse_UNPCKHB: return "punpckhb";
618 case Asse_UNPCKHW: return "punpckhw";
619 case Asse_UNPCKHD: return "punpckhd";
620 case Asse_UNPCKHQ: return "punpckhq";
621 case Asse_UNPCKLB: return "punpcklb";
622 case Asse_UNPCKLW: return "punpcklw";
623 case Asse_UNPCKLD: return "punpckld";
624 case Asse_UNPCKLQ: return "punpcklq";
625 default: vpanic("showAMD64SseOp");
629 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
630 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
632 i->Ain.Imm64.imm64 = imm64;
633 i->Ain.Imm64.dst = dst;
636 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
637 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
639 i->Ain.Alu64R.op = op;
640 i->Ain.Alu64R.src = src;
641 i->Ain.Alu64R.dst = dst;
644 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
645 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
647 i->Ain.Alu64M.op = op;
648 i->Ain.Alu64M.src = src;
649 i->Ain.Alu64M.dst = dst;
650 vassert(op != Aalu_MUL);
653 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
654 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
657 i->Ain.Sh64.src = src;
658 i->Ain.Sh64.dst = dst;
661 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
664 i->Ain.Test64.imm32 = imm32;
665 i->Ain.Test64.dst = dst;
668 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
669 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
670 i->tag = Ain_Unary64;
671 i->Ain.Unary64.op = op;
672 i->Ain.Unary64.dst = dst;
675 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
676 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
678 i->Ain.Lea64.am = am;
679 i->Ain.Lea64.dst = dst;
682 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
683 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
685 i->Ain.MulL.syned = syned;
686 i->Ain.MulL.src = src;
689 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
690 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
692 i->Ain.Div.syned = syned;
694 i->Ain.Div.src = src;
695 vassert(sz == 4 || sz == 8);
698 //.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
699 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
700 //.. i->tag = Xin_Sh3232;
701 //.. i->Xin.Sh3232.op = op;
702 //.. i->Xin.Sh3232.amt = amt;
703 //.. i->Xin.Sh3232.src = src;
704 //.. i->Xin.Sh3232.dst = dst;
705 //.. vassert(op == Xsh_SHL || op == Xsh_SHR);
708 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
709 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
711 i->Ain.Push.src = src;
714 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) {
715 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
717 i->Ain.Call.cond = cond;
718 i->Ain.Call.target = target;
719 i->Ain.Call.regparms = regparms;
720 vassert(regparms >= 0 && regparms <= 6);
723 AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
724 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
726 i->Ain.Goto.cond = cond;
727 i->Ain.Goto.dst = dst;
731 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
732 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
734 i->Ain.CMov64.cond = cond;
735 i->Ain.CMov64.src = src;
736 i->Ain.CMov64.dst = dst;
737 vassert(cond != Acc_ALWAYS);
740 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
741 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
743 i->Ain.MovxLQ.syned = syned;
744 i->Ain.MovxLQ.src = src;
745 i->Ain.MovxLQ.dst = dst;
748 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
749 AMD64AMode* src, HReg dst ) {
750 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
752 i->Ain.LoadEX.szSmall = szSmall;
753 i->Ain.LoadEX.syned = syned;
754 i->Ain.LoadEX.src = src;
755 i->Ain.LoadEX.dst = dst;
756 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
759 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
760 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
762 i->Ain.Store.sz = sz;
763 i->Ain.Store.src = src;
764 i->Ain.Store.dst = dst;
765 vassert(sz == 1 || sz == 2 || sz == 4);
768 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
769 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
771 i->Ain.Set64.cond = cond;
772 i->Ain.Set64.dst = dst;
775 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
776 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
778 i->Ain.Bsfr64.isFwds = isFwds;
779 i->Ain.Bsfr64.src = src;
780 i->Ain.Bsfr64.dst = dst;
783 AMD64Instr* AMD64Instr_MFence ( void ) {
784 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
788 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
789 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
791 i->Ain.ACAS.addr = addr;
793 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
796 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
797 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
799 i->Ain.DACAS.addr = addr;
800 i->Ain.DACAS.sz = sz;
801 vassert(sz == 8 || sz == 4);
805 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
807 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
808 i->tag = Ain_A87Free;
809 i->Ain.A87Free.nregs = nregs;
810 vassert(nregs >= 1 && nregs <= 7);
813 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
815 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
816 i->tag = Ain_A87PushPop;
817 i->Ain.A87PushPop.addr = addr;
818 i->Ain.A87PushPop.isPush = isPush;
819 i->Ain.A87PushPop.szB = szB;
820 vassert(szB == 8 || szB == 4);
823 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
825 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
826 i->tag = Ain_A87FpOp;
827 i->Ain.A87FpOp.op = op;
830 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
832 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
833 i->tag = Ain_A87LdCW;
834 i->Ain.A87LdCW.addr = addr;
837 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
839 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
840 i->tag = Ain_A87StSW;
841 i->Ain.A87StSW.addr = addr;
845 //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
846 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
847 //.. i->tag = Xin_FpUnary;
848 //.. i->Xin.FpUnary.op = op;
849 //.. i->Xin.FpUnary.src = src;
850 //.. i->Xin.FpUnary.dst = dst;
853 //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
854 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
855 //.. i->tag = Xin_FpBinary;
856 //.. i->Xin.FpBinary.op = op;
857 //.. i->Xin.FpBinary.srcL = srcL;
858 //.. i->Xin.FpBinary.srcR = srcR;
859 //.. i->Xin.FpBinary.dst = dst;
862 //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
863 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
864 //.. i->tag = Xin_FpLdSt;
865 //.. i->Xin.FpLdSt.isLoad = isLoad;
866 //.. i->Xin.FpLdSt.sz = sz;
867 //.. i->Xin.FpLdSt.reg = reg;
868 //.. i->Xin.FpLdSt.addr = addr;
869 //.. vassert(sz == 4 || sz == 8);
872 //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
873 //.. HReg reg, AMD64AMode* addr ) {
874 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
875 //.. i->tag = Xin_FpLdStI;
876 //.. i->Xin.FpLdStI.isLoad = isLoad;
877 //.. i->Xin.FpLdStI.sz = sz;
878 //.. i->Xin.FpLdStI.reg = reg;
879 //.. i->Xin.FpLdStI.addr = addr;
880 //.. vassert(sz == 2 || sz == 4 || sz == 8);
883 //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
884 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
885 //.. i->tag = Xin_Fp64to32;
886 //.. i->Xin.Fp64to32.src = src;
887 //.. i->Xin.Fp64to32.dst = dst;
890 //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
891 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
892 //.. i->tag = Xin_FpCMov;
893 //.. i->Xin.FpCMov.cond = cond;
894 //.. i->Xin.FpCMov.src = src;
895 //.. i->Xin.FpCMov.dst = dst;
896 //.. vassert(cond != Xcc_ALWAYS);
899 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
900 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
901 i->tag = Ain_LdMXCSR;
902 i->Ain.LdMXCSR.addr = addr;
905 //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
906 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
907 //.. i->tag = Xin_FpStSW_AX;
910 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
911 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
912 i->tag = Ain_SseUComIS;
913 i->Ain.SseUComIS.sz = toUChar(sz);
914 i->Ain.SseUComIS.srcL = srcL;
915 i->Ain.SseUComIS.srcR = srcR;
916 i->Ain.SseUComIS.dst = dst;
917 vassert(sz == 4 || sz == 8);
920 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
921 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
922 i->tag = Ain_SseSI2SF;
923 i->Ain.SseSI2SF.szS = toUChar(szS);
924 i->Ain.SseSI2SF.szD = toUChar(szD);
925 i->Ain.SseSI2SF.src = src;
926 i->Ain.SseSI2SF.dst = dst;
927 vassert(szS == 4 || szS == 8);
928 vassert(szD == 4 || szD == 8);
931 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
932 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
933 i->tag = Ain_SseSF2SI;
934 i->Ain.SseSF2SI.szS = toUChar(szS);
935 i->Ain.SseSF2SI.szD = toUChar(szD);
936 i->Ain.SseSF2SI.src = src;
937 i->Ain.SseSF2SI.dst = dst;
938 vassert(szS == 4 || szS == 8);
939 vassert(szD == 4 || szD == 8);
942 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
944 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
945 i->tag = Ain_SseSDSS;
946 i->Ain.SseSDSS.from64 = from64;
947 i->Ain.SseSDSS.src = src;
948 i->Ain.SseSDSS.dst = dst;
952 //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
953 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
954 //.. i->tag = Xin_SseConst;
955 //.. i->Xin.SseConst.con = con;
956 //.. i->Xin.SseConst.dst = dst;
957 //.. vassert(hregClass(dst) == HRcVec128);
960 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
961 HReg reg, AMD64AMode* addr ) {
962 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
963 i->tag = Ain_SseLdSt;
964 i->Ain.SseLdSt.isLoad = isLoad;
965 i->Ain.SseLdSt.sz = toUChar(sz);
966 i->Ain.SseLdSt.reg = reg;
967 i->Ain.SseLdSt.addr = addr;
968 vassert(sz == 4 || sz == 8 || sz == 16);
971 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
973 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
974 i->tag = Ain_SseLdzLO;
975 i->Ain.SseLdzLO.sz = sz;
976 i->Ain.SseLdzLO.reg = reg;
977 i->Ain.SseLdzLO.addr = addr;
978 vassert(sz == 4 || sz == 8);
981 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
982 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
983 i->tag = Ain_Sse32Fx4;
984 i->Ain.Sse32Fx4.op = op;
985 i->Ain.Sse32Fx4.src = src;
986 i->Ain.Sse32Fx4.dst = dst;
987 vassert(op != Asse_MOV);
990 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
991 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
992 i->tag = Ain_Sse32FLo;
993 i->Ain.Sse32FLo.op = op;
994 i->Ain.Sse32FLo.src = src;
995 i->Ain.Sse32FLo.dst = dst;
996 vassert(op != Asse_MOV);
999 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
1000 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1001 i->tag = Ain_Sse64Fx2;
1002 i->Ain.Sse64Fx2.op = op;
1003 i->Ain.Sse64Fx2.src = src;
1004 i->Ain.Sse64Fx2.dst = dst;
1005 vassert(op != Asse_MOV);
1008 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
1009 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1010 i->tag = Ain_Sse64FLo;
1011 i->Ain.Sse64FLo.op = op;
1012 i->Ain.Sse64FLo.src = src;
1013 i->Ain.Sse64FLo.dst = dst;
1014 vassert(op != Asse_MOV);
1017 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1018 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1019 i->tag = Ain_SseReRg;
1020 i->Ain.SseReRg.op = op;
1021 i->Ain.SseReRg.src = re;
1022 i->Ain.SseReRg.dst = rg;
1025 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
1026 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1027 i->tag = Ain_SseCMov;
1028 i->Ain.SseCMov.cond = cond;
1029 i->Ain.SseCMov.src = src;
1030 i->Ain.SseCMov.dst = dst;
1031 vassert(cond != Acc_ALWAYS);
1034 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1035 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1036 i->tag = Ain_SseShuf;
1037 i->Ain.SseShuf.order = order;
1038 i->Ain.SseShuf.src = src;
1039 i->Ain.SseShuf.dst = dst;
1040 vassert(order >= 0 && order <= 0xFF);
1044 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
1046 vassert(mode64 == True);
1049 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1050 ppHRegAMD64(i->Ain.Imm64.dst);
1053 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1054 ppAMD64RMI(i->Ain.Alu64R.src);
1056 ppHRegAMD64(i->Ain.Alu64R.dst);
1059 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1060 ppAMD64RI(i->Ain.Alu64M.src);
1062 ppAMD64AMode(i->Ain.Alu64M.dst);
1065 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1066 if (i->Ain.Sh64.src == 0)
1067 vex_printf("%%cl,");
1069 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1070 ppHRegAMD64(i->Ain.Sh64.dst);
1073 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1074 ppHRegAMD64(i->Ain.Test64.dst);
1077 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1078 ppHRegAMD64(i->Ain.Unary64.dst);
1081 vex_printf("leaq ");
1082 ppAMD64AMode(i->Ain.Lea64.am);
1084 ppHRegAMD64(i->Ain.Lea64.dst);
1087 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1088 ppAMD64RM(i->Ain.MulL.src);
1091 vex_printf("%cdiv%s ",
1092 i->Ain.Div.syned ? 's' : 'u',
1093 showAMD64ScalarSz(i->Ain.Div.sz));
1094 ppAMD64RM(i->Ain.Div.src);
1096 //.. case Xin_Sh3232:
1097 //.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
1098 //.. if (i->Xin.Sh3232.amt == 0)
1099 //.. vex_printf(" %%cl,");
1101 //.. vex_printf(" $%d,", i->Xin.Sh3232.amt);
1102 //.. ppHRegAMD64(i->Xin.Sh3232.src);
1103 //.. vex_printf(",");
1104 //.. ppHRegAMD64(i->Xin.Sh3232.dst);
1107 vex_printf("pushq ");
1108 ppAMD64RMI(i->Ain.Push.src);
1111 vex_printf("call%s[%d] ",
1112 i->Ain.Call.cond==Acc_ALWAYS
1113 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1114 i->Ain.Call.regparms );
1115 vex_printf("0x%llx", i->Ain.Call.target);
1118 if (i->Ain.Goto.cond != Acc_ALWAYS) {
1119 vex_printf("if (%%rflags.%s) { ",
1120 showAMD64CondCode(i->Ain.Goto.cond));
1122 if (i->Ain.Goto.jk != Ijk_Boring
1123 && i->Ain.Goto.jk != Ijk_Call
1124 && i->Ain.Goto.jk != Ijk_Ret) {
1125 vex_printf("movl $");
1126 ppIRJumpKind(i->Ain.Goto.jk);
1127 vex_printf(",%%ebp ; ");
1129 vex_printf("movq ");
1130 ppAMD64RI(i->Ain.Goto.dst);
1131 vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
1132 if (i->Ain.Goto.cond != Acc_ALWAYS) {
1137 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1138 ppAMD64RM(i->Ain.CMov64.src);
1140 ppHRegAMD64(i->Ain.CMov64.dst);
1143 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1144 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1146 ppHRegAMD64(i->Ain.MovxLQ.dst);
1149 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1150 vex_printf("movl ");
1151 ppAMD64AMode(i->Ain.LoadEX.src);
1153 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1155 vex_printf("mov%c%cq ",
1156 i->Ain.LoadEX.syned ? 's' : 'z',
1157 i->Ain.LoadEX.szSmall==1
1159 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1160 ppAMD64AMode(i->Ain.LoadEX.src);
1162 ppHRegAMD64(i->Ain.LoadEX.dst);
1166 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1167 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1168 ppHRegAMD64(i->Ain.Store.src);
1170 ppAMD64AMode(i->Ain.Store.dst);
1173 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1174 ppHRegAMD64(i->Ain.Set64.dst);
1177 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1178 ppHRegAMD64(i->Ain.Bsfr64.src);
1180 ppHRegAMD64(i->Ain.Bsfr64.dst);
1183 vex_printf("mfence" );
1186 vex_printf("lock cmpxchg%c ",
1187 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1188 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1189 vex_printf("{%%rax->%%rbx},");
1190 ppAMD64AMode(i->Ain.ACAS.addr);
1193 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1194 (Int)(2 * i->Ain.DACAS.sz));
1195 ppAMD64AMode(i->Ain.DACAS.addr);
1198 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1200 case Ain_A87PushPop:
1201 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1202 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1203 ppAMD64AMode(i->Ain.A87PushPop.addr);
1206 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1209 vex_printf("fldcw ");
1210 ppAMD64AMode(i->Ain.A87LdCW.addr);
1213 vex_printf("fstsw ");
1214 ppAMD64AMode(i->Ain.A87StSW.addr);
1216 //.. case Xin_FpUnary:
1217 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
1218 //.. ppHRegAMD64(i->Xin.FpUnary.src);
1219 //.. vex_printf(",");
1220 //.. ppHRegAMD64(i->Xin.FpUnary.dst);
1222 //.. case Xin_FpBinary:
1223 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
1224 //.. ppHRegAMD64(i->Xin.FpBinary.srcL);
1225 //.. vex_printf(",");
1226 //.. ppHRegAMD64(i->Xin.FpBinary.srcR);
1227 //.. vex_printf(",");
1228 //.. ppHRegAMD64(i->Xin.FpBinary.dst);
1230 //.. case Xin_FpLdSt:
1231 //.. if (i->Xin.FpLdSt.isLoad) {
1232 //.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
1233 //.. ppAMD64AMode(i->Xin.FpLdSt.addr);
1234 //.. vex_printf(", ");
1235 //.. ppHRegAMD64(i->Xin.FpLdSt.reg);
1237 //.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
1238 //.. ppHRegAMD64(i->Xin.FpLdSt.reg);
1239 //.. vex_printf(", ");
1240 //.. ppAMD64AMode(i->Xin.FpLdSt.addr);
1243 //.. case Xin_FpLdStI:
1244 //.. if (i->Xin.FpLdStI.isLoad) {
1245 //.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1246 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
1247 //.. ppAMD64AMode(i->Xin.FpLdStI.addr);
1248 //.. vex_printf(", ");
1249 //.. ppHRegAMD64(i->Xin.FpLdStI.reg);
1251 //.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1252 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
1253 //.. ppHRegAMD64(i->Xin.FpLdStI.reg);
1254 //.. vex_printf(", ");
1255 //.. ppAMD64AMode(i->Xin.FpLdStI.addr);
1258 //.. case Xin_Fp64to32:
1259 //.. vex_printf("gdtof ");
1260 //.. ppHRegAMD64(i->Xin.Fp64to32.src);
1261 //.. vex_printf(",");
1262 //.. ppHRegAMD64(i->Xin.Fp64to32.dst);
1264 //.. case Xin_FpCMov:
1265 //.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
1266 //.. ppHRegAMD64(i->Xin.FpCMov.src);
1267 //.. vex_printf(",");
1268 //.. ppHRegAMD64(i->Xin.FpCMov.dst);
1270 //.. case Xin_FpLdStCW:
1271 //.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
1272 //.. ppAMD64AMode(i->Xin.FpLdStCW.addr);
1274 //.. case Xin_FpStSW_AX:
1275 //.. vex_printf("fstsw %%ax");
1278 vex_printf("ldmxcsr ");
1279 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1282 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1283 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1285 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1286 vex_printf(" ; pushfq ; popq ");
1287 ppHRegAMD64(i->Ain.SseUComIS.dst);
1290 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1291 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1292 (i->Ain.SseSI2SF.src);
1294 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1297 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1298 ppHRegAMD64(i->Ain.SseSF2SI.src);
1300 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1301 (i->Ain.SseSF2SI.dst);
1304 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1305 ppHRegAMD64(i->Ain.SseSDSS.src);
1307 ppHRegAMD64(i->Ain.SseSDSS.dst);
1309 //.. case Xin_SseConst:
1310 //.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1311 //.. ppHRegAMD64(i->Xin.SseConst.dst);
1314 switch (i->Ain.SseLdSt.sz) {
1315 case 4: vex_printf("movss "); break;
1316 case 8: vex_printf("movsd "); break;
1317 case 16: vex_printf("movups "); break;
1318 default: vassert(0);
1320 if (i->Ain.SseLdSt.isLoad) {
1321 ppAMD64AMode(i->Ain.SseLdSt.addr);
1323 ppHRegAMD64(i->Ain.SseLdSt.reg);
1325 ppHRegAMD64(i->Ain.SseLdSt.reg);
1327 ppAMD64AMode(i->Ain.SseLdSt.addr);
1331 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1332 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1334 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1337 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1338 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1340 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1343 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1344 ppHRegAMD64(i->Ain.Sse32FLo.src);
1346 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1349 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1350 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1352 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1355 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1356 ppHRegAMD64(i->Ain.Sse64FLo.src);
1358 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1361 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1362 ppHRegAMD64(i->Ain.SseReRg.src);
1364 ppHRegAMD64(i->Ain.SseReRg.dst);
1367 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1368 ppHRegAMD64(i->Ain.SseCMov.src);
1370 ppHRegAMD64(i->Ain.SseCMov.dst);
1373 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1374 ppHRegAMD64(i->Ain.SseShuf.src);
1376 ppHRegAMD64(i->Ain.SseShuf.dst);
1380 vpanic("ppAMD64Instr");
1384 /* --------- Helpers for register allocation. --------- */
1386 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
1389 vassert(mode64 == True);
1393 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1396 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1397 if (i->Ain.Alu64R.op == Aalu_MOV) {
1398 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1401 if (i->Ain.Alu64R.op == Aalu_CMP) {
1402 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1405 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1408 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1409 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1412 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1413 if (i->Ain.Sh64.src == 0)
1414 addHRegUse(u, HRmRead, hregAMD64_RCX());
1417 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1420 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1423 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1424 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1427 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1428 addHRegUse(u, HRmModify, hregAMD64_RAX());
1429 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1432 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1433 addHRegUse(u, HRmModify, hregAMD64_RAX());
1434 addHRegUse(u, HRmModify, hregAMD64_RDX());
1436 //.. case Xin_Sh3232:
1437 //.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1438 //.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1439 //.. if (i->Xin.Sh3232.amt == 0)
1440 //.. addHRegUse(u, HRmRead, hregAMD64_ECX());
1443 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1444 addHRegUse(u, HRmModify, hregAMD64_RSP());
1447 /* This is a bit subtle. */
1448 /* First off, claim it trashes all the caller-saved regs
1449 which fall within the register allocator's jurisdiction.
1450 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1451 and all the xmm registers.
1453 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1454 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1455 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1456 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1457 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1458 addHRegUse(u, HRmWrite, hregAMD64_R8());
1459 addHRegUse(u, HRmWrite, hregAMD64_R9());
1460 addHRegUse(u, HRmWrite, hregAMD64_R10());
1461 addHRegUse(u, HRmWrite, hregAMD64_R11());
1462 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1463 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1464 addHRegUse(u, HRmWrite, hregAMD64_XMM2());
1465 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1466 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1467 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1468 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1469 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1470 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1471 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1472 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1473 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1474 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1475 addHRegUse(u, HRmWrite, hregAMD64_XMM13());
1476 addHRegUse(u, HRmWrite, hregAMD64_XMM14());
1477 addHRegUse(u, HRmWrite, hregAMD64_XMM15());
1479 /* Now we have to state any parameter-carrying registers
1480 which might be read. This depends on the regparmness. */
1481 switch (i->Ain.Call.regparms) {
1482 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1483 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1484 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1485 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1486 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1487 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1489 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1491 /* Finally, there is the issue that the insn trashes a
1492 register because the literal target address has to be
1493 loaded into a register. Fortunately, r11 is stated in the
1494 ABI as a scratch register, and so seems a suitable victim. */
1495 addHRegUse(u, HRmWrite, hregAMD64_R11());
1496 /* Upshot of this is that the assembler really must use r11,
1497 and no other, as a destination temporary. */
1500 addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
1501 addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
1502 addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
1503 if (i->Ain.Goto.jk != Ijk_Boring
1504 && i->Ain.Goto.jk != Ijk_Call
1505 && i->Ain.Goto.jk != Ijk_Ret)
1506 /* note, this is irrelevant since rbp is not actually
1507 available to the allocator. But still .. */
1508 addHRegUse(u, HRmWrite, hregAMD64_RBP());
1511 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
1512 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1515 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1516 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1519 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1520 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1523 addHRegUse(u, HRmRead, i->Ain.Store.src);
1524 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1527 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1530 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1531 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1536 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1537 addHRegUse(u, HRmRead, hregAMD64_RBX());
1538 addHRegUse(u, HRmModify, hregAMD64_RAX());
1541 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1542 addHRegUse(u, HRmRead, hregAMD64_RCX());
1543 addHRegUse(u, HRmRead, hregAMD64_RBX());
1544 addHRegUse(u, HRmModify, hregAMD64_RDX());
1545 addHRegUse(u, HRmModify, hregAMD64_RAX());
1549 case Ain_A87PushPop:
1550 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1555 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1558 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1560 //.. case Xin_FpUnary:
1561 //.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1562 //.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1564 //.. case Xin_FpBinary:
1565 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1566 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1567 //.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1569 //.. case Xin_FpLdSt:
1570 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
1571 //.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1572 //.. i->Xin.FpLdSt.reg);
1574 //.. case Xin_FpLdStI:
1575 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
1576 //.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1577 //.. i->Xin.FpLdStI.reg);
1579 //.. case Xin_Fp64to32:
1580 //.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1581 //.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1583 //.. case Xin_FpCMov:
1584 //.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1585 //.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1588 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1590 //.. case Xin_FpStSW_AX:
1591 //.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
1594 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1595 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1596 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1599 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1600 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1603 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1604 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1607 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1608 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1611 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1612 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1613 i->Ain.SseLdSt.reg);
1616 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1617 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1619 //.. case Xin_SseConst:
1620 //.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1623 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1624 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1625 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1626 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1627 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1628 addHRegUse(u, unary ? HRmWrite : HRmModify,
1629 i->Ain.Sse32Fx4.dst);
1632 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1633 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1634 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1635 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1636 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1637 addHRegUse(u, unary ? HRmWrite : HRmModify,
1638 i->Ain.Sse32FLo.dst);
1641 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1642 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1643 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1644 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1645 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1646 addHRegUse(u, unary ? HRmWrite : HRmModify,
1647 i->Ain.Sse64Fx2.dst);
1650 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1651 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1652 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1653 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1654 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1655 addHRegUse(u, unary ? HRmWrite : HRmModify,
1656 i->Ain.Sse64FLo.dst);
1659 if ( (i->Ain.SseReRg.op == Asse_XOR
1660 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1661 && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) {
1662 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1663 r,r' as a write of a value to r, and independent of any
1664 previous value in r */
1665 /* (as opposed to a rite of passage :-) */
1666 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1668 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1669 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1670 ? HRmWrite : HRmModify,
1671 i->Ain.SseReRg.dst);
1675 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1676 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1679 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1680 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1683 ppAMD64Instr(i, mode64);
1684 vpanic("getRegUsage_AMD64Instr");
1689 static inline void mapReg(HRegRemap* m, HReg* r)
1691 *r = lookupHRegRemap(m, *r);
1694 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1696 vassert(mode64 == True);
1699 mapReg(m, &i->Ain.Imm64.dst);
1702 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1703 mapReg(m, &i->Ain.Alu64R.dst);
1706 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1707 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1710 mapReg(m, &i->Ain.Sh64.dst);
1713 mapReg(m, &i->Ain.Test64.dst);
1716 mapReg(m, &i->Ain.Unary64.dst);
1719 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1720 mapReg(m, &i->Ain.Lea64.dst);
1723 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1726 mapRegs_AMD64RM(m, i->Ain.Div.src);
1728 //.. case Xin_Sh3232:
1729 //.. mapReg(m, &i->Xin.Sh3232.src);
1730 //.. mapReg(m, &i->Xin.Sh3232.dst);
1733 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1738 mapRegs_AMD64RI(m, i->Ain.Goto.dst);
1741 mapRegs_AMD64RM(m, i->Ain.CMov64.src);
1742 mapReg(m, &i->Ain.CMov64.dst);
1745 mapReg(m, &i->Ain.MovxLQ.src);
1746 mapReg(m, &i->Ain.MovxLQ.dst);
1749 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1750 mapReg(m, &i->Ain.LoadEX.dst);
1753 mapReg(m, &i->Ain.Store.src);
1754 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1757 mapReg(m, &i->Ain.Set64.dst);
1760 mapReg(m, &i->Ain.Bsfr64.src);
1761 mapReg(m, &i->Ain.Bsfr64.dst);
1766 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1769 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1773 case Ain_A87PushPop:
1774 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1779 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1782 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1784 //.. case Xin_FpUnary:
1785 //.. mapReg(m, &i->Xin.FpUnary.src);
1786 //.. mapReg(m, &i->Xin.FpUnary.dst);
1788 //.. case Xin_FpBinary:
1789 //.. mapReg(m, &i->Xin.FpBinary.srcL);
1790 //.. mapReg(m, &i->Xin.FpBinary.srcR);
1791 //.. mapReg(m, &i->Xin.FpBinary.dst);
1793 //.. case Xin_FpLdSt:
1794 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
1795 //.. mapReg(m, &i->Xin.FpLdSt.reg);
1797 //.. case Xin_FpLdStI:
1798 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
1799 //.. mapReg(m, &i->Xin.FpLdStI.reg);
1801 //.. case Xin_Fp64to32:
1802 //.. mapReg(m, &i->Xin.Fp64to32.src);
1803 //.. mapReg(m, &i->Xin.Fp64to32.dst);
1805 //.. case Xin_FpCMov:
1806 //.. mapReg(m, &i->Xin.FpCMov.src);
1807 //.. mapReg(m, &i->Xin.FpCMov.dst);
1810 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1812 //.. case Xin_FpStSW_AX:
1815 mapReg(m, &i->Ain.SseUComIS.srcL);
1816 mapReg(m, &i->Ain.SseUComIS.srcR);
1817 mapReg(m, &i->Ain.SseUComIS.dst);
1820 mapReg(m, &i->Ain.SseSI2SF.src);
1821 mapReg(m, &i->Ain.SseSI2SF.dst);
1824 mapReg(m, &i->Ain.SseSF2SI.src);
1825 mapReg(m, &i->Ain.SseSF2SI.dst);
1828 mapReg(m, &i->Ain.SseSDSS.src);
1829 mapReg(m, &i->Ain.SseSDSS.dst);
1831 //.. case Xin_SseConst:
1832 //.. mapReg(m, &i->Xin.SseConst.dst);
1835 mapReg(m, &i->Ain.SseLdSt.reg);
1836 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1839 mapReg(m, &i->Ain.SseLdzLO.reg);
1840 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1843 mapReg(m, &i->Ain.Sse32Fx4.src);
1844 mapReg(m, &i->Ain.Sse32Fx4.dst);
1847 mapReg(m, &i->Ain.Sse32FLo.src);
1848 mapReg(m, &i->Ain.Sse32FLo.dst);
1851 mapReg(m, &i->Ain.Sse64Fx2.src);
1852 mapReg(m, &i->Ain.Sse64Fx2.dst);
1855 mapReg(m, &i->Ain.Sse64FLo.src);
1856 mapReg(m, &i->Ain.Sse64FLo.dst);
1859 mapReg(m, &i->Ain.SseReRg.src);
1860 mapReg(m, &i->Ain.SseReRg.dst);
1863 mapReg(m, &i->Ain.SseCMov.src);
1864 mapReg(m, &i->Ain.SseCMov.dst);
1867 mapReg(m, &i->Ain.SseShuf.src);
1868 mapReg(m, &i->Ain.SseShuf.dst);
1871 ppAMD64Instr(i, mode64);
1872 vpanic("mapRegs_AMD64Instr");
1876 /* Figure out if i represents a reg-reg move, and if so assign the
1877 source and destination to *src and *dst. If in doubt say No. Used
1878 by the register allocator to do move coalescing.
1880 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
1882 /* Moves between integer regs */
1883 if (i->tag == Ain_Alu64R) {
1884 if (i->Ain.Alu64R.op != Aalu_MOV)
1886 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1888 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1889 *dst = i->Ain.Alu64R.dst;
1892 /* Moves between vector regs */
1893 if (i->tag == Ain_SseReRg) {
1894 if (i->Ain.SseReRg.op != Asse_MOV)
1896 *src = i->Ain.SseReRg.src;
1897 *dst = i->Ain.SseReRg.dst;
1904 /* Generate amd64 spill/reload instructions under the direction of the
1905 register allocator. Note it's critical these don't write the
1908 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1909 HReg rreg, Int offsetB, Bool mode64 )
1912 vassert(offsetB >= 0);
1913 vassert(!hregIsVirtual(rreg));
1914 vassert(mode64 == True);
1916 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1917 switch (hregClass(rreg)) {
1919 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1922 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1925 ppHRegClass(hregClass(rreg));
1926 vpanic("genSpill_AMD64: unimplemented regclass");
1930 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1931 HReg rreg, Int offsetB, Bool mode64 )
1934 vassert(offsetB >= 0);
1935 vassert(!hregIsVirtual(rreg));
1936 vassert(mode64 == True);
1938 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1939 switch (hregClass(rreg)) {
1941 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1944 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1947 ppHRegClass(hregClass(rreg));
1948 vpanic("genReload_AMD64: unimplemented regclass");
1953 /* --------- The amd64 assembler (bleh.) --------- */
1955 /* Produce the low three bits of an integer register number. */
1956 static UChar iregBits210 ( HReg r )
1959 vassert(hregClass(r) == HRcInt64);
1960 vassert(!hregIsVirtual(r));
1963 return toUChar(n & 7);
1966 /* Produce bit 3 of an integer register number. */
1967 static UChar iregBit3 ( HReg r )
1970 vassert(hregClass(r) == HRcInt64);
1971 vassert(!hregIsVirtual(r));
1974 return toUChar((n >> 3) & 1);
1977 /* Produce a complete 4-bit integer register number. */
1978 static UChar iregBits3210 ( HReg r )
1981 vassert(hregClass(r) == HRcInt64);
1982 vassert(!hregIsVirtual(r));
1988 /* Given an xmm (128bit V-class) register number, produce the
1989 equivalent numbered register in 64-bit I-class. This is a bit of
1990 fakery which facilitates using functions that work on integer
1991 register numbers to be used when assembling SSE instructions
1993 static UInt vreg2ireg ( HReg r )
1996 vassert(hregClass(r) == HRcVec128);
1997 vassert(!hregIsVirtual(r));
2000 return mkHReg(n, HRcInt64, False);
2003 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
2005 return toUChar( ((mod & 3) << 6)
2010 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
2012 return toUChar( ((shift & 3) << 6)
2013 | ((regindex & 7) << 3)
2017 static UChar* emit32 ( UChar* p, UInt w32 )
2019 *p++ = toUChar((w32) & 0x000000FF);
2020 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2021 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2022 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2026 static UChar* emit64 ( UChar* p, ULong w64 )
2028 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2029 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2033 /* Does a sign-extend of the lowest 8 bits give
2034 the original number? */
2035 static Bool fits8bits ( UInt w32 )
2038 return toBool(i32 == ((i32 << 24) >> 24));
2040 /* Can the lower 32 bits be signedly widened to produce the whole
2041 64-bit value? In other words, are the top 33 bits either all 0 or
2043 static Bool fitsIn32Bits ( ULong x )
2049 return toBool(x == y1);
2053 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2055 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2058 greg, d8(ereg) | ereg is neither of: RSP R12
2061 greg, d32(ereg) | ereg is neither of: RSP R12
2064 greg, d8(ereg) | ereg is either: RSP R12
2065 = 01 greg 100, 0x24, d8
2066 (lowest bit of rex distinguishes R12/RSP)
2068 greg, d32(ereg) | ereg is either: RSP R12
2069 = 10 greg 100, 0x24, d32
2070 (lowest bit of rex distinguishes R12/RSP)
2072 -----------------------------------------------
2074 greg, d8(base,index,scale)
2076 = 01 greg 100, scale index base, d8
2078 greg, d32(base,index,scale)
2080 = 10 greg 100, scale index base, d32
2082 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2084 if (am->tag == Aam_IR) {
2085 if (am->Aam.IR.imm == 0
2086 && am->Aam.IR.reg != hregAMD64_RSP()
2087 && am->Aam.IR.reg != hregAMD64_RBP()
2088 && am->Aam.IR.reg != hregAMD64_R12()
2089 && am->Aam.IR.reg != hregAMD64_R13()
2091 *p++ = mkModRegRM(0, iregBits210(greg),
2092 iregBits210(am->Aam.IR.reg));
2095 if (fits8bits(am->Aam.IR.imm)
2096 && am->Aam.IR.reg != hregAMD64_RSP()
2097 && am->Aam.IR.reg != hregAMD64_R12()
2099 *p++ = mkModRegRM(1, iregBits210(greg),
2100 iregBits210(am->Aam.IR.reg));
2101 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2104 if (am->Aam.IR.reg != hregAMD64_RSP()
2105 && am->Aam.IR.reg != hregAMD64_R12()
2107 *p++ = mkModRegRM(2, iregBits210(greg),
2108 iregBits210(am->Aam.IR.reg));
2109 p = emit32(p, am->Aam.IR.imm);
2112 if ((am->Aam.IR.reg == hregAMD64_RSP()
2113 || am->Aam.IR.reg == hregAMD64_R12())
2114 && fits8bits(am->Aam.IR.imm)) {
2115 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2117 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2120 if (/* (am->Aam.IR.reg == hregAMD64_RSP()
2121 || wait for test case for RSP case */
2122 am->Aam.IR.reg == hregAMD64_R12()) {
2123 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2125 p = emit32(p, am->Aam.IR.imm);
2129 vpanic("doAMode_M: can't emit amode IR");
2132 if (am->tag == Aam_IRRS) {
2133 if (fits8bits(am->Aam.IRRS.imm)
2134 && am->Aam.IRRS.index != hregAMD64_RSP()) {
2135 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2136 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2138 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2141 if (am->Aam.IRRS.index != hregAMD64_RSP()) {
2142 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2143 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2145 p = emit32(p, am->Aam.IRRS.imm);
2149 vpanic("doAMode_M: can't emit amode IRRS");
2152 vpanic("doAMode_M: unknown amode");
2157 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2158 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2160 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
2165 /* Clear the W bit on a REX byte, thereby changing the operand size
2166 back to whatever that instruction's default operand size is. */
2167 static inline UChar clearWBit ( UChar rex )
2169 return toUChar(rex & ~(1<<3));
2173 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2174 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2176 if (am->tag == Aam_IR) {
2177 UChar W = 1; /* we want 64-bit mode */
2178 UChar R = iregBit3(greg);
2179 UChar X = 0; /* not relevant */
2180 UChar B = iregBit3(am->Aam.IR.reg);
2181 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2183 if (am->tag == Aam_IRRS) {
2184 UChar W = 1; /* we want 64-bit mode */
2185 UChar R = iregBit3(greg);
2186 UChar X = iregBit3(am->Aam.IRRS.index);
2187 UChar B = iregBit3(am->Aam.IRRS.base);
2188 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2191 return 0; /*NOTREACHED*/
2194 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2195 static UChar rexAMode_R ( HReg greg, HReg ereg )
2197 UChar W = 1; /* we want 64-bit mode */
2198 UChar R = iregBit3(greg);
2199 UChar X = 0; /* not relevant */
2200 UChar B = iregBit3(ereg);
2201 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2205 /* Emit ffree %st(N) */
2206 static UChar* do_ffree_st ( UChar* p, Int n )
2208 vassert(n >= 0 && n <= 7);
2210 *p++ = toUChar(0xC0 + n);
2214 //.. /* Emit fstp %st(i), 1 <= i <= 7 */
2215 //.. static UChar* do_fstp_st ( UChar* p, Int i )
2217 //.. vassert(1 <= i && i <= 7);
2223 //.. /* Emit fld %st(i), 0 <= i <= 6 */
2224 //.. static UChar* do_fld_st ( UChar* p, Int i )
2226 //.. vassert(0 <= i && i <= 6);
2232 //.. /* Emit f<op> %st(0) */
2233 //.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
2236 //.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
2237 //.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
2238 //.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2239 //.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2240 //.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2241 //.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2242 //.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2243 //.. case Xfp_MOV: break;
2244 //.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
2245 //.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */
2246 //.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
2248 //.. default: vpanic("do_fop1_st: unknown op");
2253 //.. /* Emit f<op> %st(i), 1 <= i <= 5 */
2254 //.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
2256 //.. # define fake(_n) mkHReg((_n), HRcInt32, False)
2259 //.. case Xfp_ADD: subopc = 0; break;
2260 //.. case Xfp_SUB: subopc = 4; break;
2261 //.. case Xfp_MUL: subopc = 1; break;
2262 //.. case Xfp_DIV: subopc = 6; break;
2263 //.. default: vpanic("do_fop2_st: unknown op");
2266 //.. p = doAMode_R(p, fake(subopc), fake(i));
2271 //.. /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2272 //.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2274 //.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
2277 //.. vassert(0 == (tags & ~0xF));
2278 //.. if (tags == 0) {
2279 //.. /* pushl $0x00000000 */
2284 //.. /* pushl $0xFFFFFFFF */
2285 //.. if (tags == 0xF) {
2289 //.. vassert(0); /* awaiting test case */
2291 //.. if (tags & 1) w |= 0x000000FF;
2292 //.. if (tags & 2) w |= 0x0000FF00;
2293 //.. if (tags & 4) w |= 0x00FF0000;
2294 //.. if (tags & 8) w |= 0xFF000000;
2296 //.. p = emit32(p, w);
2301 /* Emit an instruction into buf and return the number of bytes used.
2302 Note that buf is not the insn's final place, and therefore it is
2303 imperative to emit position-independent code. */
2305 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
2306 Bool mode64, void* dispatch )
2308 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2315 vassert(nbuf >= 32);
2316 vassert(mode64 == True);
2318 /* Wrap an integer as a int register, for use assembling
2319 GrpN insns, in which the greg field is used as a sub-opcode
2320 and does not really contain a register. */
2321 # define fake(_n) mkHReg((_n), HRcInt64, False)
2323 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2328 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2329 /* Use the short form (load into 32 bit reg, + default
2330 widening rule) for constants under 1 million. We could
2331 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2332 limit it to a smaller range for verifiability purposes. */
2333 if (1 & iregBit3(i->Ain.Imm64.dst))
2335 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
2336 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2338 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
2339 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
2340 p = emit64(p, i->Ain.Imm64.imm64);
2345 /* Deal specially with MOV */
2346 if (i->Ain.Alu64R.op == Aalu_MOV) {
2347 switch (i->Ain.Alu64R.src->tag) {
2349 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2350 /* Actually we could use this form for constants in
2351 the range 0 through 0x7FFFFFFF inclusive, but
2352 limit it to a small range for verifiability
2354 /* Generate "movl $imm32, 32-bit-register" and let
2355 the default zero-extend rule cause the upper half
2356 of the dst to be zeroed out too. This saves 1
2357 and sometimes 2 bytes compared to the more
2358 obvious encoding in the 'else' branch. */
2359 if (1 & iregBit3(i->Ain.Alu64R.dst))
2361 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
2362 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2364 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
2366 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
2367 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2371 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2372 i->Ain.Alu64R.dst );
2374 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2378 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2379 i->Ain.Alu64R.src->Armi.Mem.am);
2381 p = doAMode_M(p, i->Ain.Alu64R.dst,
2382 i->Ain.Alu64R.src->Armi.Mem.am);
2389 if (i->Ain.Alu64R.op == Aalu_MUL) {
2390 switch (i->Ain.Alu64R.src->tag) {
2392 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2393 i->Ain.Alu64R.src->Armi.Reg.reg);
2396 p = doAMode_R(p, i->Ain.Alu64R.dst,
2397 i->Ain.Alu64R.src->Armi.Reg.reg);
2400 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2401 i->Ain.Alu64R.src->Armi.Mem.am);
2404 p = doAMode_M(p, i->Ain.Alu64R.dst,
2405 i->Ain.Alu64R.src->Armi.Mem.am);
2408 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2409 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2411 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2412 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2414 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2416 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2417 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2424 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2425 opc = opc_rr = subopc_imm = opc_imma = 0;
2426 switch (i->Ain.Alu64R.op) {
2427 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2428 subopc_imm = 2; opc_imma = 0x15; break;
2429 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2430 subopc_imm = 0; opc_imma = 0x05; break;
2431 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2432 subopc_imm = 5; opc_imma = 0x2D; break;
2433 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2434 subopc_imm = 3; opc_imma = 0x1D; break;
2435 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2436 subopc_imm = 4; opc_imma = 0x25; break;
2437 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2438 subopc_imm = 6; opc_imma = 0x35; break;
2439 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2440 subopc_imm = 1; opc_imma = 0x0D; break;
2441 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2442 subopc_imm = 7; opc_imma = 0x3D; break;
2445 switch (i->Ain.Alu64R.src->tag) {
2447 if (i->Ain.Alu64R.dst == hregAMD64_RAX()
2448 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2449 goto bad; /* FIXME: awaiting test case */
2450 *p++ = toUChar(opc_imma);
2451 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2453 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2454 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
2456 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2457 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2459 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
2461 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2462 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2466 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2468 *p++ = toUChar(opc_rr);
2469 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2473 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2474 i->Ain.Alu64R.src->Armi.Mem.am);
2475 *p++ = toUChar(opc);
2476 p = doAMode_M(p, i->Ain.Alu64R.dst,
2477 i->Ain.Alu64R.src->Armi.Mem.am);
2485 /* Deal specially with MOV */
2486 if (i->Ain.Alu64M.op == Aalu_MOV) {
2487 switch (i->Ain.Alu64M.src->tag) {
2489 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2492 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2496 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
2498 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
2499 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2505 //.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2506 //.. allowed here. */
2507 //.. opc = subopc_imm = opc_imma = 0;
2508 //.. switch (i->Xin.Alu32M.op) {
2509 //.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2510 //.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2511 //.. default: goto bad;
2513 //.. switch (i->Xin.Alu32M.src->tag) {
2516 //.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2517 //.. i->Xin.Alu32M.dst);
2520 //.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2522 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2523 //.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
2527 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2528 //.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2537 opc_cl = opc_imm = subopc = 0;
2538 switch (i->Ain.Sh64.op) {
2539 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2540 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2541 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2544 if (i->Ain.Sh64.src == 0) {
2545 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2546 *p++ = toUChar(opc_cl);
2547 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2550 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2551 *p++ = toUChar(opc_imm);
2552 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2553 *p++ = (UChar)(i->Ain.Sh64.src);
2559 /* testq sign-extend($imm32), %reg */
2560 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
2562 p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
2563 p = emit32(p, i->Ain.Test64.imm32);
2567 if (i->Ain.Unary64.op == Aun_NOT) {
2568 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2570 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
2573 if (i->Ain.Unary64.op == Aun_NEG) {
2574 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2576 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
2582 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2584 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2588 subopc = i->Ain.MulL.syned ? 5 : 4;
2589 switch (i->Ain.MulL.src->tag) {
2591 *p++ = rexAMode_M( fake(0),
2592 i->Ain.MulL.src->Arm.Mem.am);
2594 p = doAMode_M(p, fake(subopc),
2595 i->Ain.MulL.src->Arm.Mem.am);
2598 *p++ = rexAMode_R(fake(0),
2599 i->Ain.MulL.src->Arm.Reg.reg);
2601 p = doAMode_R(p, fake(subopc),
2602 i->Ain.MulL.src->Arm.Reg.reg);
2610 subopc = i->Ain.Div.syned ? 7 : 6;
2611 if (i->Ain.Div.sz == 4) {
2612 switch (i->Ain.Div.src->tag) {
2617 p = doAMode_M(p, fake(subopc),
2618 i->Ain.Div.src->Arm.Mem.am);
2622 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
2624 p = doAMode_R(p, fake(subopc),
2625 i->Ain.Div.src->Arm.Reg.reg);
2631 if (i->Ain.Div.sz == 8) {
2632 switch (i->Ain.Div.src->tag) {
2634 *p++ = rexAMode_M( fake(0),
2635 i->Ain.Div.src->Arm.Mem.am);
2637 p = doAMode_M(p, fake(subopc),
2638 i->Ain.Div.src->Arm.Mem.am);
2641 *p++ = rexAMode_R( fake(0),
2642 i->Ain.Div.src->Arm.Reg.reg);
2644 p = doAMode_R(p, fake(subopc),
2645 i->Ain.Div.src->Arm.Reg.reg);
2653 //.. case Xin_Sh3232:
2654 //.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2655 //.. if (i->Xin.Sh3232.amt == 0) {
2656 //.. /* shldl/shrdl by %cl */
2658 //.. if (i->Xin.Sh3232.op == Xsh_SHL) {
2663 //.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2669 switch (i->Ain.Push.src->tag) {
2672 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
2674 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
2678 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2681 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
2682 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
2689 /* As per detailed comment for Ain_Call in
2690 getRegUsage_AMD64Instr above, %r11 is used as an address
2692 /* jump over the following two insns if the condition does not
2694 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2695 if (i->Ain.Call.cond != Acc_ALWAYS) {
2696 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2697 *p++ = shortImm ? 10 : 13;
2698 /* 10 or 13 bytes in the next two insns */
2701 /* 7 bytes: movl sign-extend(imm32), %r11 */
2705 p = emit32(p, (UInt)i->Ain.Call.target);
2707 /* 10 bytes: movabsq $target, %r11 */
2710 p = emit64(p, i->Ain.Call.target);
2712 /* 3 bytes: call *%r11 */
2720 /* Use ptmp for backpatching conditional jumps. */
2723 /* First off, if this is conditional, create a conditional
2724 jump over the rest of it. */
2725 if (i->Ain.Goto.cond != Acc_ALWAYS) {
2726 /* jmp fwds if !condition */
2727 *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
2728 ptmp = p; /* fill in this bit later */
2729 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2732 /* If a non-boring, set %rbp (the guest state pointer)
2733 appropriately. Since these numbers are all small positive
2734 integers, we can get away with "movl $N, %ebp" rather than
2735 the longer "movq $N, %rbp". */
2736 /* movl $magic_number, %ebp */
2737 switch (i->Ain.Goto.jk) {
2740 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
2741 case Ijk_Sys_syscall:
2743 p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
2746 p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
2749 p = emit32(p, VEX_TRC_JMP_YIELD); break;
2752 p = emit32(p, VEX_TRC_JMP_EMWARN); break;
2755 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
2758 p = emit32(p, VEX_TRC_JMP_NODECODE); break;
2761 p = emit32(p, VEX_TRC_JMP_TINVAL); break;
2764 p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
2767 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
2770 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
2776 ppIRJumpKind(i->Ain.Goto.jk);
2777 vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
2780 /* Get the destination address into %rax */
2781 if (i->Ain.Goto.dst->tag == Ari_Imm) {
2782 /* movl sign-ext($immediate), %rax ; ret */
2786 p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
2788 vassert(i->Ain.Goto.dst->tag == Ari_Reg);
2789 /* movq %reg, %rax ; ret */
2790 if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
2791 *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
2793 p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
2797 /* Get the dispatcher address into %rdx. This has to happen
2798 after the load of %rax since %rdx might be carrying the value
2799 destined for %rax immediately prior to this Ain_Goto. */
2800 vassert(sizeof(ULong) == sizeof(void*));
2801 vassert(dispatch != NULL);
2803 if (fitsIn32Bits(Ptr_to_ULong(dispatch))) {
2804 /* movl sign-extend(imm32), %rdx */
2808 p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
2810 /* movabsq $imm64, %rdx */
2813 p = emit64(p, Ptr_to_ULong(dispatch));
2819 /* Fix up the conditional jump, if there was one. */
2820 if (i->Ain.Goto.cond != Acc_ALWAYS) {
2821 Int delta = p - ptmp;
2822 vassert(delta > 0 && delta < 30);
2823 *ptmp = toUChar(delta-1);
2828 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
2829 if (i->Ain.CMov64.src->tag == Arm_Reg) {
2830 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2832 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2833 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2836 if (i->Ain.CMov64.src->tag == Arm_Mem) {
2837 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2839 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2840 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2846 /* No, _don't_ ask me why the sense of the args has to be
2847 different in the S vs Z case. I don't know. */
2848 if (i->Ain.MovxLQ.syned) {
2849 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
2850 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2852 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2854 /* Produce a 32-bit reg-reg move, since the implicit
2855 zero-extend does what we want. */
2857 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
2859 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
2864 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
2866 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2869 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2872 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
2874 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2877 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2880 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
2882 /* This isn't really an existing AMD64 instruction per se.
2883 Rather, we have to do a 32-bit load. Because a 32-bit
2884 write implicitly clears the upper 32 bits of the target
2885 register, we get what we want. */
2887 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
2889 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2895 /* Make the destination register be 1 or 0, depending on whether
2896 the relevant condition holds. Complication: the top 56 bits
2897 of the destination should be forced to zero, but doing 'xorq
2898 %r,%r' kills the flag(s) we are about to read. Sigh. So
2899 start off my moving $0 into the dest. */
2900 reg = iregBits3210(i->Ain.Set64.dst);
2904 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
2906 *p++ = toUChar(0xC0 + (reg & 7));
2909 /* setb lo8(%dst) */
2910 /* note, 8-bit register rex trickyness. Be careful here. */
2911 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
2913 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
2914 *p++ = toUChar(0xC0 + (reg & 7));
2918 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2920 if (i->Ain.Bsfr64.isFwds) {
2925 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2930 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2936 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
2937 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
2938 in %rbx. The new-value register is hardwired to be %rbx
2939 since dealing with byte integer registers is too much hassle,
2940 so we force the register operand to %rbx (could equally be
2942 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
2943 if (i->Ain.ACAS.sz != 8)
2944 rex = clearWBit(rex);
2946 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
2948 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2949 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
2955 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
2956 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
2957 aren't encoded in the insn. */
2958 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
2959 if (i->Ain.ACAS.sz != 8)
2960 rex = clearWBit(rex);
2964 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
2968 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
2969 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
2970 p = do_ffree_st(p, 7-j);
2974 case Ain_A87PushPop:
2975 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
2976 if (i->Ain.A87PushPop.isPush) {
2977 /* Load from memory into %st(0): flds/fldl amode */
2979 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
2980 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
2981 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
2983 /* Dump %st(0) to memory: fstps/fstpl amode */
2985 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
2986 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
2987 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
2993 switch (i->Ain.A87FpOp.op) {
2994 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2995 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2996 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2997 case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break;
2998 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2999 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3000 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3001 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3002 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3003 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3004 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3005 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3012 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
3014 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
3019 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
3021 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
3025 if (i->Ain.Store.sz == 2) {
3026 /* This just goes to show the crazyness of the instruction
3027 set encoding. We have to insert two prefix bytes, but be
3028 careful to avoid a conflict in what the size should be, by
3029 ensuring that REX.W = 0. */
3030 *p++ = 0x66; /* override to 16-bits */
3031 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3033 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3036 if (i->Ain.Store.sz == 4) {
3037 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3039 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3042 if (i->Ain.Store.sz == 1) {
3043 /* This is one place where it would be wrong to skip emitting
3044 a rex byte of 0x40, since the mere presence of rex changes
3045 the meaning of the byte register access. Be careful. */
3046 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3048 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3053 //.. case Xin_FpUnary:
3054 //.. /* gop %src, %dst
3055 //.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
3057 //.. p = do_ffree_st7(p);
3058 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
3059 //.. p = do_fop1_st(p, i->Xin.FpUnary.op);
3060 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
3063 //.. case Xin_FpBinary:
3064 //.. if (i->Xin.FpBinary.op == Xfp_YL2X
3065 //.. || i->Xin.FpBinary.op == Xfp_YL2XP1) {
3066 //.. /* Have to do this specially. */
3067 //.. /* ffree %st7 ; fld %st(srcL) ;
3068 //.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
3069 //.. p = do_ffree_st7(p);
3070 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3071 //.. p = do_ffree_st7(p);
3072 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
3074 //.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
3075 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3078 //.. if (i->Xin.FpBinary.op == Xfp_ATAN) {
3079 //.. /* Have to do this specially. */
3080 //.. /* ffree %st7 ; fld %st(srcL) ;
3081 //.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
3082 //.. p = do_ffree_st7(p);
3083 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3084 //.. p = do_ffree_st7(p);
3085 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
3086 //.. *p++ = 0xD9; *p++ = 0xF3;
3087 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3090 //.. if (i->Xin.FpBinary.op == Xfp_PREM
3091 //.. || i->Xin.FpBinary.op == Xfp_PREM1
3092 //.. || i->Xin.FpBinary.op == Xfp_SCALE) {
3093 //.. /* Have to do this specially. */
3094 //.. /* ffree %st7 ; fld %st(srcR) ;
3095 //.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
3096 //.. fincstp ; ffree %st7 */
3097 //.. p = do_ffree_st7(p);
3098 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
3099 //.. p = do_ffree_st7(p);
3100 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
3102 //.. switch (i->Xin.FpBinary.op) {
3103 //.. case Xfp_PREM: *p++ = 0xF8; break;
3104 //.. case Xfp_PREM1: *p++ = 0xF5; break;
3105 //.. case Xfp_SCALE: *p++ = 0xFD; break;
3106 //.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
3108 //.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
3109 //.. *p++ = 0xD9; *p++ = 0xF7;
3110 //.. p = do_ffree_st7(p);
3113 //.. /* General case */
3114 //.. /* gop %srcL, %srcR, %dst
3115 //.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
3117 //.. p = do_ffree_st7(p);
3118 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3119 //.. p = do_fop2_st(p, i->Xin.FpBinary.op,
3120 //.. 1+hregNumber(i->Xin.FpBinary.srcR));
3121 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3124 //.. case Xin_FpLdSt:
3125 //.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
3126 //.. if (i->Xin.FpLdSt.isLoad) {
3127 //.. /* Load from memory into %fakeN.
3128 //.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
3130 //.. p = do_ffree_st7(p);
3131 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
3132 //.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
3133 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
3136 //.. /* Store from %fakeN into memory.
3137 //.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
3139 //.. p = do_ffree_st7(p);
3140 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
3141 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
3142 //.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
3147 //.. case Xin_FpLdStI:
3148 //.. if (i->Xin.FpLdStI.isLoad) {
3149 //.. /* Load from memory into %fakeN, converting from an int.
3150 //.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
3152 //.. switch (i->Xin.FpLdStI.sz) {
3153 //.. case 8: opc = 0xDF; subopc_imm = 5; break;
3154 //.. case 4: opc = 0xDB; subopc_imm = 0; break;
3155 //.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
3156 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
3158 //.. p = do_ffree_st7(p);
3160 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
3161 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
3164 //.. /* Store from %fakeN into memory, converting to an int.
3165 //.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
3167 //.. switch (i->Xin.FpLdStI.sz) {
3168 //.. case 8: opc = 0xDF; subopc_imm = 7; break;
3169 //.. case 4: opc = 0xDB; subopc_imm = 3; break;
3170 //.. case 2: opc = 0xDF; subopc_imm = 3; break;
3171 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
3173 //.. p = do_ffree_st7(p);
3174 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
3176 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
3181 //.. case Xin_Fp64to32:
3182 //.. /* ffree %st7 ; fld %st(src) */
3183 //.. p = do_ffree_st7(p);
3184 //.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
3185 //.. /* subl $4, %esp */
3186 //.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
3187 //.. /* fstps (%esp) */
3188 //.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
3189 //.. /* flds (%esp) */
3190 //.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
3191 //.. /* addl $4, %esp */
3192 //.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
3193 //.. /* fstp %st(1+dst) */
3194 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
3197 //.. case Xin_FpCMov:
3198 //.. /* jmp fwds if !condition */
3199 //.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
3200 //.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3203 //.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3204 //.. p = do_ffree_st7(p);
3205 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
3206 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
3208 //.. /* Fill in the jump offset. */
3209 //.. *(ptmp-1) = p - ptmp;
3213 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
3216 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
3219 //.. case Xin_FpStSW_AX:
3220 //.. /* note, this emits fnstsw %ax, not fstsw %ax */
3226 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3227 /* ucomi[sd] %srcL, %srcR */
3228 if (i->Ain.SseUComIS.sz == 8) {
3232 vassert(i->Ain.SseUComIS.sz == 4);
3235 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
3236 vreg2ireg(i->Ain.SseUComIS.srcR) ));
3239 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
3240 vreg2ireg(i->Ain.SseUComIS.srcR) );
3244 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
3245 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
3249 /* cvssi2s[sd] %src, %dst */
3250 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
3251 i->Ain.SseSI2SF.src );
3252 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3253 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3256 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
3257 i->Ain.SseSI2SF.src );
3261 /* cvss[sd]2si %src, %dst */
3262 rex = rexAMode_R( i->Ain.SseSF2SI.dst,
3263 vreg2ireg(i->Ain.SseSF2SI.src) );
3264 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3265 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3268 p = doAMode_R( p, i->Ain.SseSF2SI.dst,
3269 vreg2ireg(i->Ain.SseSF2SI.src) );
3273 /* cvtsd2ss/cvtss2sd %src, %dst */
3274 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3276 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
3277 vreg2ireg(i->Ain.SseSDSS.src) ));
3280 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
3281 vreg2ireg(i->Ain.SseSDSS.src) );
3285 //.. case Xin_FpCmp:
3286 //.. /* gcmp %fL, %fR, %dst
3287 //.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3288 //.. fnstsw %ax ; movl %eax, %dst
3290 //.. /* ffree %st7 */
3291 //.. p = do_ffree_st7(p);
3292 //.. /* fpush %fL */
3293 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
3294 //.. /* fucomp %(fR+1) */
3296 //.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
3297 //.. /* fnstsw %ax */
3300 //.. /* movl %eax, %dst */
3302 //.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
3305 //.. case Xin_SseConst: {
3306 //.. UShort con = i->Xin.SseConst.con;
3307 //.. p = push_word_from_tags(p, (con >> 12) & 0xF);
3308 //.. p = push_word_from_tags(p, (con >> 8) & 0xF);
3309 //.. p = push_word_from_tags(p, (con >> 4) & 0xF);
3310 //.. p = push_word_from_tags(p, con & 0xF);
3311 //.. /* movl (%esp), %xmm-dst */
3314 //.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
3316 //.. /* addl $16, %esp */
3324 if (i->Ain.SseLdSt.sz == 8) {
3327 if (i->Ain.SseLdSt.sz == 4) {
3330 if (i->Ain.SseLdSt.sz != 16) {
3334 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
3336 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3337 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
3341 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3342 /* movs[sd] amode, %xmm-dst */
3343 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3345 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
3346 i->Ain.SseLdzLO.addr));
3349 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
3350 i->Ain.SseLdzLO.addr);
3356 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
3357 vreg2ireg(i->Ain.Sse32Fx4.src) ));
3359 switch (i->Ain.Sse32Fx4.op) {
3360 case Asse_ADDF: *p++ = 0x58; break;
3361 case Asse_DIVF: *p++ = 0x5E; break;
3362 case Asse_MAXF: *p++ = 0x5F; break;
3363 case Asse_MINF: *p++ = 0x5D; break;
3364 case Asse_MULF: *p++ = 0x59; break;
3365 case Asse_RCPF: *p++ = 0x53; break;
3366 case Asse_RSQRTF: *p++ = 0x52; break;
3367 case Asse_SQRTF: *p++ = 0x51; break;
3368 case Asse_SUBF: *p++ = 0x5C; break;
3369 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3370 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3371 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3372 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3375 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
3376 vreg2ireg(i->Ain.Sse32Fx4.src) );
3378 *p++ = toUChar(xtra & 0xFF);
3385 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
3386 vreg2ireg(i->Ain.Sse64Fx2.src) ));
3388 switch (i->Ain.Sse64Fx2.op) {
3389 case Asse_ADDF: *p++ = 0x58; break;
3390 case Asse_DIVF: *p++ = 0x5E; break;
3391 case Asse_MAXF: *p++ = 0x5F; break;
3392 case Asse_MINF: *p++ = 0x5D; break;
3393 case Asse_MULF: *p++ = 0x59; break;
3394 //.. case Xsse_RCPF: *p++ = 0x53; break;
3395 //.. case Xsse_RSQRTF: *p++ = 0x52; break;
3396 case Asse_SQRTF: *p++ = 0x51; break;
3397 case Asse_SUBF: *p++ = 0x5C; break;
3398 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3399 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3400 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3401 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3404 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
3405 vreg2ireg(i->Ain.Sse64Fx2.src) );
3407 *p++ = toUChar(xtra & 0xFF);
3414 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
3415 vreg2ireg(i->Ain.Sse32FLo.src) ));
3417 switch (i->Ain.Sse32FLo.op) {
3418 case Asse_ADDF: *p++ = 0x58; break;
3419 case Asse_DIVF: *p++ = 0x5E; break;
3420 case Asse_MAXF: *p++ = 0x5F; break;
3421 case Asse_MINF: *p++ = 0x5D; break;
3422 case Asse_MULF: *p++ = 0x59; break;
3423 case Asse_RCPF: *p++ = 0x53; break;
3424 case Asse_RSQRTF: *p++ = 0x52; break;
3425 case Asse_SQRTF: *p++ = 0x51; break;
3426 case Asse_SUBF: *p++ = 0x5C; break;
3427 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3428 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3429 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3430 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3433 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
3434 vreg2ireg(i->Ain.Sse32FLo.src) );
3436 *p++ = toUChar(xtra & 0xFF);
3443 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
3444 vreg2ireg(i->Ain.Sse64FLo.src) ));
3446 switch (i->Ain.Sse64FLo.op) {
3447 case Asse_ADDF: *p++ = 0x58; break;
3448 case Asse_DIVF: *p++ = 0x5E; break;
3449 case Asse_MAXF: *p++ = 0x5F; break;
3450 case Asse_MINF: *p++ = 0x5D; break;
3451 case Asse_MULF: *p++ = 0x59; break;
3452 //.. case Xsse_RCPF: *p++ = 0x53; break;
3453 //.. case Xsse_RSQRTF: *p++ = 0x52; break;
3454 case Asse_SQRTF: *p++ = 0x51; break;
3455 case Asse_SUBF: *p++ = 0x5C; break;
3456 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3457 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3458 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3459 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3462 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
3463 vreg2ireg(i->Ain.Sse64FLo.src) );
3465 *p++ = toUChar(xtra & 0xFF);
3469 # define XX(_n) *p++ = (_n)
3472 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
3473 vreg2ireg(i->Ain.SseReRg.src) ));
3475 switch (i->Ain.SseReRg.op) {
3476 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3477 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3478 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3479 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3480 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3481 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3482 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3483 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3484 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3485 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3486 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3487 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3488 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3489 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3490 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3491 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3492 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3493 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3494 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3495 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3496 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3497 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3498 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3499 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3500 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3501 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3502 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3503 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3504 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3505 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3506 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3507 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3508 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3509 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3510 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3511 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3512 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3513 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3514 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3515 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3516 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3517 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3518 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3519 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3520 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3521 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3522 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3523 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3524 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3525 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3526 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3527 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3528 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3529 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3530 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3533 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
3534 vreg2ireg(i->Ain.SseReRg.src) );
3539 /* jmp fwds if !condition */
3540 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3541 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3544 /* movaps %src, %dst */
3546 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
3547 vreg2ireg(i->Ain.SseCMov.src) ));
3550 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
3551 vreg2ireg(i->Ain.SseCMov.src) );
3553 /* Fill in the jump offset. */
3554 *(ptmp-1) = toUChar(p - ptmp);
3560 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
3561 vreg2ireg(i->Ain.SseShuf.src) ));
3564 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
3565 vreg2ireg(i->Ain.SseShuf.src) );
3566 *p++ = (UChar)(i->Ain.SseShuf.order);
3574 ppAMD64Instr(i, mode64);
3575 vpanic("emit_AMD64Instr");
3579 vassert(p - &buf[0] <= 32);
3585 /*---------------------------------------------------------------*/
3586 /*--- end host_amd64_defs.c ---*/
3587 /*---------------------------------------------------------------*/