2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
45 /* --------- Registers. --------- */
47 void ppHRegAMD64 ( HReg reg )
50 static HChar* ireg64_names[16]
51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
53 /* Be generic for all virtual regs. */
54 if (hregIsVirtual(reg)) {
58 /* But specific for real regs. */
59 switch (hregClass(reg)) {
62 vassert(r >= 0 && r < 16);
63 vex_printf("%s", ireg64_names[r]);
67 vassert(r >= 0 && r < 6);
68 vex_printf("%%fake%d", r);
72 vassert(r >= 0 && r < 16);
73 vex_printf("%%xmm%d", r);
76 vpanic("ppHRegAMD64");
80 static void ppHRegAMD64_lo32 ( HReg reg )
83 static HChar* ireg32_names[16]
84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
86 /* Be generic for all virtual regs. */
87 if (hregIsVirtual(reg)) {
92 /* But specific for real regs. */
93 switch (hregClass(reg)) {
96 vassert(r >= 0 && r < 16);
97 vex_printf("%s", ireg32_names[r]);
100 vpanic("ppHRegAMD64_lo32: invalid regclass");
104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
121 //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
122 //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
123 //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
124 //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
125 //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
126 //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
128 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
129 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
130 HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); }
131 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
132 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
133 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
134 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
135 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
136 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
137 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
138 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
139 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
140 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
141 HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); }
142 HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); }
143 HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); }
146 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
151 (*arr)[ 0] = hregAMD64_RSI();
152 (*arr)[ 1] = hregAMD64_RDI();
153 (*arr)[ 2] = hregAMD64_RBX();
155 (*arr)[ 3] = hregAMD64_XMM7();
156 (*arr)[ 4] = hregAMD64_XMM8();
157 (*arr)[ 5] = hregAMD64_XMM9();
161 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
162 (*arr)[ 0] = hregAMD64_RSI();
163 (*arr)[ 1] = hregAMD64_RDI();
164 (*arr)[ 2] = hregAMD64_R8();
165 (*arr)[ 3] = hregAMD64_R9();
166 (*arr)[ 4] = hregAMD64_R12();
167 (*arr)[ 5] = hregAMD64_R13();
168 (*arr)[ 6] = hregAMD64_R14();
169 (*arr)[ 7] = hregAMD64_R15();
170 (*arr)[ 8] = hregAMD64_RBX();
172 (*arr)[ 9] = hregAMD64_XMM3();
173 (*arr)[10] = hregAMD64_XMM4();
174 (*arr)[11] = hregAMD64_XMM5();
175 (*arr)[12] = hregAMD64_XMM6();
176 (*arr)[13] = hregAMD64_XMM7();
177 (*arr)[14] = hregAMD64_XMM8();
178 (*arr)[15] = hregAMD64_XMM9();
179 (*arr)[16] = hregAMD64_XMM10();
180 (*arr)[17] = hregAMD64_XMM11();
181 (*arr)[18] = hregAMD64_XMM12();
182 (*arr)[19] = hregAMD64_R10();
187 /* --------- Condition codes, Intel encoding. --------- */
189 HChar* showAMD64CondCode ( AMD64CondCode cond )
192 case Acc_O: return "o";
193 case Acc_NO: return "no";
194 case Acc_B: return "b";
195 case Acc_NB: return "nb";
196 case Acc_Z: return "z";
197 case Acc_NZ: return "nz";
198 case Acc_BE: return "be";
199 case Acc_NBE: return "nbe";
200 case Acc_S: return "s";
201 case Acc_NS: return "ns";
202 case Acc_P: return "p";
203 case Acc_NP: return "np";
204 case Acc_L: return "l";
205 case Acc_NL: return "nl";
206 case Acc_LE: return "le";
207 case Acc_NLE: return "nle";
208 case Acc_ALWAYS: return "ALWAYS";
209 default: vpanic("ppAMD64CondCode");
214 /* --------- AMD64AMode: memory address expressions. --------- */
216 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
217 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
219 am->Aam.IR.imm = imm32;
220 am->Aam.IR.reg = reg;
223 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
224 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
226 am->Aam.IRRS.imm = imm32;
227 am->Aam.IRRS.base = base;
228 am->Aam.IRRS.index = indEx;
229 am->Aam.IRRS.shift = shift;
230 vassert(shift >= 0 && shift <= 3);
234 //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
235 //.. switch (am->tag) {
237 //.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
239 //.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
240 //.. am->Xam.IRRS.index, am->Xam.IRRS.shift );
242 //.. vpanic("dopyAMD64AMode");
246 void ppAMD64AMode ( AMD64AMode* am ) {
249 if (am->Aam.IR.imm == 0)
252 vex_printf("0x%x(", am->Aam.IR.imm);
253 ppHRegAMD64(am->Aam.IR.reg);
257 vex_printf("0x%x(", am->Aam.IRRS.imm);
258 ppHRegAMD64(am->Aam.IRRS.base);
260 ppHRegAMD64(am->Aam.IRRS.index);
261 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
264 vpanic("ppAMD64AMode");
268 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
271 addHRegUse(u, HRmRead, am->Aam.IR.reg);
274 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
275 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
278 vpanic("addRegUsage_AMD64AMode");
282 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
285 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
288 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
289 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
292 vpanic("mapRegs_AMD64AMode");
296 /* --------- Operand, which can be reg, immediate or memory. --------- */
298 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
299 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
301 op->Armi.Imm.imm32 = imm32;
304 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
305 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
307 op->Armi.Reg.reg = reg;
310 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
311 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
313 op->Armi.Mem.am = am;
317 void ppAMD64RMI ( AMD64RMI* op ) {
320 vex_printf("$0x%x", op->Armi.Imm.imm32);
323 ppHRegAMD64(op->Armi.Reg.reg);
326 ppAMD64AMode(op->Armi.Mem.am);
329 vpanic("ppAMD64RMI");
333 /* An AMD64RMI can only be used in a "read" context (what would it mean
334 to write or modify a literal?) and so we enumerate its registers
336 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
341 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
344 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
347 vpanic("addRegUsage_AMD64RMI");
351 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
356 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
359 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
362 vpanic("mapRegs_AMD64RMI");
367 /* --------- Operand, which can be reg or immediate only. --------- */
369 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
370 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
372 op->Ari.Imm.imm32 = imm32;
375 AMD64RI* AMD64RI_Reg ( HReg reg ) {
376 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
378 op->Ari.Reg.reg = reg;
382 void ppAMD64RI ( AMD64RI* op ) {
385 vex_printf("$0x%x", op->Ari.Imm.imm32);
388 ppHRegAMD64(op->Ari.Reg.reg);
395 /* An AMD64RI can only be used in a "read" context (what would it mean
396 to write or modify a literal?) and so we enumerate its registers
398 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
403 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
406 vpanic("addRegUsage_AMD64RI");
410 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
415 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
418 vpanic("mapRegs_AMD64RI");
423 /* --------- Operand, which can be reg or memory only. --------- */
425 AMD64RM* AMD64RM_Reg ( HReg reg ) {
426 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
428 op->Arm.Reg.reg = reg;
431 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
432 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
438 void ppAMD64RM ( AMD64RM* op ) {
441 ppAMD64AMode(op->Arm.Mem.am);
444 ppHRegAMD64(op->Arm.Reg.reg);
451 /* Because an AMD64RM can be both a source or destination operand, we
452 have to supply a mode -- pertaining to the operand as a whole --
453 indicating how it's being used. */
454 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
457 /* Memory is read, written or modified. So we just want to
458 know the regs read by the amode. */
459 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
462 /* reg is read, written or modified. Add it in the
464 addHRegUse(u, mode, op->Arm.Reg.reg);
467 vpanic("addRegUsage_AMD64RM");
471 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
475 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
478 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
481 vpanic("mapRegs_AMD64RM");
486 /* --------- Instructions. --------- */
488 static HChar* showAMD64ScalarSz ( Int sz ) {
493 default: vpanic("showAMD64ScalarSz");
497 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
499 case Aun_NOT: return "not";
500 case Aun_NEG: return "neg";
501 default: vpanic("showAMD64UnaryOp");
505 HChar* showAMD64AluOp ( AMD64AluOp op ) {
507 case Aalu_MOV: return "mov";
508 case Aalu_CMP: return "cmp";
509 case Aalu_ADD: return "add";
510 case Aalu_SUB: return "sub";
511 case Aalu_ADC: return "adc";
512 case Aalu_SBB: return "sbb";
513 case Aalu_AND: return "and";
514 case Aalu_OR: return "or";
515 case Aalu_XOR: return "xor";
516 case Aalu_MUL: return "imul";
517 default: vpanic("showAMD64AluOp");
521 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
523 case Ash_SHL: return "shl";
524 case Ash_SHR: return "shr";
525 case Ash_SAR: return "sar";
526 default: vpanic("showAMD64ShiftOp");
530 HChar* showA87FpOp ( A87FpOp op ) {
532 //.. case Xfp_ADD: return "add";
533 //.. case Xfp_SUB: return "sub";
534 //.. case Xfp_MUL: return "mul";
535 //.. case Xfp_DIV: return "div";
536 case Afp_SCALE: return "scale";
537 case Afp_ATAN: return "atan";
538 case Afp_YL2X: return "yl2x";
539 case Afp_YL2XP1: return "yl2xp1";
540 case Afp_PREM: return "prem";
541 case Afp_PREM1: return "prem1";
542 case Afp_SQRT: return "sqrt";
543 //.. case Xfp_ABS: return "abs";
544 //.. case Xfp_NEG: return "chs";
545 //.. case Xfp_MOV: return "mov";
546 case Afp_SIN: return "sin";
547 case Afp_COS: return "cos";
548 case Afp_TAN: return "tan";
549 case Afp_ROUND: return "round";
550 case Afp_2XM1: return "2xm1";
551 default: vpanic("showA87FpOp");
555 HChar* showAMD64SseOp ( AMD64SseOp op ) {
557 case Asse_MOV: return "movups";
558 case Asse_ADDF: return "add";
559 case Asse_SUBF: return "sub";
560 case Asse_MULF: return "mul";
561 case Asse_DIVF: return "div";
562 case Asse_MAXF: return "max";
563 case Asse_MINF: return "min";
564 case Asse_CMPEQF: return "cmpFeq";
565 case Asse_CMPLTF: return "cmpFlt";
566 case Asse_CMPLEF: return "cmpFle";
567 case Asse_CMPUNF: return "cmpFun";
568 case Asse_RCPF: return "rcp";
569 case Asse_RSQRTF: return "rsqrt";
570 case Asse_SQRTF: return "sqrt";
571 case Asse_AND: return "and";
572 case Asse_OR: return "or";
573 case Asse_XOR: return "xor";
574 case Asse_ANDN: return "andn";
575 case Asse_ADD8: return "paddb";
576 case Asse_ADD16: return "paddw";
577 case Asse_ADD32: return "paddd";
578 case Asse_ADD64: return "paddq";
579 case Asse_QADD8U: return "paddusb";
580 case Asse_QADD16U: return "paddusw";
581 case Asse_QADD8S: return "paddsb";
582 case Asse_QADD16S: return "paddsw";
583 case Asse_SUB8: return "psubb";
584 case Asse_SUB16: return "psubw";
585 case Asse_SUB32: return "psubd";
586 case Asse_SUB64: return "psubq";
587 case Asse_QSUB8U: return "psubusb";
588 case Asse_QSUB16U: return "psubusw";
589 case Asse_QSUB8S: return "psubsb";
590 case Asse_QSUB16S: return "psubsw";
591 case Asse_MUL16: return "pmullw";
592 case Asse_MULHI16U: return "pmulhuw";
593 case Asse_MULHI16S: return "pmulhw";
594 case Asse_AVG8U: return "pavgb";
595 case Asse_AVG16U: return "pavgw";
596 case Asse_MAX16S: return "pmaxw";
597 case Asse_MAX8U: return "pmaxub";
598 case Asse_MIN16S: return "pminw";
599 case Asse_MIN8U: return "pminub";
600 case Asse_CMPEQ8: return "pcmpeqb";
601 case Asse_CMPEQ16: return "pcmpeqw";
602 case Asse_CMPEQ32: return "pcmpeqd";
603 case Asse_CMPGT8S: return "pcmpgtb";
604 case Asse_CMPGT16S: return "pcmpgtw";
605 case Asse_CMPGT32S: return "pcmpgtd";
606 case Asse_SHL16: return "psllw";
607 case Asse_SHL32: return "pslld";
608 case Asse_SHL64: return "psllq";
609 case Asse_SHR16: return "psrlw";
610 case Asse_SHR32: return "psrld";
611 case Asse_SHR64: return "psrlq";
612 case Asse_SAR16: return "psraw";
613 case Asse_SAR32: return "psrad";
614 case Asse_PACKSSD: return "packssdw";
615 case Asse_PACKSSW: return "packsswb";
616 case Asse_PACKUSW: return "packuswb";
617 case Asse_UNPCKHB: return "punpckhb";
618 case Asse_UNPCKHW: return "punpckhw";
619 case Asse_UNPCKHD: return "punpckhd";
620 case Asse_UNPCKHQ: return "punpckhq";
621 case Asse_UNPCKLB: return "punpcklb";
622 case Asse_UNPCKLW: return "punpcklw";
623 case Asse_UNPCKLD: return "punpckld";
624 case Asse_UNPCKLQ: return "punpcklq";
625 default: vpanic("showAMD64SseOp");
629 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
630 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
632 i->Ain.Imm64.imm64 = imm64;
633 i->Ain.Imm64.dst = dst;
636 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
637 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
639 i->Ain.Alu64R.op = op;
640 i->Ain.Alu64R.src = src;
641 i->Ain.Alu64R.dst = dst;
644 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
645 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
647 i->Ain.Alu64M.op = op;
648 i->Ain.Alu64M.src = src;
649 i->Ain.Alu64M.dst = dst;
650 vassert(op != Aalu_MUL);
653 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
654 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
657 i->Ain.Sh64.src = src;
658 i->Ain.Sh64.dst = dst;
661 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
664 i->Ain.Test64.imm32 = imm32;
665 i->Ain.Test64.dst = dst;
668 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
669 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
670 i->tag = Ain_Unary64;
671 i->Ain.Unary64.op = op;
672 i->Ain.Unary64.dst = dst;
675 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
676 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
678 i->Ain.Lea64.am = am;
679 i->Ain.Lea64.dst = dst;
682 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
683 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
685 i->Ain.MulL.syned = syned;
686 i->Ain.MulL.src = src;
689 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
690 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
692 i->Ain.Div.syned = syned;
694 i->Ain.Div.src = src;
695 vassert(sz == 4 || sz == 8);
698 //.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
699 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
700 //.. i->tag = Xin_Sh3232;
701 //.. i->Xin.Sh3232.op = op;
702 //.. i->Xin.Sh3232.amt = amt;
703 //.. i->Xin.Sh3232.src = src;
704 //.. i->Xin.Sh3232.dst = dst;
705 //.. vassert(op == Xsh_SHL || op == Xsh_SHR);
708 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
709 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
711 i->Ain.Push.src = src;
714 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) {
715 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
717 i->Ain.Call.cond = cond;
718 i->Ain.Call.target = target;
719 i->Ain.Call.regparms = regparms;
720 vassert(regparms >= 0 && regparms <= 6);
723 AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
724 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
726 i->Ain.Goto.cond = cond;
727 i->Ain.Goto.dst = dst;
731 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
732 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
734 i->Ain.CMov64.cond = cond;
735 i->Ain.CMov64.src = src;
736 i->Ain.CMov64.dst = dst;
737 vassert(cond != Acc_ALWAYS);
740 AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ) {
741 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
743 i->Ain.MovZLQ.src = src;
744 i->Ain.MovZLQ.dst = dst;
747 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
748 AMD64AMode* src, HReg dst ) {
749 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
751 i->Ain.LoadEX.szSmall = szSmall;
752 i->Ain.LoadEX.syned = syned;
753 i->Ain.LoadEX.src = src;
754 i->Ain.LoadEX.dst = dst;
755 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
758 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
759 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
761 i->Ain.Store.sz = sz;
762 i->Ain.Store.src = src;
763 i->Ain.Store.dst = dst;
764 vassert(sz == 1 || sz == 2 || sz == 4);
767 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
768 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
770 i->Ain.Set64.cond = cond;
771 i->Ain.Set64.dst = dst;
774 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
775 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
777 i->Ain.Bsfr64.isFwds = isFwds;
778 i->Ain.Bsfr64.src = src;
779 i->Ain.Bsfr64.dst = dst;
782 AMD64Instr* AMD64Instr_MFence ( void ) {
783 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
787 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
788 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
790 i->Ain.ACAS.addr = addr;
792 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
795 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
796 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
798 i->Ain.DACAS.addr = addr;
799 i->Ain.DACAS.sz = sz;
800 vassert(sz == 8 || sz == 4);
804 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
806 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
807 i->tag = Ain_A87Free;
808 i->Ain.A87Free.nregs = nregs;
809 vassert(nregs >= 1 && nregs <= 7);
812 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
814 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
815 i->tag = Ain_A87PushPop;
816 i->Ain.A87PushPop.addr = addr;
817 i->Ain.A87PushPop.isPush = isPush;
820 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
822 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
823 i->tag = Ain_A87FpOp;
824 i->Ain.A87FpOp.op = op;
827 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
829 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
830 i->tag = Ain_A87LdCW;
831 i->Ain.A87LdCW.addr = addr;
834 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
836 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
837 i->tag = Ain_A87StSW;
838 i->Ain.A87StSW.addr = addr;
842 //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
843 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
844 //.. i->tag = Xin_FpUnary;
845 //.. i->Xin.FpUnary.op = op;
846 //.. i->Xin.FpUnary.src = src;
847 //.. i->Xin.FpUnary.dst = dst;
850 //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
851 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
852 //.. i->tag = Xin_FpBinary;
853 //.. i->Xin.FpBinary.op = op;
854 //.. i->Xin.FpBinary.srcL = srcL;
855 //.. i->Xin.FpBinary.srcR = srcR;
856 //.. i->Xin.FpBinary.dst = dst;
859 //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
860 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
861 //.. i->tag = Xin_FpLdSt;
862 //.. i->Xin.FpLdSt.isLoad = isLoad;
863 //.. i->Xin.FpLdSt.sz = sz;
864 //.. i->Xin.FpLdSt.reg = reg;
865 //.. i->Xin.FpLdSt.addr = addr;
866 //.. vassert(sz == 4 || sz == 8);
869 //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
870 //.. HReg reg, AMD64AMode* addr ) {
871 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
872 //.. i->tag = Xin_FpLdStI;
873 //.. i->Xin.FpLdStI.isLoad = isLoad;
874 //.. i->Xin.FpLdStI.sz = sz;
875 //.. i->Xin.FpLdStI.reg = reg;
876 //.. i->Xin.FpLdStI.addr = addr;
877 //.. vassert(sz == 2 || sz == 4 || sz == 8);
880 //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
881 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
882 //.. i->tag = Xin_Fp64to32;
883 //.. i->Xin.Fp64to32.src = src;
884 //.. i->Xin.Fp64to32.dst = dst;
887 //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
888 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
889 //.. i->tag = Xin_FpCMov;
890 //.. i->Xin.FpCMov.cond = cond;
891 //.. i->Xin.FpCMov.src = src;
892 //.. i->Xin.FpCMov.dst = dst;
893 //.. vassert(cond != Xcc_ALWAYS);
896 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
897 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
898 i->tag = Ain_LdMXCSR;
899 i->Ain.LdMXCSR.addr = addr;
902 //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
903 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
904 //.. i->tag = Xin_FpStSW_AX;
907 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
908 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
909 i->tag = Ain_SseUComIS;
910 i->Ain.SseUComIS.sz = toUChar(sz);
911 i->Ain.SseUComIS.srcL = srcL;
912 i->Ain.SseUComIS.srcR = srcR;
913 i->Ain.SseUComIS.dst = dst;
914 vassert(sz == 4 || sz == 8);
917 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
918 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
919 i->tag = Ain_SseSI2SF;
920 i->Ain.SseSI2SF.szS = toUChar(szS);
921 i->Ain.SseSI2SF.szD = toUChar(szD);
922 i->Ain.SseSI2SF.src = src;
923 i->Ain.SseSI2SF.dst = dst;
924 vassert(szS == 4 || szS == 8);
925 vassert(szD == 4 || szD == 8);
928 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
929 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
930 i->tag = Ain_SseSF2SI;
931 i->Ain.SseSF2SI.szS = toUChar(szS);
932 i->Ain.SseSF2SI.szD = toUChar(szD);
933 i->Ain.SseSF2SI.src = src;
934 i->Ain.SseSF2SI.dst = dst;
935 vassert(szS == 4 || szS == 8);
936 vassert(szD == 4 || szD == 8);
939 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
941 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
942 i->tag = Ain_SseSDSS;
943 i->Ain.SseSDSS.from64 = from64;
944 i->Ain.SseSDSS.src = src;
945 i->Ain.SseSDSS.dst = dst;
949 //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
950 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
951 //.. i->tag = Xin_SseConst;
952 //.. i->Xin.SseConst.con = con;
953 //.. i->Xin.SseConst.dst = dst;
954 //.. vassert(hregClass(dst) == HRcVec128);
957 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
958 HReg reg, AMD64AMode* addr ) {
959 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
960 i->tag = Ain_SseLdSt;
961 i->Ain.SseLdSt.isLoad = isLoad;
962 i->Ain.SseLdSt.sz = toUChar(sz);
963 i->Ain.SseLdSt.reg = reg;
964 i->Ain.SseLdSt.addr = addr;
965 vassert(sz == 4 || sz == 8 || sz == 16);
968 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
970 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
971 i->tag = Ain_SseLdzLO;
972 i->Ain.SseLdzLO.sz = sz;
973 i->Ain.SseLdzLO.reg = reg;
974 i->Ain.SseLdzLO.addr = addr;
975 vassert(sz == 4 || sz == 8);
978 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
979 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
980 i->tag = Ain_Sse32Fx4;
981 i->Ain.Sse32Fx4.op = op;
982 i->Ain.Sse32Fx4.src = src;
983 i->Ain.Sse32Fx4.dst = dst;
984 vassert(op != Asse_MOV);
987 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
988 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
989 i->tag = Ain_Sse32FLo;
990 i->Ain.Sse32FLo.op = op;
991 i->Ain.Sse32FLo.src = src;
992 i->Ain.Sse32FLo.dst = dst;
993 vassert(op != Asse_MOV);
996 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
997 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
998 i->tag = Ain_Sse64Fx2;
999 i->Ain.Sse64Fx2.op = op;
1000 i->Ain.Sse64Fx2.src = src;
1001 i->Ain.Sse64Fx2.dst = dst;
1002 vassert(op != Asse_MOV);
1005 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
1006 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1007 i->tag = Ain_Sse64FLo;
1008 i->Ain.Sse64FLo.op = op;
1009 i->Ain.Sse64FLo.src = src;
1010 i->Ain.Sse64FLo.dst = dst;
1011 vassert(op != Asse_MOV);
1014 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1015 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1016 i->tag = Ain_SseReRg;
1017 i->Ain.SseReRg.op = op;
1018 i->Ain.SseReRg.src = re;
1019 i->Ain.SseReRg.dst = rg;
1022 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
1023 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1024 i->tag = Ain_SseCMov;
1025 i->Ain.SseCMov.cond = cond;
1026 i->Ain.SseCMov.src = src;
1027 i->Ain.SseCMov.dst = dst;
1028 vassert(cond != Acc_ALWAYS);
1031 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1032 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1033 i->tag = Ain_SseShuf;
1034 i->Ain.SseShuf.order = order;
1035 i->Ain.SseShuf.src = src;
1036 i->Ain.SseShuf.dst = dst;
1037 vassert(order >= 0 && order <= 0xFF);
1041 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
1043 vassert(mode64 == True);
1046 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1047 ppHRegAMD64(i->Ain.Imm64.dst);
1050 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1051 ppAMD64RMI(i->Ain.Alu64R.src);
1053 ppHRegAMD64(i->Ain.Alu64R.dst);
1056 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1057 ppAMD64RI(i->Ain.Alu64M.src);
1059 ppAMD64AMode(i->Ain.Alu64M.dst);
1062 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1063 if (i->Ain.Sh64.src == 0)
1064 vex_printf("%%cl,");
1066 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1067 ppHRegAMD64(i->Ain.Sh64.dst);
1070 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1071 ppHRegAMD64(i->Ain.Test64.dst);
1074 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1075 ppHRegAMD64(i->Ain.Unary64.dst);
1078 vex_printf("leaq ");
1079 ppAMD64AMode(i->Ain.Lea64.am);
1081 ppHRegAMD64(i->Ain.Lea64.dst);
1084 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1085 ppAMD64RM(i->Ain.MulL.src);
1088 vex_printf("%cdiv%s ",
1089 i->Ain.Div.syned ? 's' : 'u',
1090 showAMD64ScalarSz(i->Ain.Div.sz));
1091 ppAMD64RM(i->Ain.Div.src);
1093 //.. case Xin_Sh3232:
1094 //.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
1095 //.. if (i->Xin.Sh3232.amt == 0)
1096 //.. vex_printf(" %%cl,");
1098 //.. vex_printf(" $%d,", i->Xin.Sh3232.amt);
1099 //.. ppHRegAMD64(i->Xin.Sh3232.src);
1100 //.. vex_printf(",");
1101 //.. ppHRegAMD64(i->Xin.Sh3232.dst);
1104 vex_printf("pushq ");
1105 ppAMD64RMI(i->Ain.Push.src);
1108 vex_printf("call%s[%d] ",
1109 i->Ain.Call.cond==Acc_ALWAYS
1110 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1111 i->Ain.Call.regparms );
1112 vex_printf("0x%llx", i->Ain.Call.target);
1115 if (i->Ain.Goto.cond != Acc_ALWAYS) {
1116 vex_printf("if (%%rflags.%s) { ",
1117 showAMD64CondCode(i->Ain.Goto.cond));
1119 if (i->Ain.Goto.jk != Ijk_Boring
1120 && i->Ain.Goto.jk != Ijk_Call
1121 && i->Ain.Goto.jk != Ijk_Ret) {
1122 vex_printf("movl $");
1123 ppIRJumpKind(i->Ain.Goto.jk);
1124 vex_printf(",%%ebp ; ");
1126 vex_printf("movq ");
1127 ppAMD64RI(i->Ain.Goto.dst);
1128 vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
1129 if (i->Ain.Goto.cond != Acc_ALWAYS) {
1134 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1135 ppAMD64RM(i->Ain.CMov64.src);
1137 ppHRegAMD64(i->Ain.CMov64.dst);
1140 vex_printf("movzlq ");
1141 ppHRegAMD64_lo32(i->Ain.MovZLQ.src);
1143 ppHRegAMD64(i->Ain.MovZLQ.dst);
1146 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1147 vex_printf("movl ");
1148 ppAMD64AMode(i->Ain.LoadEX.src);
1150 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1152 vex_printf("mov%c%cq ",
1153 i->Ain.LoadEX.syned ? 's' : 'z',
1154 i->Ain.LoadEX.szSmall==1
1156 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1157 ppAMD64AMode(i->Ain.LoadEX.src);
1159 ppHRegAMD64(i->Ain.LoadEX.dst);
1163 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1164 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1165 ppHRegAMD64(i->Ain.Store.src);
1167 ppAMD64AMode(i->Ain.Store.dst);
1170 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1171 ppHRegAMD64(i->Ain.Set64.dst);
1174 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1175 ppHRegAMD64(i->Ain.Bsfr64.src);
1177 ppHRegAMD64(i->Ain.Bsfr64.dst);
1180 vex_printf("mfence" );
1183 vex_printf("lock cmpxchg%c ",
1184 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1185 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1186 vex_printf("{%%rax->%%rbx},");
1187 ppAMD64AMode(i->Ain.ACAS.addr);
1190 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1191 (Int)(2 * i->Ain.DACAS.sz));
1192 ppAMD64AMode(i->Ain.DACAS.addr);
1195 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1197 case Ain_A87PushPop:
1198 vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
1199 ppAMD64AMode(i->Ain.A87PushPop.addr);
1202 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1205 vex_printf("fldcw ");
1206 ppAMD64AMode(i->Ain.A87LdCW.addr);
1209 vex_printf("fstsw ");
1210 ppAMD64AMode(i->Ain.A87StSW.addr);
1212 //.. case Xin_FpUnary:
1213 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
1214 //.. ppHRegAMD64(i->Xin.FpUnary.src);
1215 //.. vex_printf(",");
1216 //.. ppHRegAMD64(i->Xin.FpUnary.dst);
1218 //.. case Xin_FpBinary:
1219 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
1220 //.. ppHRegAMD64(i->Xin.FpBinary.srcL);
1221 //.. vex_printf(",");
1222 //.. ppHRegAMD64(i->Xin.FpBinary.srcR);
1223 //.. vex_printf(",");
1224 //.. ppHRegAMD64(i->Xin.FpBinary.dst);
1226 //.. case Xin_FpLdSt:
1227 //.. if (i->Xin.FpLdSt.isLoad) {
1228 //.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
1229 //.. ppAMD64AMode(i->Xin.FpLdSt.addr);
1230 //.. vex_printf(", ");
1231 //.. ppHRegAMD64(i->Xin.FpLdSt.reg);
1233 //.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
1234 //.. ppHRegAMD64(i->Xin.FpLdSt.reg);
1235 //.. vex_printf(", ");
1236 //.. ppAMD64AMode(i->Xin.FpLdSt.addr);
1239 //.. case Xin_FpLdStI:
1240 //.. if (i->Xin.FpLdStI.isLoad) {
1241 //.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1242 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
1243 //.. ppAMD64AMode(i->Xin.FpLdStI.addr);
1244 //.. vex_printf(", ");
1245 //.. ppHRegAMD64(i->Xin.FpLdStI.reg);
1247 //.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1248 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
1249 //.. ppHRegAMD64(i->Xin.FpLdStI.reg);
1250 //.. vex_printf(", ");
1251 //.. ppAMD64AMode(i->Xin.FpLdStI.addr);
1254 //.. case Xin_Fp64to32:
1255 //.. vex_printf("gdtof ");
1256 //.. ppHRegAMD64(i->Xin.Fp64to32.src);
1257 //.. vex_printf(",");
1258 //.. ppHRegAMD64(i->Xin.Fp64to32.dst);
1260 //.. case Xin_FpCMov:
1261 //.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
1262 //.. ppHRegAMD64(i->Xin.FpCMov.src);
1263 //.. vex_printf(",");
1264 //.. ppHRegAMD64(i->Xin.FpCMov.dst);
1266 //.. case Xin_FpLdStCW:
1267 //.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
1268 //.. ppAMD64AMode(i->Xin.FpLdStCW.addr);
1270 //.. case Xin_FpStSW_AX:
1271 //.. vex_printf("fstsw %%ax");
1274 vex_printf("ldmxcsr ");
1275 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1278 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1279 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1281 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1282 vex_printf(" ; pushfq ; popq ");
1283 ppHRegAMD64(i->Ain.SseUComIS.dst);
1286 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1287 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1288 (i->Ain.SseSI2SF.src);
1290 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1293 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1294 ppHRegAMD64(i->Ain.SseSF2SI.src);
1296 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1297 (i->Ain.SseSF2SI.dst);
1300 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1301 ppHRegAMD64(i->Ain.SseSDSS.src);
1303 ppHRegAMD64(i->Ain.SseSDSS.dst);
1305 //.. case Xin_SseConst:
1306 //.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1307 //.. ppHRegAMD64(i->Xin.SseConst.dst);
1310 switch (i->Ain.SseLdSt.sz) {
1311 case 4: vex_printf("movss "); break;
1312 case 8: vex_printf("movsd "); break;
1313 case 16: vex_printf("movups "); break;
1314 default: vassert(0);
1316 if (i->Ain.SseLdSt.isLoad) {
1317 ppAMD64AMode(i->Ain.SseLdSt.addr);
1319 ppHRegAMD64(i->Ain.SseLdSt.reg);
1321 ppHRegAMD64(i->Ain.SseLdSt.reg);
1323 ppAMD64AMode(i->Ain.SseLdSt.addr);
1327 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1328 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1330 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1333 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1334 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1336 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1339 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1340 ppHRegAMD64(i->Ain.Sse32FLo.src);
1342 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1345 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1346 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1348 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1351 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1352 ppHRegAMD64(i->Ain.Sse64FLo.src);
1354 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1357 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1358 ppHRegAMD64(i->Ain.SseReRg.src);
1360 ppHRegAMD64(i->Ain.SseReRg.dst);
1363 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1364 ppHRegAMD64(i->Ain.SseCMov.src);
1366 ppHRegAMD64(i->Ain.SseCMov.dst);
1369 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1370 ppHRegAMD64(i->Ain.SseShuf.src);
1372 ppHRegAMD64(i->Ain.SseShuf.dst);
1376 vpanic("ppAMD64Instr");
1380 /* --------- Helpers for register allocation. --------- */
1382 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
1385 vassert(mode64 == True);
1389 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1392 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1393 if (i->Ain.Alu64R.op == Aalu_MOV) {
1394 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1397 if (i->Ain.Alu64R.op == Aalu_CMP) {
1398 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1401 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1404 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1405 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1408 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1409 if (i->Ain.Sh64.src == 0)
1410 addHRegUse(u, HRmRead, hregAMD64_RCX());
1413 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1416 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1419 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1420 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1423 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1424 addHRegUse(u, HRmModify, hregAMD64_RAX());
1425 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1428 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1429 addHRegUse(u, HRmModify, hregAMD64_RAX());
1430 addHRegUse(u, HRmModify, hregAMD64_RDX());
1432 //.. case Xin_Sh3232:
1433 //.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1434 //.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1435 //.. if (i->Xin.Sh3232.amt == 0)
1436 //.. addHRegUse(u, HRmRead, hregAMD64_ECX());
1439 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1440 addHRegUse(u, HRmModify, hregAMD64_RSP());
1443 /* This is a bit subtle. */
1444 /* First off, claim it trashes all the caller-saved regs
1445 which fall within the register allocator's jurisdiction.
1446 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1447 and all the xmm registers.
1449 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1450 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1451 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1452 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1453 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1454 addHRegUse(u, HRmWrite, hregAMD64_R8());
1455 addHRegUse(u, HRmWrite, hregAMD64_R9());
1456 addHRegUse(u, HRmWrite, hregAMD64_R10());
1457 addHRegUse(u, HRmWrite, hregAMD64_R11());
1458 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1459 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1460 addHRegUse(u, HRmWrite, hregAMD64_XMM2());
1461 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1462 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1463 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1464 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1465 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1466 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1467 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1468 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1469 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1470 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1471 addHRegUse(u, HRmWrite, hregAMD64_XMM13());
1472 addHRegUse(u, HRmWrite, hregAMD64_XMM14());
1473 addHRegUse(u, HRmWrite, hregAMD64_XMM15());
1475 /* Now we have to state any parameter-carrying registers
1476 which might be read. This depends on the regparmness. */
1477 switch (i->Ain.Call.regparms) {
1478 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1479 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1480 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1481 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1482 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1483 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1485 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1487 /* Finally, there is the issue that the insn trashes a
1488 register because the literal target address has to be
1489 loaded into a register. Fortunately, r11 is stated in the
1490 ABI as a scratch register, and so seems a suitable victim. */
1491 addHRegUse(u, HRmWrite, hregAMD64_R11());
1492 /* Upshot of this is that the assembler really must use r11,
1493 and no other, as a destination temporary. */
1496 addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
1497 addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
1498 addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
1499 if (i->Ain.Goto.jk != Ijk_Boring
1500 && i->Ain.Goto.jk != Ijk_Call
1501 && i->Ain.Goto.jk != Ijk_Ret)
1502 /* note, this is irrelevant since rbp is not actually
1503 available to the allocator. But still .. */
1504 addHRegUse(u, HRmWrite, hregAMD64_RBP());
1507 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
1508 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1511 addHRegUse(u, HRmRead, i->Ain.MovZLQ.src);
1512 addHRegUse(u, HRmWrite, i->Ain.MovZLQ.dst);
1515 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1516 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1519 addHRegUse(u, HRmRead, i->Ain.Store.src);
1520 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1523 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1526 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1527 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1532 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1533 addHRegUse(u, HRmRead, hregAMD64_RBX());
1534 addHRegUse(u, HRmModify, hregAMD64_RAX());
1537 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1538 addHRegUse(u, HRmRead, hregAMD64_RCX());
1539 addHRegUse(u, HRmRead, hregAMD64_RBX());
1540 addHRegUse(u, HRmModify, hregAMD64_RDX());
1541 addHRegUse(u, HRmModify, hregAMD64_RAX());
1545 case Ain_A87PushPop:
1546 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1551 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1554 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1556 //.. case Xin_FpUnary:
1557 //.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1558 //.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1560 //.. case Xin_FpBinary:
1561 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1562 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1563 //.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1565 //.. case Xin_FpLdSt:
1566 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
1567 //.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1568 //.. i->Xin.FpLdSt.reg);
1570 //.. case Xin_FpLdStI:
1571 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
1572 //.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1573 //.. i->Xin.FpLdStI.reg);
1575 //.. case Xin_Fp64to32:
1576 //.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1577 //.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1579 //.. case Xin_FpCMov:
1580 //.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1581 //.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1584 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1586 //.. case Xin_FpStSW_AX:
1587 //.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
1590 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1591 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1592 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1595 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1596 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1599 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1600 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1603 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1604 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1607 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1608 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1609 i->Ain.SseLdSt.reg);
1612 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1613 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1615 //.. case Xin_SseConst:
1616 //.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1619 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1620 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1621 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1622 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1623 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1624 addHRegUse(u, unary ? HRmWrite : HRmModify,
1625 i->Ain.Sse32Fx4.dst);
1628 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1629 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1630 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1631 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1632 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1633 addHRegUse(u, unary ? HRmWrite : HRmModify,
1634 i->Ain.Sse32FLo.dst);
1637 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1638 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1639 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1640 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1641 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1642 addHRegUse(u, unary ? HRmWrite : HRmModify,
1643 i->Ain.Sse64Fx2.dst);
1646 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1647 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1648 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1649 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1650 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1651 addHRegUse(u, unary ? HRmWrite : HRmModify,
1652 i->Ain.Sse64FLo.dst);
1655 if ( (i->Ain.SseReRg.op == Asse_XOR
1656 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1657 && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) {
1658 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1659 r,r' as a write of a value to r, and independent of any
1660 previous value in r */
1661 /* (as opposed to a rite of passage :-) */
1662 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1664 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1665 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1666 ? HRmWrite : HRmModify,
1667 i->Ain.SseReRg.dst);
1671 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1672 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1675 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1676 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1679 ppAMD64Instr(i, mode64);
1680 vpanic("getRegUsage_AMD64Instr");
1685 static inline void mapReg(HRegRemap* m, HReg* r)
1687 *r = lookupHRegRemap(m, *r);
1690 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1692 vassert(mode64 == True);
1695 mapReg(m, &i->Ain.Imm64.dst);
1698 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1699 mapReg(m, &i->Ain.Alu64R.dst);
1702 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1703 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1706 mapReg(m, &i->Ain.Sh64.dst);
1709 mapReg(m, &i->Ain.Test64.dst);
1712 mapReg(m, &i->Ain.Unary64.dst);
1715 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1716 mapReg(m, &i->Ain.Lea64.dst);
1719 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1722 mapRegs_AMD64RM(m, i->Ain.Div.src);
1724 //.. case Xin_Sh3232:
1725 //.. mapReg(m, &i->Xin.Sh3232.src);
1726 //.. mapReg(m, &i->Xin.Sh3232.dst);
1729 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1734 mapRegs_AMD64RI(m, i->Ain.Goto.dst);
1737 mapRegs_AMD64RM(m, i->Ain.CMov64.src);
1738 mapReg(m, &i->Ain.CMov64.dst);
1741 mapReg(m, &i->Ain.MovZLQ.src);
1742 mapReg(m, &i->Ain.MovZLQ.dst);
1745 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1746 mapReg(m, &i->Ain.LoadEX.dst);
1749 mapReg(m, &i->Ain.Store.src);
1750 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1753 mapReg(m, &i->Ain.Set64.dst);
1756 mapReg(m, &i->Ain.Bsfr64.src);
1757 mapReg(m, &i->Ain.Bsfr64.dst);
1762 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1765 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1769 case Ain_A87PushPop:
1770 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1775 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1778 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1780 //.. case Xin_FpUnary:
1781 //.. mapReg(m, &i->Xin.FpUnary.src);
1782 //.. mapReg(m, &i->Xin.FpUnary.dst);
1784 //.. case Xin_FpBinary:
1785 //.. mapReg(m, &i->Xin.FpBinary.srcL);
1786 //.. mapReg(m, &i->Xin.FpBinary.srcR);
1787 //.. mapReg(m, &i->Xin.FpBinary.dst);
1789 //.. case Xin_FpLdSt:
1790 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
1791 //.. mapReg(m, &i->Xin.FpLdSt.reg);
1793 //.. case Xin_FpLdStI:
1794 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
1795 //.. mapReg(m, &i->Xin.FpLdStI.reg);
1797 //.. case Xin_Fp64to32:
1798 //.. mapReg(m, &i->Xin.Fp64to32.src);
1799 //.. mapReg(m, &i->Xin.Fp64to32.dst);
1801 //.. case Xin_FpCMov:
1802 //.. mapReg(m, &i->Xin.FpCMov.src);
1803 //.. mapReg(m, &i->Xin.FpCMov.dst);
1806 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1808 //.. case Xin_FpStSW_AX:
1811 mapReg(m, &i->Ain.SseUComIS.srcL);
1812 mapReg(m, &i->Ain.SseUComIS.srcR);
1813 mapReg(m, &i->Ain.SseUComIS.dst);
1816 mapReg(m, &i->Ain.SseSI2SF.src);
1817 mapReg(m, &i->Ain.SseSI2SF.dst);
1820 mapReg(m, &i->Ain.SseSF2SI.src);
1821 mapReg(m, &i->Ain.SseSF2SI.dst);
1824 mapReg(m, &i->Ain.SseSDSS.src);
1825 mapReg(m, &i->Ain.SseSDSS.dst);
1827 //.. case Xin_SseConst:
1828 //.. mapReg(m, &i->Xin.SseConst.dst);
1831 mapReg(m, &i->Ain.SseLdSt.reg);
1832 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1835 mapReg(m, &i->Ain.SseLdzLO.reg);
1836 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1839 mapReg(m, &i->Ain.Sse32Fx4.src);
1840 mapReg(m, &i->Ain.Sse32Fx4.dst);
1843 mapReg(m, &i->Ain.Sse32FLo.src);
1844 mapReg(m, &i->Ain.Sse32FLo.dst);
1847 mapReg(m, &i->Ain.Sse64Fx2.src);
1848 mapReg(m, &i->Ain.Sse64Fx2.dst);
1851 mapReg(m, &i->Ain.Sse64FLo.src);
1852 mapReg(m, &i->Ain.Sse64FLo.dst);
1855 mapReg(m, &i->Ain.SseReRg.src);
1856 mapReg(m, &i->Ain.SseReRg.dst);
1859 mapReg(m, &i->Ain.SseCMov.src);
1860 mapReg(m, &i->Ain.SseCMov.dst);
1863 mapReg(m, &i->Ain.SseShuf.src);
1864 mapReg(m, &i->Ain.SseShuf.dst);
1867 ppAMD64Instr(i, mode64);
1868 vpanic("mapRegs_AMD64Instr");
1872 /* Figure out if i represents a reg-reg move, and if so assign the
1873 source and destination to *src and *dst. If in doubt say No. Used
1874 by the register allocator to do move coalescing.
1876 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
1878 /* Moves between integer regs */
1879 if (i->tag == Ain_Alu64R) {
1880 if (i->Ain.Alu64R.op != Aalu_MOV)
1882 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1884 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1885 *dst = i->Ain.Alu64R.dst;
1888 /* Moves between vector regs */
1889 if (i->tag == Ain_SseReRg) {
1890 if (i->Ain.SseReRg.op != Asse_MOV)
1892 *src = i->Ain.SseReRg.src;
1893 *dst = i->Ain.SseReRg.dst;
1900 /* Generate amd64 spill/reload instructions under the direction of the
1901 register allocator. Note it's critical these don't write the
1904 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1905 HReg rreg, Int offsetB, Bool mode64 )
1908 vassert(offsetB >= 0);
1909 vassert(!hregIsVirtual(rreg));
1910 vassert(mode64 == True);
1912 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1913 switch (hregClass(rreg)) {
1915 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1918 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1921 ppHRegClass(hregClass(rreg));
1922 vpanic("genSpill_AMD64: unimplemented regclass");
1926 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1927 HReg rreg, Int offsetB, Bool mode64 )
1930 vassert(offsetB >= 0);
1931 vassert(!hregIsVirtual(rreg));
1932 vassert(mode64 == True);
1934 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1935 switch (hregClass(rreg)) {
1937 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1940 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1943 ppHRegClass(hregClass(rreg));
1944 vpanic("genReload_AMD64: unimplemented regclass");
1949 /* --------- The amd64 assembler (bleh.) --------- */
1951 /* Produce the low three bits of an integer register number. */
1952 static UChar iregBits210 ( HReg r )
1955 vassert(hregClass(r) == HRcInt64);
1956 vassert(!hregIsVirtual(r));
1959 return toUChar(n & 7);
1962 /* Produce bit 3 of an integer register number. */
1963 static UChar iregBit3 ( HReg r )
1966 vassert(hregClass(r) == HRcInt64);
1967 vassert(!hregIsVirtual(r));
1970 return toUChar((n >> 3) & 1);
1973 /* Produce a complete 4-bit integer register number. */
1974 static UChar iregBits3210 ( HReg r )
1977 vassert(hregClass(r) == HRcInt64);
1978 vassert(!hregIsVirtual(r));
1984 /* Given an xmm (128bit V-class) register number, produce the
1985 equivalent numbered register in 64-bit I-class. This is a bit of
1986 fakery which facilitates using functions that work on integer
1987 register numbers to be used when assembling SSE instructions
1989 static UInt vreg2ireg ( HReg r )
1992 vassert(hregClass(r) == HRcVec128);
1993 vassert(!hregIsVirtual(r));
1996 return mkHReg(n, HRcInt64, False);
1999 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
2001 return toUChar( ((mod & 3) << 6)
2006 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
2008 return toUChar( ((shift & 3) << 6)
2009 | ((regindex & 7) << 3)
2013 static UChar* emit32 ( UChar* p, UInt w32 )
2015 *p++ = toUChar((w32) & 0x000000FF);
2016 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2017 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2018 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2022 static UChar* emit64 ( UChar* p, ULong w64 )
2024 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2025 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2029 /* Does a sign-extend of the lowest 8 bits give
2030 the original number? */
2031 static Bool fits8bits ( UInt w32 )
2034 return toBool(i32 == ((i32 << 24) >> 24));
2036 /* Can the lower 32 bits be signedly widened to produce the whole
2037 64-bit value? In other words, are the top 33 bits either all 0 or
2039 static Bool fitsIn32Bits ( ULong x )
2045 return toBool(x == y1);
2049 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2051 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2054 greg, d8(ereg) | ereg is neither of: RSP R12
2057 greg, d32(ereg) | ereg is neither of: RSP R12
2060 greg, d8(ereg) | ereg is either: RSP R12
2061 = 01 greg 100, 0x24, d8
2062 (lowest bit of rex distinguishes R12/RSP)
2064 greg, d32(ereg) | ereg is either: RSP R12
2065 = 10 greg 100, 0x24, d32
2066 (lowest bit of rex distinguishes R12/RSP)
2068 -----------------------------------------------
2070 greg, d8(base,index,scale)
2072 = 01 greg 100, scale index base, d8
2074 greg, d32(base,index,scale)
2076 = 10 greg 100, scale index base, d32
2078 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2080 if (am->tag == Aam_IR) {
2081 if (am->Aam.IR.imm == 0
2082 && am->Aam.IR.reg != hregAMD64_RSP()
2083 && am->Aam.IR.reg != hregAMD64_RBP()
2084 && am->Aam.IR.reg != hregAMD64_R12()
2085 && am->Aam.IR.reg != hregAMD64_R13()
2087 *p++ = mkModRegRM(0, iregBits210(greg),
2088 iregBits210(am->Aam.IR.reg));
2091 if (fits8bits(am->Aam.IR.imm)
2092 && am->Aam.IR.reg != hregAMD64_RSP()
2093 && am->Aam.IR.reg != hregAMD64_R12()
2095 *p++ = mkModRegRM(1, iregBits210(greg),
2096 iregBits210(am->Aam.IR.reg));
2097 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2100 if (am->Aam.IR.reg != hregAMD64_RSP()
2101 && am->Aam.IR.reg != hregAMD64_R12()
2103 *p++ = mkModRegRM(2, iregBits210(greg),
2104 iregBits210(am->Aam.IR.reg));
2105 p = emit32(p, am->Aam.IR.imm);
2108 if ((am->Aam.IR.reg == hregAMD64_RSP()
2109 || am->Aam.IR.reg == hregAMD64_R12())
2110 && fits8bits(am->Aam.IR.imm)) {
2111 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2113 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2116 if (/* (am->Aam.IR.reg == hregAMD64_RSP()
2117 || wait for test case for RSP case */
2118 am->Aam.IR.reg == hregAMD64_R12()) {
2119 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2121 p = emit32(p, am->Aam.IR.imm);
2125 vpanic("doAMode_M: can't emit amode IR");
2128 if (am->tag == Aam_IRRS) {
2129 if (fits8bits(am->Aam.IRRS.imm)
2130 && am->Aam.IRRS.index != hregAMD64_RSP()) {
2131 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2132 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2134 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2137 if (am->Aam.IRRS.index != hregAMD64_RSP()) {
2138 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2139 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2141 p = emit32(p, am->Aam.IRRS.imm);
2145 vpanic("doAMode_M: can't emit amode IRRS");
2148 vpanic("doAMode_M: unknown amode");
2153 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2154 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2156 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
2161 /* Clear the W bit on a REX byte, thereby changing the operand size
2162 back to whatever that instruction's default operand size is. */
2163 static inline UChar clearWBit ( UChar rex )
2165 return toUChar(rex & ~(1<<3));
2169 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2170 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2172 if (am->tag == Aam_IR) {
2173 UChar W = 1; /* we want 64-bit mode */
2174 UChar R = iregBit3(greg);
2175 UChar X = 0; /* not relevant */
2176 UChar B = iregBit3(am->Aam.IR.reg);
2177 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2179 if (am->tag == Aam_IRRS) {
2180 UChar W = 1; /* we want 64-bit mode */
2181 UChar R = iregBit3(greg);
2182 UChar X = iregBit3(am->Aam.IRRS.index);
2183 UChar B = iregBit3(am->Aam.IRRS.base);
2184 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2187 return 0; /*NOTREACHED*/
2190 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2191 static UChar rexAMode_R ( HReg greg, HReg ereg )
2193 UChar W = 1; /* we want 64-bit mode */
2194 UChar R = iregBit3(greg);
2195 UChar X = 0; /* not relevant */
2196 UChar B = iregBit3(ereg);
2197 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2201 /* Emit ffree %st(N) */
2202 static UChar* do_ffree_st ( UChar* p, Int n )
2204 vassert(n >= 0 && n <= 7);
2206 *p++ = toUChar(0xC0 + n);
2210 //.. /* Emit fstp %st(i), 1 <= i <= 7 */
2211 //.. static UChar* do_fstp_st ( UChar* p, Int i )
2213 //.. vassert(1 <= i && i <= 7);
2219 //.. /* Emit fld %st(i), 0 <= i <= 6 */
2220 //.. static UChar* do_fld_st ( UChar* p, Int i )
2222 //.. vassert(0 <= i && i <= 6);
2228 //.. /* Emit f<op> %st(0) */
2229 //.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
2232 //.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
2233 //.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
2234 //.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2235 //.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2236 //.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2237 //.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2238 //.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2239 //.. case Xfp_MOV: break;
2240 //.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
2241 //.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */
2242 //.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
2244 //.. default: vpanic("do_fop1_st: unknown op");
2249 //.. /* Emit f<op> %st(i), 1 <= i <= 5 */
2250 //.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
2252 //.. # define fake(_n) mkHReg((_n), HRcInt32, False)
2255 //.. case Xfp_ADD: subopc = 0; break;
2256 //.. case Xfp_SUB: subopc = 4; break;
2257 //.. case Xfp_MUL: subopc = 1; break;
2258 //.. case Xfp_DIV: subopc = 6; break;
2259 //.. default: vpanic("do_fop2_st: unknown op");
2262 //.. p = doAMode_R(p, fake(subopc), fake(i));
2267 //.. /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2268 //.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2270 //.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
2273 //.. vassert(0 == (tags & ~0xF));
2274 //.. if (tags == 0) {
2275 //.. /* pushl $0x00000000 */
2280 //.. /* pushl $0xFFFFFFFF */
2281 //.. if (tags == 0xF) {
2285 //.. vassert(0); /* awaiting test case */
2287 //.. if (tags & 1) w |= 0x000000FF;
2288 //.. if (tags & 2) w |= 0x0000FF00;
2289 //.. if (tags & 4) w |= 0x00FF0000;
2290 //.. if (tags & 8) w |= 0xFF000000;
2292 //.. p = emit32(p, w);
2297 /* Emit an instruction into buf and return the number of bytes used.
2298 Note that buf is not the insn's final place, and therefore it is
2299 imperative to emit position-independent code. */
2301 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
2302 Bool mode64, void* dispatch )
2304 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2311 vassert(nbuf >= 32);
2312 vassert(mode64 == True);
2314 /* Wrap an integer as a int register, for use assembling
2315 GrpN insns, in which the greg field is used as a sub-opcode
2316 and does not really contain a register. */
2317 # define fake(_n) mkHReg((_n), HRcInt64, False)
2319 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2324 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
2325 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
2326 p = emit64(p, i->Ain.Imm64.imm64);
2330 /* Deal specially with MOV */
2331 if (i->Ain.Alu64R.op == Aalu_MOV) {
2332 switch (i->Ain.Alu64R.src->tag) {
2334 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFF)) {
2335 /* Actually we could use this form for constants in
2336 the range 0 through 0x7FFFFFFF inclusive, but
2337 limit it to a small range for verifiability
2339 /* Generate "movl $imm32, 32-bit-register" and let
2340 the default zero-extend rule cause the upper half
2341 of the dst to be zeroed out too. This saves 1
2342 and sometimes 2 bytes compared to the more
2343 obvious encoding in the 'else' branch. */
2344 if (1 & iregBit3(i->Ain.Alu64R.dst))
2346 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
2347 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2349 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
2351 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
2352 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2356 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2357 i->Ain.Alu64R.dst );
2359 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2363 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2364 i->Ain.Alu64R.src->Armi.Mem.am);
2366 p = doAMode_M(p, i->Ain.Alu64R.dst,
2367 i->Ain.Alu64R.src->Armi.Mem.am);
2374 if (i->Ain.Alu64R.op == Aalu_MUL) {
2375 switch (i->Ain.Alu64R.src->tag) {
2377 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2378 i->Ain.Alu64R.src->Armi.Reg.reg);
2381 p = doAMode_R(p, i->Ain.Alu64R.dst,
2382 i->Ain.Alu64R.src->Armi.Reg.reg);
2385 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2386 i->Ain.Alu64R.src->Armi.Mem.am);
2389 p = doAMode_M(p, i->Ain.Alu64R.dst,
2390 i->Ain.Alu64R.src->Armi.Mem.am);
2393 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2394 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2396 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2397 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2399 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2401 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2402 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2409 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2410 opc = opc_rr = subopc_imm = opc_imma = 0;
2411 switch (i->Ain.Alu64R.op) {
2412 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2413 subopc_imm = 2; opc_imma = 0x15; break;
2414 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2415 subopc_imm = 0; opc_imma = 0x05; break;
2416 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2417 subopc_imm = 5; opc_imma = 0x2D; break;
2418 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2419 subopc_imm = 3; opc_imma = 0x1D; break;
2420 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2421 subopc_imm = 4; opc_imma = 0x25; break;
2422 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2423 subopc_imm = 6; opc_imma = 0x35; break;
2424 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2425 subopc_imm = 1; opc_imma = 0x0D; break;
2426 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2427 subopc_imm = 7; opc_imma = 0x3D; break;
2430 switch (i->Ain.Alu64R.src->tag) {
2432 if (i->Ain.Alu64R.dst == hregAMD64_RAX()
2433 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2434 goto bad; /* FIXME: awaiting test case */
2435 *p++ = toUChar(opc_imma);
2436 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2438 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2439 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
2441 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2442 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2444 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
2446 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2447 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2451 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2453 *p++ = toUChar(opc_rr);
2454 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2458 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2459 i->Ain.Alu64R.src->Armi.Mem.am);
2460 *p++ = toUChar(opc);
2461 p = doAMode_M(p, i->Ain.Alu64R.dst,
2462 i->Ain.Alu64R.src->Armi.Mem.am);
2470 /* Deal specially with MOV */
2471 if (i->Ain.Alu64M.op == Aalu_MOV) {
2472 switch (i->Ain.Alu64M.src->tag) {
2474 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2477 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2481 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
2483 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
2484 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2490 //.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2491 //.. allowed here. */
2492 //.. opc = subopc_imm = opc_imma = 0;
2493 //.. switch (i->Xin.Alu32M.op) {
2494 //.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2495 //.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2496 //.. default: goto bad;
2498 //.. switch (i->Xin.Alu32M.src->tag) {
2501 //.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2502 //.. i->Xin.Alu32M.dst);
2505 //.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2507 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2508 //.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
2512 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2513 //.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2522 opc_cl = opc_imm = subopc = 0;
2523 switch (i->Ain.Sh64.op) {
2524 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2525 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2526 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2529 if (i->Ain.Sh64.src == 0) {
2530 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2531 *p++ = toUChar(opc_cl);
2532 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2535 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2536 *p++ = toUChar(opc_imm);
2537 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2538 *p++ = (UChar)(i->Ain.Sh64.src);
2544 /* testq sign-extend($imm32), %reg */
2545 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
2547 p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
2548 p = emit32(p, i->Ain.Test64.imm32);
2552 if (i->Ain.Unary64.op == Aun_NOT) {
2553 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2555 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
2558 if (i->Ain.Unary64.op == Aun_NEG) {
2559 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2561 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
2567 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2569 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2573 subopc = i->Ain.MulL.syned ? 5 : 4;
2574 switch (i->Ain.MulL.src->tag) {
2576 *p++ = rexAMode_M( fake(0),
2577 i->Ain.MulL.src->Arm.Mem.am);
2579 p = doAMode_M(p, fake(subopc),
2580 i->Ain.MulL.src->Arm.Mem.am);
2583 *p++ = rexAMode_R(fake(0),
2584 i->Ain.MulL.src->Arm.Reg.reg);
2586 p = doAMode_R(p, fake(subopc),
2587 i->Ain.MulL.src->Arm.Reg.reg);
2595 subopc = i->Ain.Div.syned ? 7 : 6;
2596 if (i->Ain.Div.sz == 4) {
2597 switch (i->Ain.Div.src->tag) {
2602 p = doAMode_M(p, fake(subopc),
2603 i->Ain.Div.src->Arm.Mem.am);
2607 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
2609 p = doAMode_R(p, fake(subopc),
2610 i->Ain.Div.src->Arm.Reg.reg);
2616 if (i->Ain.Div.sz == 8) {
2617 switch (i->Ain.Div.src->tag) {
2619 *p++ = rexAMode_M( fake(0),
2620 i->Ain.Div.src->Arm.Mem.am);
2622 p = doAMode_M(p, fake(subopc),
2623 i->Ain.Div.src->Arm.Mem.am);
2626 *p++ = rexAMode_R( fake(0),
2627 i->Ain.Div.src->Arm.Reg.reg);
2629 p = doAMode_R(p, fake(subopc),
2630 i->Ain.Div.src->Arm.Reg.reg);
2638 //.. case Xin_Sh3232:
2639 //.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2640 //.. if (i->Xin.Sh3232.amt == 0) {
2641 //.. /* shldl/shrdl by %cl */
2643 //.. if (i->Xin.Sh3232.op == Xsh_SHL) {
2648 //.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2654 switch (i->Ain.Push.src->tag) {
2657 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
2659 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
2663 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2666 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
2667 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
2674 /* As per detailed comment for Ain_Call in
2675 getRegUsage_AMD64Instr above, %r11 is used as an address
2677 /* jump over the following two insns if the condition does not
2679 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2680 if (i->Ain.Call.cond != Acc_ALWAYS) {
2681 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2682 *p++ = shortImm ? 10 : 13;
2683 /* 10 or 13 bytes in the next two insns */
2686 /* 7 bytes: movl sign-extend(imm32), %r11 */
2690 p = emit32(p, (UInt)i->Ain.Call.target);
2692 /* 10 bytes: movabsq $target, %r11 */
2695 p = emit64(p, i->Ain.Call.target);
2697 /* 3 bytes: call *%r11 */
2705 /* Use ptmp for backpatching conditional jumps. */
2708 /* First off, if this is conditional, create a conditional
2709 jump over the rest of it. */
2710 if (i->Ain.Goto.cond != Acc_ALWAYS) {
2711 /* jmp fwds if !condition */
2712 *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
2713 ptmp = p; /* fill in this bit later */
2714 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2717 /* If a non-boring, set %rbp (the guest state pointer)
2718 appropriately. Since these numbers are all small positive
2719 integers, we can get away with "movl $N, %ebp" rather than
2720 the longer "movq $N, %rbp". */
2721 /* movl $magic_number, %ebp */
2722 switch (i->Ain.Goto.jk) {
2725 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
2726 case Ijk_Sys_syscall:
2728 p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
2731 p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
2734 p = emit32(p, VEX_TRC_JMP_YIELD); break;
2737 p = emit32(p, VEX_TRC_JMP_EMWARN); break;
2740 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
2743 p = emit32(p, VEX_TRC_JMP_NODECODE); break;
2746 p = emit32(p, VEX_TRC_JMP_TINVAL); break;
2749 p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
2752 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
2755 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
2761 ppIRJumpKind(i->Ain.Goto.jk);
2762 vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
2765 /* Get the destination address into %rax */
2766 if (i->Ain.Goto.dst->tag == Ari_Imm) {
2767 /* movl sign-ext($immediate), %rax ; ret */
2771 p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
2773 vassert(i->Ain.Goto.dst->tag == Ari_Reg);
2774 /* movq %reg, %rax ; ret */
2775 if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
2776 *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
2778 p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
2782 /* Get the dispatcher address into %rdx. This has to happen
2783 after the load of %rax since %rdx might be carrying the value
2784 destined for %rax immediately prior to this Ain_Goto. */
2785 vassert(sizeof(ULong) == sizeof(void*));
2786 vassert(dispatch != NULL);
2788 if (fitsIn32Bits(Ptr_to_ULong(dispatch))) {
2789 /* movl sign-extend(imm32), %rdx */
2793 p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
2795 /* movabsq $imm64, %rdx */
2798 p = emit64(p, Ptr_to_ULong(dispatch));
2804 /* Fix up the conditional jump, if there was one. */
2805 if (i->Ain.Goto.cond != Acc_ALWAYS) {
2806 Int delta = p - ptmp;
2807 vassert(delta > 0 && delta < 30);
2808 *ptmp = toUChar(delta-1);
2813 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
2814 if (i->Ain.CMov64.src->tag == Arm_Reg) {
2815 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2817 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2818 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2821 if (i->Ain.CMov64.src->tag == Arm_Mem) {
2822 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2824 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2825 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2831 /* Produce a 32-bit reg-reg move, since the implicit zero-extend
2832 does what we want. */
2834 rexAMode_R(i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst));
2836 p = doAMode_R(p, i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst);
2840 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
2842 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2845 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2848 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
2850 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2853 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2856 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
2858 /* This isn't really an existing AMD64 instruction per se.
2859 Rather, we have to do a 32-bit load. Because a 32-bit
2860 write implicitly clears the upper 32 bits of the target
2861 register, we get what we want. */
2863 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
2865 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2871 /* Make the destination register be 1 or 0, depending on whether
2872 the relevant condition holds. Complication: the top 56 bits
2873 of the destination should be forced to zero, but doing 'xorq
2874 %r,%r' kills the flag(s) we are about to read. Sigh. So
2875 start off my moving $0 into the dest. */
2876 reg = iregBits3210(i->Ain.Set64.dst);
2880 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
2882 *p++ = toUChar(0xC0 + (reg & 7));
2885 /* setb lo8(%dst) */
2886 /* note, 8-bit register rex trickyness. Be careful here. */
2887 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
2889 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
2890 *p++ = toUChar(0xC0 + (reg & 7));
2894 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2896 if (i->Ain.Bsfr64.isFwds) {
2901 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2906 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2912 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
2913 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
2914 in %rbx. The new-value register is hardwired to be %rbx
2915 since dealing with byte integer registers is too much hassle,
2916 so we force the register operand to %rbx (could equally be
2918 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
2919 if (i->Ain.ACAS.sz != 8)
2920 rex = clearWBit(rex);
2922 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
2924 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2925 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
2931 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
2932 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
2933 aren't encoded in the insn. */
2934 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
2935 if (i->Ain.ACAS.sz != 8)
2936 rex = clearWBit(rex);
2940 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
2944 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
2945 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
2946 p = do_ffree_st(p, 7-j);
2950 case Ain_A87PushPop:
2951 if (i->Ain.A87PushPop.isPush) {
2952 /* Load from memory into %st(0): fldl amode */
2954 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
2956 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
2958 /* Dump %st(0) to memory: fstpl amode */
2960 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
2962 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
2968 switch (i->Ain.A87FpOp.op) {
2969 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2970 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2971 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2972 case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break;
2973 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2974 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2975 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
2976 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
2977 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
2978 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
2979 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
2980 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
2987 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
2989 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
2994 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
2996 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
3000 if (i->Ain.Store.sz == 2) {
3001 /* This just goes to show the crazyness of the instruction
3002 set encoding. We have to insert two prefix bytes, but be
3003 careful to avoid a conflict in what the size should be, by
3004 ensuring that REX.W = 0. */
3005 *p++ = 0x66; /* override to 16-bits */
3006 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3008 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3011 if (i->Ain.Store.sz == 4) {
3012 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3014 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3017 if (i->Ain.Store.sz == 1) {
3018 /* This is one place where it would be wrong to skip emitting
3019 a rex byte of 0x40, since the mere presence of rex changes
3020 the meaning of the byte register access. Be careful. */
3021 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3023 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3028 //.. case Xin_FpUnary:
3029 //.. /* gop %src, %dst
3030 //.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
3032 //.. p = do_ffree_st7(p);
3033 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
3034 //.. p = do_fop1_st(p, i->Xin.FpUnary.op);
3035 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
3038 //.. case Xin_FpBinary:
3039 //.. if (i->Xin.FpBinary.op == Xfp_YL2X
3040 //.. || i->Xin.FpBinary.op == Xfp_YL2XP1) {
3041 //.. /* Have to do this specially. */
3042 //.. /* ffree %st7 ; fld %st(srcL) ;
3043 //.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
3044 //.. p = do_ffree_st7(p);
3045 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3046 //.. p = do_ffree_st7(p);
3047 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
3049 //.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
3050 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3053 //.. if (i->Xin.FpBinary.op == Xfp_ATAN) {
3054 //.. /* Have to do this specially. */
3055 //.. /* ffree %st7 ; fld %st(srcL) ;
3056 //.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
3057 //.. p = do_ffree_st7(p);
3058 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3059 //.. p = do_ffree_st7(p);
3060 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
3061 //.. *p++ = 0xD9; *p++ = 0xF3;
3062 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3065 //.. if (i->Xin.FpBinary.op == Xfp_PREM
3066 //.. || i->Xin.FpBinary.op == Xfp_PREM1
3067 //.. || i->Xin.FpBinary.op == Xfp_SCALE) {
3068 //.. /* Have to do this specially. */
3069 //.. /* ffree %st7 ; fld %st(srcR) ;
3070 //.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
3071 //.. fincstp ; ffree %st7 */
3072 //.. p = do_ffree_st7(p);
3073 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
3074 //.. p = do_ffree_st7(p);
3075 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
3077 //.. switch (i->Xin.FpBinary.op) {
3078 //.. case Xfp_PREM: *p++ = 0xF8; break;
3079 //.. case Xfp_PREM1: *p++ = 0xF5; break;
3080 //.. case Xfp_SCALE: *p++ = 0xFD; break;
3081 //.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
3083 //.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
3084 //.. *p++ = 0xD9; *p++ = 0xF7;
3085 //.. p = do_ffree_st7(p);
3088 //.. /* General case */
3089 //.. /* gop %srcL, %srcR, %dst
3090 //.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
3092 //.. p = do_ffree_st7(p);
3093 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
3094 //.. p = do_fop2_st(p, i->Xin.FpBinary.op,
3095 //.. 1+hregNumber(i->Xin.FpBinary.srcR));
3096 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
3099 //.. case Xin_FpLdSt:
3100 //.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
3101 //.. if (i->Xin.FpLdSt.isLoad) {
3102 //.. /* Load from memory into %fakeN.
3103 //.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
3105 //.. p = do_ffree_st7(p);
3106 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
3107 //.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
3108 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
3111 //.. /* Store from %fakeN into memory.
3112 //.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
3114 //.. p = do_ffree_st7(p);
3115 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
3116 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
3117 //.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
3122 //.. case Xin_FpLdStI:
3123 //.. if (i->Xin.FpLdStI.isLoad) {
3124 //.. /* Load from memory into %fakeN, converting from an int.
3125 //.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
3127 //.. switch (i->Xin.FpLdStI.sz) {
3128 //.. case 8: opc = 0xDF; subopc_imm = 5; break;
3129 //.. case 4: opc = 0xDB; subopc_imm = 0; break;
3130 //.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
3131 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
3133 //.. p = do_ffree_st7(p);
3135 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
3136 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
3139 //.. /* Store from %fakeN into memory, converting to an int.
3140 //.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
3142 //.. switch (i->Xin.FpLdStI.sz) {
3143 //.. case 8: opc = 0xDF; subopc_imm = 7; break;
3144 //.. case 4: opc = 0xDB; subopc_imm = 3; break;
3145 //.. case 2: opc = 0xDF; subopc_imm = 3; break;
3146 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
3148 //.. p = do_ffree_st7(p);
3149 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
3151 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
3156 //.. case Xin_Fp64to32:
3157 //.. /* ffree %st7 ; fld %st(src) */
3158 //.. p = do_ffree_st7(p);
3159 //.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
3160 //.. /* subl $4, %esp */
3161 //.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
3162 //.. /* fstps (%esp) */
3163 //.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
3164 //.. /* flds (%esp) */
3165 //.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
3166 //.. /* addl $4, %esp */
3167 //.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
3168 //.. /* fstp %st(1+dst) */
3169 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
3172 //.. case Xin_FpCMov:
3173 //.. /* jmp fwds if !condition */
3174 //.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
3175 //.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3178 //.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3179 //.. p = do_ffree_st7(p);
3180 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
3181 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
3183 //.. /* Fill in the jump offset. */
3184 //.. *(ptmp-1) = p - ptmp;
3188 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
3191 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
3194 //.. case Xin_FpStSW_AX:
3195 //.. /* note, this emits fnstsw %ax, not fstsw %ax */
3201 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3202 /* ucomi[sd] %srcL, %srcR */
3203 if (i->Ain.SseUComIS.sz == 8) {
3207 vassert(i->Ain.SseUComIS.sz == 4);
3210 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
3211 vreg2ireg(i->Ain.SseUComIS.srcR) ));
3214 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
3215 vreg2ireg(i->Ain.SseUComIS.srcR) );
3219 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
3220 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
3224 /* cvssi2s[sd] %src, %dst */
3225 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
3226 i->Ain.SseSI2SF.src );
3227 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3228 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3231 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
3232 i->Ain.SseSI2SF.src );
3236 /* cvss[sd]2si %src, %dst */
3237 rex = rexAMode_R( i->Ain.SseSF2SI.dst,
3238 vreg2ireg(i->Ain.SseSF2SI.src) );
3239 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3240 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3243 p = doAMode_R( p, i->Ain.SseSF2SI.dst,
3244 vreg2ireg(i->Ain.SseSF2SI.src) );
3248 /* cvtsd2ss/cvtss2sd %src, %dst */
3249 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3251 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
3252 vreg2ireg(i->Ain.SseSDSS.src) ));
3255 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
3256 vreg2ireg(i->Ain.SseSDSS.src) );
3260 //.. case Xin_FpCmp:
3261 //.. /* gcmp %fL, %fR, %dst
3262 //.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3263 //.. fnstsw %ax ; movl %eax, %dst
3265 //.. /* ffree %st7 */
3266 //.. p = do_ffree_st7(p);
3267 //.. /* fpush %fL */
3268 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
3269 //.. /* fucomp %(fR+1) */
3271 //.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
3272 //.. /* fnstsw %ax */
3275 //.. /* movl %eax, %dst */
3277 //.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
3280 //.. case Xin_SseConst: {
3281 //.. UShort con = i->Xin.SseConst.con;
3282 //.. p = push_word_from_tags(p, (con >> 12) & 0xF);
3283 //.. p = push_word_from_tags(p, (con >> 8) & 0xF);
3284 //.. p = push_word_from_tags(p, (con >> 4) & 0xF);
3285 //.. p = push_word_from_tags(p, con & 0xF);
3286 //.. /* movl (%esp), %xmm-dst */
3289 //.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
3291 //.. /* addl $16, %esp */
3299 if (i->Ain.SseLdSt.sz == 8) {
3302 if (i->Ain.SseLdSt.sz == 4) {
3305 if (i->Ain.SseLdSt.sz != 16) {
3309 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
3311 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3312 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
3316 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3317 /* movs[sd] amode, %xmm-dst */
3318 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3320 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
3321 i->Ain.SseLdzLO.addr));
3324 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
3325 i->Ain.SseLdzLO.addr);
3331 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
3332 vreg2ireg(i->Ain.Sse32Fx4.src) ));
3334 switch (i->Ain.Sse32Fx4.op) {
3335 case Asse_ADDF: *p++ = 0x58; break;
3336 case Asse_DIVF: *p++ = 0x5E; break;
3337 case Asse_MAXF: *p++ = 0x5F; break;
3338 case Asse_MINF: *p++ = 0x5D; break;
3339 case Asse_MULF: *p++ = 0x59; break;
3340 case Asse_RCPF: *p++ = 0x53; break;
3341 case Asse_RSQRTF: *p++ = 0x52; break;
3342 case Asse_SQRTF: *p++ = 0x51; break;
3343 case Asse_SUBF: *p++ = 0x5C; break;
3344 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3345 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3346 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3347 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3350 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
3351 vreg2ireg(i->Ain.Sse32Fx4.src) );
3353 *p++ = toUChar(xtra & 0xFF);
3360 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
3361 vreg2ireg(i->Ain.Sse64Fx2.src) ));
3363 switch (i->Ain.Sse64Fx2.op) {
3364 case Asse_ADDF: *p++ = 0x58; break;
3365 case Asse_DIVF: *p++ = 0x5E; break;
3366 case Asse_MAXF: *p++ = 0x5F; break;
3367 case Asse_MINF: *p++ = 0x5D; break;
3368 case Asse_MULF: *p++ = 0x59; break;
3369 //.. case Xsse_RCPF: *p++ = 0x53; break;
3370 //.. case Xsse_RSQRTF: *p++ = 0x52; break;
3371 case Asse_SQRTF: *p++ = 0x51; break;
3372 case Asse_SUBF: *p++ = 0x5C; break;
3373 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3374 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3375 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3376 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3379 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
3380 vreg2ireg(i->Ain.Sse64Fx2.src) );
3382 *p++ = toUChar(xtra & 0xFF);
3389 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
3390 vreg2ireg(i->Ain.Sse32FLo.src) ));
3392 switch (i->Ain.Sse32FLo.op) {
3393 case Asse_ADDF: *p++ = 0x58; break;
3394 case Asse_DIVF: *p++ = 0x5E; break;
3395 case Asse_MAXF: *p++ = 0x5F; break;
3396 case Asse_MINF: *p++ = 0x5D; break;
3397 case Asse_MULF: *p++ = 0x59; break;
3398 case Asse_RCPF: *p++ = 0x53; break;
3399 case Asse_RSQRTF: *p++ = 0x52; break;
3400 case Asse_SQRTF: *p++ = 0x51; break;
3401 case Asse_SUBF: *p++ = 0x5C; break;
3402 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3403 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3404 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3405 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3408 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
3409 vreg2ireg(i->Ain.Sse32FLo.src) );
3411 *p++ = toUChar(xtra & 0xFF);
3418 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
3419 vreg2ireg(i->Ain.Sse64FLo.src) ));
3421 switch (i->Ain.Sse64FLo.op) {
3422 case Asse_ADDF: *p++ = 0x58; break;
3423 case Asse_DIVF: *p++ = 0x5E; break;
3424 case Asse_MAXF: *p++ = 0x5F; break;
3425 case Asse_MINF: *p++ = 0x5D; break;
3426 case Asse_MULF: *p++ = 0x59; break;
3427 //.. case Xsse_RCPF: *p++ = 0x53; break;
3428 //.. case Xsse_RSQRTF: *p++ = 0x52; break;
3429 case Asse_SQRTF: *p++ = 0x51; break;
3430 case Asse_SUBF: *p++ = 0x5C; break;
3431 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3432 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3433 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3434 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3437 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
3438 vreg2ireg(i->Ain.Sse64FLo.src) );
3440 *p++ = toUChar(xtra & 0xFF);
3444 # define XX(_n) *p++ = (_n)
3447 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
3448 vreg2ireg(i->Ain.SseReRg.src) ));
3450 switch (i->Ain.SseReRg.op) {
3451 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3452 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3453 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3454 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3455 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3456 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3457 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3458 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3459 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3460 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3461 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3462 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3463 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3464 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3465 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3466 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3467 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3468 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3469 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3470 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3471 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3472 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3473 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3474 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3475 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3476 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3477 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3478 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3479 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3480 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3481 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3482 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3483 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3484 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3485 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3486 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3487 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3488 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3489 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3490 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3491 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3492 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3493 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3494 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3495 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3496 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3497 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3498 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3499 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3500 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3501 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3502 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3503 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3504 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3505 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3508 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
3509 vreg2ireg(i->Ain.SseReRg.src) );
3514 /* jmp fwds if !condition */
3515 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3516 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3519 /* movaps %src, %dst */
3521 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
3522 vreg2ireg(i->Ain.SseCMov.src) ));
3525 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
3526 vreg2ireg(i->Ain.SseCMov.src) );
3528 /* Fill in the jump offset. */
3529 *(ptmp-1) = toUChar(p - ptmp);
3535 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
3536 vreg2ireg(i->Ain.SseShuf.src) ));
3539 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
3540 vreg2ireg(i->Ain.SseShuf.src) );
3541 *p++ = (UChar)(i->Ain.SseShuf.order);
3549 ppAMD64Instr(i, mode64);
3550 vpanic("emit_AMD64Instr");
3554 vassert(p - &buf[0] <= 32);
3560 /*---------------------------------------------------------------*/
3561 /*--- end host_amd64_defs.c ---*/
3562 /*---------------------------------------------------------------*/