2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_x86_defs.h"
45 /* --------- Registers. --------- */
47 void ppHRegX86 ( HReg reg )
50 static HChar* ireg32_names[8]
51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
52 /* Be generic for all virtual regs. */
53 if (hregIsVirtual(reg)) {
57 /* But specific for real regs. */
58 switch (hregClass(reg)) {
61 vassert(r >= 0 && r < 8);
62 vex_printf("%s", ireg32_names[r]);
66 vassert(r >= 0 && r < 6);
67 vex_printf("%%fake%d", r);
71 vassert(r >= 0 && r < 8);
72 vex_printf("%%xmm%d", r);
79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
109 (*arr)[0] = hregX86_EAX();
110 (*arr)[1] = hregX86_EBX();
111 (*arr)[2] = hregX86_ECX();
112 (*arr)[3] = hregX86_EDX();
113 (*arr)[4] = hregX86_ESI();
114 (*arr)[5] = hregX86_EDI();
115 (*arr)[6] = hregX86_FAKE0();
116 (*arr)[7] = hregX86_FAKE1();
117 (*arr)[8] = hregX86_FAKE2();
118 (*arr)[9] = hregX86_FAKE3();
119 (*arr)[10] = hregX86_FAKE4();
120 (*arr)[11] = hregX86_FAKE5();
121 (*arr)[12] = hregX86_XMM0();
122 (*arr)[13] = hregX86_XMM1();
123 (*arr)[14] = hregX86_XMM2();
124 (*arr)[15] = hregX86_XMM3();
125 (*arr)[16] = hregX86_XMM4();
126 (*arr)[17] = hregX86_XMM5();
127 (*arr)[18] = hregX86_XMM6();
128 (*arr)[19] = hregX86_XMM7();
132 /* --------- Condition codes, Intel encoding. --------- */
134 HChar* showX86CondCode ( X86CondCode cond )
137 case Xcc_O: return "o";
138 case Xcc_NO: return "no";
139 case Xcc_B: return "b";
140 case Xcc_NB: return "nb";
141 case Xcc_Z: return "z";
142 case Xcc_NZ: return "nz";
143 case Xcc_BE: return "be";
144 case Xcc_NBE: return "nbe";
145 case Xcc_S: return "s";
146 case Xcc_NS: return "ns";
147 case Xcc_P: return "p";
148 case Xcc_NP: return "np";
149 case Xcc_L: return "l";
150 case Xcc_NL: return "nl";
151 case Xcc_LE: return "le";
152 case Xcc_NLE: return "nle";
153 case Xcc_ALWAYS: return "ALWAYS";
154 default: vpanic("ppX86CondCode");
159 /* --------- X86AMode: memory address expressions. --------- */
161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
164 am->Xam.IR.imm = imm32;
165 am->Xam.IR.reg = reg;
168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
171 am->Xam.IRRS.imm = imm32;
172 am->Xam.IRRS.base = base;
173 am->Xam.IRRS.index = indEx;
174 am->Xam.IRRS.shift = shift;
175 vassert(shift >= 0 && shift <= 3);
179 X86AMode* dopyX86AMode ( X86AMode* am ) {
182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
185 am->Xam.IRRS.index, am->Xam.IRRS.shift );
187 vpanic("dopyX86AMode");
191 void ppX86AMode ( X86AMode* am ) {
194 if (am->Xam.IR.imm == 0)
197 vex_printf("0x%x(", am->Xam.IR.imm);
198 ppHRegX86(am->Xam.IR.reg);
202 vex_printf("0x%x(", am->Xam.IRRS.imm);
203 ppHRegX86(am->Xam.IRRS.base);
205 ppHRegX86(am->Xam.IRRS.index);
206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
209 vpanic("ppX86AMode");
213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
216 addHRegUse(u, HRmRead, am->Xam.IR.reg);
219 addHRegUse(u, HRmRead, am->Xam.IRRS.base);
220 addHRegUse(u, HRmRead, am->Xam.IRRS.index);
223 vpanic("addRegUsage_X86AMode");
227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
237 vpanic("mapRegs_X86AMode");
241 /* --------- Operand, which can be reg, immediate or memory. --------- */
243 X86RMI* X86RMI_Imm ( UInt imm32 ) {
244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
246 op->Xrmi.Imm.imm32 = imm32;
249 X86RMI* X86RMI_Reg ( HReg reg ) {
250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
252 op->Xrmi.Reg.reg = reg;
255 X86RMI* X86RMI_Mem ( X86AMode* am ) {
256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
258 op->Xrmi.Mem.am = am;
262 void ppX86RMI ( X86RMI* op ) {
265 vex_printf("$0x%x", op->Xrmi.Imm.imm32);
268 ppHRegX86(op->Xrmi.Reg.reg);
271 ppX86AMode(op->Xrmi.Mem.am);
278 /* An X86RMI can only be used in a "read" context (what would it mean
279 to write or modify a literal?) and so we enumerate its registers
281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
292 vpanic("addRegUsage_X86RMI");
296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
304 mapRegs_X86AMode(m, op->Xrmi.Mem.am);
307 vpanic("mapRegs_X86RMI");
312 /* --------- Operand, which can be reg or immediate only. --------- */
314 X86RI* X86RI_Imm ( UInt imm32 ) {
315 X86RI* op = LibVEX_Alloc(sizeof(X86RI));
317 op->Xri.Imm.imm32 = imm32;
320 X86RI* X86RI_Reg ( HReg reg ) {
321 X86RI* op = LibVEX_Alloc(sizeof(X86RI));
323 op->Xri.Reg.reg = reg;
327 void ppX86RI ( X86RI* op ) {
330 vex_printf("$0x%x", op->Xri.Imm.imm32);
333 ppHRegX86(op->Xri.Reg.reg);
340 /* An X86RI can only be used in a "read" context (what would it mean
341 to write or modify a literal?) and so we enumerate its registers
343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
348 addHRegUse(u, HRmRead, op->Xri.Reg.reg);
351 vpanic("addRegUsage_X86RI");
355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
363 vpanic("mapRegs_X86RI");
368 /* --------- Operand, which can be reg or memory only. --------- */
370 X86RM* X86RM_Reg ( HReg reg ) {
371 X86RM* op = LibVEX_Alloc(sizeof(X86RM));
373 op->Xrm.Reg.reg = reg;
376 X86RM* X86RM_Mem ( X86AMode* am ) {
377 X86RM* op = LibVEX_Alloc(sizeof(X86RM));
383 void ppX86RM ( X86RM* op ) {
386 ppX86AMode(op->Xrm.Mem.am);
389 ppHRegX86(op->Xrm.Reg.reg);
396 /* Because an X86RM can be both a source or destination operand, we
397 have to supply a mode -- pertaining to the operand as a whole --
398 indicating how it's being used. */
399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
402 /* Memory is read, written or modified. So we just want to
403 know the regs read by the amode. */
404 addRegUsage_X86AMode(u, op->Xrm.Mem.am);
407 /* reg is read, written or modified. Add it in the
409 addHRegUse(u, mode, op->Xrm.Reg.reg);
412 vpanic("addRegUsage_X86RM");
416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
420 mapRegs_X86AMode(m, op->Xrm.Mem.am);
423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
426 vpanic("mapRegs_X86RM");
431 /* --------- Instructions. --------- */
433 HChar* showX86UnaryOp ( X86UnaryOp op ) {
435 case Xun_NOT: return "not";
436 case Xun_NEG: return "neg";
437 default: vpanic("showX86UnaryOp");
441 HChar* showX86AluOp ( X86AluOp op ) {
443 case Xalu_MOV: return "mov";
444 case Xalu_CMP: return "cmp";
445 case Xalu_ADD: return "add";
446 case Xalu_SUB: return "sub";
447 case Xalu_ADC: return "adc";
448 case Xalu_SBB: return "sbb";
449 case Xalu_AND: return "and";
450 case Xalu_OR: return "or";
451 case Xalu_XOR: return "xor";
452 case Xalu_MUL: return "mul";
453 default: vpanic("showX86AluOp");
457 HChar* showX86ShiftOp ( X86ShiftOp op ) {
459 case Xsh_SHL: return "shl";
460 case Xsh_SHR: return "shr";
461 case Xsh_SAR: return "sar";
462 default: vpanic("showX86ShiftOp");
466 HChar* showX86FpOp ( X86FpOp op ) {
468 case Xfp_ADD: return "add";
469 case Xfp_SUB: return "sub";
470 case Xfp_MUL: return "mul";
471 case Xfp_DIV: return "div";
472 case Xfp_SCALE: return "scale";
473 case Xfp_ATAN: return "atan";
474 case Xfp_YL2X: return "yl2x";
475 case Xfp_YL2XP1: return "yl2xp1";
476 case Xfp_PREM: return "prem";
477 case Xfp_PREM1: return "prem1";
478 case Xfp_SQRT: return "sqrt";
479 case Xfp_ABS: return "abs";
480 case Xfp_NEG: return "chs";
481 case Xfp_MOV: return "mov";
482 case Xfp_SIN: return "sin";
483 case Xfp_COS: return "cos";
484 case Xfp_TAN: return "tan";
485 case Xfp_ROUND: return "round";
486 case Xfp_2XM1: return "2xm1";
487 default: vpanic("showX86FpOp");
491 HChar* showX86SseOp ( X86SseOp op ) {
493 case Xsse_MOV: return "mov(?!)";
494 case Xsse_ADDF: return "add";
495 case Xsse_SUBF: return "sub";
496 case Xsse_MULF: return "mul";
497 case Xsse_DIVF: return "div";
498 case Xsse_MAXF: return "max";
499 case Xsse_MINF: return "min";
500 case Xsse_CMPEQF: return "cmpFeq";
501 case Xsse_CMPLTF: return "cmpFlt";
502 case Xsse_CMPLEF: return "cmpFle";
503 case Xsse_CMPUNF: return "cmpFun";
504 case Xsse_RCPF: return "rcp";
505 case Xsse_RSQRTF: return "rsqrt";
506 case Xsse_SQRTF: return "sqrt";
507 case Xsse_AND: return "and";
508 case Xsse_OR: return "or";
509 case Xsse_XOR: return "xor";
510 case Xsse_ANDN: return "andn";
511 case Xsse_ADD8: return "paddb";
512 case Xsse_ADD16: return "paddw";
513 case Xsse_ADD32: return "paddd";
514 case Xsse_ADD64: return "paddq";
515 case Xsse_QADD8U: return "paddusb";
516 case Xsse_QADD16U: return "paddusw";
517 case Xsse_QADD8S: return "paddsb";
518 case Xsse_QADD16S: return "paddsw";
519 case Xsse_SUB8: return "psubb";
520 case Xsse_SUB16: return "psubw";
521 case Xsse_SUB32: return "psubd";
522 case Xsse_SUB64: return "psubq";
523 case Xsse_QSUB8U: return "psubusb";
524 case Xsse_QSUB16U: return "psubusw";
525 case Xsse_QSUB8S: return "psubsb";
526 case Xsse_QSUB16S: return "psubsw";
527 case Xsse_MUL16: return "pmullw";
528 case Xsse_MULHI16U: return "pmulhuw";
529 case Xsse_MULHI16S: return "pmulhw";
530 case Xsse_AVG8U: return "pavgb";
531 case Xsse_AVG16U: return "pavgw";
532 case Xsse_MAX16S: return "pmaxw";
533 case Xsse_MAX8U: return "pmaxub";
534 case Xsse_MIN16S: return "pminw";
535 case Xsse_MIN8U: return "pminub";
536 case Xsse_CMPEQ8: return "pcmpeqb";
537 case Xsse_CMPEQ16: return "pcmpeqw";
538 case Xsse_CMPEQ32: return "pcmpeqd";
539 case Xsse_CMPGT8S: return "pcmpgtb";
540 case Xsse_CMPGT16S: return "pcmpgtw";
541 case Xsse_CMPGT32S: return "pcmpgtd";
542 case Xsse_SHL16: return "psllw";
543 case Xsse_SHL32: return "pslld";
544 case Xsse_SHL64: return "psllq";
545 case Xsse_SHR16: return "psrlw";
546 case Xsse_SHR32: return "psrld";
547 case Xsse_SHR64: return "psrlq";
548 case Xsse_SAR16: return "psraw";
549 case Xsse_SAR32: return "psrad";
550 case Xsse_PACKSSD: return "packssdw";
551 case Xsse_PACKSSW: return "packsswb";
552 case Xsse_PACKUSW: return "packuswb";
553 case Xsse_UNPCKHB: return "punpckhb";
554 case Xsse_UNPCKHW: return "punpckhw";
555 case Xsse_UNPCKHD: return "punpckhd";
556 case Xsse_UNPCKHQ: return "punpckhq";
557 case Xsse_UNPCKLB: return "punpcklb";
558 case Xsse_UNPCKLW: return "punpcklw";
559 case Xsse_UNPCKLD: return "punpckld";
560 case Xsse_UNPCKLQ: return "punpcklq";
561 default: vpanic("showX86SseOp");
565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
568 i->Xin.Alu32R.op = op;
569 i->Xin.Alu32R.src = src;
570 i->Xin.Alu32R.dst = dst;
573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
576 i->Xin.Alu32M.op = op;
577 i->Xin.Alu32M.src = src;
578 i->Xin.Alu32M.dst = dst;
579 vassert(op != Xalu_MUL);
582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
586 i->Xin.Sh32.src = src;
587 i->Xin.Sh32.dst = dst;
590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
593 i->Xin.Test32.imm32 = imm32;
594 i->Xin.Test32.dst = dst;
597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
599 i->tag = Xin_Unary32;
600 i->Xin.Unary32.op = op;
601 i->Xin.Unary32.dst = dst;
604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
607 i->Xin.Lea32.am = am;
608 i->Xin.Lea32.dst = dst;
611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
614 i->Xin.MulL.syned = syned;
615 i->Xin.MulL.src = src;
618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
621 i->Xin.Div.syned = syned;
622 i->Xin.Div.src = src;
625 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
628 i->Xin.Sh3232.op = op;
629 i->Xin.Sh3232.amt = amt;
630 i->Xin.Sh3232.src = src;
631 i->Xin.Sh3232.dst = dst;
632 vassert(op == Xsh_SHL || op == Xsh_SHR);
635 X86Instr* X86Instr_Push( X86RMI* src ) {
636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
638 i->Xin.Push.src = src;
641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) {
642 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
644 i->Xin.Call.cond = cond;
645 i->Xin.Call.target = target;
646 i->Xin.Call.regparms = regparms;
647 vassert(regparms >= 0 && regparms <= 3);
650 X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
651 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
653 i->Xin.Goto.cond = cond;
654 i->Xin.Goto.dst = dst;
658 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
659 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
661 i->Xin.CMov32.cond = cond;
662 i->Xin.CMov32.src = src;
663 i->Xin.CMov32.dst = dst;
664 vassert(cond != Xcc_ALWAYS);
667 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
668 X86AMode* src, HReg dst ) {
669 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
671 i->Xin.LoadEX.szSmall = szSmall;
672 i->Xin.LoadEX.syned = syned;
673 i->Xin.LoadEX.src = src;
674 i->Xin.LoadEX.dst = dst;
675 vassert(szSmall == 1 || szSmall == 2);
678 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
679 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
681 i->Xin.Store.sz = sz;
682 i->Xin.Store.src = src;
683 i->Xin.Store.dst = dst;
684 vassert(sz == 1 || sz == 2);
687 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
688 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
690 i->Xin.Set32.cond = cond;
691 i->Xin.Set32.dst = dst;
694 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
695 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
697 i->Xin.Bsfr32.isFwds = isFwds;
698 i->Xin.Bsfr32.src = src;
699 i->Xin.Bsfr32.dst = dst;
702 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
703 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
705 i->Xin.MFence.hwcaps = hwcaps;
706 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2
707 |VEX_HWCAPS_X86_SSE3)));
710 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
711 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
713 i->Xin.ACAS.addr = addr;
715 vassert(sz == 4 || sz == 2 || sz == 1);
718 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
719 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
721 i->Xin.DACAS.addr = addr;
725 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
726 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
727 i->tag = Xin_FpUnary;
728 i->Xin.FpUnary.op = op;
729 i->Xin.FpUnary.src = src;
730 i->Xin.FpUnary.dst = dst;
733 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
734 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
735 i->tag = Xin_FpBinary;
736 i->Xin.FpBinary.op = op;
737 i->Xin.FpBinary.srcL = srcL;
738 i->Xin.FpBinary.srcR = srcR;
739 i->Xin.FpBinary.dst = dst;
742 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
743 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
745 i->Xin.FpLdSt.isLoad = isLoad;
746 i->Xin.FpLdSt.sz = sz;
747 i->Xin.FpLdSt.reg = reg;
748 i->Xin.FpLdSt.addr = addr;
749 vassert(sz == 4 || sz == 8 || sz == 10);
752 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
753 HReg reg, X86AMode* addr ) {
754 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
755 i->tag = Xin_FpLdStI;
756 i->Xin.FpLdStI.isLoad = isLoad;
757 i->Xin.FpLdStI.sz = sz;
758 i->Xin.FpLdStI.reg = reg;
759 i->Xin.FpLdStI.addr = addr;
760 vassert(sz == 2 || sz == 4 || sz == 8);
763 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
764 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
765 i->tag = Xin_Fp64to32;
766 i->Xin.Fp64to32.src = src;
767 i->Xin.Fp64to32.dst = dst;
770 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
771 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
773 i->Xin.FpCMov.cond = cond;
774 i->Xin.FpCMov.src = src;
775 i->Xin.FpCMov.dst = dst;
776 vassert(cond != Xcc_ALWAYS);
779 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
780 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
782 i->Xin.FpLdCW.addr = addr;
785 X86Instr* X86Instr_FpStSW_AX ( void ) {
786 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
787 i->tag = Xin_FpStSW_AX;
790 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
791 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
793 i->Xin.FpCmp.srcL = srcL;
794 i->Xin.FpCmp.srcR = srcR;
795 i->Xin.FpCmp.dst = dst;
799 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
800 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
801 i->tag = Xin_SseConst;
802 i->Xin.SseConst.con = con;
803 i->Xin.SseConst.dst = dst;
804 vassert(hregClass(dst) == HRcVec128);
807 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
808 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
809 i->tag = Xin_SseLdSt;
810 i->Xin.SseLdSt.isLoad = isLoad;
811 i->Xin.SseLdSt.reg = reg;
812 i->Xin.SseLdSt.addr = addr;
815 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr )
817 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
818 i->tag = Xin_SseLdzLO;
819 i->Xin.SseLdzLO.sz = toUChar(sz);
820 i->Xin.SseLdzLO.reg = reg;
821 i->Xin.SseLdzLO.addr = addr;
822 vassert(sz == 4 || sz == 8);
825 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
826 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
827 i->tag = Xin_Sse32Fx4;
828 i->Xin.Sse32Fx4.op = op;
829 i->Xin.Sse32Fx4.src = src;
830 i->Xin.Sse32Fx4.dst = dst;
831 vassert(op != Xsse_MOV);
834 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
835 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
836 i->tag = Xin_Sse32FLo;
837 i->Xin.Sse32FLo.op = op;
838 i->Xin.Sse32FLo.src = src;
839 i->Xin.Sse32FLo.dst = dst;
840 vassert(op != Xsse_MOV);
843 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
844 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
845 i->tag = Xin_Sse64Fx2;
846 i->Xin.Sse64Fx2.op = op;
847 i->Xin.Sse64Fx2.src = src;
848 i->Xin.Sse64Fx2.dst = dst;
849 vassert(op != Xsse_MOV);
852 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
853 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
854 i->tag = Xin_Sse64FLo;
855 i->Xin.Sse64FLo.op = op;
856 i->Xin.Sse64FLo.src = src;
857 i->Xin.Sse64FLo.dst = dst;
858 vassert(op != Xsse_MOV);
861 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
862 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
863 i->tag = Xin_SseReRg;
864 i->Xin.SseReRg.op = op;
865 i->Xin.SseReRg.src = re;
866 i->Xin.SseReRg.dst = rg;
869 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
870 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
871 i->tag = Xin_SseCMov;
872 i->Xin.SseCMov.cond = cond;
873 i->Xin.SseCMov.src = src;
874 i->Xin.SseCMov.dst = dst;
875 vassert(cond != Xcc_ALWAYS);
878 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
879 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
880 i->tag = Xin_SseShuf;
881 i->Xin.SseShuf.order = order;
882 i->Xin.SseShuf.src = src;
883 i->Xin.SseShuf.dst = dst;
884 vassert(order >= 0 && order <= 0xFF);
888 void ppX86Instr ( X86Instr* i, Bool mode64 ) {
889 vassert(mode64 == False);
892 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
893 ppX86RMI(i->Xin.Alu32R.src);
895 ppHRegX86(i->Xin.Alu32R.dst);
898 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
899 ppX86RI(i->Xin.Alu32M.src);
901 ppX86AMode(i->Xin.Alu32M.dst);
904 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
905 if (i->Xin.Sh32.src == 0)
908 vex_printf("$%d,", (Int)i->Xin.Sh32.src);
909 ppHRegX86(i->Xin.Sh32.dst);
912 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
913 ppX86RM(i->Xin.Test32.dst);
916 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
917 ppHRegX86(i->Xin.Unary32.dst);
921 ppX86AMode(i->Xin.Lea32.am);
923 ppHRegX86(i->Xin.Lea32.dst);
926 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
927 ppX86RM(i->Xin.MulL.src);
930 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
931 ppX86RM(i->Xin.Div.src);
934 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
935 if (i->Xin.Sh3232.amt == 0)
936 vex_printf(" %%cl,");
938 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
939 ppHRegX86(i->Xin.Sh3232.src);
941 ppHRegX86(i->Xin.Sh3232.dst);
944 vex_printf("pushl ");
945 ppX86RMI(i->Xin.Push.src);
948 vex_printf("call%s[%d] ",
949 i->Xin.Call.cond==Xcc_ALWAYS
950 ? "" : showX86CondCode(i->Xin.Call.cond),
951 i->Xin.Call.regparms);
952 vex_printf("0x%x", i->Xin.Call.target);
955 if (i->Xin.Goto.cond != Xcc_ALWAYS) {
956 vex_printf("if (%%eflags.%s) { ",
957 showX86CondCode(i->Xin.Goto.cond));
959 if (i->Xin.Goto.jk != Ijk_Boring
960 && i->Xin.Goto.jk != Ijk_Call
961 && i->Xin.Goto.jk != Ijk_Ret) {
962 vex_printf("movl $");
963 ppIRJumpKind(i->Xin.Goto.jk);
964 vex_printf(",%%ebp ; ");
967 ppX86RI(i->Xin.Goto.dst);
968 vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
969 if (i->Xin.Goto.cond != Xcc_ALWAYS) {
974 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
975 ppX86RM(i->Xin.CMov32.src);
977 ppHRegX86(i->Xin.CMov32.dst);
980 vex_printf("mov%c%cl ",
981 i->Xin.LoadEX.syned ? 's' : 'z',
982 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
983 ppX86AMode(i->Xin.LoadEX.src);
985 ppHRegX86(i->Xin.LoadEX.dst);
988 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
989 ppHRegX86(i->Xin.Store.src);
991 ppX86AMode(i->Xin.Store.dst);
994 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
995 ppHRegX86(i->Xin.Set32.dst);
998 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
999 ppHRegX86(i->Xin.Bsfr32.src);
1001 ppHRegX86(i->Xin.Bsfr32.dst);
1004 vex_printf("mfence(%s)",
1005 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1008 vex_printf("lock cmpxchg%c ",
1009 i->Xin.ACAS.sz==1 ? 'b'
1010 : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1011 vex_printf("{%%eax->%%ebx},");
1012 ppX86AMode(i->Xin.ACAS.addr);
1015 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1016 ppX86AMode(i->Xin.DACAS.addr);
1019 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1020 ppHRegX86(i->Xin.FpUnary.src);
1022 ppHRegX86(i->Xin.FpUnary.dst);
1025 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1026 ppHRegX86(i->Xin.FpBinary.srcL);
1028 ppHRegX86(i->Xin.FpBinary.srcR);
1030 ppHRegX86(i->Xin.FpBinary.dst);
1033 if (i->Xin.FpLdSt.isLoad) {
1034 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1035 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1036 ppX86AMode(i->Xin.FpLdSt.addr);
1038 ppHRegX86(i->Xin.FpLdSt.reg);
1040 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1041 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1042 ppHRegX86(i->Xin.FpLdSt.reg);
1044 ppX86AMode(i->Xin.FpLdSt.addr);
1048 if (i->Xin.FpLdStI.isLoad) {
1049 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1050 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1051 ppX86AMode(i->Xin.FpLdStI.addr);
1053 ppHRegX86(i->Xin.FpLdStI.reg);
1055 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1056 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1057 ppHRegX86(i->Xin.FpLdStI.reg);
1059 ppX86AMode(i->Xin.FpLdStI.addr);
1063 vex_printf("gdtof ");
1064 ppHRegX86(i->Xin.Fp64to32.src);
1066 ppHRegX86(i->Xin.Fp64to32.dst);
1069 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1070 ppHRegX86(i->Xin.FpCMov.src);
1072 ppHRegX86(i->Xin.FpCMov.dst);
1075 vex_printf("fldcw ");
1076 ppX86AMode(i->Xin.FpLdCW.addr);
1079 vex_printf("fstsw %%ax");
1082 vex_printf("gcmp ");
1083 ppHRegX86(i->Xin.FpCmp.srcL);
1085 ppHRegX86(i->Xin.FpCmp.srcR);
1087 ppHRegX86(i->Xin.FpCmp.dst);
1090 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1091 ppHRegX86(i->Xin.SseConst.dst);
1094 vex_printf("movups ");
1095 if (i->Xin.SseLdSt.isLoad) {
1096 ppX86AMode(i->Xin.SseLdSt.addr);
1098 ppHRegX86(i->Xin.SseLdSt.reg);
1100 ppHRegX86(i->Xin.SseLdSt.reg);
1102 ppX86AMode(i->Xin.SseLdSt.addr);
1106 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1107 ppX86AMode(i->Xin.SseLdzLO.addr);
1109 ppHRegX86(i->Xin.SseLdzLO.reg);
1112 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1113 ppHRegX86(i->Xin.Sse32Fx4.src);
1115 ppHRegX86(i->Xin.Sse32Fx4.dst);
1118 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1119 ppHRegX86(i->Xin.Sse32FLo.src);
1121 ppHRegX86(i->Xin.Sse32FLo.dst);
1124 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1125 ppHRegX86(i->Xin.Sse64Fx2.src);
1127 ppHRegX86(i->Xin.Sse64Fx2.dst);
1130 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1131 ppHRegX86(i->Xin.Sse64FLo.src);
1133 ppHRegX86(i->Xin.Sse64FLo.dst);
1136 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1137 ppHRegX86(i->Xin.SseReRg.src);
1139 ppHRegX86(i->Xin.SseReRg.dst);
1142 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1143 ppHRegX86(i->Xin.SseCMov.src);
1145 ppHRegX86(i->Xin.SseCMov.dst);
1148 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
1149 ppHRegX86(i->Xin.SseShuf.src);
1151 ppHRegX86(i->Xin.SseShuf.dst);
1155 vpanic("ppX86Instr");
1159 /* --------- Helpers for register allocation. --------- */
1161 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
1164 vassert(mode64 == False);
1168 addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1169 if (i->Xin.Alu32R.op == Xalu_MOV) {
1170 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1173 if (i->Xin.Alu32R.op == Xalu_CMP) {
1174 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1177 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1180 addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1181 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1184 addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1185 if (i->Xin.Sh32.src == 0)
1186 addHRegUse(u, HRmRead, hregX86_ECX());
1189 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1192 addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1195 addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1196 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1199 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1200 addHRegUse(u, HRmModify, hregX86_EAX());
1201 addHRegUse(u, HRmWrite, hregX86_EDX());
1204 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1205 addHRegUse(u, HRmModify, hregX86_EAX());
1206 addHRegUse(u, HRmModify, hregX86_EDX());
1209 addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1210 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1211 if (i->Xin.Sh3232.amt == 0)
1212 addHRegUse(u, HRmRead, hregX86_ECX());
1215 addRegUsage_X86RMI(u, i->Xin.Push.src);
1216 addHRegUse(u, HRmModify, hregX86_ESP());
1219 /* This is a bit subtle. */
1220 /* First off, claim it trashes all the caller-saved regs
1221 which fall within the register allocator's jurisdiction.
1222 These I believe to be %eax %ecx %edx and all the xmm
1224 addHRegUse(u, HRmWrite, hregX86_EAX());
1225 addHRegUse(u, HRmWrite, hregX86_ECX());
1226 addHRegUse(u, HRmWrite, hregX86_EDX());
1227 addHRegUse(u, HRmWrite, hregX86_XMM0());
1228 addHRegUse(u, HRmWrite, hregX86_XMM1());
1229 addHRegUse(u, HRmWrite, hregX86_XMM2());
1230 addHRegUse(u, HRmWrite, hregX86_XMM3());
1231 addHRegUse(u, HRmWrite, hregX86_XMM4());
1232 addHRegUse(u, HRmWrite, hregX86_XMM5());
1233 addHRegUse(u, HRmWrite, hregX86_XMM6());
1234 addHRegUse(u, HRmWrite, hregX86_XMM7());
1235 /* Now we have to state any parameter-carrying registers
1236 which might be read. This depends on the regparmness. */
1237 switch (i->Xin.Call.regparms) {
1238 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1239 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1240 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1242 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1244 /* Finally, there is the issue that the insn trashes a
1245 register because the literal target address has to be
1246 loaded into a register. Fortunately, for the 0/1/2
1247 regparm case, we can use EAX, EDX and ECX respectively, so
1248 this does not cause any further damage. For the 3-regparm
1249 case, we'll have to choose another register arbitrarily --
1250 since A, D and C are used for parameters -- and so we might
1251 as well choose EDI. */
1252 if (i->Xin.Call.regparms == 3)
1253 addHRegUse(u, HRmWrite, hregX86_EDI());
1254 /* Upshot of this is that the assembler really must observe
1255 the here-stated convention of which register to use as an
1256 address temporary, depending on the regparmness: 0==EAX,
1257 1==EDX, 2==ECX, 3==EDI. */
1260 addRegUsage_X86RI(u, i->Xin.Goto.dst);
1261 addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
1262 addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
1263 if (i->Xin.Goto.jk != Ijk_Boring
1264 && i->Xin.Goto.jk != Ijk_Call
1265 && i->Xin.Goto.jk != Ijk_Ret)
1266 /* note, this is irrelevant since ebp is not actually
1267 available to the allocator. But still .. */
1268 addHRegUse(u, HRmWrite, hregX86_EBP());
1271 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1272 addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1275 addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1276 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1279 addHRegUse(u, HRmRead, i->Xin.Store.src);
1280 addRegUsage_X86AMode(u, i->Xin.Store.dst);
1283 addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1286 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1287 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1292 addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1293 addHRegUse(u, HRmRead, hregX86_EBX());
1294 addHRegUse(u, HRmModify, hregX86_EAX());
1297 addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1298 addHRegUse(u, HRmRead, hregX86_ECX());
1299 addHRegUse(u, HRmRead, hregX86_EBX());
1300 addHRegUse(u, HRmModify, hregX86_EDX());
1301 addHRegUse(u, HRmModify, hregX86_EAX());
1304 addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1305 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1308 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1309 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1310 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1313 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1314 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1318 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1319 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1320 i->Xin.FpLdStI.reg);
1323 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1324 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1327 addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1328 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1331 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1334 addHRegUse(u, HRmWrite, hregX86_EAX());
1337 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1338 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1339 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1340 addHRegUse(u, HRmWrite, hregX86_EAX());
1343 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1344 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1345 i->Xin.SseLdSt.reg);
1348 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1349 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1352 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1355 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1356 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1357 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1358 || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1359 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1360 addHRegUse(u, unary ? HRmWrite : HRmModify,
1361 i->Xin.Sse32Fx4.dst);
1364 vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1365 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1366 || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1367 || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1368 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1369 addHRegUse(u, unary ? HRmWrite : HRmModify,
1370 i->Xin.Sse32FLo.dst);
1373 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1374 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1375 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1376 || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1377 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1378 addHRegUse(u, unary ? HRmWrite : HRmModify,
1379 i->Xin.Sse64Fx2.dst);
1382 vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1383 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1384 || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1385 || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1386 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1387 addHRegUse(u, unary ? HRmWrite : HRmModify,
1388 i->Xin.Sse64FLo.dst);
1391 if (i->Xin.SseReRg.op == Xsse_XOR
1392 && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) {
1393 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1394 /* (as opposed to a rite of passage :-) */
1395 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1397 addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1398 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1399 ? HRmWrite : HRmModify,
1400 i->Xin.SseReRg.dst);
1404 addHRegUse(u, HRmRead, i->Xin.SseCMov.src);
1405 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1408 addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
1409 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1412 ppX86Instr(i, False);
1413 vpanic("getRegUsage_X86Instr");
1418 static void mapReg( HRegRemap* m, HReg* r )
1420 *r = lookupHRegRemap(m, *r);
1423 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1425 vassert(mode64 == False);
1428 mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1429 mapReg(m, &i->Xin.Alu32R.dst);
1432 mapRegs_X86RI(m, i->Xin.Alu32M.src);
1433 mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1436 mapReg(m, &i->Xin.Sh32.dst);
1439 mapRegs_X86RM(m, i->Xin.Test32.dst);
1442 mapReg(m, &i->Xin.Unary32.dst);
1445 mapRegs_X86AMode(m, i->Xin.Lea32.am);
1446 mapReg(m, &i->Xin.Lea32.dst);
1449 mapRegs_X86RM(m, i->Xin.MulL.src);
1452 mapRegs_X86RM(m, i->Xin.Div.src);
1455 mapReg(m, &i->Xin.Sh3232.src);
1456 mapReg(m, &i->Xin.Sh3232.dst);
1459 mapRegs_X86RMI(m, i->Xin.Push.src);
1464 mapRegs_X86RI(m, i->Xin.Goto.dst);
1467 mapRegs_X86RM(m, i->Xin.CMov32.src);
1468 mapReg(m, &i->Xin.CMov32.dst);
1471 mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1472 mapReg(m, &i->Xin.LoadEX.dst);
1475 mapReg(m, &i->Xin.Store.src);
1476 mapRegs_X86AMode(m, i->Xin.Store.dst);
1479 mapReg(m, &i->Xin.Set32.dst);
1482 mapReg(m, &i->Xin.Bsfr32.src);
1483 mapReg(m, &i->Xin.Bsfr32.dst);
1488 mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1491 mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1494 mapReg(m, &i->Xin.FpUnary.src);
1495 mapReg(m, &i->Xin.FpUnary.dst);
1498 mapReg(m, &i->Xin.FpBinary.srcL);
1499 mapReg(m, &i->Xin.FpBinary.srcR);
1500 mapReg(m, &i->Xin.FpBinary.dst);
1503 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1504 mapReg(m, &i->Xin.FpLdSt.reg);
1507 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1508 mapReg(m, &i->Xin.FpLdStI.reg);
1511 mapReg(m, &i->Xin.Fp64to32.src);
1512 mapReg(m, &i->Xin.Fp64to32.dst);
1515 mapReg(m, &i->Xin.FpCMov.src);
1516 mapReg(m, &i->Xin.FpCMov.dst);
1519 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1524 mapReg(m, &i->Xin.FpCmp.srcL);
1525 mapReg(m, &i->Xin.FpCmp.srcR);
1526 mapReg(m, &i->Xin.FpCmp.dst);
1529 mapReg(m, &i->Xin.SseConst.dst);
1532 mapReg(m, &i->Xin.SseLdSt.reg);
1533 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1536 mapReg(m, &i->Xin.SseLdzLO.reg);
1537 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1540 mapReg(m, &i->Xin.Sse32Fx4.src);
1541 mapReg(m, &i->Xin.Sse32Fx4.dst);
1544 mapReg(m, &i->Xin.Sse32FLo.src);
1545 mapReg(m, &i->Xin.Sse32FLo.dst);
1548 mapReg(m, &i->Xin.Sse64Fx2.src);
1549 mapReg(m, &i->Xin.Sse64Fx2.dst);
1552 mapReg(m, &i->Xin.Sse64FLo.src);
1553 mapReg(m, &i->Xin.Sse64FLo.dst);
1556 mapReg(m, &i->Xin.SseReRg.src);
1557 mapReg(m, &i->Xin.SseReRg.dst);
1560 mapReg(m, &i->Xin.SseCMov.src);
1561 mapReg(m, &i->Xin.SseCMov.dst);
1564 mapReg(m, &i->Xin.SseShuf.src);
1565 mapReg(m, &i->Xin.SseShuf.dst);
1568 ppX86Instr(i, mode64);
1569 vpanic("mapRegs_X86Instr");
1573 /* Figure out if i represents a reg-reg move, and if so assign the
1574 source and destination to *src and *dst. If in doubt say No. Used
1575 by the register allocator to do move coalescing.
1577 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
1579 /* Moves between integer regs */
1580 if (i->tag == Xin_Alu32R) {
1581 if (i->Xin.Alu32R.op != Xalu_MOV)
1583 if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
1585 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1586 *dst = i->Xin.Alu32R.dst;
1589 /* Moves between FP regs */
1590 if (i->tag == Xin_FpUnary) {
1591 if (i->Xin.FpUnary.op != Xfp_MOV)
1593 *src = i->Xin.FpUnary.src;
1594 *dst = i->Xin.FpUnary.dst;
1597 if (i->tag == Xin_SseReRg) {
1598 if (i->Xin.SseReRg.op != Xsse_MOV)
1600 *src = i->Xin.SseReRg.src;
1601 *dst = i->Xin.SseReRg.dst;
1608 /* Generate x86 spill/reload instructions under the direction of the
1609 register allocator. Note it's critical these don't write the
1612 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1613 HReg rreg, Int offsetB, Bool mode64 )
1616 vassert(offsetB >= 0);
1617 vassert(!hregIsVirtual(rreg));
1618 vassert(mode64 == False);
1620 am = X86AMode_IR(offsetB, hregX86_EBP());
1621 switch (hregClass(rreg)) {
1623 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1626 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1629 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1632 ppHRegClass(hregClass(rreg));
1633 vpanic("genSpill_X86: unimplemented regclass");
1637 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1638 HReg rreg, Int offsetB, Bool mode64 )
1641 vassert(offsetB >= 0);
1642 vassert(!hregIsVirtual(rreg));
1643 vassert(mode64 == False);
1645 am = X86AMode_IR(offsetB, hregX86_EBP());
1646 switch (hregClass(rreg)) {
1648 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1651 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1654 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1657 ppHRegClass(hregClass(rreg));
1658 vpanic("genReload_X86: unimplemented regclass");
1662 /* The given instruction reads the specified vreg exactly once, and
1663 that vreg is currently located at the given spill offset. If
1664 possible, return a variant of the instruction to one which instead
1665 references the spill slot directly. */
1667 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1669 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1671 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1672 Convert to: src=RMI_Mem, dst=Reg
1674 if (i->tag == Xin_Alu32R
1675 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1676 || i->Xin.Alu32R.op == Xalu_XOR)
1677 && i->Xin.Alu32R.src->tag == Xrmi_Reg
1678 && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) {
1679 vassert(i->Xin.Alu32R.dst != vreg);
1680 return X86Instr_Alu32R(
1682 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1687 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1688 Convert to: src=RI_Imm, dst=Mem
1690 if (i->tag == Xin_Alu32R
1691 && (i->Xin.Alu32R.op == Xalu_CMP)
1692 && i->Xin.Alu32R.src->tag == Xrmi_Imm
1693 && i->Xin.Alu32R.dst == vreg) {
1694 return X86Instr_Alu32M(
1696 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1697 X86AMode_IR( spill_off, hregX86_EBP())
1701 /* Deal with form: Push(RMI_Reg)
1702 Convert to: Push(RMI_Mem)
1704 if (i->tag == Xin_Push
1705 && i->Xin.Push.src->tag == Xrmi_Reg
1706 && i->Xin.Push.src->Xrmi.Reg.reg == vreg) {
1707 return X86Instr_Push(
1708 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1712 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1713 Convert to CMov32(RM_Mem, dst) */
1714 if (i->tag == Xin_CMov32
1715 && i->Xin.CMov32.src->tag == Xrm_Reg
1716 && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) {
1717 vassert(i->Xin.CMov32.dst != vreg);
1718 return X86Instr_CMov32(
1720 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1725 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1726 if (i->tag == Xin_Test32
1727 && i->Xin.Test32.dst->tag == Xrm_Reg
1728 && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) {
1729 return X86Instr_Test32(
1730 i->Xin.Test32.imm32,
1731 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1739 /* --------- The x86 assembler (bleh.) --------- */
1741 static UChar iregNo ( HReg r )
1744 vassert(hregClass(r) == HRcInt32);
1745 vassert(!hregIsVirtual(r));
1751 static UInt fregNo ( HReg r )
1754 vassert(hregClass(r) == HRcFlt64);
1755 vassert(!hregIsVirtual(r));
1761 static UInt vregNo ( HReg r )
1764 vassert(hregClass(r) == HRcVec128);
1765 vassert(!hregIsVirtual(r));
1771 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
1773 return toUChar( ((mod & 3) << 6)
1778 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
1780 return toUChar( ((shift & 3) << 6)
1781 | ((regindex & 7) << 3)
1785 static UChar* emit32 ( UChar* p, UInt w32 )
1787 *p++ = toUChar( w32 & 0x000000FF);
1788 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1789 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1790 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1794 /* Does a sign-extend of the lowest 8 bits give
1795 the original number? */
1796 static Bool fits8bits ( UInt w32 )
1799 return toBool(i32 == ((i32 << 24) >> 24));
1803 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1805 greg, 0(ereg) | ereg != ESP && ereg != EBP
1808 greg, d8(ereg) | ereg != ESP
1811 greg, d32(ereg) | ereg != ESP
1814 greg, d8(%esp) = 01 greg 100, 0x24, d8
1816 -----------------------------------------------
1818 greg, d8(base,index,scale)
1820 = 01 greg 100, scale index base, d8
1822 greg, d32(base,index,scale)
1824 = 10 greg 100, scale index base, d32
1826 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1828 if (am->tag == Xam_IR) {
1829 if (am->Xam.IR.imm == 0
1830 && am->Xam.IR.reg != hregX86_ESP()
1831 && am->Xam.IR.reg != hregX86_EBP() ) {
1832 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
1835 if (fits8bits(am->Xam.IR.imm)
1836 && am->Xam.IR.reg != hregX86_ESP()) {
1837 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
1838 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1841 if (am->Xam.IR.reg != hregX86_ESP()) {
1842 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
1843 p = emit32(p, am->Xam.IR.imm);
1846 if (am->Xam.IR.reg == hregX86_ESP()
1847 && fits8bits(am->Xam.IR.imm)) {
1848 *p++ = mkModRegRM(1, iregNo(greg), 4);
1850 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1854 vpanic("doAMode_M: can't emit amode IR");
1857 if (am->tag == Xam_IRRS) {
1858 if (fits8bits(am->Xam.IRRS.imm)
1859 && am->Xam.IRRS.index != hregX86_ESP()) {
1860 *p++ = mkModRegRM(1, iregNo(greg), 4);
1861 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
1863 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1866 if (am->Xam.IRRS.index != hregX86_ESP()) {
1867 *p++ = mkModRegRM(2, iregNo(greg), 4);
1868 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
1870 p = emit32(p, am->Xam.IRRS.imm);
1874 vpanic("doAMode_M: can't emit amode IRRS");
1877 vpanic("doAMode_M: unknown amode");
1882 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
1883 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1885 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
1890 /* Emit ffree %st(7) */
1891 static UChar* do_ffree_st7 ( UChar* p )
1898 /* Emit fstp %st(i), 1 <= i <= 7 */
1899 static UChar* do_fstp_st ( UChar* p, Int i )
1901 vassert(1 <= i && i <= 7);
1903 *p++ = toUChar(0xD8+i);
1907 /* Emit fld %st(i), 0 <= i <= 6 */
1908 static UChar* do_fld_st ( UChar* p, Int i )
1910 vassert(0 <= i && i <= 6);
1912 *p++ = toUChar(0xC0+i);
1916 /* Emit f<op> %st(0) */
1917 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
1920 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
1921 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
1922 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
1923 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
1924 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
1925 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
1926 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
1927 case Xfp_MOV: break;
1928 case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
1929 *p++ = 0xD9; *p++ = 0xF2; /* fptan */
1930 *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
1932 default: vpanic("do_fop1_st: unknown op");
1937 /* Emit f<op> %st(i), 1 <= i <= 5 */
1938 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
1940 # define fake(_n) mkHReg((_n), HRcInt32, False)
1943 case Xfp_ADD: subopc = 0; break;
1944 case Xfp_SUB: subopc = 4; break;
1945 case Xfp_MUL: subopc = 1; break;
1946 case Xfp_DIV: subopc = 6; break;
1947 default: vpanic("do_fop2_st: unknown op");
1950 p = doAMode_R(p, fake(subopc), fake(i));
1955 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
1956 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
1958 static UChar* push_word_from_tags ( UChar* p, UShort tags )
1961 vassert(0 == (tags & ~0xF));
1963 /* pushl $0x00000000 */
1968 /* pushl $0xFFFFFFFF */
1973 vassert(0); /* awaiting test case */
1975 if (tags & 1) w |= 0x000000FF;
1976 if (tags & 2) w |= 0x0000FF00;
1977 if (tags & 4) w |= 0x00FF0000;
1978 if (tags & 8) w |= 0xFF000000;
1985 /* Emit an instruction into buf and return the number of bytes used.
1986 Note that buf is not the insn's final place, and therefore it is
1987 imperative to emit position-independent code. */
1989 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
1990 Bool mode64, void* dispatch )
1992 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
1997 vassert(nbuf >= 32);
1998 vassert(mode64 == False);
2000 /* Wrap an integer as a int register, for use assembling
2001 GrpN insns, in which the greg field is used as a sub-opcode
2002 and does not really contain a register. */
2003 # define fake(_n) mkHReg((_n), HRcInt32, False)
2005 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2010 /* Deal specially with MOV */
2011 if (i->Xin.Alu32R.op == Xalu_MOV) {
2012 switch (i->Xin.Alu32R.src->tag) {
2014 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
2015 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2019 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2024 p = doAMode_M(p, i->Xin.Alu32R.dst,
2025 i->Xin.Alu32R.src->Xrmi.Mem.am);
2032 if (i->Xin.Alu32R.op == Xalu_MUL) {
2033 switch (i->Xin.Alu32R.src->tag) {
2037 p = doAMode_R(p, i->Xin.Alu32R.dst,
2038 i->Xin.Alu32R.src->Xrmi.Reg.reg);
2043 p = doAMode_M(p, i->Xin.Alu32R.dst,
2044 i->Xin.Alu32R.src->Xrmi.Mem.am);
2047 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2049 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2050 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2053 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2054 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2061 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2062 opc = opc_rr = subopc_imm = opc_imma = 0;
2063 switch (i->Xin.Alu32R.op) {
2064 case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2065 subopc_imm = 2; opc_imma = 0x15; break;
2066 case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2067 subopc_imm = 0; opc_imma = 0x05; break;
2068 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2069 subopc_imm = 5; opc_imma = 0x2D; break;
2070 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2071 subopc_imm = 3; opc_imma = 0x1D; break;
2072 case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2073 subopc_imm = 4; opc_imma = 0x25; break;
2074 case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2075 subopc_imm = 6; opc_imma = 0x35; break;
2076 case Xalu_OR: opc = 0x0B; opc_rr = 0x09;
2077 subopc_imm = 1; opc_imma = 0x0D; break;
2078 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2079 subopc_imm = 7; opc_imma = 0x3D; break;
2082 switch (i->Xin.Alu32R.src->tag) {
2084 if (i->Xin.Alu32R.dst == hregX86_EAX()
2085 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2086 *p++ = toUChar(opc_imma);
2087 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2089 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2091 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2092 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2095 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2096 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2100 *p++ = toUChar(opc_rr);
2101 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2105 *p++ = toUChar(opc);
2106 p = doAMode_M(p, i->Xin.Alu32R.dst,
2107 i->Xin.Alu32R.src->Xrmi.Mem.am);
2115 /* Deal specially with MOV */
2116 if (i->Xin.Alu32M.op == Xalu_MOV) {
2117 switch (i->Xin.Alu32M.src->tag) {
2120 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2125 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
2126 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2132 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2134 opc = subopc_imm = opc_imma = 0;
2135 switch (i->Xin.Alu32M.op) {
2136 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2137 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2138 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2141 switch (i->Xin.Alu32M.src->tag) {
2143 *p++ = toUChar(opc);
2144 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2148 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2150 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2151 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2155 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2156 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2165 opc_cl = opc_imm = subopc = 0;
2166 switch (i->Xin.Sh32.op) {
2167 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2168 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2169 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2172 if (i->Xin.Sh32.src == 0) {
2173 *p++ = toUChar(opc_cl);
2174 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2176 *p++ = toUChar(opc_imm);
2177 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2178 *p++ = (UChar)(i->Xin.Sh32.src);
2183 if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2184 /* testl $imm32, %reg */
2186 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
2187 p = emit32(p, i->Xin.Test32.imm32);
2190 /* testl $imm32, amode */
2192 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
2193 p = emit32(p, i->Xin.Test32.imm32);
2198 if (i->Xin.Unary32.op == Xun_NOT) {
2200 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
2203 if (i->Xin.Unary32.op == Xun_NEG) {
2205 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
2212 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2216 subopc = i->Xin.MulL.syned ? 5 : 4;
2218 switch (i->Xin.MulL.src->tag) {
2220 p = doAMode_M(p, fake(subopc),
2221 i->Xin.MulL.src->Xrm.Mem.am);
2224 p = doAMode_R(p, fake(subopc),
2225 i->Xin.MulL.src->Xrm.Reg.reg);
2233 subopc = i->Xin.Div.syned ? 7 : 6;
2235 switch (i->Xin.Div.src->tag) {
2237 p = doAMode_M(p, fake(subopc),
2238 i->Xin.Div.src->Xrm.Mem.am);
2241 p = doAMode_R(p, fake(subopc),
2242 i->Xin.Div.src->Xrm.Reg.reg);
2250 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2251 if (i->Xin.Sh3232.amt == 0) {
2252 /* shldl/shrdl by %cl */
2254 if (i->Xin.Sh3232.op == Xsh_SHL) {
2259 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2265 switch (i->Xin.Push.src->tag) {
2268 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
2272 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2275 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
2282 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2283 for explanation of this. */
2284 switch (i->Xin.Call.regparms) {
2285 case 0: irno = iregNo(hregX86_EAX()); break;
2286 case 1: irno = iregNo(hregX86_EDX()); break;
2287 case 2: irno = iregNo(hregX86_ECX()); break;
2288 case 3: irno = iregNo(hregX86_EDI()); break;
2289 default: vpanic(" emit_X86Instr:call:regparms");
2291 /* jump over the following two insns if the condition does not
2293 if (i->Xin.Call.cond != Xcc_ALWAYS) {
2294 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2295 *p++ = 0x07; /* 7 bytes in the next two insns */
2297 /* movl $target, %tmp */
2298 *p++ = toUChar(0xB8 + irno);
2299 p = emit32(p, i->Xin.Call.target);
2302 *p++ = toUChar(0xD0 + irno);
2306 /* Use ptmp for backpatching conditional jumps. */
2309 /* First off, if this is conditional, create a conditional
2310 jump over the rest of it. */
2311 if (i->Xin.Goto.cond != Xcc_ALWAYS) {
2312 /* jmp fwds if !condition */
2313 *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
2314 ptmp = p; /* fill in this bit later */
2315 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2318 /* If a non-boring, set %ebp (the guest state pointer)
2320 /* movl $magic_number, %ebp */
2321 switch (i->Xin.Goto.jk) {
2324 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
2327 p = emit32(p, VEX_TRC_JMP_SYS_INT48); break;
2330 p = emit32(p, VEX_TRC_JMP_SYS_INT50); break;
2331 case Ijk_Sys_int128:
2333 p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
2334 case Ijk_Sys_int129:
2336 p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
2337 case Ijk_Sys_int130:
2339 p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
2342 p = emit32(p, VEX_TRC_JMP_YIELD); break;
2345 p = emit32(p, VEX_TRC_JMP_EMWARN); break;
2348 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
2351 p = emit32(p, VEX_TRC_JMP_NODECODE); break;
2354 p = emit32(p, VEX_TRC_JMP_TINVAL); break;
2357 p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
2358 case Ijk_Sys_sysenter:
2360 p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
2363 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
2366 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
2367 case Ijk_l4_utcb_eax:
2369 p = emit32(p, VEX_TRC_JMP_L4_UTCB_EAX); break;
2370 case Ijk_l4_utcb_ebx:
2372 p = emit32(p, VEX_TRC_JMP_L4_UTCB_EBX); break;
2373 case Ijk_l4_utcb_ecx:
2375 p = emit32(p, VEX_TRC_JMP_L4_UTCB_ECX); break;
2376 case Ijk_l4_utcb_edx:
2378 p = emit32(p, VEX_TRC_JMP_L4_UTCB_EDX); break;
2379 case Ijk_l4_utcb_edi:
2381 p = emit32(p, VEX_TRC_JMP_L4_UTCB_EDI); break;
2382 case Ijk_l4_utcb_esi:
2384 p = emit32(p, VEX_TRC_JMP_L4_UTCB_ESI); break;
2387 p = emit32(p, VEX_TRC_JMP_L4_UD2); break;
2388 case Ijk_l4_artificial:
2390 p = emit32(p, VEX_TRC_JMP_L4_ARTIFICIAL); break;
2397 ppIRJumpKind(i->Xin.Goto.jk);
2398 vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
2401 /* Get the destination address into %eax */
2402 if (i->Xin.Goto.dst->tag == Xri_Imm) {
2403 /* movl $immediate, %eax */
2405 p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
2407 vassert(i->Xin.Goto.dst->tag == Xri_Reg);
2408 /* movl %reg, %eax */
2409 if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
2411 p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
2415 /* Get the dispatcher address into %edx. This has to happen
2416 after the load of %eax since %edx might be carrying the value
2417 destined for %eax immediately prior to this Xin_Goto. */
2418 vassert(sizeof(UInt) == sizeof(void*));
2419 vassert(dispatch != NULL);
2420 /* movl $imm32, %edx */
2422 p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
2428 /* Fix up the conditional jump, if there was one. */
2429 if (i->Xin.Goto.cond != Xcc_ALWAYS) {
2430 Int delta = p - ptmp;
2431 vassert(delta > 0 && delta < 20);
2432 *ptmp = toUChar(delta-1);
2437 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2439 /* This generates cmov, which is illegal on P54/P55. */
2442 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2443 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2444 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2447 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2448 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2453 /* Alternative version which works on any x86 variant. */
2454 /* jmp fwds if !condition */
2455 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2456 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2459 switch (i->Xin.CMov32.src->tag) {
2461 /* Big sigh. This is movl E -> G ... */
2463 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2468 /* ... whereas this is movl G -> E. That's why the args
2469 to doAMode_R appear to be the wrong way round in the
2472 p = doAMode_M(p, i->Xin.CMov32.dst,
2473 i->Xin.CMov32.src->Xrm.Mem.am);
2478 /* Fill in the jump offset. */
2479 *(ptmp-1) = toUChar(p - ptmp);
2485 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2489 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2492 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2496 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2499 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2503 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2509 /* Make the destination register be 1 or 0, depending on whether
2510 the relevant condition holds. We have to dodge and weave
2511 when the destination is %esi or %edi as we cannot directly
2512 emit the native 'setb %reg' for those. Further complication:
2513 the top 24 bits of the destination should be forced to zero,
2514 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2515 Sigh. So start off my moving $0 into the dest. */
2517 /* Do we need to swap in %eax? */
2518 if (iregNo(i->Xin.Set32.dst) >= 4) {
2519 /* xchg %eax, %dst */
2520 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2522 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
2524 /* setb lo8(%eax) */
2526 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2527 p = doAMode_R(p, fake(0), hregX86_EAX());
2528 /* xchg %eax, %dst */
2529 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2532 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
2534 /* setb lo8(%dst) */
2536 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2537 p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
2543 if (i->Xin.Bsfr32.isFwds) {
2548 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2552 /* see comment in hdefs.h re this insn */
2553 if (0) vex_printf("EMIT FENCE\n");
2554 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2555 |VEX_HWCAPS_X86_SSE2)) {
2557 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2560 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
2562 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2563 /* lock addl $0,0(%esp) */
2564 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2565 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2568 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2569 /* lock addl $0,0(%esp) */
2570 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2571 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2574 vpanic("emit_X86Instr:mfence:hwcaps");
2581 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2582 in %ebx. The new-value register is hardwired to be %ebx
2583 since letting it be any integer register gives the problem
2584 that %sil and %dil are unaddressible on x86 and hence we
2585 would have to resort to the same kind of trickery as with
2586 byte-sized Xin.Store, just below. Given that this isn't
2587 performance critical, it is simpler just to force the
2588 register operand to %ebx (could equally be %ecx or %edx).
2589 (Although %ebx is more consistent with cmpxchg8b.) */
2590 if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2592 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2593 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2599 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2600 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2601 aren't encoded in the insn. */
2604 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
2608 if (i->Xin.Store.sz == 2) {
2609 /* This case, at least, is simple, given that we can
2610 reference the low 16 bits of any integer register. */
2613 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2617 if (i->Xin.Store.sz == 1) {
2618 /* We have to do complex dodging and weaving if src is not
2619 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2620 if (iregNo(i->Xin.Store.src) < 4) {
2621 /* we're OK, can do it directly */
2623 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2626 /* Bleh. This means the source is %edi or %esi. Since
2627 the address mode can only mention three registers, at
2628 least one of %eax/%ebx/%ecx/%edx must be available to
2629 temporarily swap the source into, so the store can
2630 happen. So we have to look at the regs mentioned
2632 HReg swap = INVALID_HREG;
2633 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(),
2634 ecx = hregX86_ECX(), edx = hregX86_EDX();
2635 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
2639 addRegUsage_X86AMode(&u, i->Xin.Store.dst);
2640 for (j = 0; j < u.n_used; j++) {
2642 if (r == eax) a_ok = False;
2643 if (r == ebx) b_ok = False;
2644 if (r == ecx) c_ok = False;
2645 if (r == edx) d_ok = False;
2647 if (a_ok) swap = eax;
2648 if (b_ok) swap = ebx;
2649 if (c_ok) swap = ecx;
2650 if (d_ok) swap = edx;
2651 vassert(swap != INVALID_HREG);
2652 /* xchgl %source, %swap. Could do better if swap is %eax. */
2654 p = doAMode_R(p, i->Xin.Store.src, swap);
2655 /* movb lo8{%swap}, (dst) */
2657 p = doAMode_M(p, swap, i->Xin.Store.dst);
2658 /* xchgl %source, %swap. Could do better if swap is %eax. */
2660 p = doAMode_R(p, i->Xin.Store.src, swap);
2663 } /* if (i->Xin.Store.sz == 1) */
2668 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2670 p = do_ffree_st7(p);
2671 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
2672 p = do_fop1_st(p, i->Xin.FpUnary.op);
2673 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
2677 if (i->Xin.FpBinary.op == Xfp_YL2X
2678 || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2679 /* Have to do this specially. */
2680 /* ffree %st7 ; fld %st(srcL) ;
2681 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2682 p = do_ffree_st7(p);
2683 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2684 p = do_ffree_st7(p);
2685 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2687 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2688 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2691 if (i->Xin.FpBinary.op == Xfp_ATAN) {
2692 /* Have to do this specially. */
2693 /* ffree %st7 ; fld %st(srcL) ;
2694 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2695 p = do_ffree_st7(p);
2696 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2697 p = do_ffree_st7(p);
2698 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2699 *p++ = 0xD9; *p++ = 0xF3;
2700 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2703 if (i->Xin.FpBinary.op == Xfp_PREM
2704 || i->Xin.FpBinary.op == Xfp_PREM1
2705 || i->Xin.FpBinary.op == Xfp_SCALE) {
2706 /* Have to do this specially. */
2707 /* ffree %st7 ; fld %st(srcR) ;
2708 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2709 fincstp ; ffree %st7 */
2710 p = do_ffree_st7(p);
2711 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
2712 p = do_ffree_st7(p);
2713 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
2715 switch (i->Xin.FpBinary.op) {
2716 case Xfp_PREM: *p++ = 0xF8; break;
2717 case Xfp_PREM1: *p++ = 0xF5; break;
2718 case Xfp_SCALE: *p++ = 0xFD; break;
2719 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2721 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
2722 *p++ = 0xD9; *p++ = 0xF7;
2723 p = do_ffree_st7(p);
2727 /* gop %srcL, %srcR, %dst
2728 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2730 p = do_ffree_st7(p);
2731 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2732 p = do_fop2_st(p, i->Xin.FpBinary.op,
2733 1+hregNumber(i->Xin.FpBinary.srcR));
2734 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2738 if (i->Xin.FpLdSt.isLoad) {
2739 /* Load from memory into %fakeN.
2740 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2742 p = do_ffree_st7(p);
2743 switch (i->Xin.FpLdSt.sz) {
2746 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2750 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2754 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
2757 vpanic("emitX86Instr(FpLdSt,load)");
2759 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
2762 /* Store from %fakeN into memory.
2763 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2765 p = do_ffree_st7(p);
2766 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
2767 switch (i->Xin.FpLdSt.sz) {
2770 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2774 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2778 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
2781 vpanic("emitX86Instr(FpLdSt,store)");
2788 if (i->Xin.FpLdStI.isLoad) {
2789 /* Load from memory into %fakeN, converting from an int.
2790 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2792 switch (i->Xin.FpLdStI.sz) {
2793 case 8: opc = 0xDF; subopc_imm = 5; break;
2794 case 4: opc = 0xDB; subopc_imm = 0; break;
2795 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
2796 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2798 p = do_ffree_st7(p);
2799 *p++ = toUChar(opc);
2800 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2801 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
2804 /* Store from %fakeN into memory, converting to an int.
2805 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2807 switch (i->Xin.FpLdStI.sz) {
2808 case 8: opc = 0xDF; subopc_imm = 7; break;
2809 case 4: opc = 0xDB; subopc_imm = 3; break;
2810 case 2: opc = 0xDF; subopc_imm = 3; break;
2811 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2813 p = do_ffree_st7(p);
2814 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
2815 *p++ = toUChar(opc);
2816 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2822 /* ffree %st7 ; fld %st(src) */
2823 p = do_ffree_st7(p);
2824 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
2826 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2828 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2830 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2832 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2833 /* fstp %st(1+dst) */
2834 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
2838 /* jmp fwds if !condition */
2839 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2840 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2843 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
2844 p = do_ffree_st7(p);
2845 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
2846 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
2848 /* Fill in the jump offset. */
2849 *(ptmp-1) = toUChar(p - ptmp);
2854 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
2858 /* note, this emits fnstsw %ax, not fstsw %ax */
2864 /* gcmp %fL, %fR, %dst
2865 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
2866 fnstsw %ax ; movl %eax, %dst
2869 p = do_ffree_st7(p);
2871 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
2872 /* fucomp %(fR+1) */
2874 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
2878 /* movl %eax, %dst */
2880 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
2883 case Xin_SseConst: {
2884 UShort con = i->Xin.SseConst.con;
2885 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
2886 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
2887 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
2888 p = push_word_from_tags(p, toUShort(con & 0xF));
2889 /* movl (%esp), %xmm-dst */
2892 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
2894 /* addl $16, %esp */
2903 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
2904 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
2908 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
2909 /* movs[sd] amode, %xmm-dst */
2910 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
2913 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
2914 i->Xin.SseLdzLO.addr);
2920 switch (i->Xin.Sse32Fx4.op) {
2921 case Xsse_ADDF: *p++ = 0x58; break;
2922 case Xsse_DIVF: *p++ = 0x5E; break;
2923 case Xsse_MAXF: *p++ = 0x5F; break;
2924 case Xsse_MINF: *p++ = 0x5D; break;
2925 case Xsse_MULF: *p++ = 0x59; break;
2926 case Xsse_RCPF: *p++ = 0x53; break;
2927 case Xsse_RSQRTF: *p++ = 0x52; break;
2928 case Xsse_SQRTF: *p++ = 0x51; break;
2929 case Xsse_SUBF: *p++ = 0x5C; break;
2930 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
2931 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
2932 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
2933 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
2936 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
2937 fake(vregNo(i->Xin.Sse32Fx4.src)) );
2939 *p++ = toUChar(xtra & 0xFF);
2946 switch (i->Xin.Sse64Fx2.op) {
2947 case Xsse_ADDF: *p++ = 0x58; break;
2948 case Xsse_DIVF: *p++ = 0x5E; break;
2949 case Xsse_MAXF: *p++ = 0x5F; break;
2950 case Xsse_MINF: *p++ = 0x5D; break;
2951 case Xsse_MULF: *p++ = 0x59; break;
2952 case Xsse_RCPF: *p++ = 0x53; break;
2953 case Xsse_RSQRTF: *p++ = 0x52; break;
2954 case Xsse_SQRTF: *p++ = 0x51; break;
2955 case Xsse_SUBF: *p++ = 0x5C; break;
2956 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
2957 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
2958 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
2959 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
2962 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
2963 fake(vregNo(i->Xin.Sse64Fx2.src)) );
2965 *p++ = toUChar(xtra & 0xFF);
2972 switch (i->Xin.Sse32FLo.op) {
2973 case Xsse_ADDF: *p++ = 0x58; break;
2974 case Xsse_DIVF: *p++ = 0x5E; break;
2975 case Xsse_MAXF: *p++ = 0x5F; break;
2976 case Xsse_MINF: *p++ = 0x5D; break;
2977 case Xsse_MULF: *p++ = 0x59; break;
2978 case Xsse_RCPF: *p++ = 0x53; break;
2979 case Xsse_RSQRTF: *p++ = 0x52; break;
2980 case Xsse_SQRTF: *p++ = 0x51; break;
2981 case Xsse_SUBF: *p++ = 0x5C; break;
2982 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
2983 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
2984 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
2985 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
2988 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
2989 fake(vregNo(i->Xin.Sse32FLo.src)) );
2991 *p++ = toUChar(xtra & 0xFF);
2998 switch (i->Xin.Sse64FLo.op) {
2999 case Xsse_ADDF: *p++ = 0x58; break;
3000 case Xsse_DIVF: *p++ = 0x5E; break;
3001 case Xsse_MAXF: *p++ = 0x5F; break;
3002 case Xsse_MINF: *p++ = 0x5D; break;
3003 case Xsse_MULF: *p++ = 0x59; break;
3004 case Xsse_RCPF: *p++ = 0x53; break;
3005 case Xsse_RSQRTF: *p++ = 0x52; break;
3006 case Xsse_SQRTF: *p++ = 0x51; break;
3007 case Xsse_SUBF: *p++ = 0x5C; break;
3008 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3009 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3010 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3011 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3014 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
3015 fake(vregNo(i->Xin.Sse64FLo.src)) );
3017 *p++ = toUChar(xtra & 0xFF);
3021 # define XX(_n) *p++ = (_n)
3022 switch (i->Xin.SseReRg.op) {
3023 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break;
3024 case Xsse_OR: XX(0x0F); XX(0x56); break;
3025 case Xsse_XOR: XX(0x0F); XX(0x57); break;
3026 case Xsse_AND: XX(0x0F); XX(0x54); break;
3027 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break;
3028 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break;
3029 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break;
3030 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break;
3031 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break;
3032 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break;
3033 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break;
3034 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break;
3035 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break;
3036 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break;
3037 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break;
3038 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break;
3039 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break;
3040 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break;
3041 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break;
3042 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break;
3043 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break;
3044 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3045 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3046 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break;
3047 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break;
3048 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break;
3049 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break;
3050 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3051 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3052 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break;
3053 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break;
3054 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break;
3055 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break;
3056 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break;
3057 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break;
3058 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break;
3059 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break;
3060 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break;
3061 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break;
3062 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break;
3063 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break;
3064 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break;
3065 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break;
3066 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break;
3067 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break;
3068 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break;
3069 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break;
3070 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break;
3071 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break;
3072 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break;
3073 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break;
3074 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break;
3075 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break;
3076 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break;
3079 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
3080 fake(vregNo(i->Xin.SseReRg.src)) );
3085 /* jmp fwds if !condition */
3086 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3087 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3090 /* movaps %src, %dst */
3093 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
3094 fake(vregNo(i->Xin.SseCMov.src)) );
3096 /* Fill in the jump offset. */
3097 *(ptmp-1) = toUChar(p - ptmp);
3104 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
3105 fake(vregNo(i->Xin.SseShuf.src)) );
3106 *p++ = (UChar)(i->Xin.SseShuf.order);
3114 ppX86Instr(i, mode64);
3115 vpanic("emit_X86Instr");
3119 vassert(p - &buf[0] <= 32);
3125 /*---------------------------------------------------------------*/
3126 /*--- end host_x86_defs.c ---*/
3127 /*---------------------------------------------------------------*/