2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates x86 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
43 FUCOMI(P): what happens to A and S flags? Currently are forced
48 * all arithmetic done at 64 bits
50 * no FP exceptions, except for handling stack over/underflow
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
57 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
58 simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
61 * some of the FCOM cases could do with testing -- not convinced
62 that the args are the right way round.
64 * FSAVE does not re-initialise the FPU; it should do
66 * FINIT not only initialises the FPU environment, it also
67 zeroes all the FP registers. It should leave the registers
70 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
71 per Intel docs this bit has no meaning anyway. Since PUSHF is the
72 only way to observe eflags[1], a proper fix would be to make that
75 The state of %eflags.AC (alignment check, bit 18) is recorded by
76 the simulation (viz, if you set it with popf then a pushf produces
77 the value you set it to), but it is otherwise ignored. In
78 particular, setting it to 1 does NOT cause alignment checking to
79 happen. Programs that set it to 1 and then rely on the resulting
80 SIGBUSs to inform them of misaligned accesses will not work.
82 Implementation of sysenter is necessarily partial. sysenter is a
83 kind of system call entry. When doing a sysenter, the return
84 address is not known -- that is something that is beyond Vex's
85 knowledge. So the generated IR forces a return to the scheduler,
86 which can do what it likes to simulate the systenter, but it MUST
87 set this thread's guest_EIP field with the continuation address
88 before resuming execution. If that doesn't happen, the thread will
89 jump to address zero, which is probably fatal.
91 This module uses global variables and so is not MT-safe (if that
92 should ever become relevant).
94 The delta values are 32-bit ints, not 64-bit ints. That means
95 this module may not work right if run on a 64-bit host. That should
96 be fixed properly, really -- if anyone ever wants to use Vex to
97 translate x86 code for execution on a 64-bit host.
99 casLE (implementation of lock-prefixed insns) and rep-prefixed
100 insns: the side-exit back to the start of the insn is done with
101 Ijk_Boring. This is quite wrong, it should be done with
102 Ijk_NoRedir, since otherwise the side exit, which is intended to
103 restart the instruction for whatever reason, could go somewhere
104 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
105 no-redir jumps performance critical, at least for rep-prefixed
106 instructions, since all iterations thereof would involve such a
107 jump. It's not such a big deal with casLE since the side exit is
108 only taken if the CAS fails, that is, the location is contended,
109 which is relatively unlikely.
111 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
114 Note also, the test for CAS success vs failure is done using
115 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
116 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
117 shouldn't definedness-check these comparisons. See
118 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
119 background/rationale.
122 /* Performance holes:
124 - fcom ; fstsw %ax ; sahf
125 sahf does not update the O flag (sigh) and so O needs to
126 be computed. This is done expensively; it would be better
127 to have a calculate_eflags_o helper.
129 - emwarns; some FP codes can generate huge numbers of these
130 if the fpucw is changed in an inner loop. It would be
131 better for the guest state to have an emwarn-enable reg
132 which can be set zero or nonzero. If it is zero, emwarns
133 are not flagged, and instead control just flows all the
134 way through bbs as usual.
137 /* "Special" instructions.
139 This instruction decoder can decode three special instructions
140 which mean nothing natively (are no-ops as far as regs/mem are
141 concerned) but have meaning for supporting Valgrind. A special
142 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
143 C1C713 (in the standard interpretation, that means: roll $3, %edi;
144 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
145 one of the following 3 are allowed (standard interpretation in
148 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
149 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
150 87D2 (xchgl %edx,%edx) call-noredir *%EAX
152 Any other bytes following the 12-byte preamble are illegal and
153 constitute a failure in instruction decoding. This all assumes
154 that the preamble will never occur except in specific code
155 fragments designed for Valgrind to catch.
157 No prefixes may precede a "Special" instruction.
160 /* LOCK prefixed instructions. These are translated using IR-level
161 CAS statements (IRCAS) and are believed to preserve atomicity, even
162 from the point of view of some other process racing against a
163 simulated one (presumably they communicate via a shared memory
166 Handlers which are aware of LOCK prefixes are:
167 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
168 dis_cmpxchg_G_E (cmpxchg)
169 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
173 dis_Grp8_Imm (bts, btc, btr)
174 dis_bt_G_E (bts, btc, btr)
179 #include "libvex_basictypes.h"
180 #include "libvex_ir.h"
182 #include "libvex_guest_x86.h"
184 #include "main_util.h"
185 #include "main_globals.h"
186 #include "guest_generic_bb_to_IR.h"
187 #include "guest_generic_x87.h"
188 #include "guest_x86_defs.h"
191 /*------------------------------------------------------------*/
193 /*------------------------------------------------------------*/
195 /* These are set at the start of the translation of an insn, right
196 down in disInstr_X86, so that we don't have to pass them around
197 endlessly. They are all constant during the translation of any
200 /* We need to know this to do sub-register accesses correctly. */
201 static Bool host_is_bigendian;
203 /* Pointer to the guest code area (points to start of BB, not to the
204 insn being processed). */
205 static UChar* guest_code;
207 /* The guest address corresponding to guest_code[0]. */
208 static Addr32 guest_EIP_bbstart;
210 /* The guest address for the instruction currently being
212 static Addr32 guest_EIP_curr_instr;
214 /* The IRSB* into which we're generating code. */
218 /*------------------------------------------------------------*/
219 /*--- Debugging output ---*/
220 /*------------------------------------------------------------*/
222 #define DIP(format, args...) \
223 if (vex_traceflags & VEX_TRACE_FE) \
224 vex_printf(format, ## args)
226 #define DIS(buf, format, args...) \
227 if (vex_traceflags & VEX_TRACE_FE) \
228 vex_sprintf(buf, format, ## args)
231 /*------------------------------------------------------------*/
232 /*--- Offsets of various parts of the x86 guest state. ---*/
233 /*------------------------------------------------------------*/
235 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
236 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
237 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
238 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
239 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
240 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
241 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
242 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
244 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
246 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
247 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
248 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
249 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
251 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
252 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
253 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
254 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
255 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
256 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
257 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
258 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
260 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
261 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
262 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
263 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
264 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
265 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
266 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
267 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
269 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
270 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
271 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
272 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
273 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
274 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
275 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
276 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
277 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
279 #define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
281 #define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART)
282 #define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN)
283 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
285 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
288 /*------------------------------------------------------------*/
289 /*--- Helper bits and pieces for deconstructing the ---*/
290 /*--- x86 insn stream. ---*/
291 /*------------------------------------------------------------*/
293 /* This is the Intel register encoding -- integer regs. */
303 #define R_AL (0+R_EAX)
304 #define R_AH (4+R_EAX)
306 /* This is the Intel register encoding -- segment regs. */
315 /* Add a statement to the list held by "irbb". */
316 static void stmt ( IRStmt* st )
318 addStmtToIRSB( irsb, st );
321 /* Generate a new temporary of the given type. */
322 static IRTemp newTemp ( IRType ty )
324 vassert(isPlausibleIRType(ty));
325 return newIRTemp( irsb->tyenv, ty );
328 /* Various simple conversions */
330 static UInt extend_s_8to32( UInt x )
332 return (UInt)((((Int)x) << 24) >> 24);
335 static UInt extend_s_16to32 ( UInt x )
337 return (UInt)((((Int)x) << 16) >> 16);
340 /* Fetch a byte from the guest insn stream. */
341 static UChar getIByte ( Int delta )
343 return guest_code[delta];
346 /* Extract the reg field from a modRM byte. */
347 static Int gregOfRM ( UChar mod_reg_rm )
349 return (Int)( (mod_reg_rm >> 3) & 7 );
352 /* Figure out whether the mod and rm parts of a modRM byte refer to a
353 register or memory. If so, the byte will have the form 11XXXYYY,
354 where YYY is the register number. */
355 static Bool epartIsReg ( UChar mod_reg_rm )
357 return toBool(0xC0 == (mod_reg_rm & 0xC0));
360 /* ... and extract the register number ... */
361 static Int eregOfRM ( UChar mod_reg_rm )
363 return (Int)(mod_reg_rm & 0x7);
366 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
368 static UChar getUChar ( Int delta )
370 UChar v = guest_code[delta+0];
374 static UInt getUDisp16 ( Int delta )
376 UInt v = guest_code[delta+1]; v <<= 8;
377 v |= guest_code[delta+0];
381 static UInt getUDisp32 ( Int delta )
383 UInt v = guest_code[delta+3]; v <<= 8;
384 v |= guest_code[delta+2]; v <<= 8;
385 v |= guest_code[delta+1]; v <<= 8;
386 v |= guest_code[delta+0];
390 static UInt getUDisp ( Int size, Int delta )
393 case 4: return getUDisp32(delta);
394 case 2: return getUDisp16(delta);
395 case 1: return (UInt)getUChar(delta);
396 default: vpanic("getUDisp(x86)");
398 return 0; /*notreached*/
402 /* Get a byte value out of the insn stream and sign-extend to 32
404 static UInt getSDisp8 ( Int delta )
406 return extend_s_8to32( (UInt) (guest_code[delta]) );
409 static UInt getSDisp16 ( Int delta0 )
411 UChar* eip = (UChar*)(&guest_code[delta0]);
413 d |= ((*eip++) << 8);
414 return extend_s_16to32(d);
417 static UInt getSDisp ( Int size, Int delta )
420 case 4: return getUDisp32(delta);
421 case 2: return getSDisp16(delta);
422 case 1: return getSDisp8(delta);
423 default: vpanic("getSDisp(x86)");
425 return 0; /*notreached*/
429 /*------------------------------------------------------------*/
430 /*--- Helpers for constructing IR. ---*/
431 /*------------------------------------------------------------*/
433 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
434 register references, we need to take the host endianness into
435 account. Supplied value is 0 .. 7 and in the Intel instruction
438 static IRType szToITy ( Int n )
441 case 1: return Ity_I8;
442 case 2: return Ity_I16;
443 case 4: return Ity_I32;
444 default: vpanic("szToITy(x86)");
448 /* On a little-endian host, less significant bits of the guest
449 registers are at lower addresses. Therefore, if a reference to a
450 register low half has the safe guest state offset as a reference to
453 static Int integerGuestRegOffset ( Int sz, UInt archreg )
455 vassert(archreg < 8);
457 /* Correct for little-endian host only. */
458 vassert(!host_is_bigendian);
460 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
462 case R_EAX: return OFFB_EAX;
463 case R_EBX: return OFFB_EBX;
464 case R_ECX: return OFFB_ECX;
465 case R_EDX: return OFFB_EDX;
466 case R_ESI: return OFFB_ESI;
467 case R_EDI: return OFFB_EDI;
468 case R_ESP: return OFFB_ESP;
469 case R_EBP: return OFFB_EBP;
470 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
474 vassert(archreg >= 4 && archreg < 8 && sz == 1);
476 case R_EAX: return 1+ OFFB_EAX;
477 case R_EBX: return 1+ OFFB_EBX;
478 case R_ECX: return 1+ OFFB_ECX;
479 case R_EDX: return 1+ OFFB_EDX;
480 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
484 vpanic("integerGuestRegOffset(x86,le)");
487 static Int segmentGuestRegOffset ( UInt sreg )
490 case R_ES: return OFFB_ES;
491 case R_CS: return OFFB_CS;
492 case R_SS: return OFFB_SS;
493 case R_DS: return OFFB_DS;
494 case R_FS: return OFFB_FS;
495 case R_GS: return OFFB_GS;
496 default: vpanic("segmentGuestRegOffset(x86)");
500 static Int xmmGuestRegOffset ( UInt xmmreg )
503 case 0: return OFFB_XMM0;
504 case 1: return OFFB_XMM1;
505 case 2: return OFFB_XMM2;
506 case 3: return OFFB_XMM3;
507 case 4: return OFFB_XMM4;
508 case 5: return OFFB_XMM5;
509 case 6: return OFFB_XMM6;
510 case 7: return OFFB_XMM7;
511 default: vpanic("xmmGuestRegOffset");
515 /* Lanes of vector registers are always numbered from zero being the
516 least significant lane (rightmost in the register). */
518 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
520 /* Correct for little-endian host only. */
521 vassert(!host_is_bigendian);
522 vassert(laneno >= 0 && laneno < 8);
523 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
526 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
528 /* Correct for little-endian host only. */
529 vassert(!host_is_bigendian);
530 vassert(laneno >= 0 && laneno < 4);
531 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
534 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
536 /* Correct for little-endian host only. */
537 vassert(!host_is_bigendian);
538 vassert(laneno >= 0 && laneno < 2);
539 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
542 static IRExpr* getIReg ( Int sz, UInt archreg )
544 vassert(sz == 1 || sz == 2 || sz == 4);
545 vassert(archreg < 8);
546 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
550 /* Ditto, but write to a reg instead. */
551 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
553 IRType ty = typeOfIRExpr(irsb->tyenv, e);
555 case 1: vassert(ty == Ity_I8); break;
556 case 2: vassert(ty == Ity_I16); break;
557 case 4: vassert(ty == Ity_I32); break;
558 default: vpanic("putIReg(x86)");
560 vassert(archreg < 8);
561 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
564 static IRExpr* getSReg ( UInt sreg )
566 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
569 static void putSReg ( UInt sreg, IRExpr* e )
571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
572 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
575 static IRExpr* getXMMReg ( UInt xmmreg )
577 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
580 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
585 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
587 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
590 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
595 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
597 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
600 static void putXMMReg ( UInt xmmreg, IRExpr* e )
602 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
603 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
606 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
608 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
609 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
612 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
614 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
615 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
618 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
620 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
621 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
624 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
626 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
627 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
630 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
632 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
633 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
636 static void assign ( IRTemp dst, IRExpr* e )
638 stmt( IRStmt_WrTmp(dst, e) );
641 static void storeLE ( IRExpr* addr, IRExpr* data )
643 stmt( IRStmt_Store(Iend_LE, addr, data) );
646 static IRExpr* unop ( IROp op, IRExpr* a )
648 return IRExpr_Unop(op, a);
651 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
653 return IRExpr_Binop(op, a1, a2);
656 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
658 return IRExpr_Triop(op, a1, a2, a3);
661 static IRExpr* mkexpr ( IRTemp tmp )
663 return IRExpr_RdTmp(tmp);
666 static IRExpr* mkU8 ( UInt i )
669 return IRExpr_Const(IRConst_U8( (UChar)i ));
672 static IRExpr* mkU16 ( UInt i )
675 return IRExpr_Const(IRConst_U16( (UShort)i ));
678 static IRExpr* mkU32 ( UInt i )
680 return IRExpr_Const(IRConst_U32(i));
683 static IRExpr* mkU64 ( ULong i )
685 return IRExpr_Const(IRConst_U64(i));
688 static IRExpr* mkU ( IRType ty, UInt i )
690 if (ty == Ity_I8) return mkU8(i);
691 if (ty == Ity_I16) return mkU16(i);
692 if (ty == Ity_I32) return mkU32(i);
693 /* If this panics, it usually means you passed a size (1,2,4)
694 value as the IRType, rather than a real IRType. */
698 static IRExpr* mkV128 ( UShort mask )
700 return IRExpr_Const(IRConst_V128(mask));
703 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
705 return IRExpr_Load(Iend_LE, ty, addr);
708 static IROp mkSizedOp ( IRType ty, IROp op8 )
711 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
712 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
714 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
715 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
716 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
717 || op8 == Iop_CasCmpNE8
719 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
723 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
725 if (szSmall == 1 && szBig == 4) {
726 return signd ? Iop_8Sto32 : Iop_8Uto32;
728 if (szSmall == 1 && szBig == 2) {
729 return signd ? Iop_8Sto16 : Iop_8Uto16;
731 if (szSmall == 2 && szBig == 4) {
732 return signd ? Iop_16Sto32 : Iop_16Uto32;
734 vpanic("mkWidenOp(x86,guest)");
737 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
739 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
740 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
741 return unop(Iop_32to1,
744 unop(Iop_1Uto32,y)));
747 /* Generate a compare-and-swap operation, operating on memory at
748 'addr'. The expected value is 'expVal' and the new value is
749 'newVal'. If the operation fails, then transfer control (with a
750 no-redir jump (XXX no -- see comment at top of this file)) to
751 'restart_point', which is presumably the address of the guest
752 instruction again -- retrying, essentially. */
753 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
754 Addr32 restart_point )
757 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
758 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
759 IRTemp oldTmp = newTemp(tyE);
760 IRTemp expTmp = newTemp(tyE);
762 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
763 assign(expTmp, expVal);
764 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
765 NULL, mkexpr(expTmp), NULL, newVal );
766 stmt( IRStmt_CAS(cas) );
768 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
769 mkexpr(oldTmp), mkexpr(expTmp) ),
770 Ijk_Boring, /*Ijk_NoRedir*/
771 IRConst_U32( restart_point )
776 /*------------------------------------------------------------*/
777 /*--- Helpers for %eflags. ---*/
778 /*------------------------------------------------------------*/
780 /* -------------- Evaluating the flags-thunk. -------------- */
782 /* Build IR to calculate all the eflags from stored
783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
785 static IRExpr* mk_x86g_calculate_eflags_all ( void )
788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
789 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
790 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
791 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
796 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
799 /* Exclude OP and NDEP from definedness checking. We're only
800 interested in DEP1 and DEP2. */
801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
805 /* Build IR to calculate some particular condition from stored
806 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
808 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
811 = mkIRExprVec_5( mkU32(cond),
812 IRExpr_Get(OFFB_CC_OP, Ity_I32),
813 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
814 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
815 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
820 "x86g_calculate_condition", &x86g_calculate_condition,
823 /* Exclude the requested condition, OP and NDEP from definedness
824 checking. We're only interested in DEP1 and DEP2. */
825 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
826 return unop(Iop_32to1, call);
829 /* Build IR to calculate just the carry flag from stored
830 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
831 static IRExpr* mk_x86g_calculate_eflags_c ( void )
834 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
835 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
836 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
837 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
842 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
845 /* Exclude OP and NDEP from definedness checking. We're only
846 interested in DEP1 and DEP2. */
847 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
852 /* -------------- Building the flags-thunk. -------------- */
854 /* The machinery in this section builds the flag-thunk following a
855 flag-setting operation. Hence the various setFlags_* functions.
858 static Bool isAddSub ( IROp op8 )
860 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
863 static Bool isLogic ( IROp op8 )
865 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
868 /* U-widen 8/16/32 bit int expr to 32. */
869 static IRExpr* widenUto32 ( IRExpr* e )
871 switch (typeOfIRExpr(irsb->tyenv,e)) {
872 case Ity_I32: return e;
873 case Ity_I16: return unop(Iop_16Uto32,e);
874 case Ity_I8: return unop(Iop_8Uto32,e);
875 default: vpanic("widenUto32");
879 /* S-widen 8/16/32 bit int expr to 32. */
880 static IRExpr* widenSto32 ( IRExpr* e )
882 switch (typeOfIRExpr(irsb->tyenv,e)) {
883 case Ity_I32: return e;
884 case Ity_I16: return unop(Iop_16Sto32,e);
885 case Ity_I8: return unop(Iop_8Sto32,e);
886 default: vpanic("widenSto32");
890 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
891 of these combinations make sense. */
892 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
894 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
895 if (src_ty == dst_ty)
897 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
898 return unop(Iop_32to16, e);
899 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
900 return unop(Iop_32to8, e);
902 vex_printf("\nsrc, dst tys are: ");
907 vpanic("narrowTo(x86)");
911 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
912 auto-sized up to the real op. */
915 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
917 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
919 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
922 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
923 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
924 default: ppIROp(op8);
925 vpanic("setFlags_DEP1_DEP2(x86)");
927 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
928 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
929 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
930 /* Set NDEP even though it isn't used. This makes redundant-PUT
931 elimination of previous stores to this field work better. */
932 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
936 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
939 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
941 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
943 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
948 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
949 default: ppIROp(op8);
950 vpanic("setFlags_DEP1(x86)");
952 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
953 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
954 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
955 /* Set NDEP even though it isn't used. This makes redundant-PUT
956 elimination of previous stores to this field work better. */
957 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
961 /* For shift operations, we put in the result and the undershifted
962 result. Except if the shift amount is zero, the thunk is left
965 static void setFlags_DEP1_DEP2_shift ( IROp op32,
971 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
973 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
976 /* Both kinds of right shifts are handled by the same thunk
980 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
981 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
982 default: ppIROp(op32);
983 vpanic("setFlags_DEP1_DEP2_shift(x86)");
986 /* DEP1 contains the result, DEP2 contains the undershifted value. */
987 stmt( IRStmt_Put( OFFB_CC_OP,
988 IRExpr_Mux0X( mkexpr(guard),
989 IRExpr_Get(OFFB_CC_OP,Ity_I32),
991 stmt( IRStmt_Put( OFFB_CC_DEP1,
992 IRExpr_Mux0X( mkexpr(guard),
993 IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
994 widenUto32(mkexpr(res)))) );
995 stmt( IRStmt_Put( OFFB_CC_DEP2,
996 IRExpr_Mux0X( mkexpr(guard),
997 IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
998 widenUto32(mkexpr(resUS)))) );
999 /* Set NDEP even though it isn't used. This makes redundant-PUT
1000 elimination of previous stores to this field work better. */
1001 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1005 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1006 the former value of the carry flag, which unfortunately we have to
1009 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1011 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1013 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1014 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1016 /* This has to come first, because calculating the C flag
1017 may require reading all four thunk fields. */
1018 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1019 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1020 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1021 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1025 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1029 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1033 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1036 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1042 vpanic("setFlags_MUL(x86)");
1044 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1045 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1046 /* Set NDEP even though it isn't used. This makes redundant-PUT
1047 elimination of previous stores to this field work better. */
1048 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1052 /* -------------- Condition codes. -------------- */
1054 /* Condition codes, using the Intel encoding. */
1056 static HChar* name_X86Condcode ( X86Condcode cond )
1059 case X86CondO: return "o";
1060 case X86CondNO: return "no";
1061 case X86CondB: return "b";
1062 case X86CondNB: return "nb";
1063 case X86CondZ: return "z";
1064 case X86CondNZ: return "nz";
1065 case X86CondBE: return "be";
1066 case X86CondNBE: return "nbe";
1067 case X86CondS: return "s";
1068 case X86CondNS: return "ns";
1069 case X86CondP: return "p";
1070 case X86CondNP: return "np";
1071 case X86CondL: return "l";
1072 case X86CondNL: return "nl";
1073 case X86CondLE: return "le";
1074 case X86CondNLE: return "nle";
1075 case X86CondAlways: return "ALWAYS";
1076 default: vpanic("name_X86Condcode");
1081 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1084 vassert(cond >= X86CondO && cond <= X86CondNLE);
1089 *needInvert = False;
1095 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1097 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1100 Optionally, generate a store for the 'tres' value. This can either
1101 be a normal store, or it can be a cas-with-possible-failure style
1104 if taddr is IRTemp_INVALID, then no store is generated.
1106 if taddr is not IRTemp_INVALID, then a store (using taddr as
1107 the address) is generated:
1109 if texpVal is IRTemp_INVALID then a normal store is
1110 generated, and restart_point must be zero (it is irrelevant).
1112 if texpVal is not IRTemp_INVALID then a cas-style store is
1113 generated. texpVal is the expected value, restart_point
1114 is the restart point if the store fails, and texpVal must
1115 have the same type as tres.
1117 static void helper_ADC ( Int sz,
1118 IRTemp tres, IRTemp ta1, IRTemp ta2,
1119 /* info about optional store: */
1120 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1123 IRType ty = szToITy(sz);
1124 IRTemp oldc = newTemp(Ity_I32);
1125 IRTemp oldcn = newTemp(ty);
1126 IROp plus = mkSizedOp(ty, Iop_Add8);
1127 IROp xor = mkSizedOp(ty, Iop_Xor8);
1129 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1130 vassert(sz == 1 || sz == 2 || sz == 4);
1131 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1132 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1134 /* oldc = old carry flag, 0 or 1 */
1135 assign( oldc, binop(Iop_And32,
1136 mk_x86g_calculate_eflags_c(),
1139 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1141 assign( tres, binop(plus,
1142 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1145 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1146 start of this function. */
1147 if (taddr != IRTemp_INVALID) {
1148 if (texpVal == IRTemp_INVALID) {
1149 vassert(restart_point == 0);
1150 storeLE( mkexpr(taddr), mkexpr(tres) );
1152 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1153 /* .. and hence 'texpVal' has the same type as 'tres'. */
1154 casLE( mkexpr(taddr),
1155 mkexpr(texpVal), mkexpr(tres), restart_point );
1159 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1160 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1161 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1162 mkexpr(oldcn)) )) );
1163 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1167 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1168 appropriately. As with helper_ADC, possibly generate a store of
1169 the result -- see comments on helper_ADC for details.
1171 static void helper_SBB ( Int sz,
1172 IRTemp tres, IRTemp ta1, IRTemp ta2,
1173 /* info about optional store: */
1174 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1177 IRType ty = szToITy(sz);
1178 IRTemp oldc = newTemp(Ity_I32);
1179 IRTemp oldcn = newTemp(ty);
1180 IROp minus = mkSizedOp(ty, Iop_Sub8);
1181 IROp xor = mkSizedOp(ty, Iop_Xor8);
1183 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1184 vassert(sz == 1 || sz == 2 || sz == 4);
1185 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1186 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1188 /* oldc = old carry flag, 0 or 1 */
1189 assign( oldc, binop(Iop_And32,
1190 mk_x86g_calculate_eflags_c(),
1193 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1195 assign( tres, binop(minus,
1196 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1199 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1200 start of this function. */
1201 if (taddr != IRTemp_INVALID) {
1202 if (texpVal == IRTemp_INVALID) {
1203 vassert(restart_point == 0);
1204 storeLE( mkexpr(taddr), mkexpr(tres) );
1206 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1207 /* .. and hence 'texpVal' has the same type as 'tres'. */
1208 casLE( mkexpr(taddr),
1209 mkexpr(texpVal), mkexpr(tres), restart_point );
1213 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1214 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1215 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1216 mkexpr(oldcn)) )) );
1217 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1221 /* -------------- Helpers for disassembly printing. -------------- */
1223 static HChar* nameGrp1 ( Int opc_aux )
1225 static HChar* grp1_names[8]
1226 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1227 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1228 return grp1_names[opc_aux];
1231 static HChar* nameGrp2 ( Int opc_aux )
1233 static HChar* grp2_names[8]
1234 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1235 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1236 return grp2_names[opc_aux];
1239 static HChar* nameGrp4 ( Int opc_aux )
1241 static HChar* grp4_names[8]
1242 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1243 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1244 return grp4_names[opc_aux];
1247 static HChar* nameGrp5 ( Int opc_aux )
1249 static HChar* grp5_names[8]
1250 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1251 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1252 return grp5_names[opc_aux];
1255 static HChar* nameGrp8 ( Int opc_aux )
1257 static HChar* grp8_names[8]
1258 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1259 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1260 return grp8_names[opc_aux];
1263 static HChar* nameIReg ( Int size, Int reg )
1265 static HChar* ireg32_names[8]
1266 = { "%eax", "%ecx", "%edx", "%ebx",
1267 "%esp", "%ebp", "%esi", "%edi" };
1268 static HChar* ireg16_names[8]
1269 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1270 static HChar* ireg8_names[8]
1271 = { "%al", "%cl", "%dl", "%bl",
1272 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1273 if (reg < 0 || reg > 7) goto bad;
1275 case 4: return ireg32_names[reg];
1276 case 2: return ireg16_names[reg];
1277 case 1: return ireg8_names[reg];
1280 vpanic("nameIReg(X86)");
1281 return NULL; /*notreached*/
1284 static HChar* nameSReg ( UInt sreg )
1287 case R_ES: return "%es";
1288 case R_CS: return "%cs";
1289 case R_SS: return "%ss";
1290 case R_DS: return "%ds";
1291 case R_FS: return "%fs";
1292 case R_GS: return "%gs";
1293 default: vpanic("nameSReg(x86)");
1297 static HChar* nameMMXReg ( Int mmxreg )
1299 static HChar* mmx_names[8]
1300 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1301 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1302 return mmx_names[mmxreg];
1305 static HChar* nameXMMReg ( Int xmmreg )
1307 static HChar* xmm_names[8]
1308 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1309 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1310 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1311 return xmm_names[xmmreg];
1314 static HChar* nameMMXGran ( Int gran )
1321 default: vpanic("nameMMXGran(x86,guest)");
1325 static HChar nameISize ( Int size )
1331 default: vpanic("nameISize(x86)");
1336 /*------------------------------------------------------------*/
1337 /*--- JMP helpers ---*/
1338 /*------------------------------------------------------------*/
1340 static void jmp_lit( IRJumpKind kind, Addr32 d32 )
1342 irsb->next = mkU32(d32);
1343 irsb->jumpkind = kind;
1346 static void jmp_treg( IRJumpKind kind, IRTemp t )
1348 irsb->next = mkexpr(t);
1349 irsb->jumpkind = kind;
1353 void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1356 X86Condcode condPos;
1357 condPos = positiveIse_X86Condcode ( cond, &invert );
1359 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1361 IRConst_U32(d32_false) ) );
1362 irsb->next = mkU32(d32_true);
1363 irsb->jumpkind = Ijk_Boring;
1365 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1367 IRConst_U32(d32_true) ) );
1368 irsb->next = mkU32(d32_false);
1369 irsb->jumpkind = Ijk_Boring;
1374 /*------------------------------------------------------------*/
1375 /*--- Disassembling addressing modes ---*/
1376 /*------------------------------------------------------------*/
1379 HChar* sorbTxt ( UChar sorb )
1382 case 0: return ""; /* no override */
1383 case 0x3E: return "%ds";
1384 case 0x26: return "%es:";
1385 case 0x64: return "%fs:";
1386 case 0x65: return "%gs:";
1387 default: vpanic("sorbTxt(x86,guest)");
1392 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1393 linear address by adding any required segment override as indicated
1396 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1400 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
1403 /* the common case - no override */
1407 case 0x3E: sreg = R_DS; break;
1408 case 0x26: sreg = R_ES; break;
1409 case 0x64: sreg = R_FS; break;
1410 case 0x65: sreg = R_GS; break;
1411 default: vpanic("handleSegOverride(x86,guest)");
1414 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
1416 seg_selector = newTemp(Ity_I32);
1417 ldt_ptr = newTemp(hWordTy);
1418 gdt_ptr = newTemp(hWordTy);
1419 r64 = newTemp(Ity_I64);
1421 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1422 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
1423 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
1426 Call this to do the translation and limit checks:
1427 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1428 UInt seg_selector, UInt virtual_addr )
1435 "x86g_use_seg_selector",
1436 &x86g_use_seg_selector,
1437 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1438 mkexpr(seg_selector), virtual)
1442 /* If the high 32 of the result are non-zero, there was a
1443 failure in address translation. In which case, make a
1448 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1450 IRConst_U32( guest_EIP_curr_instr )
1454 /* otherwise, here's the translated result. */
1455 return unop(Iop_64to32, mkexpr(r64));
1459 /* Generate IR to calculate an address indicated by a ModRM and
1460 following SIB bytes. The expression, and the number of bytes in
1461 the address mode, are returned. Note that this fn should not be
1462 called if the R/M part of the address denotes a register instead of
1463 memory. If print_codegen is true, text of the addressing mode is
1466 The computed address is stored in a new tempreg, and the
1467 identity of the tempreg is returned. */
1469 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
1471 IRTemp tmp = newTemp(Ity_I32);
1472 assign( tmp, addr32 );
1477 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
1479 UChar mod_reg_rm = getIByte(delta);
1484 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1485 jump table seems a bit excessive.
1487 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1488 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1489 /* is now XX0XXYYY */
1490 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1491 switch (mod_reg_rm) {
1493 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1496 case 0x00: case 0x01: case 0x02: case 0x03:
1497 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1498 { UChar rm = mod_reg_rm;
1499 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1501 return disAMode_copy2tmp(
1502 handleSegOverride(sorb, getIReg(4,rm)));
1505 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1506 --> GET %reg, t ; ADDL d8, t
1508 case 0x08: case 0x09: case 0x0A: case 0x0B:
1509 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1510 { UChar rm = toUChar(mod_reg_rm & 7);
1511 UInt d = getSDisp8(delta);
1512 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1514 return disAMode_copy2tmp(
1515 handleSegOverride(sorb,
1516 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1519 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1520 --> GET %reg, t ; ADDL d8, t
1522 case 0x10: case 0x11: case 0x12: case 0x13:
1523 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1524 { UChar rm = toUChar(mod_reg_rm & 7);
1525 UInt d = getUDisp32(delta);
1526 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1528 return disAMode_copy2tmp(
1529 handleSegOverride(sorb,
1530 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1533 /* a register, %eax .. %edi. This shouldn't happen. */
1534 case 0x18: case 0x19: case 0x1A: case 0x1B:
1535 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1536 vpanic("disAMode(x86): not an addr!");
1538 /* a 32-bit literal address
1542 { UInt d = getUDisp32(delta);
1544 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1545 return disAMode_copy2tmp(
1546 handleSegOverride(sorb, mkU32(d)));
1550 /* SIB, with no displacement. Special cases:
1551 -- %esp cannot act as an index value.
1552 If index_r indicates %esp, zero is used for the index.
1553 -- when mod is zero and base indicates EBP, base is instead
1555 It's all madness, I tell you. Extract %index, %base and
1556 scale from the SIB byte. The value denoted is then:
1557 | %index == %ESP && %base == %EBP
1558 = d32 following SIB byte
1559 | %index == %ESP && %base != %EBP
1561 | %index != %ESP && %base == %EBP
1562 = d32 following SIB byte + (%index << scale)
1563 | %index != %ESP && %base != %ESP
1564 = %base + (%index << scale)
1566 What happens to the souls of CPU architects who dream up such
1567 horrendous schemes, do you suppose?
1569 UChar sib = getIByte(delta);
1570 UChar scale = toUChar((sib >> 6) & 3);
1571 UChar index_r = toUChar((sib >> 3) & 7);
1572 UChar base_r = toUChar(sib & 7);
1575 if (index_r != R_ESP && base_r != R_EBP) {
1576 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1577 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1581 handleSegOverride(sorb,
1584 binop(Iop_Shl32, getIReg(4,index_r),
1588 if (index_r != R_ESP && base_r == R_EBP) {
1589 UInt d = getUDisp32(delta);
1590 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1591 nameIReg(4,index_r), 1<<scale);
1595 handleSegOverride(sorb,
1597 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1601 if (index_r == R_ESP && base_r != R_EBP) {
1602 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1604 return disAMode_copy2tmp(
1605 handleSegOverride(sorb, getIReg(4,base_r)));
1608 if (index_r == R_ESP && base_r == R_EBP) {
1609 UInt d = getUDisp32(delta);
1610 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1612 return disAMode_copy2tmp(
1613 handleSegOverride(sorb, mkU32(d)));
1619 /* SIB, with 8-bit displacement. Special cases:
1620 -- %esp cannot act as an index value.
1621 If index_r indicates %esp, zero is used for the index.
1626 = d8 + %base + (%index << scale)
1629 UChar sib = getIByte(delta);
1630 UChar scale = toUChar((sib >> 6) & 3);
1631 UChar index_r = toUChar((sib >> 3) & 7);
1632 UChar base_r = toUChar(sib & 7);
1633 UInt d = getSDisp8(delta+1);
1635 if (index_r == R_ESP) {
1636 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1637 (Int)d, nameIReg(4,base_r));
1639 return disAMode_copy2tmp(
1640 handleSegOverride(sorb,
1641 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1643 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1644 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1648 handleSegOverride(sorb,
1653 getIReg(4,index_r), mkU8(scale))),
1660 /* SIB, with 32-bit displacement. Special cases:
1661 -- %esp cannot act as an index value.
1662 If index_r indicates %esp, zero is used for the index.
1667 = d32 + %base + (%index << scale)
1670 UChar sib = getIByte(delta);
1671 UChar scale = toUChar((sib >> 6) & 3);
1672 UChar index_r = toUChar((sib >> 3) & 7);
1673 UChar base_r = toUChar(sib & 7);
1674 UInt d = getUDisp32(delta+1);
1676 if (index_r == R_ESP) {
1677 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1678 (Int)d, nameIReg(4,base_r));
1680 return disAMode_copy2tmp(
1681 handleSegOverride(sorb,
1682 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1684 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1685 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1689 handleSegOverride(sorb,
1694 getIReg(4,index_r), mkU8(scale))),
1702 vpanic("disAMode(x86)");
1703 return 0; /*notreached*/
1708 /* Figure out the number of (insn-stream) bytes constituting the amode
1709 beginning at delta. Is useful for getting hold of literals beyond
1710 the end of the amode before it has been disassembled. */
1712 static UInt lengthAMode ( Int delta )
1714 UChar mod_reg_rm = getIByte(delta); delta++;
1716 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1717 jump table seems a bit excessive.
1719 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1720 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1721 /* is now XX0XXYYY */
1722 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1723 switch (mod_reg_rm) {
1725 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1726 case 0x00: case 0x01: case 0x02: case 0x03:
1727 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1730 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1731 case 0x08: case 0x09: case 0x0A: case 0x0B:
1732 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1735 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1736 case 0x10: case 0x11: case 0x12: case 0x13:
1737 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1740 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1741 case 0x18: case 0x19: case 0x1A: case 0x1B:
1742 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1745 /* a 32-bit literal address. */
1746 case 0x05: return 5;
1748 /* SIB, no displacement. */
1750 UChar sib = getIByte(delta);
1751 UChar base_r = toUChar(sib & 7);
1752 if (base_r == R_EBP) return 6; else return 2;
1754 /* SIB, with 8-bit displacement. */
1755 case 0x0C: return 3;
1757 /* SIB, with 32-bit displacement. */
1758 case 0x14: return 6;
1761 vpanic("lengthAMode");
1762 return 0; /*notreached*/
1766 /*------------------------------------------------------------*/
1767 /*--- Disassembling common idioms ---*/
1768 /*------------------------------------------------------------*/
1770 /* Handle binary integer instructions of the form
1773 Is passed the a ptr to the modRM byte, the actual operation, and the
1774 data size. Returns the address advanced completely over this
1777 E(src) is reg-or-mem
1780 If E is reg, --> GET %G, tmp
1784 If E is mem and OP is not reversible,
1785 --> (getAddr E) -> tmpa
1791 If E is mem and OP is reversible
1792 --> (getAddr E) -> tmpa
1798 UInt dis_op2_E_G ( UChar sorb,
1808 IRType ty = szToITy(size);
1809 IRTemp dst1 = newTemp(ty);
1810 IRTemp src = newTemp(ty);
1811 IRTemp dst0 = newTemp(ty);
1812 UChar rm = getUChar(delta0);
1813 IRTemp addr = IRTemp_INVALID;
1815 /* addSubCarry == True indicates the intended operation is
1816 add-with-carry or subtract-with-borrow. */
1818 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1822 if (epartIsReg(rm)) {
1823 /* Specially handle XOR reg,reg, because that doesn't really
1824 depend on reg, and doing the obvious thing potentially
1825 generates a spurious value check failure due to the bogus
1826 dependency. Ditto SBB reg,reg. */
1827 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1828 && gregOfRM(rm) == eregOfRM(rm)) {
1829 putIReg(size, gregOfRM(rm), mkU(ty,0));
1831 assign( dst0, getIReg(size,gregOfRM(rm)) );
1832 assign( src, getIReg(size,eregOfRM(rm)) );
1834 if (addSubCarry && op8 == Iop_Add8) {
1835 helper_ADC( size, dst1, dst0, src,
1836 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1837 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1839 if (addSubCarry && op8 == Iop_Sub8) {
1840 helper_SBB( size, dst1, dst0, src,
1841 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1842 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1844 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1846 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1848 setFlags_DEP1(op8, dst1, ty);
1850 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1853 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1854 nameIReg(size,eregOfRM(rm)),
1855 nameIReg(size,gregOfRM(rm)));
1858 /* E refers to memory */
1859 addr = disAMode ( &len, sorb, delta0, dis_buf);
1860 assign( dst0, getIReg(size,gregOfRM(rm)) );
1861 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
1863 if (addSubCarry && op8 == Iop_Add8) {
1864 helper_ADC( size, dst1, dst0, src,
1865 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1866 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1868 if (addSubCarry && op8 == Iop_Sub8) {
1869 helper_SBB( size, dst1, dst0, src,
1870 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1871 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1873 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1875 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1877 setFlags_DEP1(op8, dst1, ty);
1879 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1882 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1883 dis_buf,nameIReg(size,gregOfRM(rm)));
1890 /* Handle binary integer instructions of the form
1893 Is passed the a ptr to the modRM byte, the actual operation, and the
1894 data size. Returns the address advanced completely over this
1898 E(dst) is reg-or-mem
1900 If E is reg, --> GET %E, tmp
1904 If E is mem, --> (getAddr E) -> tmpa
1910 UInt dis_op2_G_E ( UChar sorb,
1921 IRType ty = szToITy(size);
1922 IRTemp dst1 = newTemp(ty);
1923 IRTemp src = newTemp(ty);
1924 IRTemp dst0 = newTemp(ty);
1925 UChar rm = getIByte(delta0);
1926 IRTemp addr = IRTemp_INVALID;
1928 /* addSubCarry == True indicates the intended operation is
1929 add-with-carry or subtract-with-borrow. */
1931 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1935 if (epartIsReg(rm)) {
1936 /* Specially handle XOR reg,reg, because that doesn't really
1937 depend on reg, and doing the obvious thing potentially
1938 generates a spurious value check failure due to the bogus
1939 dependency. Ditto SBB reg,reg.*/
1940 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1941 && gregOfRM(rm) == eregOfRM(rm)) {
1942 putIReg(size, eregOfRM(rm), mkU(ty,0));
1944 assign(dst0, getIReg(size,eregOfRM(rm)));
1945 assign(src, getIReg(size,gregOfRM(rm)));
1947 if (addSubCarry && op8 == Iop_Add8) {
1948 helper_ADC( size, dst1, dst0, src,
1949 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1950 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1952 if (addSubCarry && op8 == Iop_Sub8) {
1953 helper_SBB( size, dst1, dst0, src,
1954 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1955 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1957 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
1959 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1961 setFlags_DEP1(op8, dst1, ty);
1963 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1966 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1967 nameIReg(size,gregOfRM(rm)),
1968 nameIReg(size,eregOfRM(rm)));
1972 /* E refers to memory */
1974 addr = disAMode ( &len, sorb, delta0, dis_buf);
1975 assign(dst0, loadLE(ty,mkexpr(addr)));
1976 assign(src, getIReg(size,gregOfRM(rm)));
1978 if (addSubCarry && op8 == Iop_Add8) {
1980 /* cas-style store */
1981 helper_ADC( size, dst1, dst0, src,
1982 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
1985 helper_ADC( size, dst1, dst0, src,
1986 /*store*/addr, IRTemp_INVALID, 0 );
1989 if (addSubCarry && op8 == Iop_Sub8) {
1991 /* cas-style store */
1992 helper_SBB( size, dst1, dst0, src,
1993 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
1996 helper_SBB( size, dst1, dst0, src,
1997 /*store*/addr, IRTemp_INVALID, 0 );
2000 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2003 if (0) vex_printf("locked case\n" );
2004 casLE( mkexpr(addr),
2005 mkexpr(dst0)/*expval*/,
2006 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2008 if (0) vex_printf("nonlocked case\n");
2009 storeLE(mkexpr(addr), mkexpr(dst1));
2013 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2015 setFlags_DEP1(op8, dst1, ty);
2018 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2019 nameIReg(size,gregOfRM(rm)), dis_buf);
2025 /* Handle move instructions of the form
2028 Is passed the a ptr to the modRM byte, and the data size. Returns
2029 the address advanced completely over this instruction.
2031 E(src) is reg-or-mem
2034 If E is reg, --> GET %E, tmpv
2037 If E is mem --> (getAddr E) -> tmpa
2042 UInt dis_mov_E_G ( UChar sorb,
2047 UChar rm = getIByte(delta0);
2050 if (epartIsReg(rm)) {
2051 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2052 DIP("mov%c %s,%s\n", nameISize(size),
2053 nameIReg(size,eregOfRM(rm)),
2054 nameIReg(size,gregOfRM(rm)));
2058 /* E refers to memory */
2060 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2061 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2062 DIP("mov%c %s,%s\n", nameISize(size),
2063 dis_buf,nameIReg(size,gregOfRM(rm)));
2069 /* Handle move instructions of the form
2072 Is passed the a ptr to the modRM byte, and the data size. Returns
2073 the address advanced completely over this instruction.
2076 E(dst) is reg-or-mem
2078 If E is reg, --> GET %G, tmp
2081 If E is mem, --> (getAddr E) -> tmpa
2086 UInt dis_mov_G_E ( UChar sorb,
2091 UChar rm = getIByte(delta0);
2094 if (epartIsReg(rm)) {
2095 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2096 DIP("mov%c %s,%s\n", nameISize(size),
2097 nameIReg(size,gregOfRM(rm)),
2098 nameIReg(size,eregOfRM(rm)));
2102 /* E refers to memory */
2104 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2105 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2106 DIP("mov%c %s,%s\n", nameISize(size),
2107 nameIReg(size,gregOfRM(rm)), dis_buf);
2113 /* op $immediate, AL/AX/EAX. */
2115 UInt dis_op_imm_A ( Int size,
2122 IRType ty = szToITy(size);
2123 IRTemp dst0 = newTemp(ty);
2124 IRTemp src = newTemp(ty);
2125 IRTemp dst1 = newTemp(ty);
2126 UInt lit = getUDisp(size,delta);
2127 assign(dst0, getIReg(size,R_EAX));
2128 assign(src, mkU(ty,lit));
2130 if (isAddSub(op8) && !carrying) {
2131 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2132 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2137 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2138 setFlags_DEP1(op8, dst1, ty);
2141 if (op8 == Iop_Add8 && carrying) {
2142 helper_ADC( size, dst1, dst0, src,
2143 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2146 if (op8 == Iop_Sub8 && carrying) {
2147 helper_SBB( size, dst1, dst0, src,
2148 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2151 vpanic("dis_op_imm_A(x86,guest)");
2154 putIReg(size, R_EAX, mkexpr(dst1));
2156 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2157 lit, nameIReg(size,R_EAX));
2162 /* Sign- and Zero-extending moves. */
2164 UInt dis_movx_E_G ( UChar sorb,
2165 Int delta, Int szs, Int szd, Bool sign_extend )
2167 UChar rm = getIByte(delta);
2168 if (epartIsReg(rm)) {
2169 putIReg(szd, gregOfRM(rm),
2170 unop(mkWidenOp(szs,szd,sign_extend),
2171 getIReg(szs,eregOfRM(rm))));
2172 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2173 nameISize(szs), nameISize(szd),
2174 nameIReg(szs,eregOfRM(rm)),
2175 nameIReg(szd,gregOfRM(rm)));
2179 /* E refers to memory */
2183 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2185 putIReg(szd, gregOfRM(rm),
2186 unop(mkWidenOp(szs,szd,sign_extend),
2187 loadLE(szToITy(szs),mkexpr(addr))));
2188 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2189 nameISize(szs), nameISize(szd),
2190 dis_buf, nameIReg(szd,gregOfRM(rm)));
2196 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2197 16 / 8 bit quantity in the given IRTemp. */
2199 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2201 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2202 IRTemp src64 = newTemp(Ity_I64);
2203 IRTemp dst64 = newTemp(Ity_I64);
2206 assign( src64, binop(Iop_32HLto64,
2207 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2208 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2209 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2210 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2213 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2214 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2215 assign( src64, unop(widen3264,
2217 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2218 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2219 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2220 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2224 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2225 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2226 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2227 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2229 binop(op, mkexpr(src64),
2230 unop(widen1632, unop(widen816, mkexpr(t)))) );
2231 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2232 unop(Iop_64to32,mkexpr(dst64)))) );
2233 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2234 unop(Iop_64HIto32,mkexpr(dst64)))) );
2237 default: vpanic("codegen_div(x86)");
2243 UInt dis_Grp1 ( UChar sorb, Bool locked,
2244 Int delta, UChar modrm,
2245 Int am_sz, Int d_sz, Int sz, UInt d32 )
2249 IRType ty = szToITy(sz);
2250 IRTemp dst1 = newTemp(ty);
2251 IRTemp src = newTemp(ty);
2252 IRTemp dst0 = newTemp(ty);
2253 IRTemp addr = IRTemp_INVALID;
2254 IROp op8 = Iop_INVALID;
2255 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2257 switch (gregOfRM(modrm)) {
2258 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2259 case 2: break; // ADC
2260 case 3: break; // SBB
2261 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2262 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2264 default: vpanic("dis_Grp1: unhandled case");
2267 if (epartIsReg(modrm)) {
2268 vassert(am_sz == 1);
2270 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2271 assign(src, mkU(ty,d32 & mask));
2273 if (gregOfRM(modrm) == 2 /* ADC */) {
2274 helper_ADC( sz, dst1, dst0, src,
2275 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2277 if (gregOfRM(modrm) == 3 /* SBB */) {
2278 helper_SBB( sz, dst1, dst0, src,
2279 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2281 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2283 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2285 setFlags_DEP1(op8, dst1, ty);
2288 if (gregOfRM(modrm) < 7)
2289 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2291 delta += (am_sz + d_sz);
2292 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2293 nameIReg(sz,eregOfRM(modrm)));
2295 addr = disAMode ( &len, sorb, delta, dis_buf);
2297 assign(dst0, loadLE(ty,mkexpr(addr)));
2298 assign(src, mkU(ty,d32 & mask));
2300 if (gregOfRM(modrm) == 2 /* ADC */) {
2302 /* cas-style store */
2303 helper_ADC( sz, dst1, dst0, src,
2304 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2307 helper_ADC( sz, dst1, dst0, src,
2308 /*store*/addr, IRTemp_INVALID, 0 );
2311 if (gregOfRM(modrm) == 3 /* SBB */) {
2313 /* cas-style store */
2314 helper_SBB( sz, dst1, dst0, src,
2315 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2318 helper_SBB( sz, dst1, dst0, src,
2319 /*store*/addr, IRTemp_INVALID, 0 );
2322 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2323 if (gregOfRM(modrm) < 7) {
2325 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2326 mkexpr(dst1)/*newVal*/,
2327 guest_EIP_curr_instr );
2329 storeLE(mkexpr(addr), mkexpr(dst1));
2333 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2335 setFlags_DEP1(op8, dst1, ty);
2338 delta += (len+d_sz);
2339 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2346 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2350 UInt dis_Grp2 ( UChar sorb,
2351 Int delta, UChar modrm,
2352 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2353 HChar* shift_expr_txt, Bool* decode_OK )
2355 /* delta on entry points at the modrm byte. */
2358 Bool isShift, isRotate, isRotateC;
2359 IRType ty = szToITy(sz);
2360 IRTemp dst0 = newTemp(ty);
2361 IRTemp dst1 = newTemp(ty);
2362 IRTemp addr = IRTemp_INVALID;
2366 vassert(sz == 1 || sz == 2 || sz == 4);
2368 /* Put value to shift/rotate in dst0. */
2369 if (epartIsReg(modrm)) {
2370 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2371 delta += (am_sz + d_sz);
2373 addr = disAMode ( &len, sorb, delta, dis_buf);
2374 assign(dst0, loadLE(ty,mkexpr(addr)));
2375 delta += len + d_sz;
2379 switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; }
2382 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2385 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2387 if (gregOfRM(modrm) == 6) {
2392 if (!isShift && !isRotate && !isRotateC) {
2394 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2398 /* call a helper; these insns are so ridiculous they do not
2400 Bool left = toBool(gregOfRM(modrm) == 2);
2401 IRTemp r64 = newTemp(Ity_I64);
2403 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2404 widenUto32(shift_expr), /* rotate amount */
2405 widenUto32(mk_x86g_calculate_eflags_all()),
2407 assign( r64, mkIRExprCCall(
2410 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2411 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2415 /* new eflags in hi half r64; new value in lo half r64 */
2416 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2417 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2418 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2419 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2420 /* Set NDEP even though it isn't used. This makes redundant-PUT
2421 elimination of previous stores to this field work better. */
2422 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2427 IRTemp pre32 = newTemp(Ity_I32);
2428 IRTemp res32 = newTemp(Ity_I32);
2429 IRTemp res32ss = newTemp(Ity_I32);
2430 IRTemp shift_amt = newTemp(Ity_I8);
2433 switch (gregOfRM(modrm)) {
2434 case 4: op32 = Iop_Shl32; break;
2435 case 5: op32 = Iop_Shr32; break;
2436 case 7: op32 = Iop_Sar32; break;
2438 default: vpanic("dis_Grp2:shift"); break;
2441 /* Widen the value to be shifted to 32 bits, do the shift, and
2442 narrow back down. This seems surprisingly long-winded, but
2443 unfortunately the Intel semantics requires that 8/16-bit
2444 shifts give defined results for shift values all the way up
2445 to 31, and this seems the simplest way to do it. It has the
2446 advantage that the only IR level shifts generated are of 32
2447 bit values, and the shift amount is guaranteed to be in the
2448 range 0 .. 31, thereby observing the IR semantics requiring
2449 all shift values to be in the range 0 .. 2^word_size-1. */
2451 /* shift_amt = shift_expr & 31, regardless of operation size */
2452 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2454 /* suitably widen the value to be shifted to 32 bits. */
2455 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2456 : widenUto32(mkexpr(dst0)) );
2458 /* res32 = pre32 `shift` shift_amt */
2459 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2461 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2467 mkexpr(shift_amt), mkU8(1)),
2470 /* Build the flags thunk. */
2471 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2473 /* Narrow the result back down. */
2474 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2476 } /* if (isShift) */
2480 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2481 Bool left = toBool(gregOfRM(modrm) == 0);
2482 IRTemp rot_amt = newTemp(Ity_I8);
2483 IRTemp rot_amt32 = newTemp(Ity_I8);
2484 IRTemp oldFlags = newTemp(Ity_I32);
2486 /* rot_amt = shift_expr & mask */
2487 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2488 expressions never shift beyond the word size and thus remain
2490 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2493 assign(rot_amt, mkexpr(rot_amt32));
2495 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2499 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2501 binop( mkSizedOp(ty,Iop_Or8),
2502 binop( mkSizedOp(ty,Iop_Shl8),
2506 binop( mkSizedOp(ty,Iop_Shr8),
2508 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2512 ccOp += X86G_CC_OP_ROLB;
2514 } else { /* right */
2516 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2518 binop( mkSizedOp(ty,Iop_Or8),
2519 binop( mkSizedOp(ty,Iop_Shr8),
2523 binop( mkSizedOp(ty,Iop_Shl8),
2525 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2529 ccOp += X86G_CC_OP_RORB;
2533 /* dst1 now holds the rotated value. Build flag thunk. We
2534 need the resulting value for this, and the previous flags.
2535 Except don't set it if the rotate count is zero. */
2537 assign(oldFlags, mk_x86g_calculate_eflags_all());
2539 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2540 stmt( IRStmt_Put( OFFB_CC_OP,
2541 IRExpr_Mux0X( mkexpr(rot_amt32),
2542 IRExpr_Get(OFFB_CC_OP,Ity_I32),
2544 stmt( IRStmt_Put( OFFB_CC_DEP1,
2545 IRExpr_Mux0X( mkexpr(rot_amt32),
2546 IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
2547 widenUto32(mkexpr(dst1)))) );
2548 stmt( IRStmt_Put( OFFB_CC_DEP2,
2549 IRExpr_Mux0X( mkexpr(rot_amt32),
2550 IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
2552 stmt( IRStmt_Put( OFFB_CC_NDEP,
2553 IRExpr_Mux0X( mkexpr(rot_amt32),
2554 IRExpr_Get(OFFB_CC_NDEP,Ity_I32),
2555 mkexpr(oldFlags))) );
2556 } /* if (isRotate) */
2558 /* Save result, and finish up. */
2559 if (epartIsReg(modrm)) {
2560 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2561 if (vex_traceflags & VEX_TRACE_FE) {
2563 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2565 vex_printf("%s", shift_expr_txt);
2567 ppIRExpr(shift_expr);
2568 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2571 storeLE(mkexpr(addr), mkexpr(dst1));
2572 if (vex_traceflags & VEX_TRACE_FE) {
2574 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2576 vex_printf("%s", shift_expr_txt);
2578 ppIRExpr(shift_expr);
2579 vex_printf(", %s\n", dis_buf);
2586 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2588 UInt dis_Grp8_Imm ( UChar sorb,
2590 Int delta, UChar modrm,
2591 Int am_sz, Int sz, UInt src_val,
2594 /* src_val denotes a d8.
2595 And delta on entry points at the modrm byte. */
2597 IRType ty = szToITy(sz);
2598 IRTemp t2 = newTemp(Ity_I32);
2599 IRTemp t2m = newTemp(Ity_I32);
2600 IRTemp t_addr = IRTemp_INVALID;
2604 /* we're optimists :-) */
2607 /* Limit src_val -- the bit offset -- to something within a word.
2608 The Intel docs say that literal offsets larger than a word are
2609 masked in this way. */
2611 case 2: src_val &= 15; break;
2612 case 4: src_val &= 31; break;
2613 default: *decode_OK = False; return delta;
2616 /* Invent a mask suitable for the operation. */
2617 switch (gregOfRM(modrm)) {
2618 case 4: /* BT */ mask = 0; break;
2619 case 5: /* BTS */ mask = 1 << src_val; break;
2620 case 6: /* BTR */ mask = ~(1 << src_val); break;
2621 case 7: /* BTC */ mask = 1 << src_val; break;
2622 /* If this needs to be extended, probably simplest to make a
2623 new function to handle the other cases (0 .. 3). The
2624 Intel docs do however not indicate any use for 0 .. 3, so
2625 we don't expect this to happen. */
2626 default: *decode_OK = False; return delta;
2629 /* Fetch the value to be tested and modified into t2, which is
2630 32-bits wide regardless of sz. */
2631 if (epartIsReg(modrm)) {
2632 vassert(am_sz == 1);
2633 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2634 delta += (am_sz + 1);
2635 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2636 src_val, nameIReg(sz,eregOfRM(modrm)));
2639 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2641 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2642 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2646 /* Compute the new value into t2m, if non-BT. */
2647 switch (gregOfRM(modrm)) {
2651 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2654 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2657 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2660 /*NOTREACHED*/ /*the previous switch guards this*/
2664 /* Write the result back, if non-BT. If the CAS fails then we
2665 side-exit from the trace at this point, and so the flag state is
2666 not affected. This is of course as required. */
2667 if (gregOfRM(modrm) != 4 /* BT */) {
2668 if (epartIsReg(modrm)) {
2669 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2672 casLE( mkexpr(t_addr),
2673 narrowTo(ty, mkexpr(t2))/*expd*/,
2674 narrowTo(ty, mkexpr(t2m))/*new*/,
2675 guest_EIP_curr_instr );
2677 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2682 /* Copy relevant bit from t2 into the carry flag. */
2683 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2684 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2685 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2689 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2692 /* Set NDEP even though it isn't used. This makes redundant-PUT
2693 elimination of previous stores to this field work better. */
2694 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2700 /* Signed/unsigned widening multiply. Generate IR to multiply the
2701 value in EAX/AX/AL by the given IRTemp, and park the result in
2704 static void codegen_mulL_A_D ( Int sz, Bool syned,
2705 IRTemp tmp, HChar* tmp_txt )
2707 IRType ty = szToITy(sz);
2708 IRTemp t1 = newTemp(ty);
2710 assign( t1, getIReg(sz, R_EAX) );
2714 IRTemp res64 = newTemp(Ity_I64);
2715 IRTemp resHi = newTemp(Ity_I32);
2716 IRTemp resLo = newTemp(Ity_I32);
2717 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2718 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2719 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2720 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2721 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2722 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2723 putIReg(4, R_EDX, mkexpr(resHi));
2724 putIReg(4, R_EAX, mkexpr(resLo));
2728 IRTemp res32 = newTemp(Ity_I32);
2729 IRTemp resHi = newTemp(Ity_I16);
2730 IRTemp resLo = newTemp(Ity_I16);
2731 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2732 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2733 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2734 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2735 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2736 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2737 putIReg(2, R_EDX, mkexpr(resHi));
2738 putIReg(2, R_EAX, mkexpr(resLo));
2742 IRTemp res16 = newTemp(Ity_I16);
2743 IRTemp resHi = newTemp(Ity_I8);
2744 IRTemp resLo = newTemp(Ity_I8);
2745 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2746 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2747 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2748 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2749 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2750 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2751 putIReg(2, R_EAX, mkexpr(res16));
2755 vpanic("codegen_mulL_A_D(x86)");
2757 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2761 /* Group 3 extended opcodes. */
2763 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2770 IRType ty = szToITy(sz);
2771 IRTemp t1 = newTemp(ty);
2772 IRTemp dst1, src, dst0;
2774 *decode_OK = True; /* may change this later */
2776 modrm = getIByte(delta);
2778 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2779 /* LOCK prefix only allowed with not and neg subopcodes */
2784 if (epartIsReg(modrm)) {
2785 switch (gregOfRM(modrm)) {
2786 case 0: { /* TEST */
2787 delta++; d32 = getUDisp(sz, delta); delta += sz;
2789 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2790 getIReg(sz,eregOfRM(modrm)),
2792 setFlags_DEP1( Iop_And8, dst1, ty );
2793 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2794 nameIReg(sz, eregOfRM(modrm)));
2797 case 1: /* UNDEFINED */
2798 /* The Intel docs imply this insn is undefined and binutils
2799 agrees. Unfortunately Core 2 will run it (with who
2800 knows what result?) sandpile.org reckons it's an alias
2801 for case 0. We play safe. */
2806 putIReg(sz, eregOfRM(modrm),
2807 unop(mkSizedOp(ty,Iop_Not8),
2808 getIReg(sz, eregOfRM(modrm))));
2809 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2816 assign(dst0, mkU(ty,0));
2817 assign(src, getIReg(sz,eregOfRM(modrm)));
2818 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
2819 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2820 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2821 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2823 case 4: /* MUL (unsigned widening) */
2826 assign(src, getIReg(sz,eregOfRM(modrm)));
2827 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
2829 case 5: /* IMUL (signed widening) */
2832 assign(src, getIReg(sz,eregOfRM(modrm)));
2833 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
2837 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2838 codegen_div ( sz, t1, False );
2839 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2843 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2844 codegen_div ( sz, t1, True );
2845 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2848 /* This can't happen - gregOfRM should return 0 .. 7 only */
2849 vpanic("Grp3(x86)");
2852 addr = disAMode ( &len, sorb, delta, dis_buf );
2855 assign(t1, loadLE(ty,mkexpr(addr)));
2856 switch (gregOfRM(modrm)) {
2857 case 0: { /* TEST */
2858 d32 = getUDisp(sz, delta); delta += sz;
2860 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2861 mkexpr(t1), mkU(ty,d32)));
2862 setFlags_DEP1( Iop_And8, dst1, ty );
2863 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
2866 case 1: /* UNDEFINED */
2867 /* See comment above on R case */
2872 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
2874 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2875 guest_EIP_curr_instr );
2877 storeLE( mkexpr(addr), mkexpr(dst1) );
2879 DIP("not%c %s\n", nameISize(sz), dis_buf);
2885 assign(dst0, mkU(ty,0));
2886 assign(src, mkexpr(t1));
2887 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
2888 mkexpr(dst0), mkexpr(src)));
2890 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2891 guest_EIP_curr_instr );
2893 storeLE( mkexpr(addr), mkexpr(dst1) );
2895 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2896 DIP("neg%c %s\n", nameISize(sz), dis_buf);
2899 codegen_mulL_A_D ( sz, False, t1, dis_buf );
2902 codegen_mulL_A_D ( sz, True, t1, dis_buf );
2905 codegen_div ( sz, t1, False );
2906 DIP("div%c %s\n", nameISize(sz), dis_buf);
2909 codegen_div ( sz, t1, True );
2910 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
2913 /* This can't happen - gregOfRM should return 0 .. 7 only */
2914 vpanic("Grp3(x86)");
2921 /* Group 4 extended opcodes. */
2923 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
2929 IRTemp t1 = newTemp(ty);
2930 IRTemp t2 = newTemp(ty);
2934 modrm = getIByte(delta);
2936 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
2937 /* LOCK prefix only allowed with inc and dec subopcodes */
2942 if (epartIsReg(modrm)) {
2943 assign(t1, getIReg(1, eregOfRM(modrm)));
2944 switch (gregOfRM(modrm)) {
2946 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2947 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2948 setFlags_INC_DEC( True, t2, ty );
2951 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2952 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2953 setFlags_INC_DEC( False, t2, ty );
2960 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
2961 nameIReg(1, eregOfRM(modrm)));
2963 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
2964 assign( t1, loadLE(ty, mkexpr(addr)) );
2965 switch (gregOfRM(modrm)) {
2967 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2969 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
2970 guest_EIP_curr_instr );
2972 storeLE( mkexpr(addr), mkexpr(t2) );
2974 setFlags_INC_DEC( True, t2, ty );
2977 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2979 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
2980 guest_EIP_curr_instr );
2982 storeLE( mkexpr(addr), mkexpr(t2) );
2984 setFlags_INC_DEC( False, t2, ty );
2991 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
2997 /* Group 5 extended opcodes. */
2999 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3000 DisResult* dres, Bool* decode_OK )
3005 IRTemp addr = IRTemp_INVALID;
3006 IRType ty = szToITy(sz);
3007 IRTemp t1 = newTemp(ty);
3008 IRTemp t2 = IRTemp_INVALID;
3012 modrm = getIByte(delta);
3014 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3015 /* LOCK prefix only allowed with inc and dec subopcodes */
3020 if (epartIsReg(modrm)) {
3021 assign(t1, getIReg(sz,eregOfRM(modrm)));
3022 switch (gregOfRM(modrm)) {
3024 vassert(sz == 2 || sz == 4);
3026 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3027 mkexpr(t1), mkU(ty,1)));
3028 setFlags_INC_DEC( True, t2, ty );
3029 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3032 vassert(sz == 2 || sz == 4);
3034 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3035 mkexpr(t1), mkU(ty,1)));
3036 setFlags_INC_DEC( False, t2, ty );
3037 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3039 case 2: /* call Ev */
3041 t2 = newTemp(Ity_I32);
3042 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3043 putIReg(4, R_ESP, mkexpr(t2));
3044 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3045 jmp_treg(Ijk_Call,t1);
3046 dres->whatNext = Dis_StopHere;
3048 case 4: /* jmp Ev */
3050 jmp_treg(Ijk_Boring,t1);
3051 dres->whatNext = Dis_StopHere;
3053 case 6: /* PUSH Ev */
3054 vassert(sz == 4 || sz == 2);
3055 t2 = newTemp(Ity_I32);
3056 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3057 putIReg(4, R_ESP, mkexpr(t2) );
3058 storeLE( mkexpr(t2), mkexpr(t1) );
3065 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3066 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3068 addr = disAMode ( &len, sorb, delta, dis_buf );
3069 assign(t1, loadLE(ty,mkexpr(addr)));
3070 switch (gregOfRM(modrm)) {
3073 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3074 mkexpr(t1), mkU(ty,1)));
3076 casLE( mkexpr(addr),
3077 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3079 storeLE(mkexpr(addr),mkexpr(t2));
3081 setFlags_INC_DEC( True, t2, ty );
3085 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3086 mkexpr(t1), mkU(ty,1)));
3088 casLE( mkexpr(addr),
3089 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3091 storeLE(mkexpr(addr),mkexpr(t2));
3093 setFlags_INC_DEC( False, t2, ty );
3095 case 2: /* call Ev */
3097 t2 = newTemp(Ity_I32);
3098 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3099 putIReg(4, R_ESP, mkexpr(t2));
3100 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3101 jmp_treg(Ijk_Call,t1);
3102 dres->whatNext = Dis_StopHere;
3104 case 4: /* JMP Ev */
3106 jmp_treg(Ijk_Boring,t1);
3107 dres->whatNext = Dis_StopHere;
3109 case 6: /* PUSH Ev */
3110 vassert(sz == 4 || sz == 2);
3111 t2 = newTemp(Ity_I32);
3112 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3113 putIReg(4, R_ESP, mkexpr(t2) );
3114 storeLE( mkexpr(t2), mkexpr(t1) );
3121 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3122 nameISize(sz), dis_buf);
3128 /*------------------------------------------------------------*/
3129 /*--- Disassembling string ops (including REP prefixes) ---*/
3130 /*------------------------------------------------------------*/
3132 /* Code shared by all the string ops */
3134 void dis_string_op_increment(Int sz, Int t_inc)
3136 if (sz == 4 || sz == 2) {
3138 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3142 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3147 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3148 Int sz, HChar* name, UChar sorb )
3150 IRTemp t_inc = newTemp(Ity_I32);
3151 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3152 dis_string_op_increment(sz, t_inc);
3153 dis_OP( sz, t_inc );
3154 DIP("%s%c\n", name, nameISize(sz));
3158 void dis_MOVS ( Int sz, IRTemp t_inc )
3160 IRType ty = szToITy(sz);
3161 IRTemp td = newTemp(Ity_I32); /* EDI */
3162 IRTemp ts = newTemp(Ity_I32); /* ESI */
3164 assign( td, getIReg(4, R_EDI) );
3165 assign( ts, getIReg(4, R_ESI) );
3167 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3169 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3170 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3174 void dis_LODS ( Int sz, IRTemp t_inc )
3176 IRType ty = szToITy(sz);
3177 IRTemp ts = newTemp(Ity_I32); /* ESI */
3179 assign( ts, getIReg(4, R_ESI) );
3181 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3183 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3187 void dis_STOS ( Int sz, IRTemp t_inc )
3189 IRType ty = szToITy(sz);
3190 IRTemp ta = newTemp(ty); /* EAX */
3191 IRTemp td = newTemp(Ity_I32); /* EDI */
3193 assign( ta, getIReg(sz, R_EAX) );
3194 assign( td, getIReg(4, R_EDI) );
3196 storeLE( mkexpr(td), mkexpr(ta) );
3198 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3202 void dis_CMPS ( Int sz, IRTemp t_inc )
3204 IRType ty = szToITy(sz);
3205 IRTemp tdv = newTemp(ty); /* (EDI) */
3206 IRTemp tsv = newTemp(ty); /* (ESI) */
3207 IRTemp td = newTemp(Ity_I32); /* EDI */
3208 IRTemp ts = newTemp(Ity_I32); /* ESI */
3210 assign( td, getIReg(4, R_EDI) );
3211 assign( ts, getIReg(4, R_ESI) );
3213 assign( tdv, loadLE(ty,mkexpr(td)) );
3214 assign( tsv, loadLE(ty,mkexpr(ts)) );
3216 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3218 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3219 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3223 void dis_SCAS ( Int sz, IRTemp t_inc )
3225 IRType ty = szToITy(sz);
3226 IRTemp ta = newTemp(ty); /* EAX */
3227 IRTemp td = newTemp(Ity_I32); /* EDI */
3228 IRTemp tdv = newTemp(ty); /* (EDI) */
3230 assign( ta, getIReg(sz, R_EAX) );
3231 assign( td, getIReg(4, R_EDI) );
3233 assign( tdv, loadLE(ty,mkexpr(td)) );
3234 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3236 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3240 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3241 We assume the insn is the last one in the basic block, and so emit a jump
3242 to the next insn, rather than just falling through. */
3244 void dis_REP_op ( X86Condcode cond,
3245 void (*dis_OP)(Int, IRTemp),
3246 Int sz, Addr32 eip, Addr32 eip_next, HChar* name )
3248 IRTemp t_inc = newTemp(Ity_I32);
3249 IRTemp tc = newTemp(Ity_I32); /* ECX */
3251 assign( tc, getIReg(4,R_ECX) );
3253 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3255 IRConst_U32(eip_next) ) );
3257 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3259 dis_string_op_increment(sz, t_inc);
3262 if (cond == X86CondAlways) {
3263 jmp_lit(Ijk_Boring,eip);
3265 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3267 IRConst_U32(eip) ) );
3268 jmp_lit(Ijk_Boring,eip_next);
3270 DIP("%s%c\n", name, nameISize(sz));
3274 /*------------------------------------------------------------*/
3275 /*--- Arithmetic, etc. ---*/
3276 /*------------------------------------------------------------*/
3278 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3280 UInt dis_mul_E_G ( UChar sorb,
3286 UChar rm = getIByte(delta0);
3287 IRType ty = szToITy(size);
3288 IRTemp te = newTemp(ty);
3289 IRTemp tg = newTemp(ty);
3290 IRTemp resLo = newTemp(ty);
3292 assign( tg, getIReg(size, gregOfRM(rm)) );
3293 if (epartIsReg(rm)) {
3294 assign( te, getIReg(size, eregOfRM(rm)) );
3296 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3297 assign( te, loadLE(ty,mkexpr(addr)) );
3300 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3302 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3304 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3306 if (epartIsReg(rm)) {
3307 DIP("imul%c %s, %s\n", nameISize(size),
3308 nameIReg(size,eregOfRM(rm)),
3309 nameIReg(size,gregOfRM(rm)));
3312 DIP("imul%c %s, %s\n", nameISize(size),
3313 dis_buf, nameIReg(size,gregOfRM(rm)));
3319 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3321 UInt dis_imul_I_E_G ( UChar sorb,
3328 UChar rm = getIByte(delta);
3329 IRType ty = szToITy(size);
3330 IRTemp te = newTemp(ty);
3331 IRTemp tl = newTemp(ty);
3332 IRTemp resLo = newTemp(ty);
3334 vassert(size == 1 || size == 2 || size == 4);
3336 if (epartIsReg(rm)) {
3337 assign(te, getIReg(size, eregOfRM(rm)));
3340 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3341 assign(te, loadLE(ty, mkexpr(addr)));
3344 d32 = getSDisp(litsize,delta);
3347 if (size == 1) d32 &= 0xFF;
3348 if (size == 2) d32 &= 0xFFFF;
3350 assign(tl, mkU(ty,d32));
3352 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3354 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3356 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3358 DIP("imul %d, %s, %s\n", d32,
3359 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3360 nameIReg(size,gregOfRM(rm)) );
3365 /*------------------------------------------------------------*/
3367 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3369 /*------------------------------------------------------------*/
3371 /* --- Helper functions for dealing with the register stack. --- */
3373 /* --- Set the emulation-warning pseudo-register. --- */
3375 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3377 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3378 stmt( IRStmt_Put( OFFB_EMWARN, e ) );
3381 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3383 static IRExpr* mkQNaN64 ( void )
3385 /* QNaN is 0 2047 1 0(51times)
3386 == 0b 11111111111b 1 0(51times)
3387 == 0x7FF8 0000 0000 0000
3389 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3392 /* --------- Get/put the top-of-stack pointer. --------- */
3394 static IRExpr* get_ftop ( void )
3396 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3399 static void put_ftop ( IRExpr* e )
3401 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3402 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3405 /* --------- Get/put the C3210 bits. --------- */
3407 static IRExpr* get_C3210 ( void )
3409 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3412 static void put_C3210 ( IRExpr* e )
3414 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3417 /* --------- Get/put the FPU rounding mode. --------- */
3418 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3420 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3423 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3425 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3429 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3430 /* Produces a value in 0 .. 3, which is encoded as per the type
3431 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3432 per IRRoundingMode, we merely need to get it and mask it for
3435 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3437 return binop( Iop_And32, get_fpround(), mkU32(3) );
3440 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3442 return mkU32(Irrm_NEAREST);
3446 /* --------- Get/set FP register tag bytes. --------- */
3448 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3450 static void put_ST_TAG ( Int i, IRExpr* value )
3453 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3454 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3455 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
3458 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3459 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3461 static IRExpr* get_ST_TAG ( Int i )
3463 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3464 return IRExpr_GetI( descr, get_ftop(), i );
3468 /* --------- Get/set FP registers. --------- */
3470 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3471 register's tag to indicate the register is full. The previous
3472 state of the register is not checked. */
3474 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3477 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3478 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3479 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
3480 /* Mark the register as in-use. */
3481 put_ST_TAG(i, mkU8(1));
3484 /* Given i, and some expression e, emit
3485 ST(i) = is_full(i) ? NaN : e
3486 and set the tag accordingly.
3489 static void put_ST ( Int i, IRExpr* value )
3491 put_ST_UNCHECKED( i,
3492 IRExpr_Mux0X( get_ST_TAG(i),
3495 /* non-0 means full */
3502 /* Given i, generate an expression yielding 'ST(i)'. */
3504 static IRExpr* get_ST_UNCHECKED ( Int i )
3506 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3507 return IRExpr_GetI( descr, get_ftop(), i );
3511 /* Given i, generate an expression yielding
3512 is_full(i) ? ST(i) : NaN
3515 static IRExpr* get_ST ( Int i )
3518 IRExpr_Mux0X( get_ST_TAG(i),
3521 /* non-0 means full */
3522 get_ST_UNCHECKED(i));
3526 /* Adjust FTOP downwards by one register. */
3528 static void fp_push ( void )
3530 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3533 /* Adjust FTOP upwards by one register, and mark the vacated register
3536 static void fp_pop ( void )
3538 put_ST_TAG(0, mkU8(0));
3539 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3542 /* Clear the C2 bit of the FPU status register, for
3543 sin/cos/tan/sincos. */
3545 static void clear_C2 ( void )
3547 put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) );
3550 /* Invent a plausible-looking FPU status word value:
3551 ((ftop & 7) << 11) | (c3210 & 0x4700)
3553 static IRExpr* get_FPU_sw ( void )
3559 binop(Iop_And32, get_ftop(), mkU32(7)),
3561 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3566 /* ------------------------------------------------------- */
3567 /* Given all that stack-mangling junk, we can now go ahead
3568 and describe FP instructions.
3571 /* ST(0) = ST(0) `op` mem64/32(addr)
3572 Need to check ST(0)'s tag on read, but not on write.
3575 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
3578 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3582 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3584 loadLE(Ity_F64,mkexpr(addr))
3589 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3591 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3597 /* ST(0) = mem64/32(addr) `op` ST(0)
3598 Need to check ST(0)'s tag on read, but not on write.
3601 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
3604 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3608 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3609 loadLE(Ity_F64,mkexpr(addr)),
3615 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3616 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3623 /* ST(dst) = ST(dst) `op` ST(src).
3624 Check dst and src tags when reading but not on write.
3627 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3630 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3631 (Int)st_src, (Int)st_dst );
3635 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3643 /* ST(dst) = ST(src) `op` ST(dst).
3644 Check dst and src tags when reading but not on write.
3647 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3650 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3651 (Int)st_src, (Int)st_dst );
3655 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3663 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3664 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3666 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i );
3667 /* This is a bit of a hack (and isn't really right). It sets
3668 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3669 documentation implies A and S are unchanged.
3671 /* It's also fishy in that it is used both for COMIP and
3672 UCOMIP, and they aren't the same (although similar). */
3673 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3674 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3675 stmt( IRStmt_Put( OFFB_CC_DEP1,
3677 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
3680 /* Set NDEP even though it isn't used. This makes redundant-PUT
3681 elimination of previous stores to this field work better. */
3682 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
3689 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
3696 /* On entry, delta points at the second byte of the insn (the modrm
3698 UChar first_opcode = getIByte(delta-1);
3699 UChar modrm = getIByte(delta+0);
3701 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3703 if (first_opcode == 0xD8) {
3706 /* bits 5,4,3 are an opcode extension, and the modRM also
3707 specifies an address. */
3708 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3711 switch (gregOfRM(modrm)) {
3713 case 0: /* FADD single-real */
3714 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
3717 case 1: /* FMUL single-real */
3718 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
3721 case 2: /* FCOM single-real */
3722 DIP("fcoms %s\n", dis_buf);
3723 /* This forces C1 to zero, which isn't right. */
3730 loadLE(Ity_F32,mkexpr(addr)))),
3736 case 3: /* FCOMP single-real */
3737 DIP("fcomps %s\n", dis_buf);
3738 /* This forces C1 to zero, which isn't right. */
3745 loadLE(Ity_F32,mkexpr(addr)))),
3752 case 4: /* FSUB single-real */
3753 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
3756 case 5: /* FSUBR single-real */
3757 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
3760 case 6: /* FDIV single-real */
3761 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
3764 case 7: /* FDIVR single-real */
3765 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
3769 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
3770 vex_printf("first_opcode == 0xD8\n");
3777 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3778 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
3781 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3782 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
3785 /* Dunno if this is right */
3786 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3787 r_dst = (UInt)modrm - 0xD0;
3788 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst);
3789 /* This forces C1 to zero, which isn't right. */
3793 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3799 /* Dunno if this is right */
3800 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3801 r_dst = (UInt)modrm - 0xD8;
3802 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst);
3803 /* This forces C1 to zero, which isn't right. */
3807 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3814 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3815 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
3818 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3819 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
3822 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3823 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
3826 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3827 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
3836 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3838 if (first_opcode == 0xD9) {
3841 /* bits 5,4,3 are an opcode extension, and the modRM also
3842 specifies an address. */
3843 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3846 switch (gregOfRM(modrm)) {
3848 case 0: /* FLD single-real */
3849 DIP("flds %s\n", dis_buf);
3851 put_ST(0, unop(Iop_F32toF64,
3852 loadLE(Ity_F32, mkexpr(addr))));
3855 case 2: /* FST single-real */
3856 DIP("fsts %s\n", dis_buf);
3857 storeLE(mkexpr(addr),
3858 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
3861 case 3: /* FSTP single-real */
3862 DIP("fstps %s\n", dis_buf);
3863 storeLE(mkexpr(addr),
3864 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
3868 case 4: { /* FLDENV m28 */
3869 /* Uses dirty helper:
3870 VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
3871 IRTemp ew = newTemp(Ity_I32);
3872 IRDirty* d = unsafeIRDirty_0_N (
3874 "x86g_dirtyhelper_FLDENV",
3875 &x86g_dirtyhelper_FLDENV,
3876 mkIRExprVec_1( mkexpr(addr) )
3880 /* declare we're reading memory */
3882 d->mAddr = mkexpr(addr);
3885 /* declare we're writing guest state */
3888 d->fxState[0].fx = Ifx_Write;
3889 d->fxState[0].offset = OFFB_FTOP;
3890 d->fxState[0].size = sizeof(UInt);
3892 d->fxState[1].fx = Ifx_Write;
3893 d->fxState[1].offset = OFFB_FPTAGS;
3894 d->fxState[1].size = 8 * sizeof(UChar);
3896 d->fxState[2].fx = Ifx_Write;
3897 d->fxState[2].offset = OFFB_FPROUND;
3898 d->fxState[2].size = sizeof(UInt);
3900 d->fxState[3].fx = Ifx_Write;
3901 d->fxState[3].offset = OFFB_FC3210;
3902 d->fxState[3].size = sizeof(UInt);
3904 stmt( IRStmt_Dirty(d) );
3906 /* ew contains any emulation warning we may need to
3907 issue. If needed, side-exit to the next insn,
3908 reporting the warning, so that Valgrind's dispatcher
3909 sees the warning. */
3910 put_emwarn( mkexpr(ew) );
3913 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
3915 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
3919 DIP("fldenv %s\n", dis_buf);
3923 case 5: {/* FLDCW */
3924 /* The only thing we observe in the control word is the
3925 rounding mode. Therefore, pass the 16-bit value
3926 (x87 native-format control word) to a clean helper,
3927 getting back a 64-bit value, the lower half of which
3928 is the FPROUND value to store, and the upper half of
3929 which is the emulation-warning token which may be
3932 /* ULong x86h_check_fldcw ( UInt ); */
3933 IRTemp t64 = newTemp(Ity_I64);
3934 IRTemp ew = newTemp(Ity_I32);
3935 DIP("fldcw %s\n", dis_buf);
3936 assign( t64, mkIRExprCCall(
3937 Ity_I64, 0/*regparms*/,
3942 loadLE(Ity_I16, mkexpr(addr)))
3947 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
3948 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
3949 put_emwarn( mkexpr(ew) );
3950 /* Finally, if an emulation warning was reported,
3951 side-exit to the next insn, reporting the warning,
3952 so that Valgrind's dispatcher sees the warning. */
3955 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
3957 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
3963 case 6: { /* FNSTENV m28 */
3964 /* Uses dirty helper:
3965 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
3966 IRDirty* d = unsafeIRDirty_0_N (
3968 "x86g_dirtyhelper_FSTENV",
3969 &x86g_dirtyhelper_FSTENV,
3970 mkIRExprVec_1( mkexpr(addr) )
3973 /* declare we're writing memory */
3975 d->mAddr = mkexpr(addr);
3978 /* declare we're reading guest state */
3981 d->fxState[0].fx = Ifx_Read;
3982 d->fxState[0].offset = OFFB_FTOP;
3983 d->fxState[0].size = sizeof(UInt);
3985 d->fxState[1].fx = Ifx_Read;
3986 d->fxState[1].offset = OFFB_FPTAGS;
3987 d->fxState[1].size = 8 * sizeof(UChar);
3989 d->fxState[2].fx = Ifx_Read;
3990 d->fxState[2].offset = OFFB_FPROUND;
3991 d->fxState[2].size = sizeof(UInt);
3993 d->fxState[3].fx = Ifx_Read;
3994 d->fxState[3].offset = OFFB_FC3210;
3995 d->fxState[3].size = sizeof(UInt);
3997 stmt( IRStmt_Dirty(d) );
3999 DIP("fnstenv %s\n", dis_buf);
4003 case 7: /* FNSTCW */
4004 /* Fake up a native x87 FPU control word. The only
4005 thing it depends on is FPROUND[1:0], so call a clean
4006 helper to cook it up. */
4007 /* UInt x86h_create_fpucw ( UInt fpround ) */
4008 DIP("fnstcw %s\n", dis_buf);
4014 "x86g_create_fpucw", &x86g_create_fpucw,
4015 mkIRExprVec_1( get_fpround() )
4022 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4023 vex_printf("first_opcode == 0xD9\n");
4031 case 0xC0 ... 0xC7: /* FLD %st(?) */
4032 r_src = (UInt)modrm - 0xC0;
4033 DIP("fld %%st(%d)\n", (Int)r_src);
4034 t1 = newTemp(Ity_F64);
4035 assign(t1, get_ST(r_src));
4037 put_ST(0, mkexpr(t1));
4040 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4041 r_src = (UInt)modrm - 0xC8;
4042 DIP("fxch %%st(%d)\n", (Int)r_src);
4043 t1 = newTemp(Ity_F64);
4044 t2 = newTemp(Ity_F64);
4045 assign(t1, get_ST(0));
4046 assign(t2, get_ST(r_src));
4047 put_ST_UNCHECKED(0, mkexpr(t2));
4048 put_ST_UNCHECKED(r_src, mkexpr(t1));
4051 case 0xE0: /* FCHS */
4053 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4056 case 0xE1: /* FABS */
4058 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4061 case 0xE4: /* FTST */
4063 /* This forces C1 to zero, which isn't right. */
4064 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4065 set to 0 if stack underflow occurred; otherwise, set
4066 to 0" which is pretty nonsensical. I guess it's a
4073 IRExpr_Const(IRConst_F64i(0x0ULL))),
4079 case 0xE5: { /* FXAM */
4080 /* This is an interesting one. It examines %st(0),
4081 regardless of whether the tag says it's empty or not.
4082 Here, just pass both the tag (in our format) and the
4083 value (as a double, actually a ULong) to a helper
4086 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4087 unop(Iop_ReinterpF64asI64,
4088 get_ST_UNCHECKED(0)) );
4089 put_C3210(mkIRExprCCall(
4092 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4099 case 0xE8: /* FLD1 */
4102 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4103 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4106 case 0xE9: /* FLDL2T */
4109 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4110 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4113 case 0xEA: /* FLDL2E */
4116 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4117 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4120 case 0xEB: /* FLDPI */
4123 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4124 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4127 case 0xEC: /* FLDLG2 */
4130 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4131 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4134 case 0xED: /* FLDLN2 */
4137 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4138 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4141 case 0xEE: /* FLDZ */
4144 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4145 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4148 case 0xF0: /* F2XM1 */
4152 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4156 case 0xF1: /* FYL2X */
4160 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4166 case 0xF2: /* FPTAN */
4170 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4173 put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
4174 clear_C2(); /* HACK */
4177 case 0xF3: /* FPATAN */
4181 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4187 case 0xF4: { /* FXTRACT */
4188 IRTemp argF = newTemp(Ity_F64);
4189 IRTemp sigF = newTemp(Ity_F64);
4190 IRTemp expF = newTemp(Ity_F64);
4191 IRTemp argI = newTemp(Ity_I64);
4192 IRTemp sigI = newTemp(Ity_I64);
4193 IRTemp expI = newTemp(Ity_I64);
4195 assign( argF, get_ST(0) );
4196 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4199 Ity_I64, 0/*regparms*/,
4200 "x86amd64g_calculate_FXTRACT",
4201 &x86amd64g_calculate_FXTRACT,
4202 mkIRExprVec_2( mkexpr(argI),
4203 mkIRExpr_HWord(0)/*sig*/ ))
4207 Ity_I64, 0/*regparms*/,
4208 "x86amd64g_calculate_FXTRACT",
4209 &x86amd64g_calculate_FXTRACT,
4210 mkIRExprVec_2( mkexpr(argI),
4211 mkIRExpr_HWord(1)/*exp*/ ))
4213 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4214 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4216 put_ST_UNCHECKED(0, mkexpr(expF) );
4219 put_ST(0, mkexpr(sigF) );
4223 case 0xF5: { /* FPREM1 -- IEEE compliant */
4224 IRTemp a1 = newTemp(Ity_F64);
4225 IRTemp a2 = newTemp(Ity_F64);
4227 /* Do FPREM1 twice, once to get the remainder, and once
4228 to get the C3210 flag values. */
4229 assign( a1, get_ST(0) );
4230 assign( a2, get_ST(1) );
4233 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4237 triop(Iop_PRem1C3210F64,
4238 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4244 case 0xF7: /* FINCSTP */
4246 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4249 case 0xF8: { /* FPREM -- not IEEE compliant */
4250 IRTemp a1 = newTemp(Ity_F64);
4251 IRTemp a2 = newTemp(Ity_F64);
4253 /* Do FPREM twice, once to get the remainder, and once
4254 to get the C3210 flag values. */
4255 assign( a1, get_ST(0) );
4256 assign( a2, get_ST(1) );
4259 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4263 triop(Iop_PRemC3210F64,
4264 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4270 case 0xF9: /* FYL2XP1 */
4273 triop(Iop_Yl2xp1F64,
4274 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4280 case 0xFA: /* FSQRT */
4284 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4288 case 0xFB: { /* FSINCOS */
4289 IRTemp a1 = newTemp(Ity_F64);
4290 assign( a1, get_ST(0) );
4294 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4299 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4301 clear_C2(); /* HACK */
4305 case 0xFC: /* FRNDINT */
4308 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4311 case 0xFD: /* FSCALE */
4315 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4320 case 0xFE: /* FSIN */
4324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4326 clear_C2(); /* HACK */
4329 case 0xFF: /* FCOS */
4333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4335 clear_C2(); /* HACK */
4344 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4346 if (first_opcode == 0xDA) {
4350 /* bits 5,4,3 are an opcode extension, and the modRM also
4351 specifies an address. */
4353 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4355 switch (gregOfRM(modrm)) {
4357 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4358 DIP("fiaddl %s\n", dis_buf);
4362 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4363 DIP("fimull %s\n", dis_buf);
4367 case 2: /* FICOM m32int */
4368 DIP("ficoml %s\n", dis_buf);
4369 /* This forces C1 to zero, which isn't right. */
4376 loadLE(Ity_I32,mkexpr(addr)))),
4382 case 3: /* FICOMP m32int */
4383 DIP("ficompl %s\n", dis_buf);
4384 /* This forces C1 to zero, which isn't right. */
4391 loadLE(Ity_I32,mkexpr(addr)))),
4398 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4399 DIP("fisubl %s\n", dis_buf);
4403 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4404 DIP("fisubrl %s\n", dis_buf);
4408 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4409 DIP("fidivl %s\n", dis_buf);
4413 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4414 DIP("fidivrl %s\n", dis_buf);
4421 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4424 loadLE(Ity_I32, mkexpr(addr)))));
4430 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4432 loadLE(Ity_I32, mkexpr(addr))),
4437 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4438 vex_printf("first_opcode == 0xDA\n");
4447 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4448 r_src = (UInt)modrm - 0xC0;
4449 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src);
4453 mk_x86g_calculate_condition(X86CondB)),
4454 get_ST(0), get_ST(r_src)) );
4457 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4458 r_src = (UInt)modrm - 0xC8;
4459 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src);
4463 mk_x86g_calculate_condition(X86CondZ)),
4464 get_ST(0), get_ST(r_src)) );
4467 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4468 r_src = (UInt)modrm - 0xD0;
4469 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src);
4473 mk_x86g_calculate_condition(X86CondBE)),
4474 get_ST(0), get_ST(r_src)) );
4477 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4478 r_src = (UInt)modrm - 0xD8;
4479 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src);
4483 mk_x86g_calculate_condition(X86CondP)),
4484 get_ST(0), get_ST(r_src)) );
4487 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4488 DIP("fucompp %%st(0),%%st(1)\n");
4489 /* This forces C1 to zero, which isn't right. */
4493 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4508 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4510 if (first_opcode == 0xDB) {
4513 /* bits 5,4,3 are an opcode extension, and the modRM also
4514 specifies an address. */
4515 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4518 switch (gregOfRM(modrm)) {
4520 case 0: /* FILD m32int */
4521 DIP("fildl %s\n", dis_buf);
4523 put_ST(0, unop(Iop_I32StoF64,
4524 loadLE(Ity_I32, mkexpr(addr))));
4527 case 1: /* FISTTPL m32 (SSE3) */
4528 DIP("fisttpl %s\n", dis_buf);
4529 storeLE( mkexpr(addr),
4530 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4534 case 2: /* FIST m32 */
4535 DIP("fistl %s\n", dis_buf);
4536 storeLE( mkexpr(addr),
4537 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4540 case 3: /* FISTP m32 */
4541 DIP("fistpl %s\n", dis_buf);
4542 storeLE( mkexpr(addr),
4543 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4547 case 5: { /* FLD extended-real */
4548 /* Uses dirty helper:
4549 ULong x86g_loadF80le ( UInt )
4550 addr holds the address. First, do a dirty call to
4551 get hold of the data. */
4552 IRTemp val = newTemp(Ity_I64);
4553 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
4555 IRDirty* d = unsafeIRDirty_1_N (
4558 "x86g_dirtyhelper_loadF80le",
4559 &x86g_dirtyhelper_loadF80le,
4562 /* declare that we're reading memory */
4564 d->mAddr = mkexpr(addr);
4567 /* execute the dirty call, dumping the result in val. */
4568 stmt( IRStmt_Dirty(d) );
4570 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
4572 DIP("fldt %s\n", dis_buf);
4576 case 7: { /* FSTP extended-real */
4577 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4579 = mkIRExprVec_2( mkexpr(addr),
4580 unop(Iop_ReinterpF64asI64, get_ST(0)) );
4582 IRDirty* d = unsafeIRDirty_0_N (
4584 "x86g_dirtyhelper_storeF80le",
4585 &x86g_dirtyhelper_storeF80le,
4588 /* declare we're writing memory */
4590 d->mAddr = mkexpr(addr);
4593 /* execute the dirty call. */
4594 stmt( IRStmt_Dirty(d) );
4597 DIP("fstpt\n %s", dis_buf);
4602 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4603 vex_printf("first_opcode == 0xDB\n");
4612 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4613 r_src = (UInt)modrm - 0xC0;
4614 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src);
4618 mk_x86g_calculate_condition(X86CondNB)),
4619 get_ST(0), get_ST(r_src)) );
4622 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4623 r_src = (UInt)modrm - 0xC8;
4624 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src);
4628 mk_x86g_calculate_condition(X86CondNZ)),
4629 get_ST(0), get_ST(r_src)) );
4632 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4633 r_src = (UInt)modrm - 0xD0;
4634 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src);
4638 mk_x86g_calculate_condition(X86CondNBE)),
4639 get_ST(0), get_ST(r_src)) );
4642 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4643 r_src = (UInt)modrm - 0xD8;
4644 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src);
4648 mk_x86g_calculate_condition(X86CondNP)),
4649 get_ST(0), get_ST(r_src)) );
4657 /* Uses dirty helper:
4658 void x86g_do_FINIT ( VexGuestX86State* ) */
4659 IRDirty* d = unsafeIRDirty_0_N (
4661 "x86g_dirtyhelper_FINIT",
4662 &x86g_dirtyhelper_FINIT,
4667 /* declare we're writing guest state */
4670 d->fxState[0].fx = Ifx_Write;
4671 d->fxState[0].offset = OFFB_FTOP;
4672 d->fxState[0].size = sizeof(UInt);
4674 d->fxState[1].fx = Ifx_Write;
4675 d->fxState[1].offset = OFFB_FPREGS;
4676 d->fxState[1].size = 8 * sizeof(ULong);
4678 d->fxState[2].fx = Ifx_Write;
4679 d->fxState[2].offset = OFFB_FPTAGS;
4680 d->fxState[2].size = 8 * sizeof(UChar);
4682 d->fxState[3].fx = Ifx_Write;
4683 d->fxState[3].offset = OFFB_FPROUND;
4684 d->fxState[3].size = sizeof(UInt);
4686 d->fxState[4].fx = Ifx_Write;
4687 d->fxState[4].offset = OFFB_FC3210;
4688 d->fxState[4].size = sizeof(UInt);
4690 stmt( IRStmt_Dirty(d) );
4696 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4697 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
4700 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4701 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
4710 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4712 if (first_opcode == 0xDC) {
4715 /* bits 5,4,3 are an opcode extension, and the modRM also
4716 specifies an address. */
4717 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4720 switch (gregOfRM(modrm)) {
4722 case 0: /* FADD double-real */
4723 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
4726 case 1: /* FMUL double-real */
4727 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
4730 case 2: /* FCOM double-real */
4731 DIP("fcoml %s\n", dis_buf);
4732 /* This forces C1 to zero, which isn't right. */
4738 loadLE(Ity_F64,mkexpr(addr))),
4744 case 3: /* FCOMP double-real */
4745 DIP("fcompl %s\n", dis_buf);
4746 /* This forces C1 to zero, which isn't right. */
4752 loadLE(Ity_F64,mkexpr(addr))),
4759 case 4: /* FSUB double-real */
4760 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
4763 case 5: /* FSUBR double-real */
4764 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
4767 case 6: /* FDIV double-real */
4768 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
4771 case 7: /* FDIVR double-real */
4772 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
4776 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4777 vex_printf("first_opcode == 0xDC\n");
4786 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4787 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
4790 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4791 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
4794 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4795 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
4798 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4799 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
4802 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4803 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
4806 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4807 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
4817 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
4819 if (first_opcode == 0xDD) {
4823 /* bits 5,4,3 are an opcode extension, and the modRM also
4824 specifies an address. */
4825 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4828 switch (gregOfRM(modrm)) {
4830 case 0: /* FLD double-real */
4831 DIP("fldl %s\n", dis_buf);
4833 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
4836 case 1: /* FISTTPQ m64 (SSE3) */
4837 DIP("fistppll %s\n", dis_buf);
4838 storeLE( mkexpr(addr),
4839 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
4843 case 2: /* FST double-real */
4844 DIP("fstl %s\n", dis_buf);
4845 storeLE(mkexpr(addr), get_ST(0));
4848 case 3: /* FSTP double-real */
4849 DIP("fstpl %s\n", dis_buf);
4850 storeLE(mkexpr(addr), get_ST(0));
4854 case 4: { /* FRSTOR m108 */
4855 /* Uses dirty helper:
4856 VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
4857 IRTemp ew = newTemp(Ity_I32);
4858 IRDirty* d = unsafeIRDirty_0_N (
4860 "x86g_dirtyhelper_FRSTOR",
4861 &x86g_dirtyhelper_FRSTOR,
4862 mkIRExprVec_1( mkexpr(addr) )
4866 /* declare we're reading memory */
4868 d->mAddr = mkexpr(addr);
4871 /* declare we're writing guest state */
4874 d->fxState[0].fx = Ifx_Write;
4875 d->fxState[0].offset = OFFB_FTOP;
4876 d->fxState[0].size = sizeof(UInt);
4878 d->fxState[1].fx = Ifx_Write;
4879 d->fxState[1].offset = OFFB_FPREGS;
4880 d->fxState[1].size = 8 * sizeof(ULong);
4882 d->fxState[2].fx = Ifx_Write;
4883 d->fxState[2].offset = OFFB_FPTAGS;
4884 d->fxState[2].size = 8 * sizeof(UChar);
4886 d->fxState[3].fx = Ifx_Write;
4887 d->fxState[3].offset = OFFB_FPROUND;
4888 d->fxState[3].size = sizeof(UInt);
4890 d->fxState[4].fx = Ifx_Write;
4891 d->fxState[4].offset = OFFB_FC3210;
4892 d->fxState[4].size = sizeof(UInt);
4894 stmt( IRStmt_Dirty(d) );
4896 /* ew contains any emulation warning we may need to
4897 issue. If needed, side-exit to the next insn,
4898 reporting the warning, so that Valgrind's dispatcher
4899 sees the warning. */
4900 put_emwarn( mkexpr(ew) );
4903 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4905 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
4909 DIP("frstor %s\n", dis_buf);
4913 case 6: { /* FNSAVE m108 */
4914 /* Uses dirty helper:
4915 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
4916 IRDirty* d = unsafeIRDirty_0_N (
4918 "x86g_dirtyhelper_FSAVE",
4919 &x86g_dirtyhelper_FSAVE,
4920 mkIRExprVec_1( mkexpr(addr) )
4923 /* declare we're writing memory */
4925 d->mAddr = mkexpr(addr);
4928 /* declare we're reading guest state */
4931 d->fxState[0].fx = Ifx_Read;
4932 d->fxState[0].offset = OFFB_FTOP;
4933 d->fxState[0].size = sizeof(UInt);
4935 d->fxState[1].fx = Ifx_Read;
4936 d->fxState[1].offset = OFFB_FPREGS;
4937 d->fxState[1].size = 8 * sizeof(ULong);
4939 d->fxState[2].fx = Ifx_Read;
4940 d->fxState[2].offset = OFFB_FPTAGS;
4941 d->fxState[2].size = 8 * sizeof(UChar);
4943 d->fxState[3].fx = Ifx_Read;
4944 d->fxState[3].offset = OFFB_FPROUND;
4945 d->fxState[3].size = sizeof(UInt);
4947 d->fxState[4].fx = Ifx_Read;
4948 d->fxState[4].offset = OFFB_FC3210;
4949 d->fxState[4].size = sizeof(UInt);
4951 stmt( IRStmt_Dirty(d) );
4953 DIP("fnsave %s\n", dis_buf);
4957 case 7: { /* FNSTSW m16 */
4958 IRExpr* sw = get_FPU_sw();
4959 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
4960 storeLE( mkexpr(addr), sw );
4961 DIP("fnstsw %s\n", dis_buf);
4966 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4967 vex_printf("first_opcode == 0xDD\n");
4974 case 0xC0 ... 0xC7: /* FFREE %st(?) */
4975 r_dst = (UInt)modrm - 0xC0;
4976 DIP("ffree %%st(%d)\n", (Int)r_dst);
4977 put_ST_TAG ( r_dst, mkU8(0) );
4980 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
4981 r_dst = (UInt)modrm - 0xD0;
4982 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst);
4983 /* P4 manual says: "If the destination operand is a
4984 non-empty register, the invalid-operation exception
4985 is not generated. Hence put_ST_UNCHECKED. */
4986 put_ST_UNCHECKED(r_dst, get_ST(0));
4989 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
4990 r_dst = (UInt)modrm - 0xD8;
4991 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst);
4992 /* P4 manual says: "If the destination operand is a
4993 non-empty register, the invalid-operation exception
4994 is not generated. Hence put_ST_UNCHECKED. */
4995 put_ST_UNCHECKED(r_dst, get_ST(0));
4999 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5000 r_dst = (UInt)modrm - 0xE0;
5001 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst);
5002 /* This forces C1 to zero, which isn't right. */
5006 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5012 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5013 r_dst = (UInt)modrm - 0xE8;
5014 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst);
5015 /* This forces C1 to zero, which isn't right. */
5019 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5032 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5034 if (first_opcode == 0xDE) {
5038 /* bits 5,4,3 are an opcode extension, and the modRM also
5039 specifies an address. */
5041 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5044 switch (gregOfRM(modrm)) {
5046 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5047 DIP("fiaddw %s\n", dis_buf);
5051 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5052 DIP("fimulw %s\n", dis_buf);
5056 case 2: /* FICOM m16int */
5057 DIP("ficomw %s\n", dis_buf);
5058 /* This forces C1 to zero, which isn't right. */
5066 loadLE(Ity_I16,mkexpr(addr))))),
5072 case 3: /* FICOMP m16int */
5073 DIP("ficompw %s\n", dis_buf);
5074 /* This forces C1 to zero, which isn't right. */
5082 loadLE(Ity_I16,mkexpr(addr))))),
5089 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5090 DIP("fisubw %s\n", dis_buf);
5094 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5095 DIP("fisubrw %s\n", dis_buf);
5099 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5100 DIP("fisubw %s\n", dis_buf);
5104 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5105 DIP("fidivrw %s\n", dis_buf);
5112 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5116 loadLE(Ity_I16, mkexpr(addr))))));
5122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5125 loadLE(Ity_I16, mkexpr(addr)))),
5130 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5131 vex_printf("first_opcode == 0xDE\n");
5140 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5141 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5144 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5145 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5148 case 0xD9: /* FCOMPP %st(0),%st(1) */
5149 DIP("fuompp %%st(0),%%st(1)\n");
5150 /* This forces C1 to zero, which isn't right. */
5154 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5162 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5163 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5166 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5167 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5170 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5171 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5174 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5175 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5185 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5187 if (first_opcode == 0xDF) {
5191 /* bits 5,4,3 are an opcode extension, and the modRM also
5192 specifies an address. */
5193 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5196 switch (gregOfRM(modrm)) {
5198 case 0: /* FILD m16int */
5199 DIP("fildw %s\n", dis_buf);
5201 put_ST(0, unop(Iop_I32StoF64,
5203 loadLE(Ity_I16, mkexpr(addr)))));
5206 case 1: /* FISTTPS m16 (SSE3) */
5207 DIP("fisttps %s\n", dis_buf);
5208 storeLE( mkexpr(addr),
5209 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5213 case 2: /* FIST m16 */
5214 DIP("fistp %s\n", dis_buf);
5215 storeLE( mkexpr(addr),
5216 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5219 case 3: /* FISTP m16 */
5220 DIP("fistps %s\n", dis_buf);
5221 storeLE( mkexpr(addr),
5222 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5226 case 5: /* FILD m64 */
5227 DIP("fildll %s\n", dis_buf);
5229 put_ST(0, binop(Iop_I64StoF64,
5231 loadLE(Ity_I64, mkexpr(addr))));
5234 case 7: /* FISTP m64 */
5235 DIP("fistpll %s\n", dis_buf);
5236 storeLE( mkexpr(addr),
5237 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5242 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5243 vex_printf("first_opcode == 0xDF\n");
5252 case 0xC0: /* FFREEP %st(0) */
5253 DIP("ffreep %%st(%d)\n", 0);
5254 put_ST_TAG ( 0, mkU8(0) );
5258 case 0xE0: /* FNSTSW %ax */
5259 DIP("fnstsw %%ax\n");
5260 /* Get the FPU status word value and dump it in %AX. */
5262 /* The obvious thing to do is simply dump the 16-bit
5263 status word value in %AX. However, due to a
5264 limitation in Memcheck's origin tracking
5265 machinery, this causes Memcheck not to track the
5266 origin of any undefinedness into %AH (only into
5267 %AL/%AX/%EAX), which means origins are lost in
5268 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5269 putIReg(2, R_EAX, get_FPU_sw());
5271 /* So a somewhat lame kludge is to make it very
5272 clear to Memcheck that the value is written to
5273 both %AH and %AL. This generates marginally
5274 worse code, but I don't think it matters much. */
5275 IRTemp t16 = newTemp(Ity_I16);
5276 assign(t16, get_FPU_sw());
5277 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5278 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5282 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5283 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5286 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5287 /* not really right since COMIP != UCOMIP */
5288 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5299 vpanic("dis_FPU(x86): invalid primary opcode");
5310 /*------------------------------------------------------------*/
5312 /*--- MMX INSTRUCTIONS ---*/
5314 /*------------------------------------------------------------*/
5316 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5317 IA32 arch manual, volume 3):
5319 Read from, or write to MMX register (viz, any insn except EMMS):
5320 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5321 * FP stack pointer set to zero
5324 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5325 * FP stack pointer set to zero
5328 static void do_MMX_preamble ( void )
5331 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5332 IRExpr* zero = mkU32(0);
5333 IRExpr* tag1 = mkU8(1);
5335 for (i = 0; i < 8; i++)
5336 stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
5339 static void do_EMMS_preamble ( void )
5342 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5343 IRExpr* zero = mkU32(0);
5344 IRExpr* tag0 = mkU8(0);
5346 for (i = 0; i < 8; i++)
5347 stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
5351 static IRExpr* getMMXReg ( UInt archreg )
5353 vassert(archreg < 8);
5354 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5358 static void putMMXReg ( UInt archreg, IRExpr* e )
5360 vassert(archreg < 8);
5361 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5362 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5366 /* Helper for non-shift MMX insns. Note this is incomplete in the
5367 sense that it does not first call do_MMX_preamble() -- that is the
5368 responsibility of its caller. */
5371 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5375 Bool show_granularity )
5378 UChar modrm = getIByte(delta);
5379 Bool isReg = epartIsReg(modrm);
5380 IRExpr* argL = NULL;
5381 IRExpr* argR = NULL;
5382 IRExpr* argG = NULL;
5383 IRExpr* argE = NULL;
5384 IRTemp res = newTemp(Ity_I64);
5387 IROp op = Iop_INVALID;
5389 HChar* hName = NULL;
5392 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5395 /* Original MMX ones */
5396 case 0xFC: op = Iop_Add8x8; break;
5397 case 0xFD: op = Iop_Add16x4; break;
5398 case 0xFE: op = Iop_Add32x2; break;
5400 case 0xEC: op = Iop_QAdd8Sx8; break;
5401 case 0xED: op = Iop_QAdd16Sx4; break;
5403 case 0xDC: op = Iop_QAdd8Ux8; break;
5404 case 0xDD: op = Iop_QAdd16Ux4; break;
5406 case 0xF8: op = Iop_Sub8x8; break;
5407 case 0xF9: op = Iop_Sub16x4; break;
5408 case 0xFA: op = Iop_Sub32x2; break;
5410 case 0xE8: op = Iop_QSub8Sx8; break;
5411 case 0xE9: op = Iop_QSub16Sx4; break;
5413 case 0xD8: op = Iop_QSub8Ux8; break;
5414 case 0xD9: op = Iop_QSub16Ux4; break;
5416 case 0xE5: op = Iop_MulHi16Sx4; break;
5417 case 0xD5: op = Iop_Mul16x4; break;
5418 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
5420 case 0x74: op = Iop_CmpEQ8x8; break;
5421 case 0x75: op = Iop_CmpEQ16x4; break;
5422 case 0x76: op = Iop_CmpEQ32x2; break;
5424 case 0x64: op = Iop_CmpGT8Sx8; break;
5425 case 0x65: op = Iop_CmpGT16Sx4; break;
5426 case 0x66: op = Iop_CmpGT32Sx2; break;
5428 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
5429 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
5430 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
5432 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
5433 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
5434 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
5436 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
5437 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
5438 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
5440 case 0xDB: op = Iop_And64; break;
5441 case 0xDF: op = Iop_And64; invG = True; break;
5442 case 0xEB: op = Iop_Or64; break;
5443 case 0xEF: /* Possibly do better here if argL and argR are the
5445 op = Iop_Xor64; break;
5447 /* Introduced in SSE1 */
5448 case 0xE0: op = Iop_Avg8Ux8; break;
5449 case 0xE3: op = Iop_Avg16Ux4; break;
5450 case 0xEE: op = Iop_Max16Sx4; break;
5451 case 0xDE: op = Iop_Max8Ux8; break;
5452 case 0xEA: op = Iop_Min16Sx4; break;
5453 case 0xDA: op = Iop_Min8Ux8; break;
5454 case 0xE4: op = Iop_MulHi16Ux4; break;
5455 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
5457 /* Introduced in SSE2 */
5458 case 0xD4: op = Iop_Add64; break;
5459 case 0xFB: op = Iop_Sub64; break;
5462 vex_printf("\n0x%x\n", (Int)opc);
5463 vpanic("dis_MMXop_regmem_to_reg");
5468 argG = getMMXReg(gregOfRM(modrm));
5470 argG = unop(Iop_Not64, argG);
5474 argE = getMMXReg(eregOfRM(modrm));
5477 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5479 argE = loadLE(Ity_I64, mkexpr(addr));
5490 if (op != Iop_INVALID) {
5491 vassert(hName == NULL);
5492 vassert(hAddr == NULL);
5493 assign(res, binop(op, argL, argR));
5495 vassert(hName != NULL);
5496 vassert(hAddr != NULL);
5500 0/*regparms*/, hName, hAddr,
5501 mkIRExprVec_2( argL, argR )
5506 putMMXReg( gregOfRM(modrm), mkexpr(res) );
5508 DIP("%s%s %s, %s\n",
5509 name, show_granularity ? nameMMXGran(opc & 3) : "",
5510 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
5511 nameMMXReg(gregOfRM(modrm)) );
5517 /* Vector by scalar shift of G by the amount specified at the bottom
5518 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5520 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
5521 HChar* opname, IROp op )
5527 UChar rm = getIByte(delta);
5528 IRTemp g0 = newTemp(Ity_I64);
5529 IRTemp g1 = newTemp(Ity_I64);
5530 IRTemp amt = newTemp(Ity_I32);
5531 IRTemp amt8 = newTemp(Ity_I8);
5533 if (epartIsReg(rm)) {
5534 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
5535 DIP("%s %s,%s\n", opname,
5536 nameMMXReg(eregOfRM(rm)),
5537 nameMMXReg(gregOfRM(rm)) );
5540 addr = disAMode ( &alen, sorb, delta, dis_buf );
5541 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
5542 DIP("%s %s,%s\n", opname,
5544 nameMMXReg(gregOfRM(rm)) );
5547 assign( g0, getMMXReg(gregOfRM(rm)) );
5548 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
5550 shl = shr = sar = False;
5553 case Iop_ShlN16x4: shl = True; size = 32; break;
5554 case Iop_ShlN32x2: shl = True; size = 32; break;
5555 case Iop_Shl64: shl = True; size = 64; break;
5556 case Iop_ShrN16x4: shr = True; size = 16; break;
5557 case Iop_ShrN32x2: shr = True; size = 32; break;
5558 case Iop_Shr64: shr = True; size = 64; break;
5559 case Iop_SarN16x4: sar = True; size = 16; break;
5560 case Iop_SarN32x2: sar = True; size = 32; break;
5561 default: vassert(0);
5568 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
5570 binop(op, mkexpr(g0), mkexpr(amt8))
5578 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
5579 binop(op, mkexpr(g0), mkU8(size-1)),
5580 binop(op, mkexpr(g0), mkexpr(amt8))
5588 putMMXReg( gregOfRM(rm), mkexpr(g1) );
5593 /* Vector by scalar shift of E by an immediate byte. This is a
5594 straight copy of dis_SSE_shiftE_imm. */
5597 UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op )
5600 UChar rm = getIByte(delta);
5601 IRTemp e0 = newTemp(Ity_I64);
5602 IRTemp e1 = newTemp(Ity_I64);
5604 vassert(epartIsReg(rm));
5605 vassert(gregOfRM(rm) == 2
5606 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
5607 amt = getIByte(delta+1);
5609 DIP("%s $%d,%s\n", opname,
5611 nameMMXReg(eregOfRM(rm)) );
5613 assign( e0, getMMXReg(eregOfRM(rm)) );
5615 shl = shr = sar = False;
5618 case Iop_ShlN16x4: shl = True; size = 16; break;
5619 case Iop_ShlN32x2: shl = True; size = 32; break;
5620 case Iop_Shl64: shl = True; size = 64; break;
5621 case Iop_SarN16x4: sar = True; size = 16; break;
5622 case Iop_SarN32x2: sar = True; size = 32; break;
5623 case Iop_ShrN16x4: shr = True; size = 16; break;
5624 case Iop_ShrN32x2: shr = True; size = 32; break;
5625 case Iop_Shr64: shr = True; size = 64; break;
5626 default: vassert(0);
5630 assign( e1, amt >= size
5632 : binop(op, mkexpr(e0), mkU8(amt))
5636 assign( e1, amt >= size
5637 ? binop(op, mkexpr(e0), mkU8(size-1))
5638 : binop(op, mkexpr(e0), mkU8(amt))
5645 putMMXReg( eregOfRM(rm), mkexpr(e1) );
5650 /* Completely handle all MMX instructions except emms. */
5653 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
5658 UChar opc = getIByte(delta);
5661 /* dis_MMX handles all insns except emms. */
5667 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5669 goto mmx_decode_failure;
5670 modrm = getIByte(delta);
5671 if (epartIsReg(modrm)) {
5675 binop( Iop_32HLto64,
5677 getIReg(4, eregOfRM(modrm)) ) );
5678 DIP("movd %s, %s\n",
5679 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5681 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5685 binop( Iop_32HLto64,
5687 loadLE(Ity_I32, mkexpr(addr)) ) );
5688 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
5692 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5694 goto mmx_decode_failure;
5695 modrm = getIByte(delta);
5696 if (epartIsReg(modrm)) {
5698 putIReg( 4, eregOfRM(modrm),
5699 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5700 DIP("movd %s, %s\n",
5701 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
5703 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5705 storeLE( mkexpr(addr),
5706 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5707 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
5712 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5714 goto mmx_decode_failure;
5715 modrm = getIByte(delta);
5716 if (epartIsReg(modrm)) {
5718 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
5719 DIP("movq %s, %s\n",
5720 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5722 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5724 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
5725 DIP("movq %s, %s\n",
5726 dis_buf, nameMMXReg(gregOfRM(modrm)));
5731 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5733 goto mmx_decode_failure;
5734 modrm = getIByte(delta);
5735 if (epartIsReg(modrm)) {
5737 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
5738 DIP("movq %s, %s\n",
5739 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
5741 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5743 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
5744 DIP("mov(nt)q %s, %s\n",
5745 nameMMXReg(gregOfRM(modrm)), dis_buf);
5751 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5753 goto mmx_decode_failure;
5754 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
5758 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5760 goto mmx_decode_failure;
5761 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
5765 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5767 goto mmx_decode_failure;
5768 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
5773 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5775 goto mmx_decode_failure;
5776 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
5780 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5782 goto mmx_decode_failure;
5783 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
5787 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5789 goto mmx_decode_failure;
5790 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
5793 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5795 goto mmx_decode_failure;
5796 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
5799 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5801 goto mmx_decode_failure;
5802 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
5805 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5807 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
5812 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5814 goto mmx_decode_failure;
5815 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
5820 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
5822 goto mmx_decode_failure;
5823 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
5826 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
5828 goto mmx_decode_failure;
5829 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
5832 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
5834 goto mmx_decode_failure;
5835 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
5838 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
5840 goto mmx_decode_failure;
5841 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
5846 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
5848 goto mmx_decode_failure;
5849 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
5854 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
5856 goto mmx_decode_failure;
5857 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
5860 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
5862 goto mmx_decode_failure;
5863 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
5866 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
5868 goto mmx_decode_failure;
5869 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
5872 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
5874 goto mmx_decode_failure;
5875 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
5878 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
5880 goto mmx_decode_failure;
5881 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
5884 # define SHIFT_BY_REG(_name,_op) \
5885 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
5888 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
5889 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
5890 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
5891 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
5893 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
5894 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
5895 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
5896 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
5898 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
5899 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
5900 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
5902 # undef SHIFT_BY_REG
5907 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
5908 UChar byte2, subopc;
5910 goto mmx_decode_failure;
5911 byte2 = getIByte(delta); /* amode / sub-opcode */
5912 subopc = toUChar( (byte2 >> 3) & 7 );
5914 # define SHIFT_BY_IMM(_name,_op) \
5915 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
5918 if (subopc == 2 /*SRL*/ && opc == 0x71)
5919 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
5920 else if (subopc == 2 /*SRL*/ && opc == 0x72)
5921 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
5922 else if (subopc == 2 /*SRL*/ && opc == 0x73)
5923 SHIFT_BY_IMM("psrlq", Iop_Shr64);
5925 else if (subopc == 4 /*SAR*/ && opc == 0x71)
5926 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
5927 else if (subopc == 4 /*SAR*/ && opc == 0x72)
5928 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
5930 else if (subopc == 6 /*SHL*/ && opc == 0x71)
5931 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
5932 else if (subopc == 6 /*SHL*/ && opc == 0x72)
5933 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
5934 else if (subopc == 6 /*SHL*/ && opc == 0x73)
5935 SHIFT_BY_IMM("psllq", Iop_Shl64);
5937 else goto mmx_decode_failure;
5939 # undef SHIFT_BY_IMM
5944 IRTemp addr = newTemp(Ity_I32);
5945 IRTemp regD = newTemp(Ity_I64);
5946 IRTemp regM = newTemp(Ity_I64);
5947 IRTemp mask = newTemp(Ity_I64);
5948 IRTemp olddata = newTemp(Ity_I64);
5949 IRTemp newdata = newTemp(Ity_I64);
5951 modrm = getIByte(delta);
5952 if (sz != 4 || (!epartIsReg(modrm)))
5953 goto mmx_decode_failure;
5956 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
5957 assign( regM, getMMXReg( eregOfRM(modrm) ));
5958 assign( regD, getMMXReg( gregOfRM(modrm) ));
5959 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
5960 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
5968 unop(Iop_Not64, mkexpr(mask)))) );
5969 storeLE( mkexpr(addr), mkexpr(newdata) );
5970 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
5971 nameMMXReg( gregOfRM(modrm) ) );
5975 /* --- MMX decode failure --- */
5979 return delta; /* ignored */
5988 /*------------------------------------------------------------*/
5989 /*--- More misc arithmetic and other obscure insns. ---*/
5990 /*------------------------------------------------------------*/
5992 /* Double length left and right shifts. Apparently only required in
5993 v-size (no b- variant). */
5995 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
5996 Int delta, UChar modrm,
5999 Bool amt_is_literal,
6000 HChar* shift_amt_txt,
6003 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6004 for printing it. And eip on entry points at the modrm byte. */
6008 IRType ty = szToITy(sz);
6009 IRTemp gsrc = newTemp(ty);
6010 IRTemp esrc = newTemp(ty);
6011 IRTemp addr = IRTemp_INVALID;
6012 IRTemp tmpSH = newTemp(Ity_I8);
6013 IRTemp tmpL = IRTemp_INVALID;
6014 IRTemp tmpRes = IRTemp_INVALID;
6015 IRTemp tmpSubSh = IRTemp_INVALID;
6019 IRExpr* mask = NULL;
6021 vassert(sz == 2 || sz == 4);
6023 /* The E-part is the destination; this is shifted. The G-part
6024 supplies bits to be shifted into the E-part, but is not
6027 If shifting left, form a double-length word with E at the top
6028 and G at the bottom, and shift this left. The result is then in
6031 If shifting right, form a double-length word with G at the top
6032 and E at the bottom, and shift this right. The result is then
6035 /* Fetch the operands. */
6037 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6039 if (epartIsReg(modrm)) {
6041 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6042 DIP("sh%cd%c %s, %s, %s\n",
6043 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6045 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6047 addr = disAMode ( &len, sorb, delta, dis_buf );
6049 assign( esrc, loadLE(ty, mkexpr(addr)) );
6050 DIP("sh%cd%c %s, %s, %s\n",
6051 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6053 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6056 /* Round up the relevant primops. */
6059 tmpL = newTemp(Ity_I64);
6060 tmpRes = newTemp(Ity_I32);
6061 tmpSubSh = newTemp(Ity_I32);
6062 mkpair = Iop_32HLto64;
6063 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6064 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6068 tmpL = newTemp(Ity_I32);
6069 tmpRes = newTemp(Ity_I16);
6070 tmpSubSh = newTemp(Ity_I16);
6071 mkpair = Iop_16HLto32;
6072 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6073 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6077 /* Do the shift, calculate the subshift value, and set
6080 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6083 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6085 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6087 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6093 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6096 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6097 tmpRes, tmpSubSh, ty, tmpSH );
6099 /* Put result back. */
6101 if (epartIsReg(modrm)) {
6102 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6104 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6107 if (amt_is_literal) delta++;
6112 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6115 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6117 static HChar* nameBtOp ( BtOp op )
6120 case BtOpNone: return "";
6121 case BtOpSet: return "s";
6122 case BtOpReset: return "r";
6123 case BtOpComp: return "c";
6124 default: vpanic("nameBtOp(x86)");
6130 UInt dis_bt_G_E ( UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6135 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6136 t_addr1, t_esp, t_mask, t_new;
6138 vassert(sz == 2 || sz == 4);
6140 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6141 = t_addr0 = t_addr1 = t_esp
6142 = t_mask = t_new = IRTemp_INVALID;
6144 t_fetched = newTemp(Ity_I8);
6145 t_new = newTemp(Ity_I8);
6146 t_bitno0 = newTemp(Ity_I32);
6147 t_bitno1 = newTemp(Ity_I32);
6148 t_bitno2 = newTemp(Ity_I8);
6149 t_addr1 = newTemp(Ity_I32);
6150 modrm = getIByte(delta);
6152 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6154 if (epartIsReg(modrm)) {
6156 /* Get it onto the client's stack. */
6157 t_esp = newTemp(Ity_I32);
6158 t_addr0 = newTemp(Ity_I32);
6160 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6161 putIReg(4, R_ESP, mkexpr(t_esp));
6163 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6165 /* Make t_addr0 point at it. */
6166 assign( t_addr0, mkexpr(t_esp) );
6168 /* Mask out upper bits of the shift amount, since we're doing a
6170 assign( t_bitno1, binop(Iop_And32,
6172 mkU32(sz == 4 ? 31 : 15)) );
6175 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6177 assign( t_bitno1, mkexpr(t_bitno0) );
6180 /* At this point: t_addr0 is the address being operated on. If it
6181 was a reg, we will have pushed it onto the client's stack.
6182 t_bitno1 is the bit number, suitably masked in the case of a
6185 /* Now the main sequence. */
6189 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6191 /* t_addr1 now holds effective address */
6195 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6197 /* t_bitno2 contains offset of bit within byte */
6199 if (op != BtOpNone) {
6200 t_mask = newTemp(Ity_I8);
6201 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6204 /* t_mask is now a suitable byte mask */
6206 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6208 if (op != BtOpNone) {
6212 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6216 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6220 binop(Iop_And8, mkexpr(t_fetched),
6221 unop(Iop_Not8, mkexpr(t_mask))) );
6224 vpanic("dis_bt_G_E(x86)");
6226 if (locked && !epartIsReg(modrm)) {
6227 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6228 mkexpr(t_new)/*new*/,
6229 guest_EIP_curr_instr );
6231 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6235 /* Side effect done; now get selected bit into Carry flag */
6236 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6237 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6238 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6243 unop(Iop_8Uto32, mkexpr(t_fetched)),
6247 /* Set NDEP even though it isn't used. This makes redundant-PUT
6248 elimination of previous stores to this field work better. */
6249 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6251 /* Move reg operand from stack back to reg */
6252 if (epartIsReg(modrm)) {
6253 /* t_esp still points at it. */
6254 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6255 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(sz)) );
6258 DIP("bt%s%c %s, %s\n",
6259 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6260 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6267 /* Handle BSF/BSR. Only v-size seems necessary. */
6269 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6275 IRType ty = szToITy(sz);
6276 IRTemp src = newTemp(ty);
6277 IRTemp dst = newTemp(ty);
6279 IRTemp src32 = newTemp(Ity_I32);
6280 IRTemp dst32 = newTemp(Ity_I32);
6281 IRTemp src8 = newTemp(Ity_I8);
6283 vassert(sz == 4 || sz == 2);
6285 modrm = getIByte(delta);
6287 isReg = epartIsReg(modrm);
6290 assign( src, getIReg(sz, eregOfRM(modrm)) );
6293 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6295 assign( src, loadLE(ty, mkexpr(addr)) );
6298 DIP("bs%c%c %s, %s\n",
6299 fwds ? 'f' : 'r', nameISize(sz),
6300 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6301 nameIReg(sz, gregOfRM(modrm)));
6303 /* Generate an 8-bit expression which is zero iff the
6304 original is zero, and nonzero otherwise */
6306 unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8),
6307 mkexpr(src), mkU(ty,0))) );
6309 /* Flags: Z is 1 iff source value is zero. All others
6310 are undefined -- we force them to zero. */
6311 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6312 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6315 IRExpr_Mux0X( mkexpr(src8),
6317 mkU32(X86G_CC_MASK_Z),
6322 /* Set NDEP even though it isn't used. This makes redundant-PUT
6323 elimination of previous stores to this field work better. */
6324 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6326 /* Result: iff source value is zero, we can't use
6327 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6328 But anyway, Intel x86 semantics say the result is undefined in
6329 such situations. Hence handle the zero case specially. */
6331 /* Bleh. What we compute:
6333 bsf32: if src == 0 then 0 else Ctz32(src)
6334 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6336 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6337 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6339 First, widen src to 32 bits if it is not already.
6341 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6342 dst register unchanged when src == 0. Hence change accordingly.
6345 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6347 assign( src32, mkexpr(src) );
6349 /* The main computation, guarding against zero. */
6353 /* src == 0 -- leave dst unchanged */
6354 widenUto32( getIReg( sz, gregOfRM(modrm) ) ),
6356 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6359 unop(Iop_Clz32, mkexpr(src32)))
6364 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6366 assign( dst, mkexpr(dst32) );
6368 /* dump result back */
6369 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
6376 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
6378 IRType ty = szToITy(sz);
6379 IRTemp t1 = newTemp(ty);
6380 IRTemp t2 = newTemp(ty);
6381 vassert(sz == 2 || sz == 4);
6382 assign( t1, getIReg(sz, R_EAX) );
6383 assign( t2, getIReg(sz, reg) );
6384 putIReg( sz, R_EAX, mkexpr(t2) );
6385 putIReg( sz, reg, mkexpr(t1) );
6386 DIP("xchg%c %s, %s\n",
6387 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
6392 void codegen_SAHF ( void )
6394 /* Set the flags to:
6395 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6396 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6397 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6399 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6400 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6401 IRTemp oldflags = newTemp(Ity_I32);
6402 assign( oldflags, mk_x86g_calculate_eflags_all() );
6403 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6404 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6405 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6406 stmt( IRStmt_Put( OFFB_CC_DEP1,
6408 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
6410 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
6414 /* Set NDEP even though it isn't used. This makes redundant-PUT
6415 elimination of previous stores to this field work better. */
6416 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6421 void codegen_LAHF ( void )
6423 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6424 IRExpr* eax_with_hole;
6427 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6428 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6430 IRTemp flags = newTemp(Ity_I32);
6431 assign( flags, mk_x86g_calculate_eflags_all() );
6434 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
6436 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
6439 = binop(Iop_Or32, eax_with_hole,
6440 binop(Iop_Shl32, new_byte, mkU8(8)));
6441 putIReg(4, R_EAX, new_eax);
6446 UInt dis_cmpxchg_G_E ( UChar sorb,
6454 IRType ty = szToITy(size);
6455 IRTemp acc = newTemp(ty);
6456 IRTemp src = newTemp(ty);
6457 IRTemp dest = newTemp(ty);
6458 IRTemp dest2 = newTemp(ty);
6459 IRTemp acc2 = newTemp(ty);
6460 IRTemp cond8 = newTemp(Ity_I8);
6461 IRTemp addr = IRTemp_INVALID;
6462 UChar rm = getUChar(delta0);
6464 /* There are 3 cases to consider:
6466 reg-reg: ignore any lock prefix, generate sequence based
6469 reg-mem, not locked: ignore any lock prefix, generate sequence
6472 reg-mem, locked: use IRCAS
6474 if (epartIsReg(rm)) {
6476 assign( dest, getIReg(size, eregOfRM(rm)) );
6478 assign( src, getIReg(size, gregOfRM(rm)) );
6479 assign( acc, getIReg(size, R_EAX) );
6480 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6481 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6482 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
6483 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6484 putIReg(size, R_EAX, mkexpr(acc2));
6485 putIReg(size, eregOfRM(rm), mkexpr(dest2));
6486 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6487 nameIReg(size,gregOfRM(rm)),
6488 nameIReg(size,eregOfRM(rm)) );
6490 else if (!epartIsReg(rm) && !locked) {
6492 addr = disAMode ( &len, sorb, delta0, dis_buf );
6493 assign( dest, loadLE(ty, mkexpr(addr)) );
6495 assign( src, getIReg(size, gregOfRM(rm)) );
6496 assign( acc, getIReg(size, R_EAX) );
6497 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6498 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6499 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
6500 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6501 putIReg(size, R_EAX, mkexpr(acc2));
6502 storeLE( mkexpr(addr), mkexpr(dest2) );
6503 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6504 nameIReg(size,gregOfRM(rm)), dis_buf);
6506 else if (!epartIsReg(rm) && locked) {
6508 /* src is new value. acc is expected value. dest is old value.
6509 Compute success from the output of the IRCAS, and steer the
6510 new value for EAX accordingly: in case of success, EAX is
6512 addr = disAMode ( &len, sorb, delta0, dis_buf );
6514 assign( src, getIReg(size, gregOfRM(rm)) );
6515 assign( acc, getIReg(size, R_EAX) );
6517 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
6518 NULL, mkexpr(acc), NULL, mkexpr(src) )
6520 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6521 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6522 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6523 putIReg(size, R_EAX, mkexpr(acc2));
6524 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6525 nameIReg(size,gregOfRM(rm)), dis_buf);
6533 /* Handle conditional move instructions of the form
6534 cmovcc E(reg-or-mem), G(reg)
6536 E(src) is reg-or-mem
6539 If E is reg, --> GET %E, tmps
6544 If E is mem --> (getAddr E) -> tmpa
6551 UInt dis_cmov_E_G ( UChar sorb,
6556 UChar rm = getIByte(delta0);
6560 IRType ty = szToITy(sz);
6561 IRTemp tmps = newTemp(ty);
6562 IRTemp tmpd = newTemp(ty);
6564 if (epartIsReg(rm)) {
6565 assign( tmps, getIReg(sz, eregOfRM(rm)) );
6566 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6568 putIReg(sz, gregOfRM(rm),
6569 IRExpr_Mux0X( unop(Iop_1Uto8,
6570 mk_x86g_calculate_condition(cond)),
6574 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6575 name_X86Condcode(cond),
6576 nameIReg(sz,eregOfRM(rm)),
6577 nameIReg(sz,gregOfRM(rm)));
6581 /* E refers to memory */
6583 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6584 assign( tmps, loadLE(ty, mkexpr(addr)) );
6585 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6587 putIReg(sz, gregOfRM(rm),
6588 IRExpr_Mux0X( unop(Iop_1Uto8,
6589 mk_x86g_calculate_condition(cond)),
6594 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6595 name_X86Condcode(cond),
6597 nameIReg(sz,gregOfRM(rm)));
6604 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
6608 UChar rm = getIByte(delta0);
6611 IRType ty = szToITy(sz);
6612 IRTemp tmpd = newTemp(ty);
6613 IRTemp tmpt0 = newTemp(ty);
6614 IRTemp tmpt1 = newTemp(ty);
6616 /* There are 3 cases to consider:
6618 reg-reg: currently unhandled
6620 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6621 (non-atomic) sequence
6623 reg-mem, locked: use IRCAS
6626 if (epartIsReg(rm)) {
6630 /* Currently we don't handle xadd_G_E with register operand. */
6632 else if (!epartIsReg(rm) && !locked) {
6634 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6635 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6636 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6637 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6638 mkexpr(tmpd), mkexpr(tmpt0)) );
6639 storeLE( mkexpr(addr), mkexpr(tmpt1) );
6640 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6641 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6642 DIP("xadd%c %s, %s\n",
6643 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6647 else if (!epartIsReg(rm) && locked) {
6649 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6650 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6651 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6652 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6653 mkexpr(tmpd), mkexpr(tmpt0)) );
6654 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
6655 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
6656 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6657 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6658 DIP("xadd%c %s, %s\n",
6659 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6667 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6670 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
6674 UChar rm = getIByte(delta0);
6677 if (epartIsReg(rm)) {
6678 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
6679 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
6682 addr = disAMode ( &len, sorb, delta0, dis_buf );
6683 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
6684 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
6689 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6690 dst is ireg and sz==4, zero out top half of it. */
6693 UInt dis_mov_Sw_Ew ( UChar sorb,
6699 UChar rm = getIByte(delta0);
6702 vassert(sz == 2 || sz == 4);
6704 if (epartIsReg(rm)) {
6706 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
6708 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
6710 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
6713 addr = disAMode ( &len, sorb, delta0, dis_buf );
6714 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
6715 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
6722 void dis_push_segreg ( UInt sreg, Int sz )
6724 IRTemp t1 = newTemp(Ity_I16);
6725 IRTemp ta = newTemp(Ity_I32);
6726 vassert(sz == 2 || sz == 4);
6728 assign( t1, getSReg(sreg) );
6729 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6730 putIReg(4, R_ESP, mkexpr(ta));
6731 storeLE( mkexpr(ta), mkexpr(t1) );
6733 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6737 void dis_pop_segreg ( UInt sreg, Int sz )
6739 IRTemp t1 = newTemp(Ity_I16);
6740 IRTemp ta = newTemp(Ity_I32);
6741 vassert(sz == 2 || sz == 4);
6743 assign( ta, getIReg(4, R_ESP) );
6744 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
6746 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
6747 putSReg( sreg, mkexpr(t1) );
6748 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6752 void dis_ret ( UInt d32 )
6754 IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32);
6755 assign(t1, getIReg(4,R_ESP));
6756 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
6757 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
6758 jmp_treg(Ijk_Ret,t2);
6761 /*------------------------------------------------------------*/
6762 /*--- SSE/SSE2/SSE3 helpers ---*/
6763 /*------------------------------------------------------------*/
6765 /* Worker function; do not call directly.
6766 Handles full width G = G `op` E and G = (not G) `op` E.
6769 static UInt dis_SSE_E_to_G_all_wrk (
6770 UChar sorb, Int delta,
6771 HChar* opname, IROp op,
6778 UChar rm = getIByte(delta);
6780 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
6781 : getXMMReg(gregOfRM(rm));
6782 if (epartIsReg(rm)) {
6783 putXMMReg( gregOfRM(rm),
6785 getXMMReg(eregOfRM(rm))) );
6786 DIP("%s %s,%s\n", opname,
6787 nameXMMReg(eregOfRM(rm)),
6788 nameXMMReg(gregOfRM(rm)) );
6791 addr = disAMode ( &alen, sorb, delta, dis_buf );
6792 putXMMReg( gregOfRM(rm),
6794 loadLE(Ity_V128, mkexpr(addr))) );
6795 DIP("%s %s,%s\n", opname,
6797 nameXMMReg(gregOfRM(rm)) );
6803 /* All lanes SSE binary operation, G = G `op` E. */
6806 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op )
6808 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
6811 /* All lanes SSE binary operation, G = (not G) `op` E. */
6814 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
6815 HChar* opname, IROp op )
6817 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
6821 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
6823 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
6824 HChar* opname, IROp op )
6829 UChar rm = getIByte(delta);
6830 IRExpr* gpart = getXMMReg(gregOfRM(rm));
6831 if (epartIsReg(rm)) {
6832 putXMMReg( gregOfRM(rm),
6834 getXMMReg(eregOfRM(rm))) );
6835 DIP("%s %s,%s\n", opname,
6836 nameXMMReg(eregOfRM(rm)),
6837 nameXMMReg(gregOfRM(rm)) );
6840 /* We can only do a 32-bit memory read, so the upper 3/4 of the
6841 E operand needs to be made simply of zeroes. */
6842 IRTemp epart = newTemp(Ity_V128);
6843 addr = disAMode ( &alen, sorb, delta, dis_buf );
6844 assign( epart, unop( Iop_32UtoV128,
6845 loadLE(Ity_I32, mkexpr(addr))) );
6846 putXMMReg( gregOfRM(rm),
6847 binop(op, gpart, mkexpr(epart)) );
6848 DIP("%s %s,%s\n", opname,
6850 nameXMMReg(gregOfRM(rm)) );
6856 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
6858 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
6859 HChar* opname, IROp op )
6864 UChar rm = getIByte(delta);
6865 IRExpr* gpart = getXMMReg(gregOfRM(rm));
6866 if (epartIsReg(rm)) {
6867 putXMMReg( gregOfRM(rm),
6869 getXMMReg(eregOfRM(rm))) );
6870 DIP("%s %s,%s\n", opname,
6871 nameXMMReg(eregOfRM(rm)),
6872 nameXMMReg(gregOfRM(rm)) );
6875 /* We can only do a 64-bit memory read, so the upper half of the
6876 E operand needs to be made simply of zeroes. */
6877 IRTemp epart = newTemp(Ity_V128);
6878 addr = disAMode ( &alen, sorb, delta, dis_buf );
6879 assign( epart, unop( Iop_64UtoV128,
6880 loadLE(Ity_I64, mkexpr(addr))) );
6881 putXMMReg( gregOfRM(rm),
6882 binop(op, gpart, mkexpr(epart)) );
6883 DIP("%s %s,%s\n", opname,
6885 nameXMMReg(gregOfRM(rm)) );
6891 /* All lanes unary SSE operation, G = op(E). */
6893 static UInt dis_SSE_E_to_G_unary_all (
6894 UChar sorb, Int delta,
6895 HChar* opname, IROp op
6901 UChar rm = getIByte(delta);
6902 if (epartIsReg(rm)) {
6903 putXMMReg( gregOfRM(rm),
6904 unop(op, getXMMReg(eregOfRM(rm))) );
6905 DIP("%s %s,%s\n", opname,
6906 nameXMMReg(eregOfRM(rm)),
6907 nameXMMReg(gregOfRM(rm)) );
6910 addr = disAMode ( &alen, sorb, delta, dis_buf );
6911 putXMMReg( gregOfRM(rm),
6912 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
6913 DIP("%s %s,%s\n", opname,
6915 nameXMMReg(gregOfRM(rm)) );
6921 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
6923 static UInt dis_SSE_E_to_G_unary_lo32 (
6924 UChar sorb, Int delta,
6925 HChar* opname, IROp op
6928 /* First we need to get the old G value and patch the low 32 bits
6929 of the E operand into it. Then apply op and write back to G. */
6933 UChar rm = getIByte(delta);
6934 IRTemp oldG0 = newTemp(Ity_V128);
6935 IRTemp oldG1 = newTemp(Ity_V128);
6937 assign( oldG0, getXMMReg(gregOfRM(rm)) );
6939 if (epartIsReg(rm)) {
6941 binop( Iop_SetV128lo32,
6943 getXMMRegLane32(eregOfRM(rm), 0)) );
6944 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
6945 DIP("%s %s,%s\n", opname,
6946 nameXMMReg(eregOfRM(rm)),
6947 nameXMMReg(gregOfRM(rm)) );
6950 addr = disAMode ( &alen, sorb, delta, dis_buf );
6952 binop( Iop_SetV128lo32,
6954 loadLE(Ity_I32, mkexpr(addr)) ));
6955 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
6956 DIP("%s %s,%s\n", opname,
6958 nameXMMReg(gregOfRM(rm)) );
6964 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
6966 static UInt dis_SSE_E_to_G_unary_lo64 (
6967 UChar sorb, Int delta,
6968 HChar* opname, IROp op
6971 /* First we need to get the old G value and patch the low 64 bits
6972 of the E operand into it. Then apply op and write back to G. */
6976 UChar rm = getIByte(delta);
6977 IRTemp oldG0 = newTemp(Ity_V128);
6978 IRTemp oldG1 = newTemp(Ity_V128);
6980 assign( oldG0, getXMMReg(gregOfRM(rm)) );
6982 if (epartIsReg(rm)) {
6984 binop( Iop_SetV128lo64,
6986 getXMMRegLane64(eregOfRM(rm), 0)) );
6987 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
6988 DIP("%s %s,%s\n", opname,
6989 nameXMMReg(eregOfRM(rm)),
6990 nameXMMReg(gregOfRM(rm)) );
6993 addr = disAMode ( &alen, sorb, delta, dis_buf );
6995 binop( Iop_SetV128lo64,
6997 loadLE(Ity_I64, mkexpr(addr)) ));
6998 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
6999 DIP("%s %s,%s\n", opname,
7001 nameXMMReg(gregOfRM(rm)) );
7007 /* SSE integer binary operation:
7008 G = G `op` E (eLeft == False)
7009 G = E `op` G (eLeft == True)
7011 static UInt dis_SSEint_E_to_G(
7012 UChar sorb, Int delta,
7013 HChar* opname, IROp op,
7020 UChar rm = getIByte(delta);
7021 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7022 IRExpr* epart = NULL;
7023 if (epartIsReg(rm)) {
7024 epart = getXMMReg(eregOfRM(rm));
7025 DIP("%s %s,%s\n", opname,
7026 nameXMMReg(eregOfRM(rm)),
7027 nameXMMReg(gregOfRM(rm)) );
7030 addr = disAMode ( &alen, sorb, delta, dis_buf );
7031 epart = loadLE(Ity_V128, mkexpr(addr));
7032 DIP("%s %s,%s\n", opname,
7034 nameXMMReg(gregOfRM(rm)) );
7037 putXMMReg( gregOfRM(rm),
7038 eLeft ? binop(op, epart, gpart)
7039 : binop(op, gpart, epart) );
7044 /* Helper for doing SSE FP comparisons. */
7046 static void findSSECmpOp ( Bool* needNot, IROp* op,
7047 Int imm8, Bool all_lanes, Int sz )
7057 if (sz == 4 && all_lanes) {
7059 case 0: *op = Iop_CmpEQ32Fx4; return;
7060 case 1: *op = Iop_CmpLT32Fx4; return;
7061 case 2: *op = Iop_CmpLE32Fx4; return;
7062 case 3: *op = Iop_CmpUN32Fx4; return;
7066 if (sz == 4 && !all_lanes) {
7068 case 0: *op = Iop_CmpEQ32F0x4; return;
7069 case 1: *op = Iop_CmpLT32F0x4; return;
7070 case 2: *op = Iop_CmpLE32F0x4; return;
7071 case 3: *op = Iop_CmpUN32F0x4; return;
7075 if (sz == 8 && all_lanes) {
7077 case 0: *op = Iop_CmpEQ64Fx2; return;
7078 case 1: *op = Iop_CmpLT64Fx2; return;
7079 case 2: *op = Iop_CmpLE64Fx2; return;
7080 case 3: *op = Iop_CmpUN64Fx2; return;
7084 if (sz == 8 && !all_lanes) {
7086 case 0: *op = Iop_CmpEQ64F0x2; return;
7087 case 1: *op = Iop_CmpLT64F0x2; return;
7088 case 2: *op = Iop_CmpLE64F0x2; return;
7089 case 3: *op = Iop_CmpUN64F0x2; return;
7093 vpanic("findSSECmpOp(x86,guest)");
7096 /* Handles SSE 32F/64F comparisons. */
7098 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7099 HChar* opname, Bool all_lanes, Int sz )
7104 Bool needNot = False;
7105 IROp op = Iop_INVALID;
7106 IRTemp plain = newTemp(Ity_V128);
7107 UChar rm = getIByte(delta);
7109 vassert(sz == 4 || sz == 8);
7110 if (epartIsReg(rm)) {
7111 imm8 = getIByte(delta+1);
7112 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7113 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7114 getXMMReg(eregOfRM(rm))) );
7116 DIP("%s $%d,%s,%s\n", opname,
7118 nameXMMReg(eregOfRM(rm)),
7119 nameXMMReg(gregOfRM(rm)) );
7121 addr = disAMode ( &alen, sorb, delta, dis_buf );
7122 imm8 = getIByte(delta+alen);
7123 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7127 getXMMReg(gregOfRM(rm)),
7128 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7129 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7130 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7134 DIP("%s $%d,%s,%s\n", opname,
7137 nameXMMReg(gregOfRM(rm)) );
7140 if (needNot && all_lanes) {
7141 putXMMReg( gregOfRM(rm),
7142 unop(Iop_NotV128, mkexpr(plain)) );
7145 if (needNot && !all_lanes) {
7146 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7147 putXMMReg( gregOfRM(rm),
7148 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7151 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7158 /* Vector by scalar shift of G by the amount specified at the bottom
7161 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7162 HChar* opname, IROp op )
7168 UChar rm = getIByte(delta);
7169 IRTemp g0 = newTemp(Ity_V128);
7170 IRTemp g1 = newTemp(Ity_V128);
7171 IRTemp amt = newTemp(Ity_I32);
7172 IRTemp amt8 = newTemp(Ity_I8);
7173 if (epartIsReg(rm)) {
7174 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7175 DIP("%s %s,%s\n", opname,
7176 nameXMMReg(eregOfRM(rm)),
7177 nameXMMReg(gregOfRM(rm)) );
7180 addr = disAMode ( &alen, sorb, delta, dis_buf );
7181 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7182 DIP("%s %s,%s\n", opname,
7184 nameXMMReg(gregOfRM(rm)) );
7187 assign( g0, getXMMReg(gregOfRM(rm)) );
7188 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7190 shl = shr = sar = False;
7193 case Iop_ShlN16x8: shl = True; size = 32; break;
7194 case Iop_ShlN32x4: shl = True; size = 32; break;
7195 case Iop_ShlN64x2: shl = True; size = 64; break;
7196 case Iop_SarN16x8: sar = True; size = 16; break;
7197 case Iop_SarN32x4: sar = True; size = 32; break;
7198 case Iop_ShrN16x8: shr = True; size = 16; break;
7199 case Iop_ShrN32x4: shr = True; size = 32; break;
7200 case Iop_ShrN64x2: shr = True; size = 64; break;
7201 default: vassert(0);
7208 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
7210 binop(op, mkexpr(g0), mkexpr(amt8))
7218 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
7219 binop(op, mkexpr(g0), mkU8(size-1)),
7220 binop(op, mkexpr(g0), mkexpr(amt8))
7228 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7233 /* Vector by scalar shift of E by an immediate byte. */
7236 UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op )
7239 UChar rm = getIByte(delta);
7240 IRTemp e0 = newTemp(Ity_V128);
7241 IRTemp e1 = newTemp(Ity_V128);
7243 vassert(epartIsReg(rm));
7244 vassert(gregOfRM(rm) == 2
7245 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7246 amt = getIByte(delta+1);
7248 DIP("%s $%d,%s\n", opname,
7250 nameXMMReg(eregOfRM(rm)) );
7251 assign( e0, getXMMReg(eregOfRM(rm)) );
7253 shl = shr = sar = False;
7256 case Iop_ShlN16x8: shl = True; size = 16; break;
7257 case Iop_ShlN32x4: shl = True; size = 32; break;
7258 case Iop_ShlN64x2: shl = True; size = 64; break;
7259 case Iop_SarN16x8: sar = True; size = 16; break;
7260 case Iop_SarN32x4: sar = True; size = 32; break;
7261 case Iop_ShrN16x8: shr = True; size = 16; break;
7262 case Iop_ShrN32x4: shr = True; size = 32; break;
7263 case Iop_ShrN64x2: shr = True; size = 64; break;
7264 default: vassert(0);
7268 assign( e1, amt >= size
7270 : binop(op, mkexpr(e0), mkU8(amt))
7274 assign( e1, amt >= size
7275 ? binop(op, mkexpr(e0), mkU8(size-1))
7276 : binop(op, mkexpr(e0), mkU8(amt))
7283 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7288 /* Get the current SSE rounding mode. */
7290 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7292 return binop( Iop_And32,
7293 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7297 static void put_sse_roundingmode ( IRExpr* sseround )
7299 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7300 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7303 /* Break a 128-bit value up into four 32-bit ints. */
7305 static void breakup128to32s ( IRTemp t128,
7307 IRTemp* t3, IRTemp* t2,
7308 IRTemp* t1, IRTemp* t0 )
7310 IRTemp hi64 = newTemp(Ity_I64);
7311 IRTemp lo64 = newTemp(Ity_I64);
7312 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7313 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7315 vassert(t0 && *t0 == IRTemp_INVALID);
7316 vassert(t1 && *t1 == IRTemp_INVALID);
7317 vassert(t2 && *t2 == IRTemp_INVALID);
7318 vassert(t3 && *t3 == IRTemp_INVALID);
7320 *t0 = newTemp(Ity_I32);
7321 *t1 = newTemp(Ity_I32);
7322 *t2 = newTemp(Ity_I32);
7323 *t3 = newTemp(Ity_I32);
7324 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
7325 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
7326 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
7327 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
7330 /* Construct a 128-bit value from four 32-bit ints. */
7332 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
7333 IRTemp t1, IRTemp t0 )
7336 binop( Iop_64HLtoV128,
7337 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
7338 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
7342 /* Break a 64-bit value up into four 16-bit ints. */
7344 static void breakup64to16s ( IRTemp t64,
7346 IRTemp* t3, IRTemp* t2,
7347 IRTemp* t1, IRTemp* t0 )
7349 IRTemp hi32 = newTemp(Ity_I32);
7350 IRTemp lo32 = newTemp(Ity_I32);
7351 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
7352 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
7354 vassert(t0 && *t0 == IRTemp_INVALID);
7355 vassert(t1 && *t1 == IRTemp_INVALID);
7356 vassert(t2 && *t2 == IRTemp_INVALID);
7357 vassert(t3 && *t3 == IRTemp_INVALID);
7359 *t0 = newTemp(Ity_I16);
7360 *t1 = newTemp(Ity_I16);
7361 *t2 = newTemp(Ity_I16);
7362 *t3 = newTemp(Ity_I16);
7363 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
7364 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
7365 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
7366 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
7369 /* Construct a 64-bit value from four 16-bit ints. */
7371 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
7372 IRTemp t1, IRTemp t0 )
7375 binop( Iop_32HLto64,
7376 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
7377 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
7381 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7382 in the given 32-bit temporary. The flags that are set are: O S Z A
7385 In all cases, code to set AC is generated. However, VEX actually
7386 ignores the AC value and so can optionally emit an emulation
7387 warning when it is enabled. In this routine, an emulation warning
7388 is only emitted if emit_AC_emwarn is True, in which case
7389 next_insn_EIP must be correct (this allows for correct code
7390 generation for popfl/popfw). If emit_AC_emwarn is False,
7391 next_insn_EIP is unimportant (this allows for easy if kludgey code
7392 generation for IRET.) */
7395 void set_EFLAGS_from_value ( IRTemp t1,
7396 Bool emit_AC_emwarn,
7397 Addr32 next_insn_EIP )
7399 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
7401 /* t1 is the flag word. Mask out everything except OSZACP and set
7402 the flags thunk to X86G_CC_OP_COPY. */
7403 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7404 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7405 stmt( IRStmt_Put( OFFB_CC_DEP1,
7408 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
7409 | X86G_CC_MASK_A | X86G_CC_MASK_Z
7410 | X86G_CC_MASK_S| X86G_CC_MASK_O )
7414 /* Set NDEP even though it isn't used. This makes redundant-PUT
7415 elimination of previous stores to this field work better. */
7416 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7418 /* Also need to set the D flag, which is held in bit 10 of t1.
7419 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7425 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
7431 /* Set the ID flag */
7437 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
7443 /* And set the AC flag. If setting it 1 to, possibly emit an
7444 emulation warning. */
7450 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
7456 if (emit_AC_emwarn) {
7457 put_emwarn( mkU32(EmWarn_X86_acFlag) );
7461 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
7464 IRConst_U32( next_insn_EIP )
7471 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7472 values (aa,bb), computes, for each of the 4 16-bit lanes:
7474 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7476 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
7478 IRTemp aa = newTemp(Ity_I64);
7479 IRTemp bb = newTemp(Ity_I64);
7480 IRTemp aahi32s = newTemp(Ity_I64);
7481 IRTemp aalo32s = newTemp(Ity_I64);
7482 IRTemp bbhi32s = newTemp(Ity_I64);
7483 IRTemp bblo32s = newTemp(Ity_I64);
7484 IRTemp rHi = newTemp(Ity_I64);
7485 IRTemp rLo = newTemp(Ity_I64);
7486 IRTemp one32x2 = newTemp(Ity_I64);
7491 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
7495 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
7499 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
7503 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
7505 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
7514 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
7530 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
7539 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
7542 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7543 values (aa,bb), computes, for each lane:
7545 if aa_lane < 0 then - bb_lane
7546 else if aa_lane > 0 then bb_lane
7549 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
7551 IRTemp aa = newTemp(Ity_I64);
7552 IRTemp bb = newTemp(Ity_I64);
7553 IRTemp zero = newTemp(Ity_I64);
7554 IRTemp bbNeg = newTemp(Ity_I64);
7555 IRTemp negMask = newTemp(Ity_I64);
7556 IRTemp posMask = newTemp(Ity_I64);
7557 IROp opSub = Iop_INVALID;
7558 IROp opCmpGTS = Iop_INVALID;
7561 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
7562 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
7563 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
7564 default: vassert(0);
7569 assign( zero, mkU64(0) );
7570 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
7571 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
7572 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
7576 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
7577 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
7581 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7582 value aa, computes, for each lane
7584 if aa < 0 then -aa else aa
7586 Note that the result is interpreted as unsigned, so that the
7587 absolute value of the most negative signed input can be
7590 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
7592 IRTemp aa = newTemp(Ity_I64);
7593 IRTemp zero = newTemp(Ity_I64);
7594 IRTemp aaNeg = newTemp(Ity_I64);
7595 IRTemp negMask = newTemp(Ity_I64);
7596 IRTemp posMask = newTemp(Ity_I64);
7597 IROp opSub = Iop_INVALID;
7598 IROp opSarN = Iop_INVALID;
7601 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
7602 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
7603 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
7604 default: vassert(0);
7608 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
7609 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
7610 assign( zero, mkU64(0) );
7611 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
7614 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
7615 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
7618 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
7619 IRTemp lo64, Int byteShift )
7621 vassert(byteShift >= 1 && byteShift <= 7);
7624 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
7625 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
7629 /* Generate a SIGSEGV followed by a restart of the current instruction
7630 if effective_addr is not 16-aligned. This is required behaviour
7631 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7632 This assumes that guest_RIP_curr_instr is set correctly! */
7633 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
7638 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
7641 IRConst_U32(guest_EIP_curr_instr)
7647 /* Helper for deciding whether a given insn (starting at the opcode
7648 byte) may validly be used with a LOCK prefix. The following insns
7649 may be used with LOCK when their destination operand is in memory.
7650 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7652 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7653 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7654 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7655 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7656 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7657 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7658 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7672 CMPXCHG 0F B0, 0F B1
7677 ------------------------------
7679 80 /0 = addb $imm8, rm8
7680 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7681 82 /0 = addb $imm8, rm8
7682 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7685 01 = addl r32, rm32 and addw r16, rm16
7687 Same for ADD OR ADC SBB AND SUB XOR
7690 FF /1 = dec rm32 and dec rm16
7693 FF /0 = inc rm32 and inc rm16
7696 F7 /3 = neg rm32 and neg rm16
7699 F7 /2 = not rm32 and not rm16
7701 0F BB = btcw r16, rm16 and btcl r32, rm32
7702 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7706 static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
7709 case 0x00: case 0x01: case 0x08: case 0x09:
7710 case 0x10: case 0x11: case 0x18: case 0x19:
7711 case 0x20: case 0x21: case 0x28: case 0x29:
7712 case 0x30: case 0x31:
7713 if (!epartIsReg(opc[1]))
7717 case 0x80: case 0x81: case 0x82: case 0x83:
7718 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
7719 && !epartIsReg(opc[1]))
7723 case 0xFE: case 0xFF:
7724 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
7725 && !epartIsReg(opc[1]))
7729 case 0xF6: case 0xF7:
7730 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
7731 && !epartIsReg(opc[1]))
7735 case 0x86: case 0x87:
7736 if (!epartIsReg(opc[1]))
7742 case 0xBB: case 0xB3: case 0xAB:
7743 if (!epartIsReg(opc[2]))
7747 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
7748 && !epartIsReg(opc[2]))
7751 case 0xB0: case 0xB1:
7752 if (!epartIsReg(opc[2]))
7756 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
7759 case 0xC0: case 0xC1:
7760 if (!epartIsReg(opc[2]))
7765 } /* switch (opc[1]) */
7771 } /* switch (opc[0]) */
7777 /*------------------------------------------------------------*/
7778 /*--- Disassemble a single instruction ---*/
7779 /*------------------------------------------------------------*/
7781 /* Disassemble a single instruction into IR. The instruction is
7782 located in host memory at &guest_code[delta]. *expect_CAS is set
7783 to True if the resulting IR is expected to contain an IRCAS
7784 statement, and False if it's not expected to. This makes it
7785 possible for the caller of disInstr_X86_WRK to check that
7786 LOCK-prefixed instructions are at least plausibly translated, in
7787 that it becomes possible to check that a (validly) LOCK-prefixed
7788 instruction generates a translation containing an IRCAS, and
7789 instructions without LOCK prefixes don't generate translations
7790 containing an IRCAS.
7793 DisResult disInstr_X86_WRK (
7794 /*OUT*/Bool* expect_CAS,
7796 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7798 void* callback_opaque,
7800 VexArchInfo* archinfo
7804 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
7806 UChar opc, modrm, abyte, pre;
7809 Int am_sz, d_sz, n_prefixes;
7811 UChar* insn; /* used in SSE decoders */
7813 /* The running delta */
7814 Int delta = (Int)delta64;
7816 /* Holds eip at the start of the insn, so that we can print
7817 consistent error messages for unimplemented insns. */
7818 Int delta_start = delta;
7820 /* sz denotes the nominal data-op size of the insn; we change it to
7821 2 if an 0x66 prefix is seen */
7824 /* sorb holds the segment-override-prefix byte, if any. Zero if no
7825 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65}
7826 indicating the prefix. */
7829 /* Gets set to True if a LOCK prefix is seen. */
7830 Bool pfx_lock = False;
7832 /* Set result defaults. */
7833 dres.whatNext = Dis_Continue;
7835 dres.continueAt = 0;
7837 *expect_CAS = False;
7839 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
7841 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
7842 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
7844 /* We may be asked to update the guest EIP before going further. */
7846 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) );
7848 /* Spot "Special" instructions (see comment at top of file). */
7850 UChar* code = (UChar*)(guest_code + delta);
7851 /* Spot the 12-byte preamble:
7852 C1C703 roll $3, %edi
7853 C1C70D roll $13, %edi
7854 C1C71D roll $29, %edi
7855 C1C713 roll $19, %edi
7857 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
7858 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
7859 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
7860 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
7861 /* Got a "Special" instruction preamble. Which one is it? */
7862 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
7863 /* %EDX = client_request ( %EAX ) */
7864 DIP("%%edx = client_request ( %%eax )\n");
7866 jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta);
7867 dres.whatNext = Dis_StopHere;
7868 goto decode_success;
7871 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
7872 /* %EAX = guest_NRADDR */
7873 DIP("%%eax = guest_NRADDR\n");
7875 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
7876 goto decode_success;
7879 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
7880 /* call-noredir *%EAX */
7881 DIP("call-noredir *%%eax\n");
7883 t1 = newTemp(Ity_I32);
7884 assign(t1, getIReg(4,R_EAX));
7885 t2 = newTemp(Ity_I32);
7886 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
7887 putIReg(4, R_ESP, mkexpr(t2));
7888 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
7889 jmp_treg(Ijk_NoRedir,t1);
7890 dres.whatNext = Dis_StopHere;
7891 goto decode_success;
7893 /* We don't know what it is. */
7894 goto decode_failure;
7898 /* To make a syscall in L4Re the guest must access its UTCB.
7899 * The address of the UTCB is stored in %gs, so we look for
7900 * the following instructions:
7901 * 65 a1 00 00 00 00 mov %gs:0x0,%eax
7902 * 65 8b 0d 00 00 00 00 mov %gs:0x0,%ecx
7903 * 65 8b 15 00 00 00 00 mov %gs:0x0,%edx
7904 * 65 8b 3d 00 00 00 00 mov %gs:0x0,%edi
7905 * 65 8b 35 00 00 00 00 mov %gs:0x0,%esi
7907 if (code[ 0] == 0x65 && code[ 1] == 0xa1 && code[ 2] == 0x0 &&
7908 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0) {
7909 // printf("%x\n", code);
7910 // printf("0x%x\n", guest_EIP_bbstart+delta);
7911 // DIP("%%edx = client_request ( %%eax )\n");
7912 // putIReg(4, R_EAX, mkexpr(0xdeadbeef));
7913 // vg_enter_kdebug();
7915 jmp_lit(Ijk_l4_utcb_eax, guest_EIP_bbstart+delta);
7916 dres.whatNext = Dis_StopHere;
7917 goto decode_success;
7920 // TODO implement me
7922 if (code[ 0] == 0x65 && code[ 1] == 0x8b && code[ 2] == 0x0d &&
7923 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
7925 jmp_lit(Ijk_l4_utcb_ecx, guest_EIP_bbstart+delta);
7926 dres.whatNext = Dis_StopHere;
7927 goto decode_success;
7930 if (code[ 0] == 0x65 && code[ 1] == 0x8b && code[ 2] == 0x15 &&
7931 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
7933 jmp_lit(Ijk_l4_utcb_edx, guest_EIP_bbstart+delta);
7934 dres.whatNext = Dis_StopHere;
7935 goto decode_success;
7938 if (code[ 0] == 0x65 && code[ 1] == 0x8b && code[ 2] == 0x3d &&
7939 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
7941 jmp_lit(Ijk_l4_utcb_edi, guest_EIP_bbstart+delta);
7942 dres.whatNext = Dis_StopHere;
7943 goto decode_success;
7947 if (code[ 0] == 0x65 && code[ 1] == 0x8b && code[ 2] == 0x35 &&
7948 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
7950 jmp_lit(Ijk_l4_utcb_esi, guest_EIP_bbstart+delta);
7951 dres.whatNext = Dis_StopHere;
7952 goto decode_success;
7956 if (code[ 0] == 0x0F && code[ 1] == 0x0B) {
7958 jmp_lit(Ijk_l4_ud2, guest_EIP_bbstart+delta);
7959 dres.whatNext = Dis_StopHere;
7960 goto decode_success;
7964 * The L4Re artificial trap instruction:
7967 * 0xc1 0xc0 0x42 rol eax, 0x42
7968 * 0xc1 0xc8 0x42 ror eax, 0x42
7971 if (code[ 0] == 0x50 && code[ 1] == 0xc1 &&
7972 code[ 2] == 0xc0 && code[ 3] == 0x42 &&
7973 code[ 4] == 0xc1 && code[ 5] == 0xc8 &&
7974 code[ 6] == 0x42 && code[ 7] == 0x58) {
7976 jmp_lit(Ijk_l4_artificial, guest_EIP_bbstart + delta);
7977 dres.whatNext = Dis_StopHere;
7978 goto decode_success;
7983 /* Handle a couple of weird-ass NOPs that have been observed in the
7986 UChar* code = (UChar*)(guest_code + delta);
7987 /* Sun's JVM 1.5.0 uses the following as a NOP:
7988 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
7989 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
7990 && code[3] == 0x65 && code[4] == 0x90) {
7991 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
7993 goto decode_success;
7995 /* Don't barf on recent binutils padding,
7996 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
7997 66 2e 0f 1f 84 00 00 00 00 00
7998 66 66 2e 0f 1f 84 00 00 00 00 00
7999 66 66 66 2e 0f 1f 84 00 00 00 00 00
8000 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8001 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8002 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8004 if (code[0] == 0x66) {
8006 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8007 if (code[data16_cnt] != 0x66)
8009 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8010 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8011 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8012 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8013 && code[data16_cnt + 8] == 0x00 ) {
8014 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8015 delta += 9 + data16_cnt;
8016 goto decode_success;
8021 /* Normal instruction handling starts here. */
8023 /* Deal with some but not all prefixes:
8026 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8027 Not dealt with (left in place):
8032 if (n_prefixes > 7) goto decode_failure;
8033 pre = getUChar(delta);
8042 case 0x3E: /* %DS: */
8043 case 0x26: /* %ES: */
8044 case 0x64: /* %FS: */
8045 case 0x65: /* %GS: */
8047 goto decode_failure; /* only one seg override allowed */
8050 case 0x2E: { /* %CS: */
8051 /* 2E prefix on a conditional branch instruction is a
8052 branch-prediction hint, which can safely be ignored. */
8053 UChar op1 = getIByte(delta+1);
8054 UChar op2 = getIByte(delta+2);
8055 if ((op1 >= 0x70 && op1 <= 0x7F)
8057 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8058 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8060 /* All other CS override cases are not handled */
8061 goto decode_failure;
8065 case 0x36: /* %SS: */
8066 /* SS override cases are not handled */
8067 goto decode_failure;
8077 /* Now we should be looking at the primary opcode byte or the
8078 leading F2 or F3. Check that any LOCK prefix is actually
8082 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
8085 *expect_CAS = False;
8086 goto decode_failure;
8091 /* ---------------------------------------------------- */
8092 /* --- The SSE decoder. --- */
8093 /* ---------------------------------------------------- */
8095 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8098 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8099 later section, further on. */
8101 insn = (UChar*)&guest_code[delta];
8103 /* Treat fxsave specially. It should be doable even on an SSE0
8104 (Pentium-II class) CPU. Hence be prepared to handle it on
8105 any subarchitecture variant.
8108 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8109 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8110 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8112 modrm = getIByte(delta+2);
8114 vassert(!epartIsReg(modrm));
8116 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8119 DIP("fxsave %s\n", dis_buf);
8121 /* Uses dirty helper:
8122 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8123 d = unsafeIRDirty_0_N (
8125 "x86g_dirtyhelper_FXSAVE",
8126 &x86g_dirtyhelper_FXSAVE,
8127 mkIRExprVec_1( mkexpr(addr) )
8131 /* declare we're writing memory */
8133 d->mAddr = mkexpr(addr);
8136 /* declare we're reading guest state */
8139 d->fxState[0].fx = Ifx_Read;
8140 d->fxState[0].offset = OFFB_FTOP;
8141 d->fxState[0].size = sizeof(UInt);
8143 d->fxState[1].fx = Ifx_Read;
8144 d->fxState[1].offset = OFFB_FPREGS;
8145 d->fxState[1].size = 8 * sizeof(ULong);
8147 d->fxState[2].fx = Ifx_Read;
8148 d->fxState[2].offset = OFFB_FPTAGS;
8149 d->fxState[2].size = 8 * sizeof(UChar);
8151 d->fxState[3].fx = Ifx_Read;
8152 d->fxState[3].offset = OFFB_FPROUND;
8153 d->fxState[3].size = sizeof(UInt);
8155 d->fxState[4].fx = Ifx_Read;
8156 d->fxState[4].offset = OFFB_FC3210;
8157 d->fxState[4].size = sizeof(UInt);
8159 d->fxState[5].fx = Ifx_Read;
8160 d->fxState[5].offset = OFFB_XMM0;
8161 d->fxState[5].size = 8 * sizeof(U128);
8163 d->fxState[6].fx = Ifx_Read;
8164 d->fxState[6].offset = OFFB_SSEROUND;
8165 d->fxState[6].size = sizeof(UInt);
8167 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8168 images are packed back-to-back. If not, the value of
8169 d->fxState[5].size is wrong. */
8170 vassert(16 == sizeof(U128));
8171 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8173 stmt( IRStmt_Dirty(d) );
8175 goto decode_success;
8178 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8179 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8180 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8182 modrm = getIByte(delta+2);
8184 vassert(!epartIsReg(modrm));
8186 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8189 DIP("fxrstor %s\n", dis_buf);
8191 /* Uses dirty helper:
8192 void x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) */
8193 d = unsafeIRDirty_0_N (
8195 "x86g_dirtyhelper_FXRSTOR",
8196 &x86g_dirtyhelper_FXRSTOR,
8197 mkIRExprVec_1( mkexpr(addr) )
8201 /* declare we're reading memory */
8203 d->mAddr = mkexpr(addr);
8206 /* declare we're writing guest state */
8209 d->fxState[0].fx = Ifx_Write;
8210 d->fxState[0].offset = OFFB_FTOP;
8211 d->fxState[0].size = sizeof(UInt);
8213 d->fxState[1].fx = Ifx_Write;
8214 d->fxState[1].offset = OFFB_FPREGS;
8215 d->fxState[1].size = 8 * sizeof(ULong);
8217 d->fxState[2].fx = Ifx_Write;
8218 d->fxState[2].offset = OFFB_FPTAGS;
8219 d->fxState[2].size = 8 * sizeof(UChar);
8221 d->fxState[3].fx = Ifx_Write;
8222 d->fxState[3].offset = OFFB_FPROUND;
8223 d->fxState[3].size = sizeof(UInt);
8225 d->fxState[4].fx = Ifx_Write;
8226 d->fxState[4].offset = OFFB_FC3210;
8227 d->fxState[4].size = sizeof(UInt);
8229 d->fxState[5].fx = Ifx_Write;
8230 d->fxState[5].offset = OFFB_XMM0;
8231 d->fxState[5].size = 8 * sizeof(U128);
8233 d->fxState[6].fx = Ifx_Write;
8234 d->fxState[6].offset = OFFB_SSEROUND;
8235 d->fxState[6].size = sizeof(UInt);
8237 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8238 images are packed back-to-back. If not, the value of
8239 d->fxState[5].size is wrong. */
8240 vassert(16 == sizeof(U128));
8241 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8243 stmt( IRStmt_Dirty(d) );
8245 goto decode_success;
8248 /* ------ SSE decoder main ------ */
8250 /* Skip parts of the decoder which don't apply given the stated
8251 guest subarchitecture. */
8252 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8253 goto after_sse_decoders;
8255 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8258 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8259 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
8260 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8261 goto decode_success;
8264 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8265 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8267 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8268 goto decode_success;
8271 /* 0F 55 = ANDNPS -- G = (not G) and E */
8272 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
8273 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8274 goto decode_success;
8277 /* 0F 54 = ANDPS -- G = G and E */
8278 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
8279 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8280 goto decode_success;
8283 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8284 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
8285 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8286 goto decode_success;
8289 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8290 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8292 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8293 goto decode_success;
8296 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8297 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8298 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8299 IRTemp argL = newTemp(Ity_F32);
8300 IRTemp argR = newTemp(Ity_F32);
8301 modrm = getIByte(delta+2);
8302 if (epartIsReg(modrm)) {
8303 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8305 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8306 nameXMMReg(gregOfRM(modrm)) );
8308 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8309 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
8311 DIP("[u]comiss %s,%s\n", dis_buf,
8312 nameXMMReg(gregOfRM(modrm)) );
8314 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
8316 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8317 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8322 unop(Iop_F32toF64,mkexpr(argL)),
8323 unop(Iop_F32toF64,mkexpr(argR))),
8326 /* Set NDEP even though it isn't used. This makes redundant-PUT
8327 elimination of previous stores to this field work better. */
8328 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8329 goto decode_success;
8332 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8334 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
8335 IRTemp arg64 = newTemp(Ity_I64);
8336 IRTemp rmode = newTemp(Ity_I32);
8339 modrm = getIByte(delta+2);
8341 if (epartIsReg(modrm)) {
8342 assign( arg64, getMMXReg(eregOfRM(modrm)) );
8344 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8345 nameXMMReg(gregOfRM(modrm)));
8347 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8348 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
8350 DIP("cvtpi2ps %s,%s\n", dis_buf,
8351 nameXMMReg(gregOfRM(modrm)) );
8354 assign( rmode, get_sse_roundingmode() );
8361 unop(Iop_64to32, mkexpr(arg64)) )) );
8368 unop(Iop_64HIto32, mkexpr(arg64)) )) );
8370 goto decode_success;
8373 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8375 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
8376 IRTemp arg32 = newTemp(Ity_I32);
8377 IRTemp rmode = newTemp(Ity_I32);
8380 modrm = getIByte(delta+3);
8381 if (epartIsReg(modrm)) {
8382 assign( arg32, getIReg(4, eregOfRM(modrm)) );
8384 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
8385 nameXMMReg(gregOfRM(modrm)));
8387 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8388 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
8390 DIP("cvtsi2ss %s,%s\n", dis_buf,
8391 nameXMMReg(gregOfRM(modrm)) );
8394 assign( rmode, get_sse_roundingmode() );
8400 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
8402 goto decode_success;
8405 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8406 I32 in mmx, according to prevailing SSE rounding mode */
8407 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8408 I32 in mmx, rounding towards zero */
8409 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
8410 IRTemp dst64 = newTemp(Ity_I64);
8411 IRTemp rmode = newTemp(Ity_I32);
8412 IRTemp f32lo = newTemp(Ity_F32);
8413 IRTemp f32hi = newTemp(Ity_F32);
8414 Bool r2zero = toBool(insn[1] == 0x2C);
8417 modrm = getIByte(delta+2);
8419 if (epartIsReg(modrm)) {
8421 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8422 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
8423 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8424 nameXMMReg(eregOfRM(modrm)),
8425 nameMMXReg(gregOfRM(modrm)));
8427 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8428 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8429 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
8433 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8435 nameMMXReg(gregOfRM(modrm)));
8439 assign(rmode, mkU32((UInt)Irrm_ZERO) );
8441 assign( rmode, get_sse_roundingmode() );
8446 binop( Iop_32HLto64,
8447 binop( Iop_F64toI32S,
8449 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
8450 binop( Iop_F64toI32S,
8452 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8456 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
8457 goto decode_success;
8460 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8461 I32 in ireg, according to prevailing SSE rounding mode */
8462 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8463 I32 in ireg, rounding towards zero */
8464 if (insn[0] == 0xF3 && insn[1] == 0x0F
8465 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
8466 IRTemp rmode = newTemp(Ity_I32);
8467 IRTemp f32lo = newTemp(Ity_F32);
8468 Bool r2zero = toBool(insn[2] == 0x2C);
8471 modrm = getIByte(delta+3);
8472 if (epartIsReg(modrm)) {
8474 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8475 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8476 nameXMMReg(eregOfRM(modrm)),
8477 nameIReg(4, gregOfRM(modrm)));
8479 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8480 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8482 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8484 nameIReg(4, gregOfRM(modrm)));
8488 assign( rmode, mkU32((UInt)Irrm_ZERO) );
8490 assign( rmode, get_sse_roundingmode() );
8493 putIReg(4, gregOfRM(modrm),
8494 binop( Iop_F64toI32S,
8496 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8499 goto decode_success;
8502 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8503 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
8504 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
8505 goto decode_success;
8508 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8509 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
8511 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
8512 goto decode_success;
8515 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8516 if (insn[0] == 0x0F && insn[1] == 0xAE
8517 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
8519 IRTemp t64 = newTemp(Ity_I64);
8520 IRTemp ew = newTemp(Ity_I32);
8522 modrm = getIByte(delta+2);
8523 vassert(!epartIsReg(modrm));
8526 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8528 DIP("ldmxcsr %s\n", dis_buf);
8530 /* The only thing we observe in %mxcsr is the rounding mode.
8531 Therefore, pass the 32-bit value (SSE native-format control
8532 word) to a clean helper, getting back a 64-bit value, the
8533 lower half of which is the SSEROUND value to store, and the
8534 upper half of which is the emulation-warning token which may
8537 /* ULong x86h_check_ldmxcsr ( UInt ); */
8538 assign( t64, mkIRExprCCall(
8539 Ity_I64, 0/*regparms*/,
8540 "x86g_check_ldmxcsr",
8541 &x86g_check_ldmxcsr,
8542 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
8546 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
8547 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
8548 put_emwarn( mkexpr(ew) );
8549 /* Finally, if an emulation warning was reported, side-exit to
8550 the next insn, reporting the warning, so that Valgrind's
8551 dispatcher sees the warning. */
8554 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
8556 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
8559 goto decode_success;
8562 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8563 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8564 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
8566 delta = dis_MMX( &ok, sorb, sz, delta+1 );
8568 goto decode_failure;
8569 goto decode_success;
8572 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
8573 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
8574 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
8575 goto decode_success;
8578 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
8579 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
8581 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
8582 goto decode_success;
8585 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
8586 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
8587 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
8588 goto decode_success;
8591 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
8592 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
8594 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
8595 goto decode_success;
8598 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
8599 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
8600 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
8601 modrm = getIByte(delta+2);
8602 if (epartIsReg(modrm)) {
8603 putXMMReg( gregOfRM(modrm),
8604 getXMMReg( eregOfRM(modrm) ));
8605 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8606 nameXMMReg(gregOfRM(modrm)));
8609 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8610 putXMMReg( gregOfRM(modrm),
8611 loadLE(Ity_V128, mkexpr(addr)) );
8612 DIP("mov[ua]ps %s,%s\n", dis_buf,
8613 nameXMMReg(gregOfRM(modrm)));
8616 goto decode_success;
8619 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
8620 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
8621 if (sz == 4 && insn[0] == 0x0F
8622 && (insn[1] == 0x29 || insn[1] == 0x11)) {
8623 modrm = getIByte(delta+2);
8624 if (epartIsReg(modrm)) {
8625 /* fall through; awaiting test case */
8627 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8628 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
8629 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
8632 goto decode_success;
8636 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
8637 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
8638 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
8639 modrm = getIByte(delta+2);
8640 if (epartIsReg(modrm)) {
8642 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
8643 getXMMRegLane64( eregOfRM(modrm), 0 ) );
8644 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8645 nameXMMReg(gregOfRM(modrm)));
8647 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8649 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
8650 loadLE(Ity_I64, mkexpr(addr)) );
8651 DIP("movhps %s,%s\n", dis_buf,
8652 nameXMMReg( gregOfRM(modrm) ));
8654 goto decode_success;
8657 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
8658 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
8659 if (!epartIsReg(insn[2])) {
8661 addr = disAMode ( &alen, sorb, delta, dis_buf );
8663 storeLE( mkexpr(addr),
8664 getXMMRegLane64( gregOfRM(insn[2]),
8665 1/*upper lane*/ ) );
8666 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
8668 goto decode_success;
8670 /* else fall through */
8673 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
8674 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
8675 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
8676 modrm = getIByte(delta+2);
8677 if (epartIsReg(modrm)) {
8679 putXMMRegLane64( gregOfRM(modrm),
8681 getXMMRegLane64( eregOfRM(modrm), 1 ));
8682 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
8683 nameXMMReg(gregOfRM(modrm)));
8685 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8687 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
8688 loadLE(Ity_I64, mkexpr(addr)) );
8689 DIP("movlps %s, %s\n",
8690 dis_buf, nameXMMReg( gregOfRM(modrm) ));
8692 goto decode_success;
8695 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
8696 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
8697 if (!epartIsReg(insn[2])) {
8699 addr = disAMode ( &alen, sorb, delta, dis_buf );
8701 storeLE( mkexpr(addr),
8702 getXMMRegLane64( gregOfRM(insn[2]),
8703 0/*lower lane*/ ) );
8704 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
8706 goto decode_success;
8708 /* else fall through */
8711 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
8712 to 4 lowest bits of ireg(G) */
8713 if (insn[0] == 0x0F && insn[1] == 0x50) {
8714 modrm = getIByte(delta+2);
8715 if (sz == 4 && epartIsReg(modrm)) {
8717 t0 = newTemp(Ity_I32);
8718 t1 = newTemp(Ity_I32);
8719 t2 = newTemp(Ity_I32);
8720 t3 = newTemp(Ity_I32);
8722 src = eregOfRM(modrm);
8723 assign( t0, binop( Iop_And32,
8724 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
8726 assign( t1, binop( Iop_And32,
8727 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
8729 assign( t2, binop( Iop_And32,
8730 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
8732 assign( t3, binop( Iop_And32,
8733 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
8735 putIReg(4, gregOfRM(modrm),
8737 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
8738 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
8741 DIP("movmskps %s,%s\n", nameXMMReg(src),
8742 nameIReg(4, gregOfRM(modrm)));
8743 goto decode_success;
8745 /* else fall through */
8748 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
8749 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
8750 if (insn[0] == 0x0F && insn[1] == 0x2B) {
8751 modrm = getIByte(delta+2);
8752 if (!epartIsReg(modrm)) {
8753 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8754 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
8755 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
8757 nameXMMReg(gregOfRM(modrm)));
8759 goto decode_success;
8761 /* else fall through */
8764 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8765 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8766 Intel manual does not say anything about the usual business of
8767 the FP reg tags getting trashed whenever an MMX insn happens.
8768 So we just leave them alone.
8770 if (insn[0] == 0x0F && insn[1] == 0xE7) {
8771 modrm = getIByte(delta+2);
8772 if (sz == 4 && !epartIsReg(modrm)) {
8773 /* do_MMX_preamble(); Intel docs don't specify this */
8774 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8775 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
8776 DIP("movntq %s,%s\n", dis_buf,
8777 nameMMXReg(gregOfRM(modrm)));
8779 goto decode_success;
8781 /* else fall through */
8784 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
8785 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
8786 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
8788 modrm = getIByte(delta+3);
8789 if (epartIsReg(modrm)) {
8790 putXMMRegLane32( gregOfRM(modrm), 0,
8791 getXMMRegLane32( eregOfRM(modrm), 0 ));
8792 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8793 nameXMMReg(gregOfRM(modrm)));
8796 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8797 /* zero bits 127:64 */
8798 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
8799 /* zero bits 63:32 */
8800 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
8801 /* write bits 31:0 */
8802 putXMMRegLane32( gregOfRM(modrm), 0,
8803 loadLE(Ity_I32, mkexpr(addr)) );
8804 DIP("movss %s,%s\n", dis_buf,
8805 nameXMMReg(gregOfRM(modrm)));
8808 goto decode_success;
8811 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
8813 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
8815 modrm = getIByte(delta+3);
8816 if (epartIsReg(modrm)) {
8817 /* fall through, we don't yet have a test case */
8819 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8820 storeLE( mkexpr(addr),
8821 getXMMRegLane32(gregOfRM(modrm), 0) );
8822 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
8825 goto decode_success;
8829 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
8830 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
8831 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
8832 goto decode_success;
8835 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
8836 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
8838 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
8839 goto decode_success;
8842 /* 0F 56 = ORPS -- G = G and E */
8843 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
8844 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
8845 goto decode_success;
8848 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8849 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8850 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
8852 delta = dis_MMXop_regmem_to_reg (
8853 sorb, delta+2, insn[1], "pavgb", False );
8854 goto decode_success;
8857 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8858 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8859 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
8861 delta = dis_MMXop_regmem_to_reg (
8862 sorb, delta+2, insn[1], "pavgw", False );
8863 goto decode_success;
8866 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8867 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8868 zero-extend of it in ireg(G). */
8869 if (insn[0] == 0x0F && insn[1] == 0xC5) {
8871 if (sz == 4 && epartIsReg(modrm)) {
8872 IRTemp sV = newTemp(Ity_I64);
8873 t5 = newTemp(Ity_I16);
8875 assign(sV, getMMXReg(eregOfRM(modrm)));
8876 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
8877 switch (insn[3] & 3) {
8878 case 0: assign(t5, mkexpr(t0)); break;
8879 case 1: assign(t5, mkexpr(t1)); break;
8880 case 2: assign(t5, mkexpr(t2)); break;
8881 case 3: assign(t5, mkexpr(t3)); break;
8882 default: vassert(0); /*NOTREACHED*/
8884 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
8885 DIP("pextrw $%d,%s,%s\n",
8886 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
8887 nameIReg(4,gregOfRM(modrm)));
8889 goto decode_success;
8891 /* else fall through */
8894 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8895 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8896 put it into the specified lane of mmx(G). */
8897 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
8898 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8899 mmx reg. t4 is the new lane value. t5 is the original
8900 mmx value. t6 is the new mmx value. */
8902 t4 = newTemp(Ity_I16);
8903 t5 = newTemp(Ity_I64);
8904 t6 = newTemp(Ity_I64);
8908 assign(t5, getMMXReg(gregOfRM(modrm)));
8909 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
8911 if (epartIsReg(modrm)) {
8912 assign(t4, getIReg(2, eregOfRM(modrm)));
8915 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
8916 nameIReg(2,eregOfRM(modrm)),
8917 nameMMXReg(gregOfRM(modrm)));
8919 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8921 lane = insn[3+alen-1];
8922 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
8923 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
8925 nameMMXReg(gregOfRM(modrm)));
8929 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
8930 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
8931 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
8932 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
8933 default: vassert(0); /*NOTREACHED*/
8935 putMMXReg(gregOfRM(modrm), mkexpr(t6));
8936 goto decode_success;
8939 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8940 /* 0F EE = PMAXSW -- 16x4 signed max */
8941 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
8943 delta = dis_MMXop_regmem_to_reg (
8944 sorb, delta+2, insn[1], "pmaxsw", False );
8945 goto decode_success;
8948 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8949 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8950 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
8952 delta = dis_MMXop_regmem_to_reg (
8953 sorb, delta+2, insn[1], "pmaxub", False );
8954 goto decode_success;
8957 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8958 /* 0F EA = PMINSW -- 16x4 signed min */
8959 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
8961 delta = dis_MMXop_regmem_to_reg (
8962 sorb, delta+2, insn[1], "pminsw", False );
8963 goto decode_success;
8966 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8967 /* 0F DA = PMINUB -- 8x8 unsigned min */
8968 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
8970 delta = dis_MMXop_regmem_to_reg (
8971 sorb, delta+2, insn[1], "pminub", False );
8972 goto decode_success;
8975 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8976 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
8977 mmx(G), turn them into a byte, and put zero-extend of it in
8979 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
8981 if (epartIsReg(modrm)) {
8983 t0 = newTemp(Ity_I64);
8984 t1 = newTemp(Ity_I32);
8985 assign(t0, getMMXReg(eregOfRM(modrm)));
8986 assign(t1, mkIRExprCCall(
8987 Ity_I32, 0/*regparms*/,
8988 "x86g_calculate_mmx_pmovmskb",
8989 &x86g_calculate_mmx_pmovmskb,
8990 mkIRExprVec_1(mkexpr(t0))));
8991 putIReg(4, gregOfRM(modrm), mkexpr(t1));
8992 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8993 nameIReg(4,gregOfRM(modrm)));
8995 goto decode_success;
8997 /* else fall through */
9000 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9001 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9002 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
9004 delta = dis_MMXop_regmem_to_reg (
9005 sorb, delta+2, insn[1], "pmuluh", False );
9006 goto decode_success;
9009 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9010 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9011 /* 0F 18 /2 = PREFETCH1 */
9012 /* 0F 18 /3 = PREFETCH2 */
9013 if (insn[0] == 0x0F && insn[1] == 0x18
9014 && !epartIsReg(insn[2])
9015 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
9016 HChar* hintstr = "??";
9018 modrm = getIByte(delta+2);
9019 vassert(!epartIsReg(modrm));
9021 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9024 switch (gregOfRM(modrm)) {
9025 case 0: hintstr = "nta"; break;
9026 case 1: hintstr = "t0"; break;
9027 case 2: hintstr = "t1"; break;
9028 case 3: hintstr = "t2"; break;
9029 default: vassert(0); /*NOTREACHED*/
9032 DIP("prefetch%s %s\n", hintstr, dis_buf);
9033 goto decode_success;
9036 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9037 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9038 if (insn[0] == 0x0F && insn[1] == 0x0D
9039 && !epartIsReg(insn[2])
9040 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9041 HChar* hintstr = "??";
9043 modrm = getIByte(delta+2);
9044 vassert(!epartIsReg(modrm));
9046 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9049 switch (gregOfRM(modrm)) {
9050 case 0: hintstr = ""; break;
9051 case 1: hintstr = "w"; break;
9052 default: vassert(0); /*NOTREACHED*/
9055 DIP("prefetch%s %s\n", hintstr, dis_buf);
9056 goto decode_success;
9059 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9060 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9061 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
9063 delta = dis_MMXop_regmem_to_reg (
9064 sorb, delta+2, insn[1], "psadbw", False );
9065 goto decode_success;
9068 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9069 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9070 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
9072 IRTemp sV, dV, s3, s2, s1, s0;
9073 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9074 sV = newTemp(Ity_I64);
9075 dV = newTemp(Ity_I64);
9078 if (epartIsReg(modrm)) {
9079 assign( sV, getMMXReg(eregOfRM(modrm)) );
9080 order = (Int)insn[3];
9082 DIP("pshufw $%d,%s,%s\n", order,
9083 nameMMXReg(eregOfRM(modrm)),
9084 nameMMXReg(gregOfRM(modrm)));
9086 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9087 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9088 order = (Int)insn[2+alen];
9090 DIP("pshufw $%d,%s,%s\n", order,
9092 nameMMXReg(gregOfRM(modrm)));
9094 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9097 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9099 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9100 SEL((order>>2)&3), SEL((order>>0)&3) )
9102 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9104 goto decode_success;
9107 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9108 if (insn[0] == 0x0F && insn[1] == 0x53) {
9110 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9111 "rcpps", Iop_Recip32Fx4 );
9112 goto decode_success;
9115 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9116 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9118 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9119 "rcpss", Iop_Recip32F0x4 );
9120 goto decode_success;
9123 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9124 if (insn[0] == 0x0F && insn[1] == 0x52) {
9126 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9127 "rsqrtps", Iop_RSqrt32Fx4 );
9128 goto decode_success;
9131 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9132 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9134 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9135 "rsqrtss", Iop_RSqrt32F0x4 );
9136 goto decode_success;
9139 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9140 if (insn[0] == 0x0F && insn[1] == 0xAE
9141 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9144 /* Insert a memory fence. It's sometimes important that these
9145 are carried through to the generated code. */
9146 stmt( IRStmt_MBE(Imbe_Fence) );
9148 goto decode_success;
9151 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9152 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
9155 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9156 sV = newTemp(Ity_V128);
9157 dV = newTemp(Ity_V128);
9158 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9160 assign( dV, getXMMReg(gregOfRM(modrm)) );
9162 if (epartIsReg(modrm)) {
9163 assign( sV, getXMMReg(eregOfRM(modrm)) );
9164 select = (Int)insn[3];
9166 DIP("shufps $%d,%s,%s\n", select,
9167 nameXMMReg(eregOfRM(modrm)),
9168 nameXMMReg(gregOfRM(modrm)));
9170 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9171 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9172 select = (Int)insn[2+alen];
9174 DIP("shufps $%d,%s,%s\n", select,
9176 nameXMMReg(gregOfRM(modrm)));
9179 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9180 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9182 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9183 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9187 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9188 SELD((select>>2)&3), SELD((select>>0)&3) )
9194 goto decode_success;
9197 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9198 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
9199 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9200 "sqrtps", Iop_Sqrt32Fx4 );
9201 goto decode_success;
9204 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9205 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9207 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9208 "sqrtss", Iop_Sqrt32F0x4 );
9209 goto decode_success;
9212 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9213 if (insn[0] == 0x0F && insn[1] == 0xAE
9214 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9215 modrm = getIByte(delta+2);
9217 vassert(!epartIsReg(modrm));
9219 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9222 /* Fake up a native SSE mxcsr word. The only thing it depends
9223 on is SSEROUND[1:0], so call a clean helper to cook it up.
9225 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9226 DIP("stmxcsr %s\n", dis_buf);
9227 storeLE( mkexpr(addr),
9230 "x86g_create_mxcsr", &x86g_create_mxcsr,
9231 mkIRExprVec_1( get_sse_roundingmode() )
9234 goto decode_success;
9237 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9238 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
9239 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9240 goto decode_success;
9243 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9244 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9246 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9247 goto decode_success;
9250 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9251 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9252 /* These just appear to be special cases of SHUFPS */
9253 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9255 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9256 Bool hi = toBool(insn[1] == 0x15);
9257 sV = newTemp(Ity_V128);
9258 dV = newTemp(Ity_V128);
9259 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9261 assign( dV, getXMMReg(gregOfRM(modrm)) );
9263 if (epartIsReg(modrm)) {
9264 assign( sV, getXMMReg(eregOfRM(modrm)) );
9266 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9267 nameXMMReg(eregOfRM(modrm)),
9268 nameXMMReg(gregOfRM(modrm)));
9270 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9271 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9273 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9275 nameXMMReg(gregOfRM(modrm)));
9278 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9279 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9282 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9284 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9287 goto decode_success;
9290 /* 0F 57 = XORPS -- G = G and E */
9291 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
9292 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9293 goto decode_success;
9296 /* ---------------------------------------------------- */
9297 /* --- end of the SSE decoder. --- */
9298 /* ---------------------------------------------------- */
9300 /* ---------------------------------------------------- */
9301 /* --- start of the SSE2 decoder. --- */
9302 /* ---------------------------------------------------- */
9304 /* Skip parts of the decoder which don't apply given the stated
9305 guest subarchitecture. */
9306 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
9307 goto after_sse_decoders; /* no SSE2 capabilities */
9309 insn = (UChar*)&guest_code[delta];
9311 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9312 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
9313 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
9314 goto decode_success;
9317 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9318 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
9320 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
9321 goto decode_success;
9324 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9325 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
9326 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
9327 goto decode_success;
9330 /* 66 0F 54 = ANDPD -- G = G and E */
9331 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
9332 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
9333 goto decode_success;
9336 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9337 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
9338 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
9339 goto decode_success;
9342 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9343 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
9345 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
9346 goto decode_success;
9349 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9350 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9351 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9352 IRTemp argL = newTemp(Ity_F64);
9353 IRTemp argR = newTemp(Ity_F64);
9354 modrm = getIByte(delta+2);
9355 if (epartIsReg(modrm)) {
9356 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
9358 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9359 nameXMMReg(gregOfRM(modrm)) );
9361 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9362 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9364 DIP("[u]comisd %s,%s\n", dis_buf,
9365 nameXMMReg(gregOfRM(modrm)) );
9367 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9369 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9370 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9374 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
9377 /* Set NDEP even though it isn't used. This makes redundant-PUT
9378 elimination of previous stores to this field work better. */
9379 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9380 goto decode_success;
9383 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9385 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
9386 IRTemp arg64 = newTemp(Ity_I64);
9389 modrm = getIByte(delta+3);
9390 if (epartIsReg(modrm)) {
9391 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
9393 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9394 nameXMMReg(gregOfRM(modrm)));
9396 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9397 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9399 DIP("cvtdq2pd %s,%s\n", dis_buf,
9400 nameXMMReg(gregOfRM(modrm)) );
9405 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
9410 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
9413 goto decode_success;
9416 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9418 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
9419 IRTemp argV = newTemp(Ity_V128);
9420 IRTemp rmode = newTemp(Ity_I32);
9422 modrm = getIByte(delta+2);
9423 if (epartIsReg(modrm)) {
9424 assign( argV, getXMMReg(eregOfRM(modrm)) );
9426 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9427 nameXMMReg(gregOfRM(modrm)));
9429 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9430 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9432 DIP("cvtdq2ps %s,%s\n", dis_buf,
9433 nameXMMReg(gregOfRM(modrm)) );
9436 assign( rmode, get_sse_roundingmode() );
9437 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9439 # define CVT(_t) binop( Iop_F64toF32, \
9441 unop(Iop_I32StoF64,mkexpr(_t)))
9443 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
9444 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
9445 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9446 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9450 goto decode_success;
9453 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9454 lo half xmm(G), and zero upper half */
9455 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
9456 IRTemp argV = newTemp(Ity_V128);
9457 IRTemp rmode = newTemp(Ity_I32);
9460 modrm = getIByte(delta+3);
9461 if (epartIsReg(modrm)) {
9462 assign( argV, getXMMReg(eregOfRM(modrm)) );
9464 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9465 nameXMMReg(gregOfRM(modrm)));
9467 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9468 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9470 DIP("cvtpd2dq %s,%s\n", dis_buf,
9471 nameXMMReg(gregOfRM(modrm)) );
9474 assign( rmode, get_sse_roundingmode() );
9475 t0 = newTemp(Ity_F64);
9476 t1 = newTemp(Ity_F64);
9477 assign( t0, unop(Iop_ReinterpI64asF64,
9478 unop(Iop_V128to64, mkexpr(argV))) );
9479 assign( t1, unop(Iop_ReinterpI64asF64,
9480 unop(Iop_V128HIto64, mkexpr(argV))) );
9482 # define CVT(_t) binop( Iop_F64toI32S, \
9486 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9487 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9488 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9489 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9493 goto decode_success;
9496 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9497 I32 in mmx, according to prevailing SSE rounding mode */
9498 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9499 I32 in mmx, rounding towards zero */
9500 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9501 IRTemp dst64 = newTemp(Ity_I64);
9502 IRTemp rmode = newTemp(Ity_I32);
9503 IRTemp f64lo = newTemp(Ity_F64);
9504 IRTemp f64hi = newTemp(Ity_F64);
9505 Bool r2zero = toBool(insn[1] == 0x2C);
9508 modrm = getIByte(delta+2);
9510 if (epartIsReg(modrm)) {
9512 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9513 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
9514 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
9515 nameXMMReg(eregOfRM(modrm)),
9516 nameMMXReg(gregOfRM(modrm)));
9518 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9519 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9520 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
9524 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
9526 nameMMXReg(gregOfRM(modrm)));
9530 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9532 assign( rmode, get_sse_roundingmode() );
9537 binop( Iop_32HLto64,
9538 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
9539 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
9543 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9544 goto decode_success;
9547 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9548 lo half xmm(G), and zero upper half */
9549 /* Note, this is practically identical to CVTPD2DQ. It would have
9550 been nicer to merge them together, but the insn[] offsets differ
9552 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
9553 IRTemp argV = newTemp(Ity_V128);
9554 IRTemp rmode = newTemp(Ity_I32);
9556 modrm = getIByte(delta+2);
9557 if (epartIsReg(modrm)) {
9558 assign( argV, getXMMReg(eregOfRM(modrm)) );
9560 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9561 nameXMMReg(gregOfRM(modrm)));
9563 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9564 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9566 DIP("cvtpd2ps %s,%s\n", dis_buf,
9567 nameXMMReg(gregOfRM(modrm)) );
9570 assign( rmode, get_sse_roundingmode() );
9571 t0 = newTemp(Ity_F64);
9572 t1 = newTemp(Ity_F64);
9573 assign( t0, unop(Iop_ReinterpI64asF64,
9574 unop(Iop_V128to64, mkexpr(argV))) );
9575 assign( t1, unop(Iop_ReinterpI64asF64,
9576 unop(Iop_V128HIto64, mkexpr(argV))) );
9578 # define CVT(_t) binop( Iop_F64toF32, \
9582 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9583 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9584 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9585 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9589 goto decode_success;
9592 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9594 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
9595 IRTemp arg64 = newTemp(Ity_I64);
9597 modrm = getIByte(delta+2);
9598 if (epartIsReg(modrm)) {
9599 /* Only switch to MMX mode if the source is a MMX register.
9600 This is inconsistent with all other instructions which
9601 convert between XMM and (M64 or MMX), which always switch
9602 to MMX mode even if 64-bit operand is M64 and not MMX. At
9603 least, that's what the Intel docs seem to me to say.
9606 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9608 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9609 nameXMMReg(gregOfRM(modrm)));
9611 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9612 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9614 DIP("cvtpi2pd %s,%s\n", dis_buf,
9615 nameXMMReg(gregOfRM(modrm)) );
9620 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
9625 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
9628 goto decode_success;
9631 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9633 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
9634 IRTemp argV = newTemp(Ity_V128);
9635 IRTemp rmode = newTemp(Ity_I32);
9637 modrm = getIByte(delta+2);
9638 if (epartIsReg(modrm)) {
9639 assign( argV, getXMMReg(eregOfRM(modrm)) );
9641 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9642 nameXMMReg(gregOfRM(modrm)));
9644 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9645 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9647 DIP("cvtps2dq %s,%s\n", dis_buf,
9648 nameXMMReg(gregOfRM(modrm)) );
9651 assign( rmode, get_sse_roundingmode() );
9652 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9654 /* This is less than ideal. If it turns out to be a performance
9655 bottleneck it can be improved. */
9657 binop( Iop_F64toI32S, \
9659 unop( Iop_F32toF64, \
9660 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9662 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9663 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9664 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9665 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9669 goto decode_success;
9672 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9674 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
9675 IRTemp f32lo = newTemp(Ity_F32);
9676 IRTemp f32hi = newTemp(Ity_F32);
9678 modrm = getIByte(delta+2);
9679 if (epartIsReg(modrm)) {
9680 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
9681 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
9683 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9684 nameXMMReg(gregOfRM(modrm)));
9686 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9687 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9688 assign( f32hi, loadLE(Ity_F32,
9689 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
9691 DIP("cvtps2pd %s,%s\n", dis_buf,
9692 nameXMMReg(gregOfRM(modrm)) );
9695 putXMMRegLane64F( gregOfRM(modrm), 1,
9696 unop(Iop_F32toF64, mkexpr(f32hi)) );
9697 putXMMRegLane64F( gregOfRM(modrm), 0,
9698 unop(Iop_F32toF64, mkexpr(f32lo)) );
9700 goto decode_success;
9703 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9704 I32 in ireg, according to prevailing SSE rounding mode */
9705 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9706 I32 in ireg, rounding towards zero */
9707 if (insn[0] == 0xF2 && insn[1] == 0x0F
9708 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
9709 IRTemp rmode = newTemp(Ity_I32);
9710 IRTemp f64lo = newTemp(Ity_F64);
9711 Bool r2zero = toBool(insn[2] == 0x2C);
9714 modrm = getIByte(delta+3);
9715 if (epartIsReg(modrm)) {
9717 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9718 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9719 nameXMMReg(eregOfRM(modrm)),
9720 nameIReg(4, gregOfRM(modrm)));
9722 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9723 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9725 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9727 nameIReg(4, gregOfRM(modrm)));
9731 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9733 assign( rmode, get_sse_roundingmode() );
9736 putIReg(4, gregOfRM(modrm),
9737 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
9739 goto decode_success;
9742 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
9743 low 1/4 xmm(G), according to prevailing SSE rounding mode */
9744 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
9745 IRTemp rmode = newTemp(Ity_I32);
9746 IRTemp f64lo = newTemp(Ity_F64);
9749 modrm = getIByte(delta+3);
9750 if (epartIsReg(modrm)) {
9752 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9753 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9754 nameXMMReg(gregOfRM(modrm)));
9756 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9757 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9759 DIP("cvtsd2ss %s,%s\n", dis_buf,
9760 nameXMMReg(gregOfRM(modrm)));
9763 assign( rmode, get_sse_roundingmode() );
9766 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
9769 goto decode_success;
9772 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
9774 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
9775 IRTemp arg32 = newTemp(Ity_I32);
9778 modrm = getIByte(delta+3);
9779 if (epartIsReg(modrm)) {
9780 assign( arg32, getIReg(4, eregOfRM(modrm)) );
9782 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
9783 nameXMMReg(gregOfRM(modrm)));
9785 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9786 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
9788 DIP("cvtsi2sd %s,%s\n", dis_buf,
9789 nameXMMReg(gregOfRM(modrm)) );
9794 unop(Iop_I32StoF64, mkexpr(arg32)) );
9796 goto decode_success;
9799 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
9801 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
9802 IRTemp f32lo = newTemp(Ity_F32);
9805 modrm = getIByte(delta+3);
9806 if (epartIsReg(modrm)) {
9808 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
9809 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9810 nameXMMReg(gregOfRM(modrm)));
9812 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9813 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9815 DIP("cvtss2sd %s,%s\n", dis_buf,
9816 nameXMMReg(gregOfRM(modrm)));
9819 putXMMRegLane64F( gregOfRM(modrm), 0,
9820 unop( Iop_F32toF64, mkexpr(f32lo) ) );
9822 goto decode_success;
9825 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9826 lo half xmm(G), and zero upper half, rounding towards zero */
9827 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
9828 IRTemp argV = newTemp(Ity_V128);
9829 IRTemp rmode = newTemp(Ity_I32);
9831 modrm = getIByte(delta+2);
9832 if (epartIsReg(modrm)) {
9833 assign( argV, getXMMReg(eregOfRM(modrm)) );
9835 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9836 nameXMMReg(gregOfRM(modrm)));
9838 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9839 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9841 DIP("cvttpd2dq %s,%s\n", dis_buf,
9842 nameXMMReg(gregOfRM(modrm)) );
9845 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9847 t0 = newTemp(Ity_F64);
9848 t1 = newTemp(Ity_F64);
9849 assign( t0, unop(Iop_ReinterpI64asF64,
9850 unop(Iop_V128to64, mkexpr(argV))) );
9851 assign( t1, unop(Iop_ReinterpI64asF64,
9852 unop(Iop_V128HIto64, mkexpr(argV))) );
9854 # define CVT(_t) binop( Iop_F64toI32S, \
9858 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9859 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9860 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9861 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9865 goto decode_success;
9868 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9869 xmm(G), rounding towards zero */
9870 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
9871 IRTemp argV = newTemp(Ity_V128);
9872 IRTemp rmode = newTemp(Ity_I32);
9875 modrm = getIByte(delta+3);
9876 if (epartIsReg(modrm)) {
9877 assign( argV, getXMMReg(eregOfRM(modrm)) );
9879 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9880 nameXMMReg(gregOfRM(modrm)));
9882 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9883 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9885 DIP("cvttps2dq %s,%s\n", dis_buf,
9886 nameXMMReg(gregOfRM(modrm)) );
9889 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9890 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9892 /* This is less than ideal. If it turns out to be a performance
9893 bottleneck it can be improved. */
9895 binop( Iop_F64toI32S, \
9897 unop( Iop_F32toF64, \
9898 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9900 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9901 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9902 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9903 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9907 goto decode_success;
9910 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
9911 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
9912 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
9913 goto decode_success;
9916 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
9917 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
9919 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
9920 goto decode_success;
9923 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
9924 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
9925 if (insn[0] == 0x0F && insn[1] == 0xAE
9926 && epartIsReg(insn[2])
9927 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
9930 /* Insert a memory fence. It's sometimes important that these
9931 are carried through to the generated code. */
9932 stmt( IRStmt_MBE(Imbe_Fence) );
9933 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
9934 goto decode_success;
9937 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
9938 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
9939 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
9940 goto decode_success;
9943 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
9944 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
9946 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
9947 goto decode_success;
9950 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
9951 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
9952 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
9953 goto decode_success;
9956 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
9957 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
9959 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
9960 goto decode_success;
9963 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
9964 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
9965 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
9966 if (sz == 2 && insn[0] == 0x0F
9967 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
9968 HChar* wot = insn[1]==0x28 ? "apd" :
9969 insn[1]==0x10 ? "upd" : "dqa";
9970 modrm = getIByte(delta+2);
9971 if (epartIsReg(modrm)) {
9972 putXMMReg( gregOfRM(modrm),
9973 getXMMReg( eregOfRM(modrm) ));
9974 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
9975 nameXMMReg(gregOfRM(modrm)));
9978 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9979 putXMMReg( gregOfRM(modrm),
9980 loadLE(Ity_V128, mkexpr(addr)) );
9981 DIP("mov%s %s,%s\n", wot, dis_buf,
9982 nameXMMReg(gregOfRM(modrm)));
9985 goto decode_success;
9988 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
9989 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
9990 if (sz == 2 && insn[0] == 0x0F
9991 && (insn[1] == 0x29 || insn[1] == 0x11)) {
9992 HChar* wot = insn[1]==0x29 ? "apd" : "upd";
9993 modrm = getIByte(delta+2);
9994 if (epartIsReg(modrm)) {
9995 /* fall through; awaiting test case */
9997 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9998 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9999 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10002 goto decode_success;
10006 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10007 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
10008 modrm = getIByte(delta+2);
10009 if (epartIsReg(modrm)) {
10013 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10015 DIP("movd %s, %s\n",
10016 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10018 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10022 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10024 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10026 goto decode_success;
10029 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10030 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
10031 modrm = getIByte(delta+2);
10032 if (epartIsReg(modrm)) {
10034 putIReg( 4, eregOfRM(modrm),
10035 getXMMRegLane32(gregOfRM(modrm), 0) );
10036 DIP("movd %s, %s\n",
10037 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10039 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10041 storeLE( mkexpr(addr),
10042 getXMMRegLane32(gregOfRM(modrm), 0) );
10043 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10045 goto decode_success;
10048 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10049 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
10050 modrm = getIByte(delta+2);
10051 if (epartIsReg(modrm)) {
10053 putXMMReg( eregOfRM(modrm),
10054 getXMMReg(gregOfRM(modrm)) );
10055 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10056 nameXMMReg(eregOfRM(modrm)));
10058 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10060 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10061 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10063 goto decode_success;
10066 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10067 /* Unfortunately can't simply use the MOVDQA case since the
10068 prefix lengths are different (66 vs F3) */
10069 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10071 modrm = getIByte(delta+3);
10072 if (epartIsReg(modrm)) {
10073 putXMMReg( gregOfRM(modrm),
10074 getXMMReg( eregOfRM(modrm) ));
10075 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10076 nameXMMReg(gregOfRM(modrm)));
10079 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10080 putXMMReg( gregOfRM(modrm),
10081 loadLE(Ity_V128, mkexpr(addr)) );
10082 DIP("movdqu %s,%s\n", dis_buf,
10083 nameXMMReg(gregOfRM(modrm)));
10086 goto decode_success;
10089 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10090 /* Unfortunately can't simply use the MOVDQA case since the
10091 prefix lengths are different (66 vs F3) */
10092 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10094 modrm = getIByte(delta+3);
10095 if (epartIsReg(modrm)) {
10097 putXMMReg( eregOfRM(modrm),
10098 getXMMReg(gregOfRM(modrm)) );
10099 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10100 nameXMMReg(eregOfRM(modrm)));
10102 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10104 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10105 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10107 goto decode_success;
10110 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10111 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10113 modrm = getIByte(delta+3);
10114 if (epartIsReg(modrm)) {
10116 putMMXReg( gregOfRM(modrm),
10117 getXMMRegLane64( eregOfRM(modrm), 0 ));
10118 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10119 nameMMXReg(gregOfRM(modrm)));
10121 goto decode_success;
10123 /* fall through, apparently no mem case for this insn */
10127 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10128 /* These seems identical to MOVHPS. This instruction encoding is
10129 completely crazy. */
10130 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
10131 modrm = getIByte(delta+2);
10132 if (epartIsReg(modrm)) {
10133 /* fall through; apparently reg-reg is not possible */
10135 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10137 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10138 loadLE(Ity_I64, mkexpr(addr)) );
10139 DIP("movhpd %s,%s\n", dis_buf,
10140 nameXMMReg( gregOfRM(modrm) ));
10141 goto decode_success;
10145 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10146 /* Again, this seems identical to MOVHPS. */
10147 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
10148 if (!epartIsReg(insn[2])) {
10150 addr = disAMode ( &alen, sorb, delta, dis_buf );
10152 storeLE( mkexpr(addr),
10153 getXMMRegLane64( gregOfRM(insn[2]),
10154 1/*upper lane*/ ) );
10155 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10157 goto decode_success;
10159 /* else fall through */
10162 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10163 /* Identical to MOVLPS ? */
10164 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
10165 modrm = getIByte(delta+2);
10166 if (epartIsReg(modrm)) {
10167 /* fall through; apparently reg-reg is not possible */
10169 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10171 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10172 loadLE(Ity_I64, mkexpr(addr)) );
10173 DIP("movlpd %s, %s\n",
10174 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10175 goto decode_success;
10179 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10180 /* Identical to MOVLPS ? */
10181 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
10182 if (!epartIsReg(insn[2])) {
10184 addr = disAMode ( &alen, sorb, delta, dis_buf );
10186 storeLE( mkexpr(addr),
10187 getXMMRegLane64( gregOfRM(insn[2]),
10188 0/*lower lane*/ ) );
10189 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10191 goto decode_success;
10193 /* else fall through */
10196 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10197 2 lowest bits of ireg(G) */
10198 if (insn[0] == 0x0F && insn[1] == 0x50) {
10199 modrm = getIByte(delta+2);
10200 if (sz == 2 && epartIsReg(modrm)) {
10202 t0 = newTemp(Ity_I32);
10203 t1 = newTemp(Ity_I32);
10205 src = eregOfRM(modrm);
10206 assign( t0, binop( Iop_And32,
10207 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10209 assign( t1, binop( Iop_And32,
10210 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10212 putIReg(4, gregOfRM(modrm),
10213 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10215 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10216 nameIReg(4, gregOfRM(modrm)));
10217 goto decode_success;
10219 /* else fall through */
10222 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10223 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10224 modrm = getIByte(delta+2);
10225 if (sz == 2 && epartIsReg(modrm)) {
10226 IRTemp regD = newTemp(Ity_V128);
10227 IRTemp mask = newTemp(Ity_V128);
10228 IRTemp olddata = newTemp(Ity_V128);
10229 IRTemp newdata = newTemp(Ity_V128);
10230 addr = newTemp(Ity_I32);
10232 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10233 assign( regD, getXMMReg( gregOfRM(modrm) ));
10235 /* Unfortunately can't do the obvious thing with SarN8x16
10236 here since that can't be re-emitted as SSE2 code - no such
10240 binop(Iop_64HLtoV128,
10242 getXMMRegLane64( eregOfRM(modrm), 1 ),
10245 getXMMRegLane64( eregOfRM(modrm), 0 ),
10247 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10255 unop(Iop_NotV128, mkexpr(mask)))) );
10256 storeLE( mkexpr(addr), mkexpr(newdata) );
10259 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10260 nameXMMReg( gregOfRM(modrm) ) );
10261 goto decode_success;
10263 /* else fall through */
10266 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10267 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10268 modrm = getIByte(delta+2);
10269 if (sz == 2 && !epartIsReg(modrm)) {
10270 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10271 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10272 DIP("movntdq %s,%s\n", dis_buf,
10273 nameXMMReg(gregOfRM(modrm)));
10275 goto decode_success;
10277 /* else fall through */
10280 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10281 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10283 modrm = getIByte(delta+2);
10284 if (!epartIsReg(modrm)) {
10285 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10286 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10287 DIP("movnti %s,%s\n", dis_buf,
10288 nameIReg(4, gregOfRM(modrm)));
10290 goto decode_success;
10292 /* else fall through */
10295 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10296 or lo half xmm). */
10297 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
10298 modrm = getIByte(delta+2);
10299 if (epartIsReg(modrm)) {
10300 /* fall through, awaiting test case */
10301 /* dst: lo half copied, hi half zeroed */
10303 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10304 storeLE( mkexpr(addr),
10305 getXMMRegLane64( gregOfRM(modrm), 0 ));
10306 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
10308 goto decode_success;
10312 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10314 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
10316 modrm = getIByte(delta+3);
10317 if (epartIsReg(modrm)) {
10319 putXMMReg( gregOfRM(modrm),
10320 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
10321 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10322 nameXMMReg(gregOfRM(modrm)));
10324 goto decode_success;
10326 /* fall through, apparently no mem case for this insn */
10330 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10331 G (lo half xmm). Upper half of G is zeroed out. */
10332 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10333 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10334 If E is reg, upper half of G is unchanged. */
10335 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
10336 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
10338 modrm = getIByte(delta+3);
10339 if (epartIsReg(modrm)) {
10340 putXMMRegLane64( gregOfRM(modrm), 0,
10341 getXMMRegLane64( eregOfRM(modrm), 0 ));
10342 if (insn[0] == 0xF3/*MOVQ*/) {
10343 /* zero bits 127:64 */
10344 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10346 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10347 nameXMMReg(gregOfRM(modrm)));
10350 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10351 /* zero bits 127:64 */
10352 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10353 /* write bits 63:0 */
10354 putXMMRegLane64( gregOfRM(modrm), 0,
10355 loadLE(Ity_I64, mkexpr(addr)) );
10356 DIP("movsd %s,%s\n", dis_buf,
10357 nameXMMReg(gregOfRM(modrm)));
10360 goto decode_success;
10363 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10364 or lo half xmm). */
10365 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
10367 modrm = getIByte(delta+3);
10368 if (epartIsReg(modrm)) {
10369 putXMMRegLane64( eregOfRM(modrm), 0,
10370 getXMMRegLane64( gregOfRM(modrm), 0 ));
10371 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10372 nameXMMReg(eregOfRM(modrm)));
10375 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10376 storeLE( mkexpr(addr),
10377 getXMMRegLane64(gregOfRM(modrm), 0) );
10378 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10382 goto decode_success;
10385 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10386 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
10387 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
10388 goto decode_success;
10391 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10392 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
10394 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
10395 goto decode_success;
10398 /* 66 0F 56 = ORPD -- G = G and E */
10399 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
10400 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
10401 goto decode_success;
10404 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10405 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
10407 IRTemp sV = newTemp(Ity_V128);
10408 IRTemp dV = newTemp(Ity_V128);
10409 IRTemp s1 = newTemp(Ity_I64);
10410 IRTemp s0 = newTemp(Ity_I64);
10411 IRTemp d1 = newTemp(Ity_I64);
10412 IRTemp d0 = newTemp(Ity_I64);
10415 assign( dV, getXMMReg(gregOfRM(modrm)) );
10417 if (epartIsReg(modrm)) {
10418 assign( sV, getXMMReg(eregOfRM(modrm)) );
10419 select = (Int)insn[3];
10421 DIP("shufpd $%d,%s,%s\n", select,
10422 nameXMMReg(eregOfRM(modrm)),
10423 nameXMMReg(gregOfRM(modrm)));
10425 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10426 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10427 select = (Int)insn[2+alen];
10429 DIP("shufpd $%d,%s,%s\n", select,
10431 nameXMMReg(gregOfRM(modrm)));
10434 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10435 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10436 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10437 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10439 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10440 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10444 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
10450 goto decode_success;
10453 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10454 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
10455 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
10456 "sqrtpd", Iop_Sqrt64Fx2 );
10457 goto decode_success;
10460 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10461 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
10463 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
10464 "sqrtsd", Iop_Sqrt64F0x2 );
10465 goto decode_success;
10468 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10469 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
10470 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
10471 goto decode_success;
10474 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10475 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
10477 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
10478 goto decode_success;
10481 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10482 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10483 /* These just appear to be special cases of SHUFPS */
10484 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10485 IRTemp s1 = newTemp(Ity_I64);
10486 IRTemp s0 = newTemp(Ity_I64);
10487 IRTemp d1 = newTemp(Ity_I64);
10488 IRTemp d0 = newTemp(Ity_I64);
10489 IRTemp sV = newTemp(Ity_V128);
10490 IRTemp dV = newTemp(Ity_V128);
10491 Bool hi = toBool(insn[1] == 0x15);
10494 assign( dV, getXMMReg(gregOfRM(modrm)) );
10496 if (epartIsReg(modrm)) {
10497 assign( sV, getXMMReg(eregOfRM(modrm)) );
10499 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10500 nameXMMReg(eregOfRM(modrm)),
10501 nameXMMReg(gregOfRM(modrm)));
10503 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10504 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10506 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10508 nameXMMReg(gregOfRM(modrm)));
10511 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10512 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10513 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10514 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10517 putXMMReg( gregOfRM(modrm),
10518 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
10520 putXMMReg( gregOfRM(modrm),
10521 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
10524 goto decode_success;
10527 /* 66 0F 57 = XORPD -- G = G and E */
10528 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
10529 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
10530 goto decode_success;
10533 /* 66 0F 6B = PACKSSDW */
10534 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
10535 delta = dis_SSEint_E_to_G( sorb, delta+2,
10536 "packssdw", Iop_QNarrow32Sx4, True );
10537 goto decode_success;
10540 /* 66 0F 63 = PACKSSWB */
10541 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
10542 delta = dis_SSEint_E_to_G( sorb, delta+2,
10543 "packsswb", Iop_QNarrow16Sx8, True );
10544 goto decode_success;
10547 /* 66 0F 67 = PACKUSWB */
10548 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
10549 delta = dis_SSEint_E_to_G( sorb, delta+2,
10550 "packuswb", Iop_QNarrow16Ux8, True );
10551 goto decode_success;
10554 /* 66 0F FC = PADDB */
10555 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
10556 delta = dis_SSEint_E_to_G( sorb, delta+2,
10557 "paddb", Iop_Add8x16, False );
10558 goto decode_success;
10561 /* 66 0F FE = PADDD */
10562 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
10563 delta = dis_SSEint_E_to_G( sorb, delta+2,
10564 "paddd", Iop_Add32x4, False );
10565 goto decode_success;
10568 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10569 /* 0F D4 = PADDQ -- add 64x1 */
10570 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
10572 delta = dis_MMXop_regmem_to_reg (
10573 sorb, delta+2, insn[1], "paddq", False );
10574 goto decode_success;
10577 /* 66 0F D4 = PADDQ */
10578 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
10579 delta = dis_SSEint_E_to_G( sorb, delta+2,
10580 "paddq", Iop_Add64x2, False );
10581 goto decode_success;
10584 /* 66 0F FD = PADDW */
10585 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
10586 delta = dis_SSEint_E_to_G( sorb, delta+2,
10587 "paddw", Iop_Add16x8, False );
10588 goto decode_success;
10591 /* 66 0F EC = PADDSB */
10592 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
10593 delta = dis_SSEint_E_to_G( sorb, delta+2,
10594 "paddsb", Iop_QAdd8Sx16, False );
10595 goto decode_success;
10598 /* 66 0F ED = PADDSW */
10599 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
10600 delta = dis_SSEint_E_to_G( sorb, delta+2,
10601 "paddsw", Iop_QAdd16Sx8, False );
10602 goto decode_success;
10605 /* 66 0F DC = PADDUSB */
10606 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
10607 delta = dis_SSEint_E_to_G( sorb, delta+2,
10608 "paddusb", Iop_QAdd8Ux16, False );
10609 goto decode_success;
10612 /* 66 0F DD = PADDUSW */
10613 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
10614 delta = dis_SSEint_E_to_G( sorb, delta+2,
10615 "paddusw", Iop_QAdd16Ux8, False );
10616 goto decode_success;
10619 /* 66 0F DB = PAND */
10620 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
10621 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
10622 goto decode_success;
10625 /* 66 0F DF = PANDN */
10626 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
10627 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
10628 goto decode_success;
10631 /* 66 0F E0 = PAVGB */
10632 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
10633 delta = dis_SSEint_E_to_G( sorb, delta+2,
10634 "pavgb", Iop_Avg8Ux16, False );
10635 goto decode_success;
10638 /* 66 0F E3 = PAVGW */
10639 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
10640 delta = dis_SSEint_E_to_G( sorb, delta+2,
10641 "pavgw", Iop_Avg16Ux8, False );
10642 goto decode_success;
10645 /* 66 0F 74 = PCMPEQB */
10646 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
10647 delta = dis_SSEint_E_to_G( sorb, delta+2,
10648 "pcmpeqb", Iop_CmpEQ8x16, False );
10649 goto decode_success;
10652 /* 66 0F 76 = PCMPEQD */
10653 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
10654 delta = dis_SSEint_E_to_G( sorb, delta+2,
10655 "pcmpeqd", Iop_CmpEQ32x4, False );
10656 goto decode_success;
10659 /* 66 0F 75 = PCMPEQW */
10660 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
10661 delta = dis_SSEint_E_to_G( sorb, delta+2,
10662 "pcmpeqw", Iop_CmpEQ16x8, False );
10663 goto decode_success;
10666 /* 66 0F 64 = PCMPGTB */
10667 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
10668 delta = dis_SSEint_E_to_G( sorb, delta+2,
10669 "pcmpgtb", Iop_CmpGT8Sx16, False );
10670 goto decode_success;
10673 /* 66 0F 66 = PCMPGTD */
10674 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
10675 delta = dis_SSEint_E_to_G( sorb, delta+2,
10676 "pcmpgtd", Iop_CmpGT32Sx4, False );
10677 goto decode_success;
10680 /* 66 0F 65 = PCMPGTW */
10681 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
10682 delta = dis_SSEint_E_to_G( sorb, delta+2,
10683 "pcmpgtw", Iop_CmpGT16Sx8, False );
10684 goto decode_success;
10687 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10688 zero-extend of it in ireg(G). */
10689 if (insn[0] == 0x0F && insn[1] == 0xC5) {
10691 if (sz == 2 && epartIsReg(modrm)) {
10692 t5 = newTemp(Ity_V128);
10693 t4 = newTemp(Ity_I16);
10694 assign(t5, getXMMReg(eregOfRM(modrm)));
10695 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
10696 switch (insn[3] & 7) {
10697 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
10698 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
10699 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
10700 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
10701 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
10702 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
10703 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
10704 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
10705 default: vassert(0); /*NOTREACHED*/
10707 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
10708 DIP("pextrw $%d,%s,%s\n",
10709 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
10710 nameIReg(4,gregOfRM(modrm)));
10712 goto decode_success;
10714 /* else fall through */
10717 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
10718 put it into the specified lane of xmm(G). */
10719 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
10721 t4 = newTemp(Ity_I16);
10724 if (epartIsReg(modrm)) {
10725 assign(t4, getIReg(2, eregOfRM(modrm)));
10727 lane = insn[3+1-1];
10728 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10729 nameIReg(2,eregOfRM(modrm)),
10730 nameXMMReg(gregOfRM(modrm)));
10732 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10734 lane = insn[3+alen-1];
10735 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
10736 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10738 nameXMMReg(gregOfRM(modrm)));
10741 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
10742 goto decode_success;
10745 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
10746 E(xmm or mem) to G(xmm) */
10747 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
10748 IRTemp s1V = newTemp(Ity_V128);
10749 IRTemp s2V = newTemp(Ity_V128);
10750 IRTemp dV = newTemp(Ity_V128);
10751 IRTemp s1Hi = newTemp(Ity_I64);
10752 IRTemp s1Lo = newTemp(Ity_I64);
10753 IRTemp s2Hi = newTemp(Ity_I64);
10754 IRTemp s2Lo = newTemp(Ity_I64);
10755 IRTemp dHi = newTemp(Ity_I64);
10756 IRTemp dLo = newTemp(Ity_I64);
10758 if (epartIsReg(modrm)) {
10759 assign( s1V, getXMMReg(eregOfRM(modrm)) );
10761 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10762 nameXMMReg(gregOfRM(modrm)));
10764 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10765 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
10767 DIP("pmaddwd %s,%s\n", dis_buf,
10768 nameXMMReg(gregOfRM(modrm)));
10770 assign( s2V, getXMMReg(gregOfRM(modrm)) );
10771 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
10772 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
10773 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
10774 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
10775 assign( dHi, mkIRExprCCall(
10776 Ity_I64, 0/*regparms*/,
10777 "x86g_calculate_mmx_pmaddwd",
10778 &x86g_calculate_mmx_pmaddwd,
10779 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
10781 assign( dLo, mkIRExprCCall(
10782 Ity_I64, 0/*regparms*/,
10783 "x86g_calculate_mmx_pmaddwd",
10784 &x86g_calculate_mmx_pmaddwd,
10785 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
10787 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
10788 putXMMReg(gregOfRM(modrm), mkexpr(dV));
10789 goto decode_success;
10792 /* 66 0F EE = PMAXSW -- 16x8 signed max */
10793 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
10794 delta = dis_SSEint_E_to_G( sorb, delta+2,
10795 "pmaxsw", Iop_Max16Sx8, False );
10796 goto decode_success;
10799 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
10800 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
10801 delta = dis_SSEint_E_to_G( sorb, delta+2,
10802 "pmaxub", Iop_Max8Ux16, False );
10803 goto decode_success;
10806 /* 66 0F EA = PMINSW -- 16x8 signed min */
10807 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
10808 delta = dis_SSEint_E_to_G( sorb, delta+2,
10809 "pminsw", Iop_Min16Sx8, False );
10810 goto decode_success;
10813 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
10814 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
10815 delta = dis_SSEint_E_to_G( sorb, delta+2,
10816 "pminub", Iop_Min8Ux16, False );
10817 goto decode_success;
10820 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
10821 xmm(G), turn them into a byte, and put zero-extend of it in
10822 ireg(G). Doing this directly is just too cumbersome; give up
10823 therefore and call a helper. */
10824 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
10825 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
10827 if (epartIsReg(modrm)) {
10828 t0 = newTemp(Ity_I64);
10829 t1 = newTemp(Ity_I64);
10830 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
10831 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
10832 t5 = newTemp(Ity_I32);
10833 assign(t5, mkIRExprCCall(
10834 Ity_I32, 0/*regparms*/,
10835 "x86g_calculate_sse_pmovmskb",
10836 &x86g_calculate_sse_pmovmskb,
10837 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
10838 putIReg(4, gregOfRM(modrm), mkexpr(t5));
10839 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10840 nameIReg(4,gregOfRM(modrm)));
10842 goto decode_success;
10844 /* else fall through */
10847 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
10848 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
10849 delta = dis_SSEint_E_to_G( sorb, delta+2,
10850 "pmulhuw", Iop_MulHi16Ux8, False );
10851 goto decode_success;
10854 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
10855 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
10856 delta = dis_SSEint_E_to_G( sorb, delta+2,
10857 "pmulhw", Iop_MulHi16Sx8, False );
10858 goto decode_success;
10861 /* 66 0F D5 = PMULHL -- 16x8 multiply */
10862 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
10863 delta = dis_SSEint_E_to_G( sorb, delta+2,
10864 "pmullw", Iop_Mul16x8, False );
10865 goto decode_success;
10868 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10869 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
10870 0 to form 64-bit result */
10871 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
10872 IRTemp sV = newTemp(Ity_I64);
10873 IRTemp dV = newTemp(Ity_I64);
10874 t1 = newTemp(Ity_I32);
10875 t0 = newTemp(Ity_I32);
10879 assign( dV, getMMXReg(gregOfRM(modrm)) );
10881 if (epartIsReg(modrm)) {
10882 assign( sV, getMMXReg(eregOfRM(modrm)) );
10884 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10885 nameMMXReg(gregOfRM(modrm)));
10887 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10888 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
10890 DIP("pmuludq %s,%s\n", dis_buf,
10891 nameMMXReg(gregOfRM(modrm)));
10894 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
10895 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
10896 putMMXReg( gregOfRM(modrm),
10897 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
10898 goto decode_success;
10901 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
10902 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
10904 /* This is a really poor translation -- could be improved if
10905 performance critical */
10906 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
10908 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10909 sV = newTemp(Ity_V128);
10910 dV = newTemp(Ity_V128);
10911 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10912 t1 = newTemp(Ity_I64);
10913 t0 = newTemp(Ity_I64);
10915 assign( dV, getXMMReg(gregOfRM(modrm)) );
10917 if (epartIsReg(modrm)) {
10918 assign( sV, getXMMReg(eregOfRM(modrm)) );
10920 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10921 nameXMMReg(gregOfRM(modrm)));
10923 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10924 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10926 DIP("pmuludq %s,%s\n", dis_buf,
10927 nameXMMReg(gregOfRM(modrm)));
10930 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
10931 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
10933 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
10934 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
10935 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
10936 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
10937 goto decode_success;
10940 /* 66 0F EB = POR */
10941 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
10942 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
10943 goto decode_success;
10946 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
10947 from E(xmm or mem) to G(xmm) */
10948 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
10949 IRTemp s1V = newTemp(Ity_V128);
10950 IRTemp s2V = newTemp(Ity_V128);
10951 IRTemp dV = newTemp(Ity_V128);
10952 IRTemp s1Hi = newTemp(Ity_I64);
10953 IRTemp s1Lo = newTemp(Ity_I64);
10954 IRTemp s2Hi = newTemp(Ity_I64);
10955 IRTemp s2Lo = newTemp(Ity_I64);
10956 IRTemp dHi = newTemp(Ity_I64);
10957 IRTemp dLo = newTemp(Ity_I64);
10959 if (epartIsReg(modrm)) {
10960 assign( s1V, getXMMReg(eregOfRM(modrm)) );
10962 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10963 nameXMMReg(gregOfRM(modrm)));
10965 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10966 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
10968 DIP("psadbw %s,%s\n", dis_buf,
10969 nameXMMReg(gregOfRM(modrm)));
10971 assign( s2V, getXMMReg(gregOfRM(modrm)) );
10972 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
10973 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
10974 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
10975 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
10976 assign( dHi, mkIRExprCCall(
10977 Ity_I64, 0/*regparms*/,
10978 "x86g_calculate_mmx_psadbw",
10979 &x86g_calculate_mmx_psadbw,
10980 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
10982 assign( dLo, mkIRExprCCall(
10983 Ity_I64, 0/*regparms*/,
10984 "x86g_calculate_mmx_psadbw",
10985 &x86g_calculate_mmx_psadbw,
10986 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
10988 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
10989 putXMMReg(gregOfRM(modrm), mkexpr(dV));
10990 goto decode_success;
10993 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
10994 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
10996 IRTemp sV, dV, s3, s2, s1, s0;
10997 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10998 sV = newTemp(Ity_V128);
10999 dV = newTemp(Ity_V128);
11001 if (epartIsReg(modrm)) {
11002 assign( sV, getXMMReg(eregOfRM(modrm)) );
11003 order = (Int)insn[3];
11005 DIP("pshufd $%d,%s,%s\n", order,
11006 nameXMMReg(eregOfRM(modrm)),
11007 nameXMMReg(gregOfRM(modrm)));
11009 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11010 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11011 order = (Int)insn[2+alen];
11013 DIP("pshufd $%d,%s,%s\n", order,
11015 nameXMMReg(gregOfRM(modrm)));
11017 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11020 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11022 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11023 SEL((order>>2)&3), SEL((order>>0)&3) )
11025 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11027 goto decode_success;
11030 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11031 mem) to G(xmm), and copy lower half */
11032 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11034 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11035 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11036 sV = newTemp(Ity_V128);
11037 dV = newTemp(Ity_V128);
11038 sVhi = newTemp(Ity_I64);
11039 dVhi = newTemp(Ity_I64);
11041 if (epartIsReg(modrm)) {
11042 assign( sV, getXMMReg(eregOfRM(modrm)) );
11043 order = (Int)insn[4];
11045 DIP("pshufhw $%d,%s,%s\n", order,
11046 nameXMMReg(eregOfRM(modrm)),
11047 nameXMMReg(gregOfRM(modrm)));
11049 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11050 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11051 order = (Int)insn[3+alen];
11053 DIP("pshufhw $%d,%s,%s\n", order,
11055 nameXMMReg(gregOfRM(modrm)));
11057 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11058 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11061 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11063 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11064 SEL((order>>2)&3), SEL((order>>0)&3) )
11066 assign(dV, binop( Iop_64HLtoV128,
11068 unop(Iop_V128to64, mkexpr(sV))) );
11069 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11071 goto decode_success;
11074 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11075 mem) to G(xmm), and copy upper half */
11076 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11078 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11079 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11080 sV = newTemp(Ity_V128);
11081 dV = newTemp(Ity_V128);
11082 sVlo = newTemp(Ity_I64);
11083 dVlo = newTemp(Ity_I64);
11085 if (epartIsReg(modrm)) {
11086 assign( sV, getXMMReg(eregOfRM(modrm)) );
11087 order = (Int)insn[4];
11089 DIP("pshuflw $%d,%s,%s\n", order,
11090 nameXMMReg(eregOfRM(modrm)),
11091 nameXMMReg(gregOfRM(modrm)));
11093 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11094 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11095 order = (Int)insn[3+alen];
11097 DIP("pshuflw $%d,%s,%s\n", order,
11099 nameXMMReg(gregOfRM(modrm)));
11101 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11102 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11105 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11107 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11108 SEL((order>>2)&3), SEL((order>>0)&3) )
11110 assign(dV, binop( Iop_64HLtoV128,
11111 unop(Iop_V128HIto64, mkexpr(sV)),
11113 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11115 goto decode_success;
11118 /* 66 0F 72 /6 ib = PSLLD by immediate */
11119 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11120 && epartIsReg(insn[2])
11121 && gregOfRM(insn[2]) == 6) {
11122 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11123 goto decode_success;
11126 /* 66 0F F2 = PSLLD by E */
11127 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
11128 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11129 goto decode_success;
11132 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11133 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11134 && epartIsReg(insn[2])
11135 && gregOfRM(insn[2]) == 7) {
11136 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11137 Int imm = (Int)insn[3];
11138 Int reg = eregOfRM(insn[2]);
11139 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11140 vassert(imm >= 0 && imm <= 255);
11143 sV = newTemp(Ity_V128);
11144 dV = newTemp(Ity_V128);
11145 hi64 = newTemp(Ity_I64);
11146 lo64 = newTemp(Ity_I64);
11147 hi64r = newTemp(Ity_I64);
11148 lo64r = newTemp(Ity_I64);
11151 putXMMReg(reg, mkV128(0x0000));
11152 goto decode_success;
11155 assign( sV, getXMMReg(reg) );
11156 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11157 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11160 assign( lo64r, mkexpr(lo64) );
11161 assign( hi64r, mkexpr(hi64) );
11165 assign( lo64r, mkU64(0) );
11166 assign( hi64r, mkexpr(lo64) );
11170 assign( lo64r, mkU64(0) );
11171 assign( hi64r, binop( Iop_Shl64,
11173 mkU8( 8*(imm-8) ) ));
11175 assign( lo64r, binop( Iop_Shl64,
11180 binop(Iop_Shl64, mkexpr(hi64),
11182 binop(Iop_Shr64, mkexpr(lo64),
11183 mkU8(8 * (8 - imm)) )
11187 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11188 putXMMReg(reg, mkexpr(dV));
11189 goto decode_success;
11192 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11193 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11194 && epartIsReg(insn[2])
11195 && gregOfRM(insn[2]) == 6) {
11196 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11197 goto decode_success;
11200 /* 66 0F F3 = PSLLQ by E */
11201 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
11202 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11203 goto decode_success;
11206 /* 66 0F 71 /6 ib = PSLLW by immediate */
11207 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11208 && epartIsReg(insn[2])
11209 && gregOfRM(insn[2]) == 6) {
11210 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11211 goto decode_success;
11214 /* 66 0F F1 = PSLLW by E */
11215 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
11216 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11217 goto decode_success;
11220 /* 66 0F 72 /4 ib = PSRAD by immediate */
11221 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11222 && epartIsReg(insn[2])
11223 && gregOfRM(insn[2]) == 4) {
11224 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11225 goto decode_success;
11228 /* 66 0F E2 = PSRAD by E */
11229 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
11230 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11231 goto decode_success;
11234 /* 66 0F 71 /4 ib = PSRAW by immediate */
11235 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11236 && epartIsReg(insn[2])
11237 && gregOfRM(insn[2]) == 4) {
11238 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11239 goto decode_success;
11242 /* 66 0F E1 = PSRAW by E */
11243 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
11244 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11245 goto decode_success;
11248 /* 66 0F 72 /2 ib = PSRLD by immediate */
11249 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11250 && epartIsReg(insn[2])
11251 && gregOfRM(insn[2]) == 2) {
11252 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11253 goto decode_success;
11256 /* 66 0F D2 = PSRLD by E */
11257 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
11258 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11259 goto decode_success;
11262 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11263 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11264 && epartIsReg(insn[2])
11265 && gregOfRM(insn[2]) == 3) {
11266 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11267 Int imm = (Int)insn[3];
11268 Int reg = eregOfRM(insn[2]);
11269 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11270 vassert(imm >= 0 && imm <= 255);
11273 sV = newTemp(Ity_V128);
11274 dV = newTemp(Ity_V128);
11275 hi64 = newTemp(Ity_I64);
11276 lo64 = newTemp(Ity_I64);
11277 hi64r = newTemp(Ity_I64);
11278 lo64r = newTemp(Ity_I64);
11281 putXMMReg(reg, mkV128(0x0000));
11282 goto decode_success;
11285 assign( sV, getXMMReg(reg) );
11286 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11287 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11290 assign( lo64r, mkexpr(lo64) );
11291 assign( hi64r, mkexpr(hi64) );
11295 assign( hi64r, mkU64(0) );
11296 assign( lo64r, mkexpr(hi64) );
11300 assign( hi64r, mkU64(0) );
11301 assign( lo64r, binop( Iop_Shr64,
11303 mkU8( 8*(imm-8) ) ));
11305 assign( hi64r, binop( Iop_Shr64,
11310 binop(Iop_Shr64, mkexpr(lo64),
11312 binop(Iop_Shl64, mkexpr(hi64),
11313 mkU8(8 * (8 - imm)) )
11318 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11319 putXMMReg(reg, mkexpr(dV));
11320 goto decode_success;
11323 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11324 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11325 && epartIsReg(insn[2])
11326 && gregOfRM(insn[2]) == 2) {
11327 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
11328 goto decode_success;
11331 /* 66 0F D3 = PSRLQ by E */
11332 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
11333 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
11334 goto decode_success;
11337 /* 66 0F 71 /2 ib = PSRLW by immediate */
11338 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11339 && epartIsReg(insn[2])
11340 && gregOfRM(insn[2]) == 2) {
11341 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
11342 goto decode_success;
11345 /* 66 0F D1 = PSRLW by E */
11346 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
11347 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
11348 goto decode_success;
11351 /* 66 0F F8 = PSUBB */
11352 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
11353 delta = dis_SSEint_E_to_G( sorb, delta+2,
11354 "psubb", Iop_Sub8x16, False );
11355 goto decode_success;
11358 /* 66 0F FA = PSUBD */
11359 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
11360 delta = dis_SSEint_E_to_G( sorb, delta+2,
11361 "psubd", Iop_Sub32x4, False );
11362 goto decode_success;
11365 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11366 /* 0F FB = PSUBQ -- sub 64x1 */
11367 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
11369 delta = dis_MMXop_regmem_to_reg (
11370 sorb, delta+2, insn[1], "psubq", False );
11371 goto decode_success;
11374 /* 66 0F FB = PSUBQ */
11375 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
11376 delta = dis_SSEint_E_to_G( sorb, delta+2,
11377 "psubq", Iop_Sub64x2, False );
11378 goto decode_success;
11381 /* 66 0F F9 = PSUBW */
11382 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
11383 delta = dis_SSEint_E_to_G( sorb, delta+2,
11384 "psubw", Iop_Sub16x8, False );
11385 goto decode_success;
11388 /* 66 0F E8 = PSUBSB */
11389 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
11390 delta = dis_SSEint_E_to_G( sorb, delta+2,
11391 "psubsb", Iop_QSub8Sx16, False );
11392 goto decode_success;
11395 /* 66 0F E9 = PSUBSW */
11396 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
11397 delta = dis_SSEint_E_to_G( sorb, delta+2,
11398 "psubsw", Iop_QSub16Sx8, False );
11399 goto decode_success;
11402 /* 66 0F D8 = PSUBSB */
11403 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
11404 delta = dis_SSEint_E_to_G( sorb, delta+2,
11405 "psubusb", Iop_QSub8Ux16, False );
11406 goto decode_success;
11409 /* 66 0F D9 = PSUBSW */
11410 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
11411 delta = dis_SSEint_E_to_G( sorb, delta+2,
11412 "psubusw", Iop_QSub16Ux8, False );
11413 goto decode_success;
11416 /* 66 0F 68 = PUNPCKHBW */
11417 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
11418 delta = dis_SSEint_E_to_G( sorb, delta+2,
11420 Iop_InterleaveHI8x16, True );
11421 goto decode_success;
11424 /* 66 0F 6A = PUNPCKHDQ */
11425 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
11426 delta = dis_SSEint_E_to_G( sorb, delta+2,
11428 Iop_InterleaveHI32x4, True );
11429 goto decode_success;
11432 /* 66 0F 6D = PUNPCKHQDQ */
11433 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
11434 delta = dis_SSEint_E_to_G( sorb, delta+2,
11436 Iop_InterleaveHI64x2, True );
11437 goto decode_success;
11440 /* 66 0F 69 = PUNPCKHWD */
11441 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
11442 delta = dis_SSEint_E_to_G( sorb, delta+2,
11444 Iop_InterleaveHI16x8, True );
11445 goto decode_success;
11448 /* 66 0F 60 = PUNPCKLBW */
11449 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
11450 delta = dis_SSEint_E_to_G( sorb, delta+2,
11452 Iop_InterleaveLO8x16, True );
11453 goto decode_success;
11456 /* 66 0F 62 = PUNPCKLDQ */
11457 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
11458 delta = dis_SSEint_E_to_G( sorb, delta+2,
11460 Iop_InterleaveLO32x4, True );
11461 goto decode_success;
11464 /* 66 0F 6C = PUNPCKLQDQ */
11465 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
11466 delta = dis_SSEint_E_to_G( sorb, delta+2,
11468 Iop_InterleaveLO64x2, True );
11469 goto decode_success;
11472 /* 66 0F 61 = PUNPCKLWD */
11473 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
11474 delta = dis_SSEint_E_to_G( sorb, delta+2,
11476 Iop_InterleaveLO16x8, True );
11477 goto decode_success;
11480 /* 66 0F EF = PXOR */
11481 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
11482 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
11483 goto decode_success;
11486 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11487 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11488 //-- && (!epartIsReg(insn[2]))
11489 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11490 //-- Bool store = gregOfRM(insn[2]) == 0;
11491 //-- vg_assert(sz == 4);
11492 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11493 //-- t1 = LOW24(pair);
11494 //-- eip += 2+HI8(pair);
11495 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11496 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11497 //-- Lit16, (UShort)insn[2],
11498 //-- TempReg, t1 );
11499 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11500 //-- goto decode_success;
11503 /* 0F AE /7 = CLFLUSH -- flush cache line */
11504 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
11505 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
11507 /* This is something of a hack. We need to know the size of the
11508 cache line containing addr. Since we don't (easily), assume
11509 256 on the basis that no real cache would have a line that
11510 big. It's safe to invalidate more stuff than we need, just
11512 UInt lineszB = 256;
11514 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11517 /* Round addr down to the start of the containing block. */
11522 mkU32( ~(lineszB-1) ))) );
11524 stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) );
11526 irsb->jumpkind = Ijk_TInval;
11527 irsb->next = mkU32(guest_EIP_bbstart+delta);
11528 dres.whatNext = Dis_StopHere;
11530 DIP("clflush %s\n", dis_buf);
11531 goto decode_success;
11534 /* ---------------------------------------------------- */
11535 /* --- end of the SSE2 decoder. --- */
11536 /* ---------------------------------------------------- */
11538 /* ---------------------------------------------------- */
11539 /* --- start of the SSE3 decoder. --- */
11540 /* ---------------------------------------------------- */
11542 /* Skip parts of the decoder which don't apply given the stated
11543 guest subarchitecture. */
11544 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */
11545 /* In fact this is highly bogus; we accept SSE3 insns even on a
11546 SSE2-only guest since they turn into IR which can be re-emitted
11547 successfully on an SSE2 host. */
11548 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
11549 goto after_sse_decoders; /* no SSE3 capabilities */
11551 insn = (UChar*)&guest_code[delta];
11553 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11554 duplicating some lanes (2:2:0:0). */
11555 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11556 duplicating some lanes (3:3:1:1). */
11557 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
11558 && (insn[2] == 0x12 || insn[2] == 0x16)) {
11559 IRTemp s3, s2, s1, s0;
11560 IRTemp sV = newTemp(Ity_V128);
11561 Bool isH = insn[2] == 0x16;
11562 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11565 if (epartIsReg(modrm)) {
11566 assign( sV, getXMMReg( eregOfRM(modrm)) );
11567 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11568 nameXMMReg(eregOfRM(modrm)),
11569 nameXMMReg(gregOfRM(modrm)));
11572 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11573 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11574 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11576 nameXMMReg(gregOfRM(modrm)));
11580 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11581 putXMMReg( gregOfRM(modrm),
11582 isH ? mk128from32s( s3, s3, s1, s1 )
11583 : mk128from32s( s2, s2, s0, s0 ) );
11584 goto decode_success;
11587 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11588 duplicating some lanes (0:1:0:1). */
11589 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
11590 IRTemp sV = newTemp(Ity_V128);
11591 IRTemp d0 = newTemp(Ity_I64);
11594 if (epartIsReg(modrm)) {
11595 assign( sV, getXMMReg( eregOfRM(modrm)) );
11596 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11597 nameXMMReg(gregOfRM(modrm)));
11599 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11601 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11602 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11603 DIP("movddup %s,%s\n", dis_buf,
11604 nameXMMReg(gregOfRM(modrm)));
11608 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11609 goto decode_success;
11612 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11613 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
11614 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11615 IRTemp eV = newTemp(Ity_V128);
11616 IRTemp gV = newTemp(Ity_V128);
11617 IRTemp addV = newTemp(Ity_V128);
11618 IRTemp subV = newTemp(Ity_V128);
11619 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11622 if (epartIsReg(modrm)) {
11623 assign( eV, getXMMReg( eregOfRM(modrm)) );
11624 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11625 nameXMMReg(gregOfRM(modrm)));
11628 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11629 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11630 DIP("addsubps %s,%s\n", dis_buf,
11631 nameXMMReg(gregOfRM(modrm)));
11635 assign( gV, getXMMReg(gregOfRM(modrm)) );
11637 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
11638 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
11640 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
11641 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
11643 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
11644 goto decode_success;
11647 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11648 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
11649 IRTemp eV = newTemp(Ity_V128);
11650 IRTemp gV = newTemp(Ity_V128);
11651 IRTemp addV = newTemp(Ity_V128);
11652 IRTemp subV = newTemp(Ity_V128);
11653 IRTemp a1 = newTemp(Ity_I64);
11654 IRTemp s0 = newTemp(Ity_I64);
11657 if (epartIsReg(modrm)) {
11658 assign( eV, getXMMReg( eregOfRM(modrm)) );
11659 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11660 nameXMMReg(gregOfRM(modrm)));
11663 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11664 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11665 DIP("addsubpd %s,%s\n", dis_buf,
11666 nameXMMReg(gregOfRM(modrm)));
11670 assign( gV, getXMMReg(gregOfRM(modrm)) );
11672 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
11673 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
11675 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11676 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11678 putXMMReg( gregOfRM(modrm),
11679 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11680 goto decode_success;
11683 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11684 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11685 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
11686 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
11687 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
11688 IRTemp eV = newTemp(Ity_V128);
11689 IRTemp gV = newTemp(Ity_V128);
11690 IRTemp leftV = newTemp(Ity_V128);
11691 IRTemp rightV = newTemp(Ity_V128);
11692 Bool isAdd = insn[2] == 0x7C;
11693 HChar* str = isAdd ? "add" : "sub";
11694 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
11697 if (epartIsReg(modrm)) {
11698 assign( eV, getXMMReg( eregOfRM(modrm)) );
11699 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11700 nameXMMReg(gregOfRM(modrm)));
11703 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11704 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11705 DIP("h%sps %s,%s\n", str, dis_buf,
11706 nameXMMReg(gregOfRM(modrm)));
11710 assign( gV, getXMMReg(gregOfRM(modrm)) );
11712 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
11713 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
11715 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
11716 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
11718 putXMMReg( gregOfRM(modrm),
11719 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
11720 mkexpr(leftV), mkexpr(rightV) ) );
11721 goto decode_success;
11724 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
11725 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
11726 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
11727 IRTemp e1 = newTemp(Ity_I64);
11728 IRTemp e0 = newTemp(Ity_I64);
11729 IRTemp g1 = newTemp(Ity_I64);
11730 IRTemp g0 = newTemp(Ity_I64);
11731 IRTemp eV = newTemp(Ity_V128);
11732 IRTemp gV = newTemp(Ity_V128);
11733 IRTemp leftV = newTemp(Ity_V128);
11734 IRTemp rightV = newTemp(Ity_V128);
11735 Bool isAdd = insn[1] == 0x7C;
11736 HChar* str = isAdd ? "add" : "sub";
11739 if (epartIsReg(modrm)) {
11740 assign( eV, getXMMReg( eregOfRM(modrm)) );
11741 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11742 nameXMMReg(gregOfRM(modrm)));
11745 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11746 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11747 DIP("h%spd %s,%s\n", str, dis_buf,
11748 nameXMMReg(gregOfRM(modrm)));
11752 assign( gV, getXMMReg(gregOfRM(modrm)) );
11754 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
11755 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
11756 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
11757 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
11759 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
11760 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
11762 putXMMReg( gregOfRM(modrm),
11763 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
11764 mkexpr(leftV), mkexpr(rightV) ) );
11765 goto decode_success;
11768 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
11769 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
11770 modrm = getIByte(delta+3);
11771 if (epartIsReg(modrm)) {
11772 goto decode_failure;
11774 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11775 putXMMReg( gregOfRM(modrm),
11776 loadLE(Ity_V128, mkexpr(addr)) );
11777 DIP("lddqu %s,%s\n", dis_buf,
11778 nameXMMReg(gregOfRM(modrm)));
11781 goto decode_success;
11784 /* ---------------------------------------------------- */
11785 /* --- end of the SSE3 decoder. --- */
11786 /* ---------------------------------------------------- */
11788 /* ---------------------------------------------------- */
11789 /* --- start of the SSSE3 decoder. --- */
11790 /* ---------------------------------------------------- */
11792 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
11793 Unsigned Bytes (MMX) */
11795 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
11796 IRTemp sV = newTemp(Ity_I64);
11797 IRTemp dV = newTemp(Ity_I64);
11798 IRTemp sVoddsSX = newTemp(Ity_I64);
11799 IRTemp sVevensSX = newTemp(Ity_I64);
11800 IRTemp dVoddsZX = newTemp(Ity_I64);
11801 IRTemp dVevensZX = newTemp(Ity_I64);
11805 assign( dV, getMMXReg(gregOfRM(modrm)) );
11807 if (epartIsReg(modrm)) {
11808 assign( sV, getMMXReg(eregOfRM(modrm)) );
11810 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11811 nameMMXReg(gregOfRM(modrm)));
11813 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11814 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11816 DIP("pmaddubsw %s,%s\n", dis_buf,
11817 nameMMXReg(gregOfRM(modrm)));
11820 /* compute dV unsigned x sV signed */
11822 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
11824 binop(Iop_SarN16x4,
11825 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
11828 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
11830 binop(Iop_ShrN16x4,
11831 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
11836 binop(Iop_QAdd16Sx4,
11837 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
11838 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
11841 goto decode_success;
11844 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
11845 Unsigned Bytes (XMM) */
11847 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
11848 IRTemp sV = newTemp(Ity_V128);
11849 IRTemp dV = newTemp(Ity_V128);
11850 IRTemp sVoddsSX = newTemp(Ity_V128);
11851 IRTemp sVevensSX = newTemp(Ity_V128);
11852 IRTemp dVoddsZX = newTemp(Ity_V128);
11853 IRTemp dVevensZX = newTemp(Ity_V128);
11856 assign( dV, getXMMReg(gregOfRM(modrm)) );
11858 if (epartIsReg(modrm)) {
11859 assign( sV, getXMMReg(eregOfRM(modrm)) );
11861 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11862 nameXMMReg(gregOfRM(modrm)));
11864 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11865 gen_SEGV_if_not_16_aligned( addr );
11866 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11868 DIP("pmaddubsw %s,%s\n", dis_buf,
11869 nameXMMReg(gregOfRM(modrm)));
11872 /* compute dV unsigned x sV signed */
11874 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
11876 binop(Iop_SarN16x8,
11877 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
11880 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
11882 binop(Iop_ShrN16x8,
11883 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
11888 binop(Iop_QAdd16Sx8,
11889 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
11890 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
11893 goto decode_success;
11896 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
11897 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
11898 mmx) and G to G (mmx). */
11899 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
11900 mmx) and G to G (mmx). */
11901 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
11903 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
11905 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
11907 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
11911 && insn[0] == 0x0F && insn[1] == 0x38
11912 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
11913 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
11914 HChar* str = "???";
11915 IROp opV64 = Iop_INVALID;
11916 IROp opCatO = Iop_CatOddLanes16x4;
11917 IROp opCatE = Iop_CatEvenLanes16x4;
11918 IRTemp sV = newTemp(Ity_I64);
11919 IRTemp dV = newTemp(Ity_I64);
11924 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
11925 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
11926 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
11927 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
11928 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
11929 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
11930 default: vassert(0);
11932 if (insn[2] == 0x02 || insn[2] == 0x06) {
11933 opCatO = Iop_InterleaveHI32x2;
11934 opCatE = Iop_InterleaveLO32x2;
11938 assign( dV, getMMXReg(gregOfRM(modrm)) );
11940 if (epartIsReg(modrm)) {
11941 assign( sV, getMMXReg(eregOfRM(modrm)) );
11943 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
11944 nameMMXReg(gregOfRM(modrm)));
11946 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11947 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11949 DIP("ph%s %s,%s\n", str, dis_buf,
11950 nameMMXReg(gregOfRM(modrm)));
11956 binop(opCatE,mkexpr(sV),mkexpr(dV)),
11957 binop(opCatO,mkexpr(sV),mkexpr(dV))
11960 goto decode_success;
11963 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
11964 xmm) and G to G (xmm). */
11965 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
11966 xmm) and G to G (xmm). */
11967 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
11969 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
11971 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
11973 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
11977 && insn[0] == 0x0F && insn[1] == 0x38
11978 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
11979 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
11980 HChar* str = "???";
11981 IROp opV64 = Iop_INVALID;
11982 IROp opCatO = Iop_CatOddLanes16x4;
11983 IROp opCatE = Iop_CatEvenLanes16x4;
11984 IRTemp sV = newTemp(Ity_V128);
11985 IRTemp dV = newTemp(Ity_V128);
11986 IRTemp sHi = newTemp(Ity_I64);
11987 IRTemp sLo = newTemp(Ity_I64);
11988 IRTemp dHi = newTemp(Ity_I64);
11989 IRTemp dLo = newTemp(Ity_I64);
11994 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
11995 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
11996 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
11997 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
11998 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
11999 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12000 default: vassert(0);
12002 if (insn[2] == 0x02 || insn[2] == 0x06) {
12003 opCatO = Iop_InterleaveHI32x2;
12004 opCatE = Iop_InterleaveLO32x2;
12007 assign( dV, getXMMReg(gregOfRM(modrm)) );
12009 if (epartIsReg(modrm)) {
12010 assign( sV, getXMMReg( eregOfRM(modrm)) );
12011 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12012 nameXMMReg(gregOfRM(modrm)));
12015 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12016 gen_SEGV_if_not_16_aligned( addr );
12017 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12018 DIP("ph%s %s,%s\n", str, dis_buf,
12019 nameXMMReg(gregOfRM(modrm)));
12023 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12024 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12025 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12026 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12028 /* This isn't a particularly efficient way to compute the
12029 result, but at least it avoids a proliferation of IROps,
12030 hence avoids complication all the backends. */
12033 binop(Iop_64HLtoV128,
12035 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12036 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12039 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12040 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12044 goto decode_success;
12047 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12050 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12051 IRTemp sV = newTemp(Ity_I64);
12052 IRTemp dV = newTemp(Ity_I64);
12056 assign( dV, getMMXReg(gregOfRM(modrm)) );
12058 if (epartIsReg(modrm)) {
12059 assign( sV, getMMXReg(eregOfRM(modrm)) );
12061 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12062 nameMMXReg(gregOfRM(modrm)));
12064 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12065 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12067 DIP("pmulhrsw %s,%s\n", dis_buf,
12068 nameMMXReg(gregOfRM(modrm)));
12073 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12075 goto decode_success;
12078 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12081 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12082 IRTemp sV = newTemp(Ity_V128);
12083 IRTemp dV = newTemp(Ity_V128);
12084 IRTemp sHi = newTemp(Ity_I64);
12085 IRTemp sLo = newTemp(Ity_I64);
12086 IRTemp dHi = newTemp(Ity_I64);
12087 IRTemp dLo = newTemp(Ity_I64);
12090 assign( dV, getXMMReg(gregOfRM(modrm)) );
12092 if (epartIsReg(modrm)) {
12093 assign( sV, getXMMReg(eregOfRM(modrm)) );
12095 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12096 nameXMMReg(gregOfRM(modrm)));
12098 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12099 gen_SEGV_if_not_16_aligned( addr );
12100 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12102 DIP("pmulhrsw %s,%s\n", dis_buf,
12103 nameXMMReg(gregOfRM(modrm)));
12106 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12107 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12108 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12109 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12113 binop(Iop_64HLtoV128,
12114 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12115 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12118 goto decode_success;
12121 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12122 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12123 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12125 && insn[0] == 0x0F && insn[1] == 0x38
12126 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12127 IRTemp sV = newTemp(Ity_I64);
12128 IRTemp dV = newTemp(Ity_I64);
12129 HChar* str = "???";
12133 case 0x08: laneszB = 1; str = "b"; break;
12134 case 0x09: laneszB = 2; str = "w"; break;
12135 case 0x0A: laneszB = 4; str = "d"; break;
12136 default: vassert(0);
12141 assign( dV, getMMXReg(gregOfRM(modrm)) );
12143 if (epartIsReg(modrm)) {
12144 assign( sV, getMMXReg(eregOfRM(modrm)) );
12146 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12147 nameMMXReg(gregOfRM(modrm)));
12149 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12150 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12152 DIP("psign%s %s,%s\n", str, dis_buf,
12153 nameMMXReg(gregOfRM(modrm)));
12158 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12160 goto decode_success;
12163 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12164 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12165 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12167 && insn[0] == 0x0F && insn[1] == 0x38
12168 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12169 IRTemp sV = newTemp(Ity_V128);
12170 IRTemp dV = newTemp(Ity_V128);
12171 IRTemp sHi = newTemp(Ity_I64);
12172 IRTemp sLo = newTemp(Ity_I64);
12173 IRTemp dHi = newTemp(Ity_I64);
12174 IRTemp dLo = newTemp(Ity_I64);
12175 HChar* str = "???";
12179 case 0x08: laneszB = 1; str = "b"; break;
12180 case 0x09: laneszB = 2; str = "w"; break;
12181 case 0x0A: laneszB = 4; str = "d"; break;
12182 default: vassert(0);
12186 assign( dV, getXMMReg(gregOfRM(modrm)) );
12188 if (epartIsReg(modrm)) {
12189 assign( sV, getXMMReg(eregOfRM(modrm)) );
12191 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12192 nameXMMReg(gregOfRM(modrm)));
12194 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12195 gen_SEGV_if_not_16_aligned( addr );
12196 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12198 DIP("psign%s %s,%s\n", str, dis_buf,
12199 nameXMMReg(gregOfRM(modrm)));
12202 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12203 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12204 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12205 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12209 binop(Iop_64HLtoV128,
12210 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12211 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12214 goto decode_success;
12217 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12218 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12219 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12221 && insn[0] == 0x0F && insn[1] == 0x38
12222 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12223 IRTemp sV = newTemp(Ity_I64);
12224 HChar* str = "???";
12228 case 0x1C: laneszB = 1; str = "b"; break;
12229 case 0x1D: laneszB = 2; str = "w"; break;
12230 case 0x1E: laneszB = 4; str = "d"; break;
12231 default: vassert(0);
12237 if (epartIsReg(modrm)) {
12238 assign( sV, getMMXReg(eregOfRM(modrm)) );
12240 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12241 nameMMXReg(gregOfRM(modrm)));
12243 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12244 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12246 DIP("pabs%s %s,%s\n", str, dis_buf,
12247 nameMMXReg(gregOfRM(modrm)));
12252 dis_PABS_helper( mkexpr(sV), laneszB )
12254 goto decode_success;
12257 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12258 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12259 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12261 && insn[0] == 0x0F && insn[1] == 0x38
12262 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12263 IRTemp sV = newTemp(Ity_V128);
12264 IRTemp sHi = newTemp(Ity_I64);
12265 IRTemp sLo = newTemp(Ity_I64);
12266 HChar* str = "???";
12270 case 0x1C: laneszB = 1; str = "b"; break;
12271 case 0x1D: laneszB = 2; str = "w"; break;
12272 case 0x1E: laneszB = 4; str = "d"; break;
12273 default: vassert(0);
12278 if (epartIsReg(modrm)) {
12279 assign( sV, getXMMReg(eregOfRM(modrm)) );
12281 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12282 nameXMMReg(gregOfRM(modrm)));
12284 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12285 gen_SEGV_if_not_16_aligned( addr );
12286 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12288 DIP("pabs%s %s,%s\n", str, dis_buf,
12289 nameXMMReg(gregOfRM(modrm)));
12292 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12293 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12297 binop(Iop_64HLtoV128,
12298 dis_PABS_helper( mkexpr(sHi), laneszB ),
12299 dis_PABS_helper( mkexpr(sLo), laneszB )
12302 goto decode_success;
12305 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12307 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12308 IRTemp sV = newTemp(Ity_I64);
12309 IRTemp dV = newTemp(Ity_I64);
12310 IRTemp res = newTemp(Ity_I64);
12314 assign( dV, getMMXReg(gregOfRM(modrm)) );
12316 if (epartIsReg(modrm)) {
12317 assign( sV, getMMXReg(eregOfRM(modrm)) );
12318 d32 = (UInt)insn[3+1];
12320 DIP("palignr $%d,%s,%s\n", (Int)d32,
12321 nameMMXReg(eregOfRM(modrm)),
12322 nameMMXReg(gregOfRM(modrm)));
12324 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12325 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12326 d32 = (UInt)insn[3+alen];
12328 DIP("palignr $%d%s,%s\n", (Int)d32,
12330 nameMMXReg(gregOfRM(modrm)));
12334 assign( res, mkexpr(sV) );
12336 else if (d32 >= 1 && d32 <= 7) {
12339 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
12340 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
12343 else if (d32 == 8) {
12344 assign( res, mkexpr(dV) );
12346 else if (d32 >= 9 && d32 <= 15) {
12347 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
12349 else if (d32 >= 16 && d32 <= 255) {
12350 assign( res, mkU64(0) );
12355 putMMXReg( gregOfRM(modrm), mkexpr(res) );
12356 goto decode_success;
12359 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12361 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12362 IRTemp sV = newTemp(Ity_V128);
12363 IRTemp dV = newTemp(Ity_V128);
12364 IRTemp sHi = newTemp(Ity_I64);
12365 IRTemp sLo = newTemp(Ity_I64);
12366 IRTemp dHi = newTemp(Ity_I64);
12367 IRTemp dLo = newTemp(Ity_I64);
12368 IRTemp rHi = newTemp(Ity_I64);
12369 IRTemp rLo = newTemp(Ity_I64);
12372 assign( dV, getXMMReg(gregOfRM(modrm)) );
12374 if (epartIsReg(modrm)) {
12375 assign( sV, getXMMReg(eregOfRM(modrm)) );
12376 d32 = (UInt)insn[3+1];
12378 DIP("palignr $%d,%s,%s\n", (Int)d32,
12379 nameXMMReg(eregOfRM(modrm)),
12380 nameXMMReg(gregOfRM(modrm)));
12382 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12383 gen_SEGV_if_not_16_aligned( addr );
12384 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12385 d32 = (UInt)insn[3+alen];
12387 DIP("palignr $%d,%s,%s\n", (Int)d32,
12389 nameXMMReg(gregOfRM(modrm)));
12392 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12393 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12394 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12395 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12398 assign( rHi, mkexpr(sHi) );
12399 assign( rLo, mkexpr(sLo) );
12401 else if (d32 >= 1 && d32 <= 7) {
12402 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
12403 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
12405 else if (d32 == 8) {
12406 assign( rHi, mkexpr(dLo) );
12407 assign( rLo, mkexpr(sHi) );
12409 else if (d32 >= 9 && d32 <= 15) {
12410 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
12411 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
12413 else if (d32 == 16) {
12414 assign( rHi, mkexpr(dHi) );
12415 assign( rLo, mkexpr(dLo) );
12417 else if (d32 >= 17 && d32 <= 23) {
12418 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
12419 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
12421 else if (d32 == 24) {
12422 assign( rHi, mkU64(0) );
12423 assign( rLo, mkexpr(dHi) );
12425 else if (d32 >= 25 && d32 <= 31) {
12426 assign( rHi, mkU64(0) );
12427 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
12429 else if (d32 >= 32 && d32 <= 255) {
12430 assign( rHi, mkU64(0) );
12431 assign( rLo, mkU64(0) );
12438 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12440 goto decode_success;
12443 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12445 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12446 IRTemp sV = newTemp(Ity_I64);
12447 IRTemp dV = newTemp(Ity_I64);
12451 assign( dV, getMMXReg(gregOfRM(modrm)) );
12453 if (epartIsReg(modrm)) {
12454 assign( sV, getMMXReg(eregOfRM(modrm)) );
12456 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12457 nameMMXReg(gregOfRM(modrm)));
12459 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12460 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12462 DIP("pshufb %s,%s\n", dis_buf,
12463 nameMMXReg(gregOfRM(modrm)));
12470 /* permute the lanes */
12474 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
12476 /* mask off lanes which have (index & 0x80) == 0x80 */
12477 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
12480 goto decode_success;
12483 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12485 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12486 IRTemp sV = newTemp(Ity_V128);
12487 IRTemp dV = newTemp(Ity_V128);
12488 IRTemp sHi = newTemp(Ity_I64);
12489 IRTemp sLo = newTemp(Ity_I64);
12490 IRTemp dHi = newTemp(Ity_I64);
12491 IRTemp dLo = newTemp(Ity_I64);
12492 IRTemp rHi = newTemp(Ity_I64);
12493 IRTemp rLo = newTemp(Ity_I64);
12494 IRTemp sevens = newTemp(Ity_I64);
12495 IRTemp mask0x80hi = newTemp(Ity_I64);
12496 IRTemp mask0x80lo = newTemp(Ity_I64);
12497 IRTemp maskBit3hi = newTemp(Ity_I64);
12498 IRTemp maskBit3lo = newTemp(Ity_I64);
12499 IRTemp sAnd7hi = newTemp(Ity_I64);
12500 IRTemp sAnd7lo = newTemp(Ity_I64);
12501 IRTemp permdHi = newTemp(Ity_I64);
12502 IRTemp permdLo = newTemp(Ity_I64);
12505 assign( dV, getXMMReg(gregOfRM(modrm)) );
12507 if (epartIsReg(modrm)) {
12508 assign( sV, getXMMReg(eregOfRM(modrm)) );
12510 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12511 nameXMMReg(gregOfRM(modrm)));
12513 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12514 gen_SEGV_if_not_16_aligned( addr );
12515 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12517 DIP("pshufb %s,%s\n", dis_buf,
12518 nameXMMReg(gregOfRM(modrm)));
12521 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12522 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12523 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12524 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12526 assign( sevens, mkU64(0x0707070707070707ULL) );
12529 mask0x80hi = Not(SarN8x8(sHi,7))
12530 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12531 sAnd7hi = And(sHi,sevens)
12532 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12533 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12534 rHi = And(permdHi,mask0x80hi)
12538 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
12543 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
12546 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
12553 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
12554 mkexpr(maskBit3hi)),
12556 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
12557 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
12559 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
12561 /* And the same for the lower half of the result. What fun. */
12565 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
12570 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
12573 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
12580 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
12581 mkexpr(maskBit3lo)),
12583 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
12584 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
12586 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
12590 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12592 goto decode_success;
12595 /* ---------------------------------------------------- */
12596 /* --- end of the SSSE3 decoder. --- */
12597 /* ---------------------------------------------------- */
12599 after_sse_decoders:
12601 /* ---------------------------------------------------- */
12602 /* --- deal with misc 0x67 pfxs (addr size override) -- */
12603 /* ---------------------------------------------------- */
12605 /* 67 E3 = JCXZ (for JECXZ see below) */
12606 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
12608 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
12611 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
12615 DIP("jcxz 0x%x\n", d32);
12616 goto decode_success;
12619 /* ---------------------------------------------------- */
12620 /* --- start of the baseline insn decoder -- */
12621 /* ---------------------------------------------------- */
12623 /* Get the primary opcode. */
12624 opc = getIByte(delta); delta++;
12626 /* We get here if the current insn isn't SSE, or this CPU doesn't
12631 /* ------------------------ Control flow --------------- */
12633 case 0xC2: /* RET imm16 */
12634 d32 = getUDisp16(delta);
12637 dres.whatNext = Dis_StopHere;
12638 DIP("ret %d\n", (Int)d32);
12640 case 0xC3: /* RET */
12642 dres.whatNext = Dis_StopHere;
12646 case 0xCF: /* IRET */
12647 /* Note, this is an extremely kludgey and limited implementation
12648 of iret. All it really does is:
12649 popl %EIP; popl %CS; popl %EFLAGS.
12650 %CS is set but ignored (as it is in (eg) popw %cs)". */
12651 t1 = newTemp(Ity_I32); /* ESP */
12652 t2 = newTemp(Ity_I32); /* new EIP */
12653 t3 = newTemp(Ity_I32); /* new CS */
12654 t4 = newTemp(Ity_I32); /* new EFLAGS */
12655 assign(t1, getIReg(4,R_ESP));
12656 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
12657 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
12658 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
12659 /* Get stuff off stack */
12660 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
12661 /* set %CS (which is ignored anyway) */
12662 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
12664 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
12665 /* goto new EIP value */
12666 jmp_treg(Ijk_Ret,t2);
12667 dres.whatNext = Dis_StopHere;
12668 DIP("iret (very kludgey)\n");
12671 case 0xE8: /* CALL J4 */
12672 d32 = getUDisp32(delta); delta += 4;
12673 d32 += (guest_EIP_bbstart+delta);
12674 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
12675 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
12676 && getIByte(delta) <= 0x5F) {
12677 /* Specially treat the position-independent-code idiom
12682 since this generates better code, but for no other reason. */
12683 Int archReg = getIByte(delta) - 0x58;
12684 /* vex_printf("-- fPIC thingy\n"); */
12685 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
12686 delta++; /* Step over the POP */
12687 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
12689 /* The normal sequence for a call. */
12690 t1 = newTemp(Ity_I32);
12691 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
12692 putIReg(4, R_ESP, mkexpr(t1));
12693 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
12694 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) {
12695 /* follow into the call target. */
12696 dres.whatNext = Dis_ResteerU;
12697 dres.continueAt = (Addr64)(Addr32)d32;
12699 jmp_lit(Ijk_Call,d32);
12700 dres.whatNext = Dis_StopHere;
12702 DIP("call 0x%x\n",d32);
12706 //-- case 0xC8: /* ENTER */
12707 //-- d32 = getUDisp16(eip); eip += 2;
12708 //-- abyte = getIByte(delta); delta++;
12710 //-- vg_assert(sz == 4);
12711 //-- vg_assert(abyte == 0);
12713 //-- t1 = newTemp(cb); t2 = newTemp(cb);
12714 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
12715 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
12716 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
12717 //-- uLiteral(cb, sz);
12718 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
12719 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
12720 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
12722 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
12723 //-- uLiteral(cb, d32);
12724 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
12726 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
12729 case 0xC9: /* LEAVE */
12731 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
12732 assign(t1, getIReg(4,R_EBP));
12733 /* First PUT ESP looks redundant, but need it because ESP must
12734 always be up-to-date for Memcheck to work... */
12735 putIReg(4, R_ESP, mkexpr(t1));
12736 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
12737 putIReg(4, R_EBP, mkexpr(t2));
12738 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
12742 /* ---------------- Misc weird-ass insns --------------- */
12744 case 0x27: /* DAA */
12745 case 0x2F: /* DAS */
12746 case 0x37: /* AAA */
12747 case 0x3F: /* AAS */
12748 /* An ugly implementation for some ugly instructions. Oh
12750 if (sz != 4) goto decode_failure;
12751 t1 = newTemp(Ity_I32);
12752 t2 = newTemp(Ity_I32);
12753 /* Make up a 32-bit value (t1), with the old value of AX in the
12754 bottom 16 bits, and the old OSZACP bitmask in the upper 16
12757 binop(Iop_16HLto32,
12759 mk_x86g_calculate_eflags_all()),
12762 /* Call the helper fn, to get a new AX and OSZACP value, and
12763 poke both back into the guest state. Also pass the helper
12764 the actual opcode so it knows which of the 4 instructions it
12765 is doing the computation for. */
12766 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
12769 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
12770 &x86g_calculate_daa_das_aaa_aas,
12771 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
12773 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
12775 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
12776 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
12777 stmt( IRStmt_Put( OFFB_CC_DEP1,
12779 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
12780 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
12781 | X86G_CC_MASK_A | X86G_CC_MASK_Z
12782 | X86G_CC_MASK_S| X86G_CC_MASK_O )
12786 /* Set NDEP even though it isn't used. This makes redundant-PUT
12787 elimination of previous stores to this field work better. */
12788 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
12790 case 0x27: DIP("daa\n"); break;
12791 case 0x2F: DIP("das\n"); break;
12792 case 0x37: DIP("aaa\n"); break;
12793 case 0x3F: DIP("aas\n"); break;
12794 default: vassert(0);
12798 //-- case 0xD4: /* AAM */
12799 //-- case 0xD5: /* AAD */
12800 //-- d32 = getIByte(delta); delta++;
12801 //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
12802 //-- t1 = newTemp(cb);
12803 //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
12804 //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
12805 //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
12806 //-- uWiden(cb, 2, False);
12807 //-- uInstr0(cb, CALLM_S, 0);
12808 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
12809 //-- uInstr1(cb, CALLM, 0, Lit16,
12810 //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
12811 //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
12812 //-- uInstr1(cb, POP, 4, TempReg, t1);
12813 //-- uInstr0(cb, CALLM_E, 0);
12814 //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
12815 //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
12818 /* ------------------------ CWD/CDQ -------------------- */
12820 case 0x98: /* CBW */
12822 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
12826 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
12831 case 0x99: /* CWD/CDQ */
12834 binop(mkSizedOp(ty,Iop_Sar8),
12835 getIReg(sz, R_EAX),
12836 mkU8(sz == 2 ? 15 : 31)) );
12837 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
12840 /* ------------------------ FPU ops -------------------- */
12842 case 0x9E: /* SAHF */
12847 case 0x9F: /* LAHF */
12852 case 0x9B: /* FWAIT */
12865 Int delta0 = delta;
12866 Bool decode_OK = False;
12867 delta = dis_FPU ( &decode_OK, sorb, delta );
12870 goto decode_failure;
12875 /* ------------------------ INC & DEC ------------------ */
12877 case 0x40: /* INC eAX */
12878 case 0x41: /* INC eCX */
12879 case 0x42: /* INC eDX */
12880 case 0x43: /* INC eBX */
12881 case 0x44: /* INC eSP */
12882 case 0x45: /* INC eBP */
12883 case 0x46: /* INC eSI */
12884 case 0x47: /* INC eDI */
12885 vassert(sz == 2 || sz == 4);
12888 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
12889 getIReg(sz, (UInt)(opc - 0x40)),
12891 setFlags_INC_DEC( True, t1, ty );
12892 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
12893 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
12896 case 0x48: /* DEC eAX */
12897 case 0x49: /* DEC eCX */
12898 case 0x4A: /* DEC eDX */
12899 case 0x4B: /* DEC eBX */
12900 case 0x4C: /* DEC eSP */
12901 case 0x4D: /* DEC eBP */
12902 case 0x4E: /* DEC eSI */
12903 case 0x4F: /* DEC eDI */
12904 vassert(sz == 2 || sz == 4);
12907 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
12908 getIReg(sz, (UInt)(opc - 0x48)),
12910 setFlags_INC_DEC( False, t1, ty );
12911 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
12912 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
12915 /* ------------------------ INT ------------------------ */
12917 case 0xCC: /* INT 3 */
12918 jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta);
12919 dres.whatNext = Dis_StopHere;
12923 case 0xCD: /* INT imm8 */
12924 d32 = getIByte(delta); delta++;
12926 /* For any of the cases where we emit a jump (that is, for all
12927 currently handled cases), it's important that all ArchRegs
12928 carry their up-to-date value at this point. So we declare an
12929 end-of-block here, which forces any TempRegs caching ArchRegs
12932 /* Handle int $0x30 (l4re syscall) */
12934 jmp_lit(Ijk_Sys_int48,((Addr32)guest_EIP_bbstart)+delta);
12935 dres.whatNext = Dis_StopHere;
12936 DIP("int $0x30\n");
12940 /* Handle int $0x32 (l4re syscall) */
12942 jmp_lit(Ijk_Sys_int50,((Addr32)guest_EIP_bbstart)+delta);
12943 dres.whatNext = Dis_StopHere;
12944 DIP("int $0x32\n");
12948 /* Handle int $0x40 .. $0x43 by synthesising a segfault and a
12949 restart of this instruction (hence the "-2" two lines below,
12950 to get the restart EIP to be this instruction. This is
12951 probably Linux-specific and it would be more correct to only
12952 do this if the VexAbiInfo says that is what we should do. */
12953 if (d32 >= 0x40 && d32 <= 0x43) {
12954 jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2);
12955 dres.whatNext = Dis_StopHere;
12956 DIP("int $0x%x\n", (Int)d32);
12960 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
12961 (darwin syscalls). As part of this, note where we are, so we
12962 can back up the guest to this point if the syscall needs to
12965 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
12966 mkU32(guest_EIP_curr_instr) ) );
12967 jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta);
12968 dres.whatNext = Dis_StopHere;
12969 DIP("int $0x80\n");
12973 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
12974 mkU32(guest_EIP_curr_instr) ) );
12975 jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta);
12976 dres.whatNext = Dis_StopHere;
12977 DIP("int $0x81\n");
12981 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
12982 mkU32(guest_EIP_curr_instr) ) );
12983 jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta);
12984 dres.whatNext = Dis_StopHere;
12985 DIP("int $0x82\n");
12989 /* none of the above */
12990 goto decode_failure;
12992 /* ------------------------ Jcond, byte offset --------- */
12994 case 0xEB: /* Jb (jump, byte offset) */
12995 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
12997 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
12998 dres.whatNext = Dis_ResteerU;
12999 dres.continueAt = (Addr64)(Addr32)d32;
13001 jmp_lit(Ijk_Boring,d32);
13002 dres.whatNext = Dis_StopHere;
13004 DIP("jmp-8 0x%x\n", d32);
13007 case 0xE9: /* Jv (jump, 16/32 offset) */
13008 vassert(sz == 4); /* JRS added 2004 July 11 */
13009 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13011 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13012 dres.whatNext = Dis_ResteerU;
13013 dres.continueAt = (Addr64)(Addr32)d32;
13015 jmp_lit(Ijk_Boring,d32);
13016 dres.whatNext = Dis_StopHere;
13018 DIP("jmp 0x%x\n", d32);
13023 case 0x72: /* JBb/JNAEb (jump below) */
13024 case 0x73: /* JNBb/JAEb (jump not below) */
13025 case 0x74: /* JZb/JEb (jump zero) */
13026 case 0x75: /* JNZb/JNEb (jump not zero) */
13027 case 0x76: /* JBEb/JNAb (jump below or equal) */
13028 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13029 case 0x78: /* JSb (jump negative) */
13030 case 0x79: /* JSb (jump not negative) */
13031 case 0x7A: /* JP (jump parity even) */
13032 case 0x7B: /* JNP/JPO (jump parity odd) */
13033 case 0x7C: /* JLb/JNGEb (jump less) */
13034 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13035 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13036 case 0x7F: /* JGb/JNLEb (jump greater) */
13038 HChar* comment = "";
13039 jmpDelta = (Int)getSDisp8(delta);
13040 vassert(-128 <= jmpDelta && jmpDelta < 128);
13041 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13044 && vex_control.guest_chase_cond
13045 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13047 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13048 /* Speculation: assume this backward branch is taken. So we
13049 need to emit a side-exit to the insn following this one,
13050 on the negation of the condition, and continue at the
13051 branch target address (d32). If we wind up back at the
13052 first instruction of the trace, just stop; it's better to
13053 let the IR loop unroller handle that case. */
13055 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13057 IRConst_U32(guest_EIP_bbstart+delta) ) );
13058 dres.whatNext = Dis_ResteerC;
13059 dres.continueAt = (Addr64)(Addr32)d32;
13060 comment = "(assumed taken)";
13064 && vex_control.guest_chase_cond
13065 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13067 && resteerOkFn( callback_opaque,
13068 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
13069 /* Speculation: assume this forward branch is not taken. So
13070 we need to emit a side-exit to d32 (the dest) and continue
13071 disassembling at the insn immediately following this
13074 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13076 IRConst_U32(d32) ) );
13077 dres.whatNext = Dis_ResteerC;
13078 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
13079 comment = "(assumed not taken)";
13082 /* Conservative default translation - end the block at this
13084 jcc_01( (X86Condcode)(opc - 0x70),
13085 (Addr32)(guest_EIP_bbstart+delta), d32);
13086 dres.whatNext = Dis_StopHere;
13088 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13092 case 0xE3: /* JECXZ (for JCXZ see above) */
13093 if (sz != 4) goto decode_failure;
13094 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13097 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13101 DIP("jecxz 0x%x\n", d32);
13104 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13105 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13106 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13107 { /* Again, the docs say this uses ECX/CX as a count depending on
13108 the address size override, not the operand one. Since we
13109 don't handle address size overrides, I guess that means
13111 IRExpr* zbit = NULL;
13112 IRExpr* count = NULL;
13113 IRExpr* cond = NULL;
13114 HChar* xtra = NULL;
13116 if (sz != 4) goto decode_failure;
13117 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13119 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
13121 count = getIReg(4,R_ECX);
13122 cond = binop(Iop_CmpNE32, count, mkU32(0));
13129 zbit = mk_x86g_calculate_condition( X86CondZ );
13130 cond = mkAnd1(cond, zbit);
13134 zbit = mk_x86g_calculate_condition( X86CondNZ );
13135 cond = mkAnd1(cond, zbit);
13140 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) );
13142 DIP("loop%s 0x%x\n", xtra, d32);
13146 /* ------------------------ IMUL ----------------------- */
13148 case 0x69: /* IMUL Iv, Ev, Gv */
13149 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
13151 case 0x6B: /* IMUL Ib, Ev, Gv */
13152 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
13155 /* ------------------------ MOV ------------------------ */
13157 case 0x88: /* MOV Gb,Eb */
13158 delta = dis_mov_G_E(sorb, 1, delta);
13161 case 0x89: /* MOV Gv,Ev */
13162 delta = dis_mov_G_E(sorb, sz, delta);
13165 case 0x8A: /* MOV Eb,Gb */
13166 delta = dis_mov_E_G(sorb, 1, delta);
13169 case 0x8B: /* MOV Ev,Gv */
13170 delta = dis_mov_E_G(sorb, sz, delta);
13173 case 0x8D: /* LEA M,Gv */
13175 goto decode_failure;
13176 modrm = getIByte(delta);
13177 if (epartIsReg(modrm))
13178 goto decode_failure;
13179 /* NOTE! this is the one place where a segment override prefix
13180 has no effect on the address calculation. Therefore we pass
13181 zero instead of sorb here. */
13182 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
13184 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
13185 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
13186 nameIReg(sz,gregOfRM(modrm)));
13189 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13190 delta = dis_mov_Sw_Ew(sorb, sz, delta);
13193 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13194 delta = dis_mov_Ew_Sw(sorb, delta);
13197 case 0xA0: /* MOV Ob,AL */
13199 /* Fall through ... */
13200 case 0xA1: /* MOV Ov,eAX */
13201 d32 = getUDisp32(delta); delta += 4;
13203 addr = newTemp(Ity_I32);
13204 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13205 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
13206 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
13207 d32, nameIReg(sz,R_EAX));
13210 case 0xA2: /* MOV Ob,AL */
13212 /* Fall through ... */
13213 case 0xA3: /* MOV eAX,Ov */
13214 d32 = getUDisp32(delta); delta += 4;
13216 addr = newTemp(Ity_I32);
13217 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13218 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
13219 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
13220 sorbTxt(sorb), d32);
13223 case 0xB0: /* MOV imm,AL */
13224 case 0xB1: /* MOV imm,CL */
13225 case 0xB2: /* MOV imm,DL */
13226 case 0xB3: /* MOV imm,BL */
13227 case 0xB4: /* MOV imm,AH */
13228 case 0xB5: /* MOV imm,CH */
13229 case 0xB6: /* MOV imm,DH */
13230 case 0xB7: /* MOV imm,BH */
13231 d32 = getIByte(delta); delta += 1;
13232 putIReg(1, opc-0xB0, mkU8(d32));
13233 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
13236 case 0xB8: /* MOV imm,eAX */
13237 case 0xB9: /* MOV imm,eCX */
13238 case 0xBA: /* MOV imm,eDX */
13239 case 0xBB: /* MOV imm,eBX */
13240 case 0xBC: /* MOV imm,eSP */
13241 case 0xBD: /* MOV imm,eBP */
13242 case 0xBE: /* MOV imm,eSI */
13243 case 0xBF: /* MOV imm,eDI */
13244 d32 = getUDisp(sz,delta); delta += sz;
13245 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
13246 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
13249 case 0xC6: /* MOV Ib,Eb */
13252 case 0xC7: /* MOV Iv,Ev */
13256 modrm = getIByte(delta);
13257 if (epartIsReg(modrm)) {
13258 delta++; /* mod/rm byte */
13259 d32 = getUDisp(sz,delta); delta += sz;
13260 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
13261 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
13262 nameIReg(sz,eregOfRM(modrm)));
13264 addr = disAMode ( &alen, sorb, delta, dis_buf );
13266 d32 = getUDisp(sz,delta); delta += sz;
13267 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
13268 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
13272 /* ------------------------ opl imm, A ----------------- */
13274 case 0x04: /* ADD Ib, AL */
13275 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
13277 case 0x05: /* ADD Iv, eAX */
13278 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
13281 case 0x0C: /* OR Ib, AL */
13282 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
13284 case 0x0D: /* OR Iv, eAX */
13285 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
13288 case 0x14: /* ADC Ib, AL */
13289 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
13291 case 0x15: /* ADC Iv, eAX */
13292 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
13295 case 0x1C: /* SBB Ib, AL */
13296 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
13298 case 0x1D: /* SBB Iv, eAX */
13299 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
13302 case 0x24: /* AND Ib, AL */
13303 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
13305 case 0x25: /* AND Iv, eAX */
13306 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
13309 case 0x2C: /* SUB Ib, AL */
13310 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
13312 case 0x2D: /* SUB Iv, eAX */
13313 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
13316 case 0x34: /* XOR Ib, AL */
13317 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
13319 case 0x35: /* XOR Iv, eAX */
13320 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
13323 case 0x3C: /* CMP Ib, AL */
13324 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
13326 case 0x3D: /* CMP Iv, eAX */
13327 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
13330 case 0xA8: /* TEST Ib, AL */
13331 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
13333 case 0xA9: /* TEST Iv, eAX */
13334 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
13337 /* ------------------------ opl Ev, Gv ----------------- */
13339 case 0x02: /* ADD Eb,Gb */
13340 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
13342 case 0x03: /* ADD Ev,Gv */
13343 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
13346 case 0x0A: /* OR Eb,Gb */
13347 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
13349 case 0x0B: /* OR Ev,Gv */
13350 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
13353 case 0x12: /* ADC Eb,Gb */
13354 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
13356 case 0x13: /* ADC Ev,Gv */
13357 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
13360 case 0x1A: /* SBB Eb,Gb */
13361 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
13363 case 0x1B: /* SBB Ev,Gv */
13364 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
13367 case 0x22: /* AND Eb,Gb */
13368 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
13370 case 0x23: /* AND Ev,Gv */
13371 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
13374 case 0x2A: /* SUB Eb,Gb */
13375 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
13377 case 0x2B: /* SUB Ev,Gv */
13378 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
13381 case 0x32: /* XOR Eb,Gb */
13382 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
13384 case 0x33: /* XOR Ev,Gv */
13385 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
13388 case 0x3A: /* CMP Eb,Gb */
13389 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
13391 case 0x3B: /* CMP Ev,Gv */
13392 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
13395 case 0x84: /* TEST Eb,Gb */
13396 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
13398 case 0x85: /* TEST Ev,Gv */
13399 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
13402 /* ------------------------ opl Gv, Ev ----------------- */
13404 case 0x00: /* ADD Gb,Eb */
13405 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13406 Iop_Add8, True, 1, delta, "add" );
13408 case 0x01: /* ADD Gv,Ev */
13409 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13410 Iop_Add8, True, sz, delta, "add" );
13413 case 0x08: /* OR Gb,Eb */
13414 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13415 Iop_Or8, True, 1, delta, "or" );
13417 case 0x09: /* OR Gv,Ev */
13418 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13419 Iop_Or8, True, sz, delta, "or" );
13422 case 0x10: /* ADC Gb,Eb */
13423 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13424 Iop_Add8, True, 1, delta, "adc" );
13426 case 0x11: /* ADC Gv,Ev */
13427 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13428 Iop_Add8, True, sz, delta, "adc" );
13431 case 0x18: /* SBB Gb,Eb */
13432 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13433 Iop_Sub8, True, 1, delta, "sbb" );
13435 case 0x19: /* SBB Gv,Ev */
13436 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13437 Iop_Sub8, True, sz, delta, "sbb" );
13440 case 0x20: /* AND Gb,Eb */
13441 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13442 Iop_And8, True, 1, delta, "and" );
13444 case 0x21: /* AND Gv,Ev */
13445 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13446 Iop_And8, True, sz, delta, "and" );
13449 case 0x28: /* SUB Gb,Eb */
13450 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13451 Iop_Sub8, True, 1, delta, "sub" );
13453 case 0x29: /* SUB Gv,Ev */
13454 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13455 Iop_Sub8, True, sz, delta, "sub" );
13458 case 0x30: /* XOR Gb,Eb */
13459 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13460 Iop_Xor8, True, 1, delta, "xor" );
13462 case 0x31: /* XOR Gv,Ev */
13463 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13464 Iop_Xor8, True, sz, delta, "xor" );
13467 case 0x38: /* CMP Gb,Eb */
13468 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13469 Iop_Sub8, False, 1, delta, "cmp" );
13471 case 0x39: /* CMP Gv,Ev */
13472 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13473 Iop_Sub8, False, sz, delta, "cmp" );
13476 /* ------------------------ POP ------------------------ */
13478 case 0x58: /* POP eAX */
13479 case 0x59: /* POP eCX */
13480 case 0x5A: /* POP eDX */
13481 case 0x5B: /* POP eBX */
13482 case 0x5D: /* POP eBP */
13483 case 0x5E: /* POP eSI */
13484 case 0x5F: /* POP eDI */
13485 case 0x5C: /* POP eSP */
13486 vassert(sz == 2 || sz == 4);
13487 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
13488 assign(t2, getIReg(4, R_ESP));
13489 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
13490 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13491 putIReg(sz, opc-0x58, mkexpr(t1));
13492 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
13495 case 0x9D: /* POPF */
13496 vassert(sz == 2 || sz == 4);
13497 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13498 assign(t2, getIReg(4, R_ESP));
13499 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
13500 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13502 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13504 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
13505 ((Addr32)guest_EIP_bbstart)+delta );
13507 DIP("popf%c\n", nameISize(sz));
13510 case 0x61: /* POPA */
13511 /* This is almost certainly wrong for sz==2. So ... */
13512 if (sz != 4) goto decode_failure;
13514 /* t5 is the old %ESP value. */
13515 t5 = newTemp(Ity_I32);
13516 assign( t5, getIReg(4, R_ESP) );
13518 /* Reload all the registers, except %esp. */
13519 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
13520 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
13521 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
13522 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
13523 /* ignore saved %ESP */
13524 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
13525 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
13526 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
13528 /* and move %ESP back up */
13529 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
13531 DIP("popa%c\n", nameISize(sz));
13534 case 0x8F: /* POPL/POPW m32 */
13536 UChar rm = getIByte(delta);
13538 /* make sure this instruction is correct POP */
13539 if (epartIsReg(rm) || gregOfRM(rm) != 0)
13540 goto decode_failure;
13541 /* and has correct size */
13542 if (sz != 4 && sz != 2)
13543 goto decode_failure;
13546 t1 = newTemp(Ity_I32); /* stack address */
13547 t3 = newTemp(ty); /* data */
13548 /* set t1 to ESP: t1 = ESP */
13549 assign( t1, getIReg(4, R_ESP) );
13550 /* load M[ESP] to virtual register t3: t3 = M[t1] */
13551 assign( t3, loadLE(ty, mkexpr(t1)) );
13553 /* increase ESP; must be done before the STORE. Intel manual says:
13554 If the ESP register is used as a base register for addressing
13555 a destination operand in memory, the POP instruction computes
13556 the effective address of the operand after it increments the
13559 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
13561 /* resolve MODR/M */
13562 addr = disAMode ( &len, sorb, delta, dis_buf);
13563 storeLE( mkexpr(addr), mkexpr(t3) );
13565 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
13571 case 0x1F: /* POP %DS */
13572 dis_pop_segreg( R_DS, sz ); break;
13573 case 0x07: /* POP %ES */
13574 dis_pop_segreg( R_ES, sz ); break;
13575 case 0x17: /* POP %SS */
13576 dis_pop_segreg( R_SS, sz ); break;
13578 /* ------------------------ PUSH ----------------------- */
13580 case 0x50: /* PUSH eAX */
13581 case 0x51: /* PUSH eCX */
13582 case 0x52: /* PUSH eDX */
13583 case 0x53: /* PUSH eBX */
13584 case 0x55: /* PUSH eBP */
13585 case 0x56: /* PUSH eSI */
13586 case 0x57: /* PUSH eDI */
13587 case 0x54: /* PUSH eSP */
13588 /* This is the Right Way, in that the value to be pushed is
13589 established before %esp is changed, so that pushl %esp
13590 correctly pushes the old value. */
13591 vassert(sz == 2 || sz == 4);
13592 ty = sz==2 ? Ity_I16 : Ity_I32;
13593 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
13594 assign(t1, getIReg(sz, opc-0x50));
13595 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
13596 putIReg(4, R_ESP, mkexpr(t2) );
13597 storeLE(mkexpr(t2),mkexpr(t1));
13598 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
13602 case 0x68: /* PUSH Iv */
13603 d32 = getUDisp(sz,delta); delta += sz;
13605 case 0x6A: /* PUSH Ib, sign-extended to sz */
13606 d32 = getSDisp8(delta); delta += 1;
13610 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
13611 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
13612 putIReg(4, R_ESP, mkexpr(t1) );
13613 /* stop mkU16 asserting if d32 is a negative 16-bit number
13617 storeLE( mkexpr(t1), mkU(ty,d32) );
13618 DIP("push%c $0x%x\n", nameISize(sz), d32);
13621 case 0x9C: /* PUSHF */ {
13622 vassert(sz == 2 || sz == 4);
13624 t1 = newTemp(Ity_I32);
13625 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
13626 putIReg(4, R_ESP, mkexpr(t1) );
13628 /* Calculate OSZACP, and patch in fixed fields as per
13630 - bit 1 is always 1
13631 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
13633 t2 = newTemp(Ity_I32);
13634 assign( t2, binop(Iop_Or32,
13635 mk_x86g_calculate_eflags_all(),
13636 mkU32( (1<<1)|(1<<9) ) ));
13638 /* Patch in the D flag. This can simply be a copy of bit 10 of
13639 baseBlock[OFFB_DFLAG]. */
13640 t3 = newTemp(Ity_I32);
13641 assign( t3, binop(Iop_Or32,
13644 IRExpr_Get(OFFB_DFLAG,Ity_I32),
13648 /* And patch in the ID flag. */
13649 t4 = newTemp(Ity_I32);
13650 assign( t4, binop(Iop_Or32,
13653 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
13658 /* And patch in the AC flag. */
13659 t5 = newTemp(Ity_I32);
13660 assign( t5, binop(Iop_Or32,
13663 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
13668 /* if sz==2, the stored value needs to be narrowed. */
13670 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
13672 storeLE( mkexpr(t1), mkexpr(t5) );
13674 DIP("pushf%c\n", nameISize(sz));
13678 case 0x60: /* PUSHA */
13679 /* This is almost certainly wrong for sz==2. So ... */
13680 if (sz != 4) goto decode_failure;
13682 /* This is the Right Way, in that the value to be pushed is
13683 established before %esp is changed, so that pusha
13684 correctly pushes the old %esp value. New value of %esp is
13685 pushed at start. */
13686 /* t0 is the %ESP value we're going to push. */
13687 t0 = newTemp(Ity_I32);
13688 assign( t0, getIReg(4, R_ESP) );
13690 /* t5 will be the new %ESP value. */
13691 t5 = newTemp(Ity_I32);
13692 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
13694 /* Update guest state before prodding memory. */
13695 putIReg(4, R_ESP, mkexpr(t5));
13697 /* Dump all the registers. */
13698 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
13699 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
13700 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
13701 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
13702 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
13703 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
13704 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
13705 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
13707 DIP("pusha%c\n", nameISize(sz));
13710 case 0x0E: /* PUSH %CS */
13711 dis_push_segreg( R_CS, sz ); break;
13712 case 0x1E: /* PUSH %DS */
13713 dis_push_segreg( R_DS, sz ); break;
13714 case 0x06: /* PUSH %ES */
13715 dis_push_segreg( R_ES, sz ); break;
13716 case 0x16: /* PUSH %SS */
13717 dis_push_segreg( R_SS, sz ); break;
13719 /* ------------------------ SCAS et al ----------------- */
13721 case 0xA4: /* MOVS, no REP prefix */
13724 goto decode_failure; /* else dis_string_op asserts */
13725 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
13728 case 0xA6: /* CMPSb, no REP prefix */
13731 goto decode_failure; /* else dis_string_op asserts */
13732 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
13735 case 0xAA: /* STOS, no REP prefix */
13738 goto decode_failure; /* else dis_string_op asserts */
13739 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
13742 case 0xAC: /* LODS, no REP prefix */
13745 goto decode_failure; /* else dis_string_op asserts */
13746 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
13749 case 0xAE: /* SCAS, no REP prefix */
13752 goto decode_failure; /* else dis_string_op asserts */
13753 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
13757 case 0xFC: /* CLD */
13758 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
13762 case 0xFD: /* STD */
13763 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
13767 case 0xF8: /* CLC */
13768 case 0xF9: /* STC */
13769 case 0xF5: /* CMC */
13770 t0 = newTemp(Ity_I32);
13771 t1 = newTemp(Ity_I32);
13772 assign( t0, mk_x86g_calculate_eflags_all() );
13775 assign( t1, binop(Iop_And32, mkexpr(t0),
13776 mkU32(~X86G_CC_MASK_C)));
13780 assign( t1, binop(Iop_Or32, mkexpr(t0),
13781 mkU32(X86G_CC_MASK_C)));
13785 assign( t1, binop(Iop_Xor32, mkexpr(t0),
13786 mkU32(X86G_CC_MASK_C)));
13790 vpanic("disInstr(x86)(clc/stc/cmc)");
13792 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13793 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13794 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
13795 /* Set NDEP even though it isn't used. This makes redundant-PUT
13796 elimination of previous stores to this field work better. */
13797 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13800 case 0xD6: /* SALC */
13801 t0 = newTemp(Ity_I32);
13802 t1 = newTemp(Ity_I32);
13803 assign( t0, binop(Iop_And32,
13804 mk_x86g_calculate_eflags_c(),
13806 assign( t1, binop(Iop_Sar32,
13807 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
13809 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
13813 /* REPNE prefix insn */
13815 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
13816 if (sorb != 0) goto decode_failure;
13817 abyte = getIByte(delta); delta++;
13819 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
13820 dres.whatNext = Dis_StopHere;
13823 /* According to the Intel manual, "repne movs" should never occur, but
13824 * in practice it has happened, so allow for it here... */
13825 case 0xA4: sz = 1; /* REPNE MOVS<sz> */
13827 dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig,
13828 guest_EIP_bbstart+delta, "repne movs" );
13831 case 0xA6: sz = 1; /* REPNE CMP<sz> */
13833 dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig,
13834 guest_EIP_bbstart+delta, "repne cmps" );
13837 case 0xAA: sz = 1; /* REPNE STOS<sz> */
13839 dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig,
13840 guest_EIP_bbstart+delta, "repne stos" );
13843 case 0xAE: sz = 1; /* REPNE SCAS<sz> */
13845 dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
13846 guest_EIP_bbstart+delta, "repne scas" );
13850 goto decode_failure;
13855 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
13856 for the rest, it means REP) */
13858 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
13859 if (sorb != 0) goto decode_failure;
13860 abyte = getIByte(delta); delta++;
13862 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
13863 dres.whatNext = Dis_StopHere;
13866 case 0xA4: sz = 1; /* REP MOVS<sz> */
13868 dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig,
13869 guest_EIP_bbstart+delta, "rep movs" );
13872 case 0xA6: sz = 1; /* REPE CMP<sz> */
13874 dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig,
13875 guest_EIP_bbstart+delta, "repe cmps" );
13878 case 0xAA: sz = 1; /* REP STOS<sz> */
13880 dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig,
13881 guest_EIP_bbstart+delta, "rep stos" );
13884 case 0xAC: sz = 1; /* REP LODS<sz> */
13886 dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig,
13887 guest_EIP_bbstart+delta, "rep lods" );
13890 case 0xAE: sz = 1; /* REPE SCAS<sz> */
13892 dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig,
13893 guest_EIP_bbstart+delta, "repe scas" );
13896 case 0x90: /* REP NOP (PAUSE) */
13897 /* a hint to the P4 re spin-wait loop */
13898 DIP("rep nop (P4 pause)\n");
13899 /* "observe" the hint. The Vex client needs to be careful not
13900 to cause very long delays as a result, though. */
13901 jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
13902 dres.whatNext = Dis_StopHere;
13905 case 0xC3: /* REP RET -- same as normal ret? */
13907 dres.whatNext = Dis_StopHere;
13912 goto decode_failure;
13917 /* ------------------------ XCHG ----------------------- */
13919 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
13920 prefix; hence it must be translated with an IRCAS (at least, the
13921 memory variant). */
13922 case 0x86: /* XCHG Gb,Eb */
13924 /* Fall through ... */
13925 case 0x87: /* XCHG Gv,Ev */
13926 modrm = getIByte(delta);
13928 t1 = newTemp(ty); t2 = newTemp(ty);
13929 if (epartIsReg(modrm)) {
13930 assign(t1, getIReg(sz, eregOfRM(modrm)));
13931 assign(t2, getIReg(sz, gregOfRM(modrm)));
13932 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
13933 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
13935 DIP("xchg%c %s, %s\n",
13936 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
13937 nameIReg(sz,eregOfRM(modrm)));
13939 *expect_CAS = True;
13940 addr = disAMode ( &alen, sorb, delta, dis_buf );
13941 assign( t1, loadLE(ty,mkexpr(addr)) );
13942 assign( t2, getIReg(sz,gregOfRM(modrm)) );
13943 casLE( mkexpr(addr),
13944 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
13945 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
13947 DIP("xchg%c %s, %s\n", nameISize(sz),
13948 nameIReg(sz,gregOfRM(modrm)), dis_buf);
13952 case 0x90: /* XCHG eAX,eAX */
13955 case 0x91: /* XCHG eAX,eCX */
13956 case 0x92: /* XCHG eAX,eDX */
13957 case 0x93: /* XCHG eAX,eBX */
13958 case 0x94: /* XCHG eAX,eSP */
13959 case 0x95: /* XCHG eAX,eBP */
13960 case 0x96: /* XCHG eAX,eSI */
13961 case 0x97: /* XCHG eAX,eDI */
13962 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
13965 /* ------------------------ XLAT ----------------------- */
13967 case 0xD7: /* XLAT */
13968 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
13977 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
13979 DIP("xlat%c [ebx]\n", nameISize(sz));
13982 /* ------------------------ IN / OUT ----------------------- */
13984 case 0xE4: /* IN imm8, AL */
13986 t1 = newTemp(Ity_I32);
13987 abyte = getIByte(delta); delta++;
13988 assign(t1, mkU32( abyte & 0xFF ));
13989 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
13991 case 0xE5: /* IN imm8, eAX */
13992 vassert(sz == 2 || sz == 4);
13993 t1 = newTemp(Ity_I32);
13994 abyte = getIByte(delta); delta++;
13995 assign(t1, mkU32( abyte & 0xFF ));
13996 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
13998 case 0xEC: /* IN %DX, AL */
14000 t1 = newTemp(Ity_I32);
14001 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14002 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14003 nameIReg(sz,R_EAX));
14005 case 0xED: /* IN %DX, eAX */
14006 vassert(sz == 2 || sz == 4);
14007 t1 = newTemp(Ity_I32);
14008 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14009 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14010 nameIReg(sz,R_EAX));
14013 /* At this point, sz indicates the width, and t1 is a 32-bit
14014 value giving port number. */
14016 vassert(sz == 1 || sz == 2 || sz == 4);
14018 t2 = newTemp(Ity_I32);
14019 d = unsafeIRDirty_1_N(
14022 "x86g_dirtyhelper_IN",
14023 &x86g_dirtyhelper_IN,
14024 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14026 /* do the call, dumping the result in t2. */
14027 stmt( IRStmt_Dirty(d) );
14028 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14032 case 0xE6: /* OUT AL, imm8 */
14034 t1 = newTemp(Ity_I32);
14035 abyte = getIByte(delta); delta++;
14036 assign( t1, mkU32( abyte & 0xFF ) );
14037 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14039 case 0xE7: /* OUT eAX, imm8 */
14040 vassert(sz == 2 || sz == 4);
14041 t1 = newTemp(Ity_I32);
14042 abyte = getIByte(delta); delta++;
14043 assign( t1, mkU32( abyte & 0xFF ) );
14044 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14046 case 0xEE: /* OUT AL, %DX */
14048 t1 = newTemp(Ity_I32);
14049 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14050 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14051 nameIReg(2,R_EDX));
14053 case 0xEF: /* OUT eAX, %DX */
14054 vassert(sz == 2 || sz == 4);
14055 t1 = newTemp(Ity_I32);
14056 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14057 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14058 nameIReg(2,R_EDX));
14061 /* At this point, sz indicates the width, and t1 is a 32-bit
14062 value giving port number. */
14064 vassert(sz == 1 || sz == 2 || sz == 4);
14066 d = unsafeIRDirty_0_N(
14068 "x86g_dirtyhelper_OUT",
14069 &x86g_dirtyhelper_OUT,
14070 mkIRExprVec_3( mkexpr(t1),
14071 widenUto32( getIReg(sz, R_EAX) ),
14074 stmt( IRStmt_Dirty(d) );
14078 /* ------------------------ (Grp1 extensions) ---------- */
14080 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14081 case 0x80, but only in 32-bit mode. */
14083 case 0x80: /* Grp1 Ib,Eb */
14084 modrm = getIByte(delta);
14085 am_sz = lengthAMode(delta);
14088 d32 = getUChar(delta + am_sz);
14089 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14092 case 0x81: /* Grp1 Iv,Ev */
14093 modrm = getIByte(delta);
14094 am_sz = lengthAMode(delta);
14096 d32 = getUDisp(d_sz, delta + am_sz);
14097 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14100 case 0x83: /* Grp1 Ib,Ev */
14101 modrm = getIByte(delta);
14102 am_sz = lengthAMode(delta);
14104 d32 = getSDisp8(delta + am_sz);
14105 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14108 /* ------------------------ (Grp2 extensions) ---------- */
14110 case 0xC0: { /* Grp2 Ib,Eb */
14111 Bool decode_OK = True;
14112 modrm = getIByte(delta);
14113 am_sz = lengthAMode(delta);
14115 d32 = getUChar(delta + am_sz);
14117 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14118 mkU8(d32 & 0xFF), NULL, &decode_OK );
14120 goto decode_failure;
14123 case 0xC1: { /* Grp2 Ib,Ev */
14124 Bool decode_OK = True;
14125 modrm = getIByte(delta);
14126 am_sz = lengthAMode(delta);
14128 d32 = getUChar(delta + am_sz);
14129 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14130 mkU8(d32 & 0xFF), NULL, &decode_OK );
14132 goto decode_failure;
14135 case 0xD0: { /* Grp2 1,Eb */
14136 Bool decode_OK = True;
14137 modrm = getIByte(delta);
14138 am_sz = lengthAMode(delta);
14142 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14143 mkU8(d32), NULL, &decode_OK );
14145 goto decode_failure;
14148 case 0xD1: { /* Grp2 1,Ev */
14149 Bool decode_OK = True;
14150 modrm = getUChar(delta);
14151 am_sz = lengthAMode(delta);
14154 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14155 mkU8(d32), NULL, &decode_OK );
14157 goto decode_failure;
14160 case 0xD2: { /* Grp2 CL,Eb */
14161 Bool decode_OK = True;
14162 modrm = getUChar(delta);
14163 am_sz = lengthAMode(delta);
14166 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14167 getIReg(1,R_ECX), "%cl", &decode_OK );
14169 goto decode_failure;
14172 case 0xD3: { /* Grp2 CL,Ev */
14173 Bool decode_OK = True;
14174 modrm = getIByte(delta);
14175 am_sz = lengthAMode(delta);
14177 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14178 getIReg(1,R_ECX), "%cl", &decode_OK );
14180 goto decode_failure;
14184 /* ------------------------ (Grp3 extensions) ---------- */
14186 case 0xF6: { /* Grp3 Eb */
14187 Bool decode_OK = True;
14188 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
14190 goto decode_failure;
14193 case 0xF7: { /* Grp3 Ev */
14194 Bool decode_OK = True;
14195 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
14197 goto decode_failure;
14201 /* ------------------------ (Grp4 extensions) ---------- */
14203 case 0xFE: { /* Grp4 Eb */
14204 Bool decode_OK = True;
14205 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
14207 goto decode_failure;
14211 /* ------------------------ (Grp5 extensions) ---------- */
14213 case 0xFF: { /* Grp5 Ev */
14214 Bool decode_OK = True;
14215 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
14217 goto decode_failure;
14221 /* ------------------------ Escapes to 2-byte opcodes -- */
14224 opc = getIByte(delta); delta++;
14227 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14229 case 0xBA: { /* Grp8 Ib,Ev */
14230 Bool decode_OK = False;
14231 modrm = getUChar(delta);
14232 am_sz = lengthAMode(delta);
14233 d32 = getSDisp8(delta + am_sz);
14234 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
14235 am_sz, sz, d32, &decode_OK );
14237 goto decode_failure;
14241 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14243 case 0xBC: /* BSF Gv,Ev */
14244 delta = dis_bs_E_G ( sorb, sz, delta, True );
14246 case 0xBD: /* BSR Gv,Ev */
14247 delta = dis_bs_E_G ( sorb, sz, delta, False );
14250 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14252 case 0xC8: /* BSWAP %eax */
14259 case 0xCF: /* BSWAP %edi */
14260 /* AFAICS from the Intel docs, this only exists at size 4. */
14262 t1 = newTemp(Ity_I32);
14263 t2 = newTemp(Ity_I32);
14264 assign( t1, getIReg(4, opc-0xC8) );
14268 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
14270 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
14271 mkU32(0x00FF0000)),
14273 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
14274 mkU32(0x0000FF00)),
14275 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
14276 mkU32(0x000000FF) )
14280 putIReg(4, opc-0xC8, mkexpr(t2));
14281 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
14284 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14286 case 0xA3: /* BT Gv,Ev */
14287 delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpNone );
14289 case 0xB3: /* BTR Gv,Ev */
14290 delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpReset );
14292 case 0xAB: /* BTS Gv,Ev */
14293 delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpSet );
14295 case 0xBB: /* BTC Gv,Ev */
14296 delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpComp );
14299 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14303 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14304 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14305 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14306 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14307 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14308 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14309 case 0x48: /* CMOVSb (cmov negative) */
14310 case 0x49: /* CMOVSb (cmov not negative) */
14311 case 0x4A: /* CMOVP (cmov parity even) */
14312 case 0x4B: /* CMOVNP (cmov parity odd) */
14313 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14314 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14315 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14316 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14317 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
14320 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14322 case 0xB0: /* CMPXCHG Gb,Eb */
14323 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
14325 case 0xB1: /* CMPXCHG Gv,Ev */
14326 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
14329 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14330 IRTemp expdHi = newTemp(Ity_I32);
14331 IRTemp expdLo = newTemp(Ity_I32);
14332 IRTemp dataHi = newTemp(Ity_I32);
14333 IRTemp dataLo = newTemp(Ity_I32);
14334 IRTemp oldHi = newTemp(Ity_I32);
14335 IRTemp oldLo = newTemp(Ity_I32);
14336 IRTemp flags_old = newTemp(Ity_I32);
14337 IRTemp flags_new = newTemp(Ity_I32);
14338 IRTemp success = newTemp(Ity_I1);
14340 /* Translate this using a DCAS, even if there is no LOCK
14341 prefix. Life is too short to bother with generating two
14342 different translations for the with/without-LOCK-prefix
14344 *expect_CAS = True;
14346 /* Decode, and generate address. */
14347 if (sz != 4) goto decode_failure;
14348 modrm = getIByte(delta);
14349 if (epartIsReg(modrm)) goto decode_failure;
14350 if (gregOfRM(modrm) != 1) goto decode_failure;
14351 addr = disAMode ( &alen, sorb, delta, dis_buf );
14354 /* Get the expected and new values. */
14355 assign( expdHi, getIReg(4,R_EDX) );
14356 assign( expdLo, getIReg(4,R_EAX) );
14357 assign( dataHi, getIReg(4,R_ECX) );
14358 assign( dataLo, getIReg(4,R_EBX) );
14362 mkIRCAS( oldHi, oldLo,
14363 Iend_LE, mkexpr(addr),
14364 mkexpr(expdHi), mkexpr(expdLo),
14365 mkexpr(dataHi), mkexpr(dataLo)
14368 /* success when oldHi:oldLo == expdHi:expdLo */
14370 binop(Iop_CasCmpEQ32,
14372 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
14373 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
14378 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14379 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14380 which is where they came from originally. Both the actual
14381 contents of these two regs, and any shadow values, are
14382 unchanged. If the DCAS fails then we're putting into
14383 EDX:EAX the value seen in memory. */
14385 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
14390 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
14395 /* Copy the success bit into the Z flag and leave the others
14397 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
14401 binop(Iop_And32, mkexpr(flags_old),
14402 mkU32(~X86G_CC_MASK_Z)),
14405 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
14406 mkU8(X86G_CC_SHIFT_Z)) ));
14408 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14409 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
14410 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14411 /* Set NDEP even though it isn't used. This makes
14412 redundant-PUT elimination of previous stores to this field
14414 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14416 /* Sheesh. Aren't you glad it was me and not you that had to
14417 write and validate all this grunge? */
14419 DIP("cmpxchg8b %s\n", dis_buf);
14423 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14425 case 0xA2: { /* CPUID */
14426 /* Uses dirty helper:
14427 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14428 declared to mod eax, wr ebx, ecx, edx
14431 HChar* fName = NULL;
14432 void* fAddr = NULL;
14433 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
14434 fName = "x86g_dirtyhelper_CPUID_sse2";
14435 fAddr = &x86g_dirtyhelper_CPUID_sse2;
14438 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
14439 fName = "x86g_dirtyhelper_CPUID_sse1";
14440 fAddr = &x86g_dirtyhelper_CPUID_sse1;
14443 if (archinfo->hwcaps == 0/*no SSE*/) {
14444 fName = "x86g_dirtyhelper_CPUID_sse0";
14445 fAddr = &x86g_dirtyhelper_CPUID_sse0;
14447 vpanic("disInstr(x86)(cpuid)");
14449 vassert(fName); vassert(fAddr);
14450 d = unsafeIRDirty_0_N ( 0/*regparms*/,
14451 fName, fAddr, mkIRExprVec_0() );
14452 /* declare guest state effects */
14453 d->needsBBP = True;
14455 d->fxState[0].fx = Ifx_Modify;
14456 d->fxState[0].offset = OFFB_EAX;
14457 d->fxState[0].size = 4;
14458 d->fxState[1].fx = Ifx_Write;
14459 d->fxState[1].offset = OFFB_EBX;
14460 d->fxState[1].size = 4;
14461 d->fxState[2].fx = Ifx_Modify;
14462 d->fxState[2].offset = OFFB_ECX;
14463 d->fxState[2].size = 4;
14464 d->fxState[3].fx = Ifx_Write;
14465 d->fxState[3].offset = OFFB_EDX;
14466 d->fxState[3].size = 4;
14467 /* execute the dirty call, side-effecting guest state */
14468 stmt( IRStmt_Dirty(d) );
14469 /* CPUID is a serialising insn. So, just in case someone is
14470 using it as a memory fence ... */
14471 stmt( IRStmt_MBE(Imbe_Fence) );
14476 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14477 //-- goto decode_failure;
14479 //-- t1 = newTemp(cb);
14480 //-- t2 = newTemp(cb);
14481 //-- t3 = newTemp(cb);
14482 //-- t4 = newTemp(cb);
14483 //-- uInstr0(cb, CALLM_S, 0);
14485 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14486 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14488 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14489 //-- uLiteral(cb, 0);
14490 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14492 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14493 //-- uLiteral(cb, 0);
14494 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14496 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14497 //-- uLiteral(cb, 0);
14498 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14500 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14501 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14503 //-- uInstr1(cb, POP, 4, TempReg, t4);
14504 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14506 //-- uInstr1(cb, POP, 4, TempReg, t3);
14507 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14509 //-- uInstr1(cb, POP, 4, TempReg, t2);
14510 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14512 //-- uInstr1(cb, POP, 4, TempReg, t1);
14513 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14515 //-- uInstr0(cb, CALLM_E, 0);
14516 //-- DIP("cpuid\n");
14519 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
14521 case 0xB6: /* MOVZXb Eb,Gv */
14522 if (sz != 2 && sz != 4)
14523 goto decode_failure;
14524 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
14527 case 0xB7: /* MOVZXw Ew,Gv */
14529 goto decode_failure;
14530 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
14533 case 0xBE: /* MOVSXb Eb,Gv */
14534 if (sz != 2 && sz != 4)
14535 goto decode_failure;
14536 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
14539 case 0xBF: /* MOVSXw Ew,Gv */
14541 goto decode_failure;
14542 delta = dis_movx_E_G ( sorb, delta, 2, 4, True );
14545 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
14547 //-- case 0xC3: /* MOVNTI Gv,Ev */
14548 //-- vg_assert(sz == 4);
14549 //-- modrm = getUChar(eip);
14550 //-- vg_assert(!epartIsReg(modrm));
14551 //-- t1 = newTemp(cb);
14552 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
14553 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
14554 //-- t2 = LOW24(pair);
14555 //-- eip += HI8(pair);
14556 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
14557 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
14560 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
14562 case 0xAF: /* IMUL Ev, Gv */
14563 delta = dis_mul_E_G ( sorb, sz, delta );
14566 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
14569 modrm = getUChar(delta);
14570 if (epartIsReg(modrm)) goto decode_failure;
14571 addr = disAMode ( &alen, sorb, delta, dis_buf );
14573 DIP("nop%c %s\n", nameISize(sz), dis_buf);
14576 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
14579 case 0x82: /* JBb/JNAEb (jump below) */
14580 case 0x83: /* JNBb/JAEb (jump not below) */
14581 case 0x84: /* JZb/JEb (jump zero) */
14582 case 0x85: /* JNZb/JNEb (jump not zero) */
14583 case 0x86: /* JBEb/JNAb (jump below or equal) */
14584 case 0x87: /* JNBEb/JAb (jump not below or equal) */
14585 case 0x88: /* JSb (jump negative) */
14586 case 0x89: /* JSb (jump not negative) */
14587 case 0x8A: /* JP (jump parity even) */
14588 case 0x8B: /* JNP/JPO (jump parity odd) */
14589 case 0x8C: /* JLb/JNGEb (jump less) */
14590 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
14591 case 0x8E: /* JLEb/JNGb (jump less or equal) */
14592 case 0x8F: /* JGb/JNLEb (jump greater) */
14594 HChar* comment = "";
14595 jmpDelta = (Int)getUDisp32(delta);
14596 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
14599 && vex_control.guest_chase_cond
14600 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
14602 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
14603 /* Speculation: assume this backward branch is taken. So
14604 we need to emit a side-exit to the insn following this
14605 one, on the negation of the condition, and continue at
14606 the branch target address (d32). If we wind up back at
14607 the first instruction of the trace, just stop; it's
14608 better to let the IR loop unroller handle that case.*/
14610 mk_x86g_calculate_condition((X86Condcode)
14611 (1 ^ (opc - 0x80))),
14613 IRConst_U32(guest_EIP_bbstart+delta) ) );
14614 dres.whatNext = Dis_ResteerC;
14615 dres.continueAt = (Addr64)(Addr32)d32;
14616 comment = "(assumed taken)";
14620 && vex_control.guest_chase_cond
14621 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
14623 && resteerOkFn( callback_opaque,
14624 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
14625 /* Speculation: assume this forward branch is not taken.
14626 So we need to emit a side-exit to d32 (the dest) and
14627 continue disassembling at the insn immediately
14628 following this one. */
14630 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
14632 IRConst_U32(d32) ) );
14633 dres.whatNext = Dis_ResteerC;
14634 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
14635 comment = "(assumed not taken)";
14638 /* Conservative default translation - end the block at
14640 jcc_01( (X86Condcode)(opc - 0x80),
14641 (Addr32)(guest_EIP_bbstart+delta), d32);
14642 dres.whatNext = Dis_StopHere;
14644 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
14648 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
14649 case 0x31: { /* RDTSC */
14650 IRTemp val = newTemp(Ity_I64);
14651 IRExpr** args = mkIRExprVec_0();
14652 IRDirty* d = unsafeIRDirty_1_N (
14655 "x86g_dirtyhelper_RDTSC",
14656 &x86g_dirtyhelper_RDTSC,
14659 /* execute the dirty call, dumping the result in val. */
14660 stmt( IRStmt_Dirty(d) );
14661 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
14662 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
14667 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
14669 case 0xA1: /* POP %FS */
14670 dis_pop_segreg( R_FS, sz ); break;
14671 case 0xA9: /* POP %GS */
14672 dis_pop_segreg( R_GS, sz ); break;
14674 case 0xA0: /* PUSH %FS */
14675 dis_push_segreg( R_FS, sz ); break;
14676 case 0xA8: /* PUSH %GS */
14677 dis_push_segreg( R_GS, sz ); break;
14679 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
14682 case 0x92: /* set-Bb/set-NAEb (jump below) */
14683 case 0x93: /* set-NBb/set-AEb (jump not below) */
14684 case 0x94: /* set-Zb/set-Eb (jump zero) */
14685 case 0x95: /* set-NZb/set-NEb (jump not zero) */
14686 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
14687 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
14688 case 0x98: /* set-Sb (jump negative) */
14689 case 0x99: /* set-Sb (jump not negative) */
14690 case 0x9A: /* set-P (jump parity even) */
14691 case 0x9B: /* set-NP (jump parity odd) */
14692 case 0x9C: /* set-Lb/set-NGEb (jump less) */
14693 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
14694 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
14695 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
14696 t1 = newTemp(Ity_I8);
14697 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
14698 modrm = getIByte(delta);
14699 if (epartIsReg(modrm)) {
14701 putIReg(1, eregOfRM(modrm), mkexpr(t1));
14702 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
14703 nameIReg(1,eregOfRM(modrm)));
14705 addr = disAMode ( &alen, sorb, delta, dis_buf );
14707 storeLE( mkexpr(addr), mkexpr(t1) );
14708 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
14712 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
14714 case 0xA4: /* SHLDv imm8,Gv,Ev */
14715 modrm = getIByte(delta);
14716 d32 = delta + lengthAMode(delta);
14717 vex_sprintf(dis_buf, "$%d", getIByte(d32));
14718 delta = dis_SHLRD_Gv_Ev (
14719 sorb, delta, modrm, sz,
14720 mkU8(getIByte(d32)), True, /* literal */
14723 case 0xA5: /* SHLDv %cl,Gv,Ev */
14724 modrm = getIByte(delta);
14725 delta = dis_SHLRD_Gv_Ev (
14726 sorb, delta, modrm, sz,
14727 getIReg(1,R_ECX), False, /* not literal */
14731 case 0xAC: /* SHRDv imm8,Gv,Ev */
14732 modrm = getIByte(delta);
14733 d32 = delta + lengthAMode(delta);
14734 vex_sprintf(dis_buf, "$%d", getIByte(d32));
14735 delta = dis_SHLRD_Gv_Ev (
14736 sorb, delta, modrm, sz,
14737 mkU8(getIByte(d32)), True, /* literal */
14740 case 0xAD: /* SHRDv %cl,Gv,Ev */
14741 modrm = getIByte(delta);
14742 delta = dis_SHLRD_Gv_Ev (
14743 sorb, delta, modrm, sz,
14744 getIReg(1,R_ECX), False, /* not literal */
14748 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
14751 /* Simple implementation needing a long explaination.
14753 sysenter is a kind of syscall entry. The key thing here
14754 is that the return address is not known -- that is
14755 something that is beyond Vex's knowledge. So this IR
14756 forces a return to the scheduler, which can do what it
14757 likes to simulate the systenter, but it MUST set this
14758 thread's guest_EIP field with the continuation address
14759 before resuming execution. If that doesn't happen, the
14760 thread will jump to address zero, which is probably
14764 /* Note where we are, so we can back up the guest to this
14765 point if the syscall needs to be restarted. */
14766 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
14767 mkU32(guest_EIP_curr_instr) ) );
14768 jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
14769 dres.whatNext = Dis_StopHere;
14773 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
14775 case 0xC0: { /* XADD Gb,Eb */
14777 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
14778 if (!decodeOK) goto decode_failure;
14781 case 0xC1: { /* XADD Gv,Ev */
14783 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
14784 if (!decodeOK) goto decode_failure;
14788 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
14792 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
14794 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
14795 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
14796 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
14797 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
14801 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
14804 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
14807 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
14811 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
14814 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
14817 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
14819 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
14820 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
14822 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
14826 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
14830 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
14832 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
14833 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
14834 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
14838 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
14842 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
14844 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
14845 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
14846 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
14847 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
14849 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
14853 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
14857 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
14860 Int delta0 = delta-1;
14861 Bool decode_OK = False;
14863 /* If sz==2 this is SSE, and we assume sse idec has
14864 already spotted those cases by now. */
14866 goto decode_failure;
14868 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
14871 goto decode_failure;
14876 case 0x77: /* EMMS */
14878 goto decode_failure;
14879 do_EMMS_preamble();
14883 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
14886 goto decode_failure;
14887 } /* switch (opc) for the 2-byte opcodes */
14888 goto decode_success;
14889 } /* case 0x0F: of primary opcode */
14891 /* ------------------------ ??? ------------------------ */
14895 /* All decode failures end up here. */
14896 vex_printf("vex x86->IR: unhandled instruction bytes: "
14897 "0x%x 0x%x 0x%x 0x%x\n",
14898 (Int)getIByte(delta_start+0),
14899 (Int)getIByte(delta_start+1),
14900 (Int)getIByte(delta_start+2),
14901 (Int)getIByte(delta_start+3) );
14903 /* Tell the dispatcher that this insn cannot be decoded, and so has
14904 not been executed, and (is currently) the next to be executed.
14905 EIP should be up-to-date since it made so at the start of each
14906 insn, but nevertheless be paranoid and update it again right
14908 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
14909 jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
14910 dres.whatNext = Dis_StopHere;
14912 /* We also need to say that a CAS is not expected now, regardless
14913 of what it might have been set to at the start of the function,
14914 since the IR that we've emitted just above (to synthesis a
14915 SIGILL) does not involve any CAS, and presumably no other IR has
14916 been emitted for this (non-decoded) insn. */
14917 *expect_CAS = False;
14920 } /* switch (opc) for the main (primary) opcode switch. */
14923 /* All decode successes end up here. */
14925 dres.len = delta - delta_start;
14933 /*------------------------------------------------------------*/
14934 /*--- Top-level fn ---*/
14935 /*------------------------------------------------------------*/
14937 /* Disassemble a single instruction into IR. The instruction
14938 is located in host memory at &guest_code[delta]. */
14940 DisResult disInstr_X86 ( IRSB* irsb_IN,
14942 Bool (*resteerOkFn) ( void*, Addr64 ),
14944 void* callback_opaque,
14945 UChar* guest_code_IN,
14948 VexArch guest_arch,
14949 VexArchInfo* archinfo,
14950 VexAbiInfo* abiinfo,
14951 Bool host_bigendian_IN )
14954 Bool expect_CAS, has_CAS;
14957 /* Set globals (see top of this file) */
14958 vassert(guest_arch == VexArchX86);
14959 guest_code = guest_code_IN;
14961 host_is_bigendian = host_bigendian_IN;
14962 guest_EIP_curr_instr = (Addr32)guest_IP;
14963 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
14965 x1 = irsb_IN->stmts_used;
14966 expect_CAS = False;
14967 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
14969 callback_opaque, delta, archinfo );
14970 x2 = irsb_IN->stmts_used;
14973 /* See comment at the top of disInstr_X86_WRK for meaning of
14974 expect_CAS. Here, we (sanity-)check for the presence/absence of
14975 IRCAS as directed by the returned expect_CAS value. */
14977 for (i = x1; i < x2; i++) {
14978 if (irsb_IN->stmts[i]->tag == Ist_CAS)
14982 if (expect_CAS != has_CAS) {
14983 /* inconsistency detected. re-disassemble the instruction so as
14984 to generate a useful error message; then assert. */
14985 vex_traceflags |= VEX_TRACE_FE;
14986 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
14988 callback_opaque, delta, archinfo );
14989 for (i = x1; i < x2; i++) {
14990 vex_printf("\t\t");
14991 ppIRStmt(irsb_IN->stmts[i]);
14994 /* Failure of this assertion is serious and denotes a bug in
14996 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15003 /*--------------------------------------------------------------------*/
15004 /*--- end guest_x86_toIR.c ---*/
15005 /*--------------------------------------------------------------------*/