2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates x86 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
43 FUCOMI(P): what happens to A and S flags? Currently are forced
48 * all arithmetic done at 64 bits
50 * no FP exceptions, except for handling stack over/underflow
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
57 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
58 simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
61 * some of the FCOM cases could do with testing -- not convinced
62 that the args are the right way round.
64 * FSAVE does not re-initialise the FPU; it should do
66 * FINIT not only initialises the FPU environment, it also
67 zeroes all the FP registers. It should leave the registers
70 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
71 per Intel docs this bit has no meaning anyway. Since PUSHF is the
72 only way to observe eflags[1], a proper fix would be to make that
75 The state of %eflags.AC (alignment check, bit 18) is recorded by
76 the simulation (viz, if you set it with popf then a pushf produces
77 the value you set it to), but it is otherwise ignored. In
78 particular, setting it to 1 does NOT cause alignment checking to
79 happen. Programs that set it to 1 and then rely on the resulting
80 SIGBUSs to inform them of misaligned accesses will not work.
82 Implementation of sysenter is necessarily partial. sysenter is a
83 kind of system call entry. When doing a sysenter, the return
84 address is not known -- that is something that is beyond Vex's
85 knowledge. So the generated IR forces a return to the scheduler,
86 which can do what it likes to simulate the systenter, but it MUST
87 set this thread's guest_EIP field with the continuation address
88 before resuming execution. If that doesn't happen, the thread will
89 jump to address zero, which is probably fatal.
91 This module uses global variables and so is not MT-safe (if that
92 should ever become relevant).
94 The delta values are 32-bit ints, not 64-bit ints. That means
95 this module may not work right if run on a 64-bit host. That should
96 be fixed properly, really -- if anyone ever wants to use Vex to
97 translate x86 code for execution on a 64-bit host.
99 casLE (implementation of lock-prefixed insns) and rep-prefixed
100 insns: the side-exit back to the start of the insn is done with
101 Ijk_Boring. This is quite wrong, it should be done with
102 Ijk_NoRedir, since otherwise the side exit, which is intended to
103 restart the instruction for whatever reason, could go somewhere
104 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
105 no-redir jumps performance critical, at least for rep-prefixed
106 instructions, since all iterations thereof would involve such a
107 jump. It's not such a big deal with casLE since the side exit is
108 only taken if the CAS fails, that is, the location is contended,
109 which is relatively unlikely.
111 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
114 Note also, the test for CAS success vs failure is done using
115 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
116 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
117 shouldn't definedness-check these comparisons. See
118 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
119 background/rationale.
122 /* Performance holes:
124 - fcom ; fstsw %ax ; sahf
125 sahf does not update the O flag (sigh) and so O needs to
126 be computed. This is done expensively; it would be better
127 to have a calculate_eflags_o helper.
129 - emwarns; some FP codes can generate huge numbers of these
130 if the fpucw is changed in an inner loop. It would be
131 better for the guest state to have an emwarn-enable reg
132 which can be set zero or nonzero. If it is zero, emwarns
133 are not flagged, and instead control just flows all the
134 way through bbs as usual.
137 /* "Special" instructions.
139 This instruction decoder can decode three special instructions
140 which mean nothing natively (are no-ops as far as regs/mem are
141 concerned) but have meaning for supporting Valgrind. A special
142 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
143 C1C713 (in the standard interpretation, that means: roll $3, %edi;
144 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
145 one of the following 3 are allowed (standard interpretation in
148 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
149 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
150 87D2 (xchgl %edx,%edx) call-noredir *%EAX
152 Any other bytes following the 12-byte preamble are illegal and
153 constitute a failure in instruction decoding. This all assumes
154 that the preamble will never occur except in specific code
155 fragments designed for Valgrind to catch.
157 No prefixes may precede a "Special" instruction.
160 /* LOCK prefixed instructions. These are translated using IR-level
161 CAS statements (IRCAS) and are believed to preserve atomicity, even
162 from the point of view of some other process racing against a
163 simulated one (presumably they communicate via a shared memory
166 Handlers which are aware of LOCK prefixes are:
167 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
168 dis_cmpxchg_G_E (cmpxchg)
169 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
173 dis_Grp8_Imm (bts, btc, btr)
174 dis_bt_G_E (bts, btc, btr)
179 #include "libvex_basictypes.h"
180 #include "libvex_ir.h"
182 #include "libvex_guest_x86.h"
184 #include "main_util.h"
185 #include "main_globals.h"
186 #include "guest_generic_bb_to_IR.h"
187 #include "guest_generic_x87.h"
188 #include "guest_x86_defs.h"
191 /*------------------------------------------------------------*/
193 /*------------------------------------------------------------*/
195 /* These are set at the start of the translation of an insn, right
196 down in disInstr_X86, so that we don't have to pass them around
197 endlessly. They are all constant during the translation of any
200 /* We need to know this to do sub-register accesses correctly. */
201 static Bool host_is_bigendian;
203 /* Pointer to the guest code area (points to start of BB, not to the
204 insn being processed). */
205 static UChar* guest_code;
207 /* The guest address corresponding to guest_code[0]. */
208 static Addr32 guest_EIP_bbstart;
210 /* The guest address for the instruction currently being
212 static Addr32 guest_EIP_curr_instr;
214 /* The IRSB* into which we're generating code. */
218 /*------------------------------------------------------------*/
219 /*--- Debugging output ---*/
220 /*------------------------------------------------------------*/
222 #define DIP(format, args...) \
223 if (vex_traceflags & VEX_TRACE_FE) \
224 vex_printf(format, ## args)
226 #define DIS(buf, format, args...) \
227 if (vex_traceflags & VEX_TRACE_FE) \
228 vex_sprintf(buf, format, ## args)
231 /*------------------------------------------------------------*/
232 /*--- Offsets of various parts of the x86 guest state. ---*/
233 /*------------------------------------------------------------*/
235 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
236 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
237 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
238 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
239 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
240 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
241 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
242 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
244 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
246 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
247 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
248 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
249 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
251 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
252 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
253 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
254 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
255 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
256 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
257 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
258 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
260 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
261 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
262 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
263 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
264 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
265 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
266 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
267 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
269 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
270 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
271 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
272 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
273 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
274 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
275 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
276 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
277 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
279 #define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
281 #define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART)
282 #define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN)
283 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
285 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
288 /*------------------------------------------------------------*/
289 /*--- Helper bits and pieces for deconstructing the ---*/
290 /*--- x86 insn stream. ---*/
291 /*------------------------------------------------------------*/
293 /* This is the Intel register encoding -- integer regs. */
303 #define R_AL (0+R_EAX)
304 #define R_AH (4+R_EAX)
306 /* This is the Intel register encoding -- segment regs. */
315 /* Add a statement to the list held by "irbb". */
316 static void stmt ( IRStmt* st )
318 addStmtToIRSB( irsb, st );
321 /* Generate a new temporary of the given type. */
322 static IRTemp newTemp ( IRType ty )
324 vassert(isPlausibleIRType(ty));
325 return newIRTemp( irsb->tyenv, ty );
328 /* Various simple conversions */
330 static UInt extend_s_8to32( UInt x )
332 return (UInt)((((Int)x) << 24) >> 24);
335 static UInt extend_s_16to32 ( UInt x )
337 return (UInt)((((Int)x) << 16) >> 16);
340 /* Fetch a byte from the guest insn stream. */
341 static UChar getIByte ( Int delta )
343 return guest_code[delta];
346 /* Extract the reg field from a modRM byte. */
347 static Int gregOfRM ( UChar mod_reg_rm )
349 return (Int)( (mod_reg_rm >> 3) & 7 );
352 /* Figure out whether the mod and rm parts of a modRM byte refer to a
353 register or memory. If so, the byte will have the form 11XXXYYY,
354 where YYY is the register number. */
355 static Bool epartIsReg ( UChar mod_reg_rm )
357 return toBool(0xC0 == (mod_reg_rm & 0xC0));
360 /* ... and extract the register number ... */
361 static Int eregOfRM ( UChar mod_reg_rm )
363 return (Int)(mod_reg_rm & 0x7);
366 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
368 static UChar getUChar ( Int delta )
370 UChar v = guest_code[delta+0];
374 static UInt getUDisp16 ( Int delta )
376 UInt v = guest_code[delta+1]; v <<= 8;
377 v |= guest_code[delta+0];
381 static UInt getUDisp32 ( Int delta )
383 UInt v = guest_code[delta+3]; v <<= 8;
384 v |= guest_code[delta+2]; v <<= 8;
385 v |= guest_code[delta+1]; v <<= 8;
386 v |= guest_code[delta+0];
390 static UInt getUDisp ( Int size, Int delta )
393 case 4: return getUDisp32(delta);
394 case 2: return getUDisp16(delta);
395 case 1: return (UInt)getUChar(delta);
396 default: vpanic("getUDisp(x86)");
398 return 0; /*notreached*/
402 /* Get a byte value out of the insn stream and sign-extend to 32
404 static UInt getSDisp8 ( Int delta )
406 return extend_s_8to32( (UInt) (guest_code[delta]) );
409 static UInt getSDisp16 ( Int delta0 )
411 UChar* eip = (UChar*)(&guest_code[delta0]);
413 d |= ((*eip++) << 8);
414 return extend_s_16to32(d);
417 static UInt getSDisp ( Int size, Int delta )
420 case 4: return getUDisp32(delta);
421 case 2: return getSDisp16(delta);
422 case 1: return getSDisp8(delta);
423 default: vpanic("getSDisp(x86)");
425 return 0; /*notreached*/
429 /*------------------------------------------------------------*/
430 /*--- Helpers for constructing IR. ---*/
431 /*------------------------------------------------------------*/
433 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
434 register references, we need to take the host endianness into
435 account. Supplied value is 0 .. 7 and in the Intel instruction
438 static IRType szToITy ( Int n )
441 case 1: return Ity_I8;
442 case 2: return Ity_I16;
443 case 4: return Ity_I32;
444 default: vpanic("szToITy(x86)");
448 /* On a little-endian host, less significant bits of the guest
449 registers are at lower addresses. Therefore, if a reference to a
450 register low half has the safe guest state offset as a reference to
453 static Int integerGuestRegOffset ( Int sz, UInt archreg )
455 vassert(archreg < 8);
457 /* Correct for little-endian host only. */
458 vassert(!host_is_bigendian);
460 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
462 case R_EAX: return OFFB_EAX;
463 case R_EBX: return OFFB_EBX;
464 case R_ECX: return OFFB_ECX;
465 case R_EDX: return OFFB_EDX;
466 case R_ESI: return OFFB_ESI;
467 case R_EDI: return OFFB_EDI;
468 case R_ESP: return OFFB_ESP;
469 case R_EBP: return OFFB_EBP;
470 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
474 vassert(archreg >= 4 && archreg < 8 && sz == 1);
476 case R_EAX: return 1+ OFFB_EAX;
477 case R_EBX: return 1+ OFFB_EBX;
478 case R_ECX: return 1+ OFFB_ECX;
479 case R_EDX: return 1+ OFFB_EDX;
480 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
484 vpanic("integerGuestRegOffset(x86,le)");
487 static Int segmentGuestRegOffset ( UInt sreg )
490 case R_ES: return OFFB_ES;
491 case R_CS: return OFFB_CS;
492 case R_SS: return OFFB_SS;
493 case R_DS: return OFFB_DS;
494 case R_FS: return OFFB_FS;
495 case R_GS: return OFFB_GS;
496 default: vpanic("segmentGuestRegOffset(x86)");
500 static Int xmmGuestRegOffset ( UInt xmmreg )
503 case 0: return OFFB_XMM0;
504 case 1: return OFFB_XMM1;
505 case 2: return OFFB_XMM2;
506 case 3: return OFFB_XMM3;
507 case 4: return OFFB_XMM4;
508 case 5: return OFFB_XMM5;
509 case 6: return OFFB_XMM6;
510 case 7: return OFFB_XMM7;
511 default: vpanic("xmmGuestRegOffset");
515 /* Lanes of vector registers are always numbered from zero being the
516 least significant lane (rightmost in the register). */
518 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
520 /* Correct for little-endian host only. */
521 vassert(!host_is_bigendian);
522 vassert(laneno >= 0 && laneno < 8);
523 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
526 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
528 /* Correct for little-endian host only. */
529 vassert(!host_is_bigendian);
530 vassert(laneno >= 0 && laneno < 4);
531 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
534 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
536 /* Correct for little-endian host only. */
537 vassert(!host_is_bigendian);
538 vassert(laneno >= 0 && laneno < 2);
539 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
542 static IRExpr* getIReg ( Int sz, UInt archreg )
544 vassert(sz == 1 || sz == 2 || sz == 4);
545 vassert(archreg < 8);
546 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
550 /* Ditto, but write to a reg instead. */
551 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
553 IRType ty = typeOfIRExpr(irsb->tyenv, e);
555 case 1: vassert(ty == Ity_I8); break;
556 case 2: vassert(ty == Ity_I16); break;
557 case 4: vassert(ty == Ity_I32); break;
558 default: vpanic("putIReg(x86)");
560 vassert(archreg < 8);
561 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
564 static IRExpr* getSReg ( UInt sreg )
566 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
569 static void putSReg ( UInt sreg, IRExpr* e )
571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
572 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
575 static IRExpr* getXMMReg ( UInt xmmreg )
577 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
580 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
585 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
587 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
590 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
595 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
597 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
600 static void putXMMReg ( UInt xmmreg, IRExpr* e )
602 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
603 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
606 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
608 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
609 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
612 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
614 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
615 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
618 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
620 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
621 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
624 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
626 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
627 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
630 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
632 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
633 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
636 static void assign ( IRTemp dst, IRExpr* e )
638 stmt( IRStmt_WrTmp(dst, e) );
641 static void storeLE ( IRExpr* addr, IRExpr* data )
643 stmt( IRStmt_Store(Iend_LE, addr, data) );
646 static IRExpr* unop ( IROp op, IRExpr* a )
648 return IRExpr_Unop(op, a);
651 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
653 return IRExpr_Binop(op, a1, a2);
656 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
658 return IRExpr_Triop(op, a1, a2, a3);
661 static IRExpr* mkexpr ( IRTemp tmp )
663 return IRExpr_RdTmp(tmp);
666 static IRExpr* mkU8 ( UInt i )
669 return IRExpr_Const(IRConst_U8( (UChar)i ));
672 static IRExpr* mkU16 ( UInt i )
675 return IRExpr_Const(IRConst_U16( (UShort)i ));
678 static IRExpr* mkU32 ( UInt i )
680 return IRExpr_Const(IRConst_U32(i));
683 static IRExpr* mkU64 ( ULong i )
685 return IRExpr_Const(IRConst_U64(i));
688 static IRExpr* mkU ( IRType ty, UInt i )
690 if (ty == Ity_I8) return mkU8(i);
691 if (ty == Ity_I16) return mkU16(i);
692 if (ty == Ity_I32) return mkU32(i);
693 /* If this panics, it usually means you passed a size (1,2,4)
694 value as the IRType, rather than a real IRType. */
698 static IRExpr* mkV128 ( UShort mask )
700 return IRExpr_Const(IRConst_V128(mask));
703 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
705 return IRExpr_Load(Iend_LE, ty, addr);
708 static IROp mkSizedOp ( IRType ty, IROp op8 )
711 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
712 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
714 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
715 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
716 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
717 || op8 == Iop_CasCmpNE8
719 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
723 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
725 if (szSmall == 1 && szBig == 4) {
726 return signd ? Iop_8Sto32 : Iop_8Uto32;
728 if (szSmall == 1 && szBig == 2) {
729 return signd ? Iop_8Sto16 : Iop_8Uto16;
731 if (szSmall == 2 && szBig == 4) {
732 return signd ? Iop_16Sto32 : Iop_16Uto32;
734 vpanic("mkWidenOp(x86,guest)");
737 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
739 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
740 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
741 return unop(Iop_32to1,
744 unop(Iop_1Uto32,y)));
747 /* Generate a compare-and-swap operation, operating on memory at
748 'addr'. The expected value is 'expVal' and the new value is
749 'newVal'. If the operation fails, then transfer control (with a
750 no-redir jump (XXX no -- see comment at top of this file)) to
751 'restart_point', which is presumably the address of the guest
752 instruction again -- retrying, essentially. */
753 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
754 Addr32 restart_point )
757 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
758 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
759 IRTemp oldTmp = newTemp(tyE);
760 IRTemp expTmp = newTemp(tyE);
762 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
763 assign(expTmp, expVal);
764 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
765 NULL, mkexpr(expTmp), NULL, newVal );
766 stmt( IRStmt_CAS(cas) );
768 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
769 mkexpr(oldTmp), mkexpr(expTmp) ),
770 Ijk_Boring, /*Ijk_NoRedir*/
771 IRConst_U32( restart_point )
776 /*------------------------------------------------------------*/
777 /*--- Helpers for %eflags. ---*/
778 /*------------------------------------------------------------*/
780 /* -------------- Evaluating the flags-thunk. -------------- */
782 /* Build IR to calculate all the eflags from stored
783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
785 static IRExpr* mk_x86g_calculate_eflags_all ( void )
788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
789 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
790 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
791 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
796 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
799 /* Exclude OP and NDEP from definedness checking. We're only
800 interested in DEP1 and DEP2. */
801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
805 /* Build IR to calculate some particular condition from stored
806 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
808 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
811 = mkIRExprVec_5( mkU32(cond),
812 IRExpr_Get(OFFB_CC_OP, Ity_I32),
813 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
814 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
815 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
820 "x86g_calculate_condition", &x86g_calculate_condition,
823 /* Exclude the requested condition, OP and NDEP from definedness
824 checking. We're only interested in DEP1 and DEP2. */
825 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
826 return unop(Iop_32to1, call);
829 /* Build IR to calculate just the carry flag from stored
830 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
831 static IRExpr* mk_x86g_calculate_eflags_c ( void )
834 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
835 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
836 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
837 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
842 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
845 /* Exclude OP and NDEP from definedness checking. We're only
846 interested in DEP1 and DEP2. */
847 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
852 /* -------------- Building the flags-thunk. -------------- */
854 /* The machinery in this section builds the flag-thunk following a
855 flag-setting operation. Hence the various setFlags_* functions.
858 static Bool isAddSub ( IROp op8 )
860 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
863 static Bool isLogic ( IROp op8 )
865 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
868 /* U-widen 8/16/32 bit int expr to 32. */
869 static IRExpr* widenUto32 ( IRExpr* e )
871 switch (typeOfIRExpr(irsb->tyenv,e)) {
872 case Ity_I32: return e;
873 case Ity_I16: return unop(Iop_16Uto32,e);
874 case Ity_I8: return unop(Iop_8Uto32,e);
875 default: vpanic("widenUto32");
879 /* S-widen 8/16/32 bit int expr to 32. */
880 static IRExpr* widenSto32 ( IRExpr* e )
882 switch (typeOfIRExpr(irsb->tyenv,e)) {
883 case Ity_I32: return e;
884 case Ity_I16: return unop(Iop_16Sto32,e);
885 case Ity_I8: return unop(Iop_8Sto32,e);
886 default: vpanic("widenSto32");
890 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
891 of these combinations make sense. */
892 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
894 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
895 if (src_ty == dst_ty)
897 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
898 return unop(Iop_32to16, e);
899 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
900 return unop(Iop_32to8, e);
902 vex_printf("\nsrc, dst tys are: ");
907 vpanic("narrowTo(x86)");
911 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
912 auto-sized up to the real op. */
915 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
917 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
919 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
922 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
923 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
924 default: ppIROp(op8);
925 vpanic("setFlags_DEP1_DEP2(x86)");
927 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
928 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
929 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
930 /* Set NDEP even though it isn't used. This makes redundant-PUT
931 elimination of previous stores to this field work better. */
932 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
936 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
939 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
941 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
943 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
948 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
949 default: ppIROp(op8);
950 vpanic("setFlags_DEP1(x86)");
952 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
953 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
954 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
955 /* Set NDEP even though it isn't used. This makes redundant-PUT
956 elimination of previous stores to this field work better. */
957 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
961 /* For shift operations, we put in the result and the undershifted
962 result. Except if the shift amount is zero, the thunk is left
965 static void setFlags_DEP1_DEP2_shift ( IROp op32,
971 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
973 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
976 /* Both kinds of right shifts are handled by the same thunk
980 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
981 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
982 default: ppIROp(op32);
983 vpanic("setFlags_DEP1_DEP2_shift(x86)");
986 /* DEP1 contains the result, DEP2 contains the undershifted value. */
987 stmt( IRStmt_Put( OFFB_CC_OP,
988 IRExpr_Mux0X( mkexpr(guard),
989 IRExpr_Get(OFFB_CC_OP,Ity_I32),
991 stmt( IRStmt_Put( OFFB_CC_DEP1,
992 IRExpr_Mux0X( mkexpr(guard),
993 IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
994 widenUto32(mkexpr(res)))) );
995 stmt( IRStmt_Put( OFFB_CC_DEP2,
996 IRExpr_Mux0X( mkexpr(guard),
997 IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
998 widenUto32(mkexpr(resUS)))) );
999 /* Set NDEP even though it isn't used. This makes redundant-PUT
1000 elimination of previous stores to this field work better. */
1001 stmt( IRStmt_Put( OFFB_CC_NDEP,
1002 IRExpr_Mux0X( mkexpr(guard),
1003 IRExpr_Get(OFFB_CC_NDEP,Ity_I32),
1008 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1009 the former value of the carry flag, which unfortunately we have to
1012 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1014 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1016 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1017 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1019 /* This has to come first, because calculating the C flag
1020 may require reading all four thunk fields. */
1021 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1022 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1023 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1024 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1028 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1032 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1036 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1045 vpanic("setFlags_MUL(x86)");
1047 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1048 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1049 /* Set NDEP even though it isn't used. This makes redundant-PUT
1050 elimination of previous stores to this field work better. */
1051 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1055 /* -------------- Condition codes. -------------- */
1057 /* Condition codes, using the Intel encoding. */
1059 static HChar* name_X86Condcode ( X86Condcode cond )
1062 case X86CondO: return "o";
1063 case X86CondNO: return "no";
1064 case X86CondB: return "b";
1065 case X86CondNB: return "nb";
1066 case X86CondZ: return "z";
1067 case X86CondNZ: return "nz";
1068 case X86CondBE: return "be";
1069 case X86CondNBE: return "nbe";
1070 case X86CondS: return "s";
1071 case X86CondNS: return "ns";
1072 case X86CondP: return "p";
1073 case X86CondNP: return "np";
1074 case X86CondL: return "l";
1075 case X86CondNL: return "nl";
1076 case X86CondLE: return "le";
1077 case X86CondNLE: return "nle";
1078 case X86CondAlways: return "ALWAYS";
1079 default: vpanic("name_X86Condcode");
1084 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1087 vassert(cond >= X86CondO && cond <= X86CondNLE);
1092 *needInvert = False;
1098 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1100 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1103 Optionally, generate a store for the 'tres' value. This can either
1104 be a normal store, or it can be a cas-with-possible-failure style
1107 if taddr is IRTemp_INVALID, then no store is generated.
1109 if taddr is not IRTemp_INVALID, then a store (using taddr as
1110 the address) is generated:
1112 if texpVal is IRTemp_INVALID then a normal store is
1113 generated, and restart_point must be zero (it is irrelevant).
1115 if texpVal is not IRTemp_INVALID then a cas-style store is
1116 generated. texpVal is the expected value, restart_point
1117 is the restart point if the store fails, and texpVal must
1118 have the same type as tres.
1120 static void helper_ADC ( Int sz,
1121 IRTemp tres, IRTemp ta1, IRTemp ta2,
1122 /* info about optional store: */
1123 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1126 IRType ty = szToITy(sz);
1127 IRTemp oldc = newTemp(Ity_I32);
1128 IRTemp oldcn = newTemp(ty);
1129 IROp plus = mkSizedOp(ty, Iop_Add8);
1130 IROp xor = mkSizedOp(ty, Iop_Xor8);
1132 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1133 vassert(sz == 1 || sz == 2 || sz == 4);
1134 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1135 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1137 /* oldc = old carry flag, 0 or 1 */
1138 assign( oldc, binop(Iop_And32,
1139 mk_x86g_calculate_eflags_c(),
1142 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1144 assign( tres, binop(plus,
1145 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1148 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1149 start of this function. */
1150 if (taddr != IRTemp_INVALID) {
1151 if (texpVal == IRTemp_INVALID) {
1152 vassert(restart_point == 0);
1153 storeLE( mkexpr(taddr), mkexpr(tres) );
1155 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1156 /* .. and hence 'texpVal' has the same type as 'tres'. */
1157 casLE( mkexpr(taddr),
1158 mkexpr(texpVal), mkexpr(tres), restart_point );
1162 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1163 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1164 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1165 mkexpr(oldcn)) )) );
1166 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1170 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1171 appropriately. As with helper_ADC, possibly generate a store of
1172 the result -- see comments on helper_ADC for details.
1174 static void helper_SBB ( Int sz,
1175 IRTemp tres, IRTemp ta1, IRTemp ta2,
1176 /* info about optional store: */
1177 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1180 IRType ty = szToITy(sz);
1181 IRTemp oldc = newTemp(Ity_I32);
1182 IRTemp oldcn = newTemp(ty);
1183 IROp minus = mkSizedOp(ty, Iop_Sub8);
1184 IROp xor = mkSizedOp(ty, Iop_Xor8);
1186 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1187 vassert(sz == 1 || sz == 2 || sz == 4);
1188 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1189 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1191 /* oldc = old carry flag, 0 or 1 */
1192 assign( oldc, binop(Iop_And32,
1193 mk_x86g_calculate_eflags_c(),
1196 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1198 assign( tres, binop(minus,
1199 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1202 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1203 start of this function. */
1204 if (taddr != IRTemp_INVALID) {
1205 if (texpVal == IRTemp_INVALID) {
1206 vassert(restart_point == 0);
1207 storeLE( mkexpr(taddr), mkexpr(tres) );
1209 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1210 /* .. and hence 'texpVal' has the same type as 'tres'. */
1211 casLE( mkexpr(taddr),
1212 mkexpr(texpVal), mkexpr(tres), restart_point );
1216 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1217 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1218 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1219 mkexpr(oldcn)) )) );
1220 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1224 /* -------------- Helpers for disassembly printing. -------------- */
1226 static HChar* nameGrp1 ( Int opc_aux )
1228 static HChar* grp1_names[8]
1229 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1230 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1231 return grp1_names[opc_aux];
1234 static HChar* nameGrp2 ( Int opc_aux )
1236 static HChar* grp2_names[8]
1237 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1238 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1239 return grp2_names[opc_aux];
1242 static HChar* nameGrp4 ( Int opc_aux )
1244 static HChar* grp4_names[8]
1245 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1246 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1247 return grp4_names[opc_aux];
1250 static HChar* nameGrp5 ( Int opc_aux )
1252 static HChar* grp5_names[8]
1253 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1254 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1255 return grp5_names[opc_aux];
1258 static HChar* nameGrp8 ( Int opc_aux )
1260 static HChar* grp8_names[8]
1261 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1262 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1263 return grp8_names[opc_aux];
1266 static HChar* nameIReg ( Int size, Int reg )
1268 static HChar* ireg32_names[8]
1269 = { "%eax", "%ecx", "%edx", "%ebx",
1270 "%esp", "%ebp", "%esi", "%edi" };
1271 static HChar* ireg16_names[8]
1272 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1273 static HChar* ireg8_names[8]
1274 = { "%al", "%cl", "%dl", "%bl",
1275 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1276 if (reg < 0 || reg > 7) goto bad;
1278 case 4: return ireg32_names[reg];
1279 case 2: return ireg16_names[reg];
1280 case 1: return ireg8_names[reg];
1283 vpanic("nameIReg(X86)");
1284 return NULL; /*notreached*/
1287 static HChar* nameSReg ( UInt sreg )
1290 case R_ES: return "%es";
1291 case R_CS: return "%cs";
1292 case R_SS: return "%ss";
1293 case R_DS: return "%ds";
1294 case R_FS: return "%fs";
1295 case R_GS: return "%gs";
1296 default: vpanic("nameSReg(x86)");
1300 static HChar* nameMMXReg ( Int mmxreg )
1302 static HChar* mmx_names[8]
1303 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1304 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1305 return mmx_names[mmxreg];
1308 static HChar* nameXMMReg ( Int xmmreg )
1310 static HChar* xmm_names[8]
1311 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1312 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1313 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1314 return xmm_names[xmmreg];
1317 static HChar* nameMMXGran ( Int gran )
1324 default: vpanic("nameMMXGran(x86,guest)");
1328 static HChar nameISize ( Int size )
1334 default: vpanic("nameISize(x86)");
1339 /*------------------------------------------------------------*/
1340 /*--- JMP helpers ---*/
1341 /*------------------------------------------------------------*/
1343 static void jmp_lit( IRJumpKind kind, Addr32 d32 )
1345 irsb->next = mkU32(d32);
1346 irsb->jumpkind = kind;
1349 static void jmp_treg( IRJumpKind kind, IRTemp t )
1351 irsb->next = mkexpr(t);
1352 irsb->jumpkind = kind;
1356 void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1359 X86Condcode condPos;
1360 condPos = positiveIse_X86Condcode ( cond, &invert );
1362 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1364 IRConst_U32(d32_false) ) );
1365 irsb->next = mkU32(d32_true);
1366 irsb->jumpkind = Ijk_Boring;
1368 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1370 IRConst_U32(d32_true) ) );
1371 irsb->next = mkU32(d32_false);
1372 irsb->jumpkind = Ijk_Boring;
1377 /*------------------------------------------------------------*/
1378 /*--- Disassembling addressing modes ---*/
1379 /*------------------------------------------------------------*/
1382 HChar* sorbTxt ( UChar sorb )
1385 case 0: return ""; /* no override */
1386 case 0x3E: return "%ds";
1387 case 0x26: return "%es:";
1388 case 0x64: return "%fs:";
1389 case 0x65: return "%gs:";
1390 default: vpanic("sorbTxt(x86,guest)");
1395 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1396 linear address by adding any required segment override as indicated
1399 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1403 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
1406 /* the common case - no override */
1410 case 0x3E: sreg = R_DS; break;
1411 case 0x26: sreg = R_ES; break;
1412 case 0x64: sreg = R_FS; break;
1413 case 0x65: sreg = R_GS; break;
1414 default: vpanic("handleSegOverride(x86,guest)");
1417 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
1419 seg_selector = newTemp(Ity_I32);
1420 ldt_ptr = newTemp(hWordTy);
1421 gdt_ptr = newTemp(hWordTy);
1422 r64 = newTemp(Ity_I64);
1424 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1425 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
1426 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
1429 Call this to do the translation and limit checks:
1430 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1431 UInt seg_selector, UInt virtual_addr )
1438 "x86g_use_seg_selector",
1439 &x86g_use_seg_selector,
1440 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1441 mkexpr(seg_selector), virtual)
1445 /* If the high 32 of the result are non-zero, there was a
1446 failure in address translation. In which case, make a
1451 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1453 IRConst_U32( guest_EIP_curr_instr )
1457 /* otherwise, here's the translated result. */
1458 return unop(Iop_64to32, mkexpr(r64));
1462 /* Generate IR to calculate an address indicated by a ModRM and
1463 following SIB bytes. The expression, and the number of bytes in
1464 the address mode, are returned. Note that this fn should not be
1465 called if the R/M part of the address denotes a register instead of
1466 memory. If print_codegen is true, text of the addressing mode is
1469 The computed address is stored in a new tempreg, and the
1470 identity of the tempreg is returned. */
1472 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
1474 IRTemp tmp = newTemp(Ity_I32);
1475 assign( tmp, addr32 );
1480 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
1482 UChar mod_reg_rm = getIByte(delta);
1487 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1488 jump table seems a bit excessive.
1490 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1491 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1492 /* is now XX0XXYYY */
1493 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1494 switch (mod_reg_rm) {
1496 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1499 case 0x00: case 0x01: case 0x02: case 0x03:
1500 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1501 { UChar rm = mod_reg_rm;
1502 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1504 return disAMode_copy2tmp(
1505 handleSegOverride(sorb, getIReg(4,rm)));
1508 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1509 --> GET %reg, t ; ADDL d8, t
1511 case 0x08: case 0x09: case 0x0A: case 0x0B:
1512 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1513 { UChar rm = toUChar(mod_reg_rm & 7);
1514 UInt d = getSDisp8(delta);
1515 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1517 return disAMode_copy2tmp(
1518 handleSegOverride(sorb,
1519 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1522 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1523 --> GET %reg, t ; ADDL d8, t
1525 case 0x10: case 0x11: case 0x12: case 0x13:
1526 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1527 { UChar rm = toUChar(mod_reg_rm & 7);
1528 UInt d = getUDisp32(delta);
1529 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1531 return disAMode_copy2tmp(
1532 handleSegOverride(sorb,
1533 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1536 /* a register, %eax .. %edi. This shouldn't happen. */
1537 case 0x18: case 0x19: case 0x1A: case 0x1B:
1538 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1539 vpanic("disAMode(x86): not an addr!");
1541 /* a 32-bit literal address
1545 { UInt d = getUDisp32(delta);
1547 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1548 return disAMode_copy2tmp(
1549 handleSegOverride(sorb, mkU32(d)));
1553 /* SIB, with no displacement. Special cases:
1554 -- %esp cannot act as an index value.
1555 If index_r indicates %esp, zero is used for the index.
1556 -- when mod is zero and base indicates EBP, base is instead
1558 It's all madness, I tell you. Extract %index, %base and
1559 scale from the SIB byte. The value denoted is then:
1560 | %index == %ESP && %base == %EBP
1561 = d32 following SIB byte
1562 | %index == %ESP && %base != %EBP
1564 | %index != %ESP && %base == %EBP
1565 = d32 following SIB byte + (%index << scale)
1566 | %index != %ESP && %base != %ESP
1567 = %base + (%index << scale)
1569 What happens to the souls of CPU architects who dream up such
1570 horrendous schemes, do you suppose?
1572 UChar sib = getIByte(delta);
1573 UChar scale = toUChar((sib >> 6) & 3);
1574 UChar index_r = toUChar((sib >> 3) & 7);
1575 UChar base_r = toUChar(sib & 7);
1578 if (index_r != R_ESP && base_r != R_EBP) {
1579 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1580 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1584 handleSegOverride(sorb,
1587 binop(Iop_Shl32, getIReg(4,index_r),
1591 if (index_r != R_ESP && base_r == R_EBP) {
1592 UInt d = getUDisp32(delta);
1593 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1594 nameIReg(4,index_r), 1<<scale);
1598 handleSegOverride(sorb,
1600 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1604 if (index_r == R_ESP && base_r != R_EBP) {
1605 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1607 return disAMode_copy2tmp(
1608 handleSegOverride(sorb, getIReg(4,base_r)));
1611 if (index_r == R_ESP && base_r == R_EBP) {
1612 UInt d = getUDisp32(delta);
1613 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1615 return disAMode_copy2tmp(
1616 handleSegOverride(sorb, mkU32(d)));
1622 /* SIB, with 8-bit displacement. Special cases:
1623 -- %esp cannot act as an index value.
1624 If index_r indicates %esp, zero is used for the index.
1629 = d8 + %base + (%index << scale)
1632 UChar sib = getIByte(delta);
1633 UChar scale = toUChar((sib >> 6) & 3);
1634 UChar index_r = toUChar((sib >> 3) & 7);
1635 UChar base_r = toUChar(sib & 7);
1636 UInt d = getSDisp8(delta+1);
1638 if (index_r == R_ESP) {
1639 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1640 (Int)d, nameIReg(4,base_r));
1642 return disAMode_copy2tmp(
1643 handleSegOverride(sorb,
1644 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1646 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1647 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1651 handleSegOverride(sorb,
1656 getIReg(4,index_r), mkU8(scale))),
1663 /* SIB, with 32-bit displacement. Special cases:
1664 -- %esp cannot act as an index value.
1665 If index_r indicates %esp, zero is used for the index.
1670 = d32 + %base + (%index << scale)
1673 UChar sib = getIByte(delta);
1674 UChar scale = toUChar((sib >> 6) & 3);
1675 UChar index_r = toUChar((sib >> 3) & 7);
1676 UChar base_r = toUChar(sib & 7);
1677 UInt d = getUDisp32(delta+1);
1679 if (index_r == R_ESP) {
1680 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1681 (Int)d, nameIReg(4,base_r));
1683 return disAMode_copy2tmp(
1684 handleSegOverride(sorb,
1685 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1687 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1688 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1692 handleSegOverride(sorb,
1697 getIReg(4,index_r), mkU8(scale))),
1705 vpanic("disAMode(x86)");
1706 return 0; /*notreached*/
1711 /* Figure out the number of (insn-stream) bytes constituting the amode
1712 beginning at delta. Is useful for getting hold of literals beyond
1713 the end of the amode before it has been disassembled. */
1715 static UInt lengthAMode ( Int delta )
1717 UChar mod_reg_rm = getIByte(delta); delta++;
1719 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1720 jump table seems a bit excessive.
1722 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1723 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1724 /* is now XX0XXYYY */
1725 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1726 switch (mod_reg_rm) {
1728 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1729 case 0x00: case 0x01: case 0x02: case 0x03:
1730 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1733 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1734 case 0x08: case 0x09: case 0x0A: case 0x0B:
1735 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1738 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1739 case 0x10: case 0x11: case 0x12: case 0x13:
1740 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1743 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1744 case 0x18: case 0x19: case 0x1A: case 0x1B:
1745 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1748 /* a 32-bit literal address. */
1749 case 0x05: return 5;
1751 /* SIB, no displacement. */
1753 UChar sib = getIByte(delta);
1754 UChar base_r = toUChar(sib & 7);
1755 if (base_r == R_EBP) return 6; else return 2;
1757 /* SIB, with 8-bit displacement. */
1758 case 0x0C: return 3;
1760 /* SIB, with 32-bit displacement. */
1761 case 0x14: return 6;
1764 vpanic("lengthAMode");
1765 return 0; /*notreached*/
1769 /*------------------------------------------------------------*/
1770 /*--- Disassembling common idioms ---*/
1771 /*------------------------------------------------------------*/
1773 /* Handle binary integer instructions of the form
1776 Is passed the a ptr to the modRM byte, the actual operation, and the
1777 data size. Returns the address advanced completely over this
1780 E(src) is reg-or-mem
1783 If E is reg, --> GET %G, tmp
1787 If E is mem and OP is not reversible,
1788 --> (getAddr E) -> tmpa
1794 If E is mem and OP is reversible
1795 --> (getAddr E) -> tmpa
1801 UInt dis_op2_E_G ( UChar sorb,
1811 IRType ty = szToITy(size);
1812 IRTemp dst1 = newTemp(ty);
1813 IRTemp src = newTemp(ty);
1814 IRTemp dst0 = newTemp(ty);
1815 UChar rm = getUChar(delta0);
1816 IRTemp addr = IRTemp_INVALID;
1818 /* addSubCarry == True indicates the intended operation is
1819 add-with-carry or subtract-with-borrow. */
1821 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1825 if (epartIsReg(rm)) {
1826 /* Specially handle XOR reg,reg, because that doesn't really
1827 depend on reg, and doing the obvious thing potentially
1828 generates a spurious value check failure due to the bogus
1829 dependency. Ditto SBB reg,reg. */
1830 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1831 && gregOfRM(rm) == eregOfRM(rm)) {
1832 putIReg(size, gregOfRM(rm), mkU(ty,0));
1834 assign( dst0, getIReg(size,gregOfRM(rm)) );
1835 assign( src, getIReg(size,eregOfRM(rm)) );
1837 if (addSubCarry && op8 == Iop_Add8) {
1838 helper_ADC( size, dst1, dst0, src,
1839 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1840 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1842 if (addSubCarry && op8 == Iop_Sub8) {
1843 helper_SBB( size, dst1, dst0, src,
1844 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1845 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1847 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1849 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1851 setFlags_DEP1(op8, dst1, ty);
1853 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1856 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1857 nameIReg(size,eregOfRM(rm)),
1858 nameIReg(size,gregOfRM(rm)));
1861 /* E refers to memory */
1862 addr = disAMode ( &len, sorb, delta0, dis_buf);
1863 assign( dst0, getIReg(size,gregOfRM(rm)) );
1864 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
1866 if (addSubCarry && op8 == Iop_Add8) {
1867 helper_ADC( size, dst1, dst0, src,
1868 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1869 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1871 if (addSubCarry && op8 == Iop_Sub8) {
1872 helper_SBB( size, dst1, dst0, src,
1873 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1874 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1876 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1878 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1880 setFlags_DEP1(op8, dst1, ty);
1882 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1885 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1886 dis_buf,nameIReg(size,gregOfRM(rm)));
1893 /* Handle binary integer instructions of the form
1896 Is passed the a ptr to the modRM byte, the actual operation, and the
1897 data size. Returns the address advanced completely over this
1901 E(dst) is reg-or-mem
1903 If E is reg, --> GET %E, tmp
1907 If E is mem, --> (getAddr E) -> tmpa
1913 UInt dis_op2_G_E ( UChar sorb,
1924 IRType ty = szToITy(size);
1925 IRTemp dst1 = newTemp(ty);
1926 IRTemp src = newTemp(ty);
1927 IRTemp dst0 = newTemp(ty);
1928 UChar rm = getIByte(delta0);
1929 IRTemp addr = IRTemp_INVALID;
1931 /* addSubCarry == True indicates the intended operation is
1932 add-with-carry or subtract-with-borrow. */
1934 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1938 if (epartIsReg(rm)) {
1939 /* Specially handle XOR reg,reg, because that doesn't really
1940 depend on reg, and doing the obvious thing potentially
1941 generates a spurious value check failure due to the bogus
1942 dependency. Ditto SBB reg,reg.*/
1943 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1944 && gregOfRM(rm) == eregOfRM(rm)) {
1945 putIReg(size, eregOfRM(rm), mkU(ty,0));
1947 assign(dst0, getIReg(size,eregOfRM(rm)));
1948 assign(src, getIReg(size,gregOfRM(rm)));
1950 if (addSubCarry && op8 == Iop_Add8) {
1951 helper_ADC( size, dst1, dst0, src,
1952 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1953 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1955 if (addSubCarry && op8 == Iop_Sub8) {
1956 helper_SBB( size, dst1, dst0, src,
1957 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1958 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1960 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
1962 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1964 setFlags_DEP1(op8, dst1, ty);
1966 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1969 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1970 nameIReg(size,gregOfRM(rm)),
1971 nameIReg(size,eregOfRM(rm)));
1975 /* E refers to memory */
1977 addr = disAMode ( &len, sorb, delta0, dis_buf);
1978 assign(dst0, loadLE(ty,mkexpr(addr)));
1979 assign(src, getIReg(size,gregOfRM(rm)));
1981 if (addSubCarry && op8 == Iop_Add8) {
1983 /* cas-style store */
1984 helper_ADC( size, dst1, dst0, src,
1985 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
1988 helper_ADC( size, dst1, dst0, src,
1989 /*store*/addr, IRTemp_INVALID, 0 );
1992 if (addSubCarry && op8 == Iop_Sub8) {
1994 /* cas-style store */
1995 helper_SBB( size, dst1, dst0, src,
1996 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
1999 helper_SBB( size, dst1, dst0, src,
2000 /*store*/addr, IRTemp_INVALID, 0 );
2003 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2006 if (0) vex_printf("locked case\n" );
2007 casLE( mkexpr(addr),
2008 mkexpr(dst0)/*expval*/,
2009 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2011 if (0) vex_printf("nonlocked case\n");
2012 storeLE(mkexpr(addr), mkexpr(dst1));
2016 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2018 setFlags_DEP1(op8, dst1, ty);
2021 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2022 nameIReg(size,gregOfRM(rm)), dis_buf);
2028 /* Handle move instructions of the form
2031 Is passed the a ptr to the modRM byte, and the data size. Returns
2032 the address advanced completely over this instruction.
2034 E(src) is reg-or-mem
2037 If E is reg, --> GET %E, tmpv
2040 If E is mem --> (getAddr E) -> tmpa
2045 UInt dis_mov_E_G ( UChar sorb,
2050 UChar rm = getIByte(delta0);
2053 if (epartIsReg(rm)) {
2054 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2055 DIP("mov%c %s,%s\n", nameISize(size),
2056 nameIReg(size,eregOfRM(rm)),
2057 nameIReg(size,gregOfRM(rm)));
2061 /* E refers to memory */
2063 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2064 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2065 DIP("mov%c %s,%s\n", nameISize(size),
2066 dis_buf,nameIReg(size,gregOfRM(rm)));
2072 /* Handle move instructions of the form
2075 Is passed the a ptr to the modRM byte, and the data size. Returns
2076 the address advanced completely over this instruction.
2079 E(dst) is reg-or-mem
2081 If E is reg, --> GET %G, tmp
2084 If E is mem, --> (getAddr E) -> tmpa
2089 UInt dis_mov_G_E ( UChar sorb,
2094 UChar rm = getIByte(delta0);
2097 if (epartIsReg(rm)) {
2098 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2099 DIP("mov%c %s,%s\n", nameISize(size),
2100 nameIReg(size,gregOfRM(rm)),
2101 nameIReg(size,eregOfRM(rm)));
2105 /* E refers to memory */
2107 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2108 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2109 DIP("mov%c %s,%s\n", nameISize(size),
2110 nameIReg(size,gregOfRM(rm)), dis_buf);
2116 /* op $immediate, AL/AX/EAX. */
2118 UInt dis_op_imm_A ( Int size,
2125 IRType ty = szToITy(size);
2126 IRTemp dst0 = newTemp(ty);
2127 IRTemp src = newTemp(ty);
2128 IRTemp dst1 = newTemp(ty);
2129 UInt lit = getUDisp(size,delta);
2130 assign(dst0, getIReg(size,R_EAX));
2131 assign(src, mkU(ty,lit));
2133 if (isAddSub(op8) && !carrying) {
2134 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2135 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2140 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2141 setFlags_DEP1(op8, dst1, ty);
2144 if (op8 == Iop_Add8 && carrying) {
2145 helper_ADC( size, dst1, dst0, src,
2146 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2149 if (op8 == Iop_Sub8 && carrying) {
2150 helper_SBB( size, dst1, dst0, src,
2151 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2154 vpanic("dis_op_imm_A(x86,guest)");
2157 putIReg(size, R_EAX, mkexpr(dst1));
2159 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2160 lit, nameIReg(size,R_EAX));
2165 /* Sign- and Zero-extending moves. */
2167 UInt dis_movx_E_G ( UChar sorb,
2168 Int delta, Int szs, Int szd, Bool sign_extend )
2170 UChar rm = getIByte(delta);
2171 if (epartIsReg(rm)) {
2173 // mutant case. See #250799
2174 putIReg(szd, gregOfRM(rm),
2175 getIReg(szs,eregOfRM(rm)));
2178 putIReg(szd, gregOfRM(rm),
2179 unop(mkWidenOp(szs,szd,sign_extend),
2180 getIReg(szs,eregOfRM(rm))));
2182 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2183 nameISize(szs), nameISize(szd),
2184 nameIReg(szs,eregOfRM(rm)),
2185 nameIReg(szd,gregOfRM(rm)));
2189 /* E refers to memory */
2193 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2195 // mutant case. See #250799
2196 putIReg(szd, gregOfRM(rm),
2197 loadLE(szToITy(szs),mkexpr(addr)));
2200 putIReg(szd, gregOfRM(rm),
2201 unop(mkWidenOp(szs,szd,sign_extend),
2202 loadLE(szToITy(szs),mkexpr(addr))));
2204 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2205 nameISize(szs), nameISize(szd),
2206 dis_buf, nameIReg(szd,gregOfRM(rm)));
2212 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2213 16 / 8 bit quantity in the given IRTemp. */
2215 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2217 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2218 IRTemp src64 = newTemp(Ity_I64);
2219 IRTemp dst64 = newTemp(Ity_I64);
2222 assign( src64, binop(Iop_32HLto64,
2223 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2224 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2225 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2226 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2229 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2230 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2231 assign( src64, unop(widen3264,
2233 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2234 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2235 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2236 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2240 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2241 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2242 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2243 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2245 binop(op, mkexpr(src64),
2246 unop(widen1632, unop(widen816, mkexpr(t)))) );
2247 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2248 unop(Iop_64to32,mkexpr(dst64)))) );
2249 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2250 unop(Iop_64HIto32,mkexpr(dst64)))) );
2253 default: vpanic("codegen_div(x86)");
2259 UInt dis_Grp1 ( UChar sorb, Bool locked,
2260 Int delta, UChar modrm,
2261 Int am_sz, Int d_sz, Int sz, UInt d32 )
2265 IRType ty = szToITy(sz);
2266 IRTemp dst1 = newTemp(ty);
2267 IRTemp src = newTemp(ty);
2268 IRTemp dst0 = newTemp(ty);
2269 IRTemp addr = IRTemp_INVALID;
2270 IROp op8 = Iop_INVALID;
2271 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2273 switch (gregOfRM(modrm)) {
2274 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2275 case 2: break; // ADC
2276 case 3: break; // SBB
2277 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2278 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2280 default: vpanic("dis_Grp1: unhandled case");
2283 if (epartIsReg(modrm)) {
2284 vassert(am_sz == 1);
2286 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2287 assign(src, mkU(ty,d32 & mask));
2289 if (gregOfRM(modrm) == 2 /* ADC */) {
2290 helper_ADC( sz, dst1, dst0, src,
2291 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2293 if (gregOfRM(modrm) == 3 /* SBB */) {
2294 helper_SBB( sz, dst1, dst0, src,
2295 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2297 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2299 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2301 setFlags_DEP1(op8, dst1, ty);
2304 if (gregOfRM(modrm) < 7)
2305 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2307 delta += (am_sz + d_sz);
2308 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2309 nameIReg(sz,eregOfRM(modrm)));
2311 addr = disAMode ( &len, sorb, delta, dis_buf);
2313 assign(dst0, loadLE(ty,mkexpr(addr)));
2314 assign(src, mkU(ty,d32 & mask));
2316 if (gregOfRM(modrm) == 2 /* ADC */) {
2318 /* cas-style store */
2319 helper_ADC( sz, dst1, dst0, src,
2320 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2323 helper_ADC( sz, dst1, dst0, src,
2324 /*store*/addr, IRTemp_INVALID, 0 );
2327 if (gregOfRM(modrm) == 3 /* SBB */) {
2329 /* cas-style store */
2330 helper_SBB( sz, dst1, dst0, src,
2331 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2334 helper_SBB( sz, dst1, dst0, src,
2335 /*store*/addr, IRTemp_INVALID, 0 );
2338 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2339 if (gregOfRM(modrm) < 7) {
2341 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2342 mkexpr(dst1)/*newVal*/,
2343 guest_EIP_curr_instr );
2345 storeLE(mkexpr(addr), mkexpr(dst1));
2349 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2351 setFlags_DEP1(op8, dst1, ty);
2354 delta += (len+d_sz);
2355 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2362 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2366 UInt dis_Grp2 ( UChar sorb,
2367 Int delta, UChar modrm,
2368 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2369 HChar* shift_expr_txt, Bool* decode_OK )
2371 /* delta on entry points at the modrm byte. */
2374 Bool isShift, isRotate, isRotateC;
2375 IRType ty = szToITy(sz);
2376 IRTemp dst0 = newTemp(ty);
2377 IRTemp dst1 = newTemp(ty);
2378 IRTemp addr = IRTemp_INVALID;
2382 vassert(sz == 1 || sz == 2 || sz == 4);
2384 /* Put value to shift/rotate in dst0. */
2385 if (epartIsReg(modrm)) {
2386 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2387 delta += (am_sz + d_sz);
2389 addr = disAMode ( &len, sorb, delta, dis_buf);
2390 assign(dst0, loadLE(ty,mkexpr(addr)));
2391 delta += len + d_sz;
2395 switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; }
2398 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2401 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2403 if (gregOfRM(modrm) == 6) {
2408 if (!isShift && !isRotate && !isRotateC) {
2410 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2414 /* call a helper; these insns are so ridiculous they do not
2416 Bool left = toBool(gregOfRM(modrm) == 2);
2417 IRTemp r64 = newTemp(Ity_I64);
2419 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2420 widenUto32(shift_expr), /* rotate amount */
2421 widenUto32(mk_x86g_calculate_eflags_all()),
2423 assign( r64, mkIRExprCCall(
2426 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2427 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2431 /* new eflags in hi half r64; new value in lo half r64 */
2432 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2433 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2434 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2435 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2436 /* Set NDEP even though it isn't used. This makes redundant-PUT
2437 elimination of previous stores to this field work better. */
2438 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2443 IRTemp pre32 = newTemp(Ity_I32);
2444 IRTemp res32 = newTemp(Ity_I32);
2445 IRTemp res32ss = newTemp(Ity_I32);
2446 IRTemp shift_amt = newTemp(Ity_I8);
2449 switch (gregOfRM(modrm)) {
2450 case 4: op32 = Iop_Shl32; break;
2451 case 5: op32 = Iop_Shr32; break;
2452 case 7: op32 = Iop_Sar32; break;
2454 default: vpanic("dis_Grp2:shift"); break;
2457 /* Widen the value to be shifted to 32 bits, do the shift, and
2458 narrow back down. This seems surprisingly long-winded, but
2459 unfortunately the Intel semantics requires that 8/16-bit
2460 shifts give defined results for shift values all the way up
2461 to 31, and this seems the simplest way to do it. It has the
2462 advantage that the only IR level shifts generated are of 32
2463 bit values, and the shift amount is guaranteed to be in the
2464 range 0 .. 31, thereby observing the IR semantics requiring
2465 all shift values to be in the range 0 .. 2^word_size-1. */
2467 /* shift_amt = shift_expr & 31, regardless of operation size */
2468 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2470 /* suitably widen the value to be shifted to 32 bits. */
2471 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2472 : widenUto32(mkexpr(dst0)) );
2474 /* res32 = pre32 `shift` shift_amt */
2475 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2477 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2483 mkexpr(shift_amt), mkU8(1)),
2486 /* Build the flags thunk. */
2487 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2489 /* Narrow the result back down. */
2490 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2492 } /* if (isShift) */
2496 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2497 Bool left = toBool(gregOfRM(modrm) == 0);
2498 IRTemp rot_amt = newTemp(Ity_I8);
2499 IRTemp rot_amt32 = newTemp(Ity_I8);
2500 IRTemp oldFlags = newTemp(Ity_I32);
2502 /* rot_amt = shift_expr & mask */
2503 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2504 expressions never shift beyond the word size and thus remain
2506 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2509 assign(rot_amt, mkexpr(rot_amt32));
2511 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2515 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2517 binop( mkSizedOp(ty,Iop_Or8),
2518 binop( mkSizedOp(ty,Iop_Shl8),
2522 binop( mkSizedOp(ty,Iop_Shr8),
2524 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2528 ccOp += X86G_CC_OP_ROLB;
2530 } else { /* right */
2532 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2534 binop( mkSizedOp(ty,Iop_Or8),
2535 binop( mkSizedOp(ty,Iop_Shr8),
2539 binop( mkSizedOp(ty,Iop_Shl8),
2541 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2545 ccOp += X86G_CC_OP_RORB;
2549 /* dst1 now holds the rotated value. Build flag thunk. We
2550 need the resulting value for this, and the previous flags.
2551 Except don't set it if the rotate count is zero. */
2553 assign(oldFlags, mk_x86g_calculate_eflags_all());
2555 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2556 stmt( IRStmt_Put( OFFB_CC_OP,
2557 IRExpr_Mux0X( mkexpr(rot_amt32),
2558 IRExpr_Get(OFFB_CC_OP,Ity_I32),
2560 stmt( IRStmt_Put( OFFB_CC_DEP1,
2561 IRExpr_Mux0X( mkexpr(rot_amt32),
2562 IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
2563 widenUto32(mkexpr(dst1)))) );
2564 stmt( IRStmt_Put( OFFB_CC_DEP2,
2565 IRExpr_Mux0X( mkexpr(rot_amt32),
2566 IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
2568 stmt( IRStmt_Put( OFFB_CC_NDEP,
2569 IRExpr_Mux0X( mkexpr(rot_amt32),
2570 IRExpr_Get(OFFB_CC_NDEP,Ity_I32),
2571 mkexpr(oldFlags))) );
2572 } /* if (isRotate) */
2574 /* Save result, and finish up. */
2575 if (epartIsReg(modrm)) {
2576 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2577 if (vex_traceflags & VEX_TRACE_FE) {
2579 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2581 vex_printf("%s", shift_expr_txt);
2583 ppIRExpr(shift_expr);
2584 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2587 storeLE(mkexpr(addr), mkexpr(dst1));
2588 if (vex_traceflags & VEX_TRACE_FE) {
2590 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2592 vex_printf("%s", shift_expr_txt);
2594 ppIRExpr(shift_expr);
2595 vex_printf(", %s\n", dis_buf);
2602 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2604 UInt dis_Grp8_Imm ( UChar sorb,
2606 Int delta, UChar modrm,
2607 Int am_sz, Int sz, UInt src_val,
2610 /* src_val denotes a d8.
2611 And delta on entry points at the modrm byte. */
2613 IRType ty = szToITy(sz);
2614 IRTemp t2 = newTemp(Ity_I32);
2615 IRTemp t2m = newTemp(Ity_I32);
2616 IRTemp t_addr = IRTemp_INVALID;
2620 /* we're optimists :-) */
2623 /* Limit src_val -- the bit offset -- to something within a word.
2624 The Intel docs say that literal offsets larger than a word are
2625 masked in this way. */
2627 case 2: src_val &= 15; break;
2628 case 4: src_val &= 31; break;
2629 default: *decode_OK = False; return delta;
2632 /* Invent a mask suitable for the operation. */
2633 switch (gregOfRM(modrm)) {
2634 case 4: /* BT */ mask = 0; break;
2635 case 5: /* BTS */ mask = 1 << src_val; break;
2636 case 6: /* BTR */ mask = ~(1 << src_val); break;
2637 case 7: /* BTC */ mask = 1 << src_val; break;
2638 /* If this needs to be extended, probably simplest to make a
2639 new function to handle the other cases (0 .. 3). The
2640 Intel docs do however not indicate any use for 0 .. 3, so
2641 we don't expect this to happen. */
2642 default: *decode_OK = False; return delta;
2645 /* Fetch the value to be tested and modified into t2, which is
2646 32-bits wide regardless of sz. */
2647 if (epartIsReg(modrm)) {
2648 vassert(am_sz == 1);
2649 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2650 delta += (am_sz + 1);
2651 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2652 src_val, nameIReg(sz,eregOfRM(modrm)));
2655 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2657 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2658 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2662 /* Compute the new value into t2m, if non-BT. */
2663 switch (gregOfRM(modrm)) {
2667 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2670 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2673 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2676 /*NOTREACHED*/ /*the previous switch guards this*/
2680 /* Write the result back, if non-BT. If the CAS fails then we
2681 side-exit from the trace at this point, and so the flag state is
2682 not affected. This is of course as required. */
2683 if (gregOfRM(modrm) != 4 /* BT */) {
2684 if (epartIsReg(modrm)) {
2685 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2688 casLE( mkexpr(t_addr),
2689 narrowTo(ty, mkexpr(t2))/*expd*/,
2690 narrowTo(ty, mkexpr(t2m))/*new*/,
2691 guest_EIP_curr_instr );
2693 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2698 /* Copy relevant bit from t2 into the carry flag. */
2699 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2700 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2701 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2705 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2708 /* Set NDEP even though it isn't used. This makes redundant-PUT
2709 elimination of previous stores to this field work better. */
2710 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2716 /* Signed/unsigned widening multiply. Generate IR to multiply the
2717 value in EAX/AX/AL by the given IRTemp, and park the result in
2720 static void codegen_mulL_A_D ( Int sz, Bool syned,
2721 IRTemp tmp, HChar* tmp_txt )
2723 IRType ty = szToITy(sz);
2724 IRTemp t1 = newTemp(ty);
2726 assign( t1, getIReg(sz, R_EAX) );
2730 IRTemp res64 = newTemp(Ity_I64);
2731 IRTemp resHi = newTemp(Ity_I32);
2732 IRTemp resLo = newTemp(Ity_I32);
2733 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2734 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2735 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2736 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2737 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2738 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2739 putIReg(4, R_EDX, mkexpr(resHi));
2740 putIReg(4, R_EAX, mkexpr(resLo));
2744 IRTemp res32 = newTemp(Ity_I32);
2745 IRTemp resHi = newTemp(Ity_I16);
2746 IRTemp resLo = newTemp(Ity_I16);
2747 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2748 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2749 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2750 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2751 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2752 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2753 putIReg(2, R_EDX, mkexpr(resHi));
2754 putIReg(2, R_EAX, mkexpr(resLo));
2758 IRTemp res16 = newTemp(Ity_I16);
2759 IRTemp resHi = newTemp(Ity_I8);
2760 IRTemp resLo = newTemp(Ity_I8);
2761 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2762 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2763 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2764 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2765 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2766 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2767 putIReg(2, R_EAX, mkexpr(res16));
2771 vpanic("codegen_mulL_A_D(x86)");
2773 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2777 /* Group 3 extended opcodes. */
2779 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2786 IRType ty = szToITy(sz);
2787 IRTemp t1 = newTemp(ty);
2788 IRTemp dst1, src, dst0;
2790 *decode_OK = True; /* may change this later */
2792 modrm = getIByte(delta);
2794 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2795 /* LOCK prefix only allowed with not and neg subopcodes */
2800 if (epartIsReg(modrm)) {
2801 switch (gregOfRM(modrm)) {
2802 case 0: { /* TEST */
2803 delta++; d32 = getUDisp(sz, delta); delta += sz;
2805 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2806 getIReg(sz,eregOfRM(modrm)),
2808 setFlags_DEP1( Iop_And8, dst1, ty );
2809 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2810 nameIReg(sz, eregOfRM(modrm)));
2813 case 1: /* UNDEFINED */
2814 /* The Intel docs imply this insn is undefined and binutils
2815 agrees. Unfortunately Core 2 will run it (with who
2816 knows what result?) sandpile.org reckons it's an alias
2817 for case 0. We play safe. */
2822 putIReg(sz, eregOfRM(modrm),
2823 unop(mkSizedOp(ty,Iop_Not8),
2824 getIReg(sz, eregOfRM(modrm))));
2825 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2832 assign(dst0, mkU(ty,0));
2833 assign(src, getIReg(sz,eregOfRM(modrm)));
2834 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
2835 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2836 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2837 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2839 case 4: /* MUL (unsigned widening) */
2842 assign(src, getIReg(sz,eregOfRM(modrm)));
2843 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
2845 case 5: /* IMUL (signed widening) */
2848 assign(src, getIReg(sz,eregOfRM(modrm)));
2849 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
2853 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2854 codegen_div ( sz, t1, False );
2855 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2859 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2860 codegen_div ( sz, t1, True );
2861 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2864 /* This can't happen - gregOfRM should return 0 .. 7 only */
2865 vpanic("Grp3(x86)");
2868 addr = disAMode ( &len, sorb, delta, dis_buf );
2871 assign(t1, loadLE(ty,mkexpr(addr)));
2872 switch (gregOfRM(modrm)) {
2873 case 0: { /* TEST */
2874 d32 = getUDisp(sz, delta); delta += sz;
2876 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2877 mkexpr(t1), mkU(ty,d32)));
2878 setFlags_DEP1( Iop_And8, dst1, ty );
2879 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
2882 case 1: /* UNDEFINED */
2883 /* See comment above on R case */
2888 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
2890 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2891 guest_EIP_curr_instr );
2893 storeLE( mkexpr(addr), mkexpr(dst1) );
2895 DIP("not%c %s\n", nameISize(sz), dis_buf);
2901 assign(dst0, mkU(ty,0));
2902 assign(src, mkexpr(t1));
2903 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
2904 mkexpr(dst0), mkexpr(src)));
2906 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2907 guest_EIP_curr_instr );
2909 storeLE( mkexpr(addr), mkexpr(dst1) );
2911 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2912 DIP("neg%c %s\n", nameISize(sz), dis_buf);
2915 codegen_mulL_A_D ( sz, False, t1, dis_buf );
2918 codegen_mulL_A_D ( sz, True, t1, dis_buf );
2921 codegen_div ( sz, t1, False );
2922 DIP("div%c %s\n", nameISize(sz), dis_buf);
2925 codegen_div ( sz, t1, True );
2926 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
2929 /* This can't happen - gregOfRM should return 0 .. 7 only */
2930 vpanic("Grp3(x86)");
2937 /* Group 4 extended opcodes. */
2939 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
2945 IRTemp t1 = newTemp(ty);
2946 IRTemp t2 = newTemp(ty);
2950 modrm = getIByte(delta);
2952 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
2953 /* LOCK prefix only allowed with inc and dec subopcodes */
2958 if (epartIsReg(modrm)) {
2959 assign(t1, getIReg(1, eregOfRM(modrm)));
2960 switch (gregOfRM(modrm)) {
2962 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2963 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2964 setFlags_INC_DEC( True, t2, ty );
2967 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2968 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2969 setFlags_INC_DEC( False, t2, ty );
2976 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
2977 nameIReg(1, eregOfRM(modrm)));
2979 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
2980 assign( t1, loadLE(ty, mkexpr(addr)) );
2981 switch (gregOfRM(modrm)) {
2983 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2985 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
2986 guest_EIP_curr_instr );
2988 storeLE( mkexpr(addr), mkexpr(t2) );
2990 setFlags_INC_DEC( True, t2, ty );
2993 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2995 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
2996 guest_EIP_curr_instr );
2998 storeLE( mkexpr(addr), mkexpr(t2) );
3000 setFlags_INC_DEC( False, t2, ty );
3007 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
3013 /* Group 5 extended opcodes. */
3015 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3016 DisResult* dres, Bool* decode_OK )
3021 IRTemp addr = IRTemp_INVALID;
3022 IRType ty = szToITy(sz);
3023 IRTemp t1 = newTemp(ty);
3024 IRTemp t2 = IRTemp_INVALID;
3028 modrm = getIByte(delta);
3030 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3031 /* LOCK prefix only allowed with inc and dec subopcodes */
3036 if (epartIsReg(modrm)) {
3037 assign(t1, getIReg(sz,eregOfRM(modrm)));
3038 switch (gregOfRM(modrm)) {
3040 vassert(sz == 2 || sz == 4);
3042 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3043 mkexpr(t1), mkU(ty,1)));
3044 setFlags_INC_DEC( True, t2, ty );
3045 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3048 vassert(sz == 2 || sz == 4);
3050 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3051 mkexpr(t1), mkU(ty,1)));
3052 setFlags_INC_DEC( False, t2, ty );
3053 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3055 case 2: /* call Ev */
3057 t2 = newTemp(Ity_I32);
3058 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3059 putIReg(4, R_ESP, mkexpr(t2));
3060 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3061 jmp_treg(Ijk_Call,t1);
3062 dres->whatNext = Dis_StopHere;
3064 case 4: /* jmp Ev */
3066 jmp_treg(Ijk_Boring,t1);
3067 dres->whatNext = Dis_StopHere;
3069 case 6: /* PUSH Ev */
3070 vassert(sz == 4 || sz == 2);
3071 t2 = newTemp(Ity_I32);
3072 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3073 putIReg(4, R_ESP, mkexpr(t2) );
3074 storeLE( mkexpr(t2), mkexpr(t1) );
3081 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3082 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3084 addr = disAMode ( &len, sorb, delta, dis_buf );
3085 assign(t1, loadLE(ty,mkexpr(addr)));
3086 switch (gregOfRM(modrm)) {
3089 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3090 mkexpr(t1), mkU(ty,1)));
3092 casLE( mkexpr(addr),
3093 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3095 storeLE(mkexpr(addr),mkexpr(t2));
3097 setFlags_INC_DEC( True, t2, ty );
3101 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3102 mkexpr(t1), mkU(ty,1)));
3104 casLE( mkexpr(addr),
3105 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3107 storeLE(mkexpr(addr),mkexpr(t2));
3109 setFlags_INC_DEC( False, t2, ty );
3111 case 2: /* call Ev */
3113 t2 = newTemp(Ity_I32);
3114 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3115 putIReg(4, R_ESP, mkexpr(t2));
3116 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3117 jmp_treg(Ijk_Call,t1);
3118 dres->whatNext = Dis_StopHere;
3120 case 4: /* JMP Ev */
3122 jmp_treg(Ijk_Boring,t1);
3123 dres->whatNext = Dis_StopHere;
3125 case 6: /* PUSH Ev */
3126 vassert(sz == 4 || sz == 2);
3127 t2 = newTemp(Ity_I32);
3128 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3129 putIReg(4, R_ESP, mkexpr(t2) );
3130 storeLE( mkexpr(t2), mkexpr(t1) );
3137 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3138 nameISize(sz), dis_buf);
3144 /*------------------------------------------------------------*/
3145 /*--- Disassembling string ops (including REP prefixes) ---*/
3146 /*------------------------------------------------------------*/
3148 /* Code shared by all the string ops */
3150 void dis_string_op_increment(Int sz, Int t_inc)
3152 if (sz == 4 || sz == 2) {
3154 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3158 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3163 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3164 Int sz, HChar* name, UChar sorb )
3166 IRTemp t_inc = newTemp(Ity_I32);
3167 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3168 dis_string_op_increment(sz, t_inc);
3169 dis_OP( sz, t_inc );
3170 DIP("%s%c\n", name, nameISize(sz));
3174 void dis_MOVS ( Int sz, IRTemp t_inc )
3176 IRType ty = szToITy(sz);
3177 IRTemp td = newTemp(Ity_I32); /* EDI */
3178 IRTemp ts = newTemp(Ity_I32); /* ESI */
3180 assign( td, getIReg(4, R_EDI) );
3181 assign( ts, getIReg(4, R_ESI) );
3183 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3185 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3186 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3190 void dis_LODS ( Int sz, IRTemp t_inc )
3192 IRType ty = szToITy(sz);
3193 IRTemp ts = newTemp(Ity_I32); /* ESI */
3195 assign( ts, getIReg(4, R_ESI) );
3197 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3199 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3203 void dis_STOS ( Int sz, IRTemp t_inc )
3205 IRType ty = szToITy(sz);
3206 IRTemp ta = newTemp(ty); /* EAX */
3207 IRTemp td = newTemp(Ity_I32); /* EDI */
3209 assign( ta, getIReg(sz, R_EAX) );
3210 assign( td, getIReg(4, R_EDI) );
3212 storeLE( mkexpr(td), mkexpr(ta) );
3214 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3218 void dis_CMPS ( Int sz, IRTemp t_inc )
3220 IRType ty = szToITy(sz);
3221 IRTemp tdv = newTemp(ty); /* (EDI) */
3222 IRTemp tsv = newTemp(ty); /* (ESI) */
3223 IRTemp td = newTemp(Ity_I32); /* EDI */
3224 IRTemp ts = newTemp(Ity_I32); /* ESI */
3226 assign( td, getIReg(4, R_EDI) );
3227 assign( ts, getIReg(4, R_ESI) );
3229 assign( tdv, loadLE(ty,mkexpr(td)) );
3230 assign( tsv, loadLE(ty,mkexpr(ts)) );
3232 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3234 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3235 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3239 void dis_SCAS ( Int sz, IRTemp t_inc )
3241 IRType ty = szToITy(sz);
3242 IRTemp ta = newTemp(ty); /* EAX */
3243 IRTemp td = newTemp(Ity_I32); /* EDI */
3244 IRTemp tdv = newTemp(ty); /* (EDI) */
3246 assign( ta, getIReg(sz, R_EAX) );
3247 assign( td, getIReg(4, R_EDI) );
3249 assign( tdv, loadLE(ty,mkexpr(td)) );
3250 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3252 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3256 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3257 We assume the insn is the last one in the basic block, and so emit a jump
3258 to the next insn, rather than just falling through. */
3260 void dis_REP_op ( X86Condcode cond,
3261 void (*dis_OP)(Int, IRTemp),
3262 Int sz, Addr32 eip, Addr32 eip_next, HChar* name )
3264 IRTemp t_inc = newTemp(Ity_I32);
3265 IRTemp tc = newTemp(Ity_I32); /* ECX */
3267 assign( tc, getIReg(4,R_ECX) );
3269 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3271 IRConst_U32(eip_next) ) );
3273 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3275 dis_string_op_increment(sz, t_inc);
3278 if (cond == X86CondAlways) {
3279 jmp_lit(Ijk_Boring,eip);
3281 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3283 IRConst_U32(eip) ) );
3284 jmp_lit(Ijk_Boring,eip_next);
3286 DIP("%s%c\n", name, nameISize(sz));
3290 /*------------------------------------------------------------*/
3291 /*--- Arithmetic, etc. ---*/
3292 /*------------------------------------------------------------*/
3294 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3296 UInt dis_mul_E_G ( UChar sorb,
3302 UChar rm = getIByte(delta0);
3303 IRType ty = szToITy(size);
3304 IRTemp te = newTemp(ty);
3305 IRTemp tg = newTemp(ty);
3306 IRTemp resLo = newTemp(ty);
3308 assign( tg, getIReg(size, gregOfRM(rm)) );
3309 if (epartIsReg(rm)) {
3310 assign( te, getIReg(size, eregOfRM(rm)) );
3312 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3313 assign( te, loadLE(ty,mkexpr(addr)) );
3316 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3318 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3320 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3322 if (epartIsReg(rm)) {
3323 DIP("imul%c %s, %s\n", nameISize(size),
3324 nameIReg(size,eregOfRM(rm)),
3325 nameIReg(size,gregOfRM(rm)));
3328 DIP("imul%c %s, %s\n", nameISize(size),
3329 dis_buf, nameIReg(size,gregOfRM(rm)));
3335 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3337 UInt dis_imul_I_E_G ( UChar sorb,
3344 UChar rm = getIByte(delta);
3345 IRType ty = szToITy(size);
3346 IRTemp te = newTemp(ty);
3347 IRTemp tl = newTemp(ty);
3348 IRTemp resLo = newTemp(ty);
3350 vassert(size == 1 || size == 2 || size == 4);
3352 if (epartIsReg(rm)) {
3353 assign(te, getIReg(size, eregOfRM(rm)));
3356 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3357 assign(te, loadLE(ty, mkexpr(addr)));
3360 d32 = getSDisp(litsize,delta);
3363 if (size == 1) d32 &= 0xFF;
3364 if (size == 2) d32 &= 0xFFFF;
3366 assign(tl, mkU(ty,d32));
3368 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3370 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3372 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3374 DIP("imul %d, %s, %s\n", d32,
3375 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3376 nameIReg(size,gregOfRM(rm)) );
3381 /* Generate an IR sequence to do a count-leading-zeroes operation on
3382 the supplied IRTemp, and return a new IRTemp holding the result.
3383 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3384 argument is zero, return the number of bits in the word (the
3385 natural semantics). */
3386 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
3388 vassert(ty == Ity_I32 || ty == Ity_I16);
3390 IRTemp src32 = newTemp(Ity_I32);
3391 assign(src32, widenUto32( mkexpr(src) ));
3393 IRTemp src32x = newTemp(Ity_I32);
3395 binop(Iop_Shl32, mkexpr(src32),
3396 mkU8(32 - 8 * sizeofIRType(ty))));
3398 // Clz32 has undefined semantics when its input is zero, so
3399 // special-case around that.
3400 IRTemp res32 = newTemp(Ity_I32);
3404 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0))),
3405 unop(Iop_Clz32, mkexpr(src32x)),
3406 mkU32(8 * sizeofIRType(ty))
3409 IRTemp res = newTemp(ty);
3410 assign(res, narrowTo(ty, mkexpr(res32)));
3415 /*------------------------------------------------------------*/
3417 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3419 /*------------------------------------------------------------*/
3421 /* --- Helper functions for dealing with the register stack. --- */
3423 /* --- Set the emulation-warning pseudo-register. --- */
3425 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3427 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3428 stmt( IRStmt_Put( OFFB_EMWARN, e ) );
3431 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3433 static IRExpr* mkQNaN64 ( void )
3435 /* QNaN is 0 2047 1 0(51times)
3436 == 0b 11111111111b 1 0(51times)
3437 == 0x7FF8 0000 0000 0000
3439 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3442 /* --------- Get/put the top-of-stack pointer. --------- */
3444 static IRExpr* get_ftop ( void )
3446 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3449 static void put_ftop ( IRExpr* e )
3451 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3452 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3455 /* --------- Get/put the C3210 bits. --------- */
3457 static IRExpr* get_C3210 ( void )
3459 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3462 static void put_C3210 ( IRExpr* e )
3464 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3467 /* --------- Get/put the FPU rounding mode. --------- */
3468 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3470 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3473 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3475 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3479 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3480 /* Produces a value in 0 .. 3, which is encoded as per the type
3481 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3482 per IRRoundingMode, we merely need to get it and mask it for
3485 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3487 return binop( Iop_And32, get_fpround(), mkU32(3) );
3490 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3492 return mkU32(Irrm_NEAREST);
3496 /* --------- Get/set FP register tag bytes. --------- */
3498 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3500 static void put_ST_TAG ( Int i, IRExpr* value )
3503 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3504 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3505 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
3508 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3509 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3511 static IRExpr* get_ST_TAG ( Int i )
3513 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3514 return IRExpr_GetI( descr, get_ftop(), i );
3518 /* --------- Get/set FP registers. --------- */
3520 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3521 register's tag to indicate the register is full. The previous
3522 state of the register is not checked. */
3524 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3527 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3528 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3529 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
3530 /* Mark the register as in-use. */
3531 put_ST_TAG(i, mkU8(1));
3534 /* Given i, and some expression e, emit
3535 ST(i) = is_full(i) ? NaN : e
3536 and set the tag accordingly.
3539 static void put_ST ( Int i, IRExpr* value )
3541 put_ST_UNCHECKED( i,
3542 IRExpr_Mux0X( get_ST_TAG(i),
3545 /* non-0 means full */
3552 /* Given i, generate an expression yielding 'ST(i)'. */
3554 static IRExpr* get_ST_UNCHECKED ( Int i )
3556 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3557 return IRExpr_GetI( descr, get_ftop(), i );
3561 /* Given i, generate an expression yielding
3562 is_full(i) ? ST(i) : NaN
3565 static IRExpr* get_ST ( Int i )
3568 IRExpr_Mux0X( get_ST_TAG(i),
3571 /* non-0 means full */
3572 get_ST_UNCHECKED(i));
3576 /* Adjust FTOP downwards by one register. */
3578 static void fp_push ( void )
3580 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3583 /* Adjust FTOP upwards by one register, and mark the vacated register
3586 static void fp_pop ( void )
3588 put_ST_TAG(0, mkU8(0));
3589 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3592 /* Clear the C2 bit of the FPU status register, for
3593 sin/cos/tan/sincos. */
3595 static void clear_C2 ( void )
3597 put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) );
3600 /* Invent a plausible-looking FPU status word value:
3601 ((ftop & 7) << 11) | (c3210 & 0x4700)
3603 static IRExpr* get_FPU_sw ( void )
3609 binop(Iop_And32, get_ftop(), mkU32(7)),
3611 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3616 /* ------------------------------------------------------- */
3617 /* Given all that stack-mangling junk, we can now go ahead
3618 and describe FP instructions.
3621 /* ST(0) = ST(0) `op` mem64/32(addr)
3622 Need to check ST(0)'s tag on read, but not on write.
3625 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
3628 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3632 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3634 loadLE(Ity_F64,mkexpr(addr))
3639 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3641 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3647 /* ST(0) = mem64/32(addr) `op` ST(0)
3648 Need to check ST(0)'s tag on read, but not on write.
3651 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
3654 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3658 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3659 loadLE(Ity_F64,mkexpr(addr)),
3665 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3666 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3673 /* ST(dst) = ST(dst) `op` ST(src).
3674 Check dst and src tags when reading but not on write.
3677 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3680 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3681 (Int)st_src, (Int)st_dst );
3685 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3693 /* ST(dst) = ST(src) `op` ST(dst).
3694 Check dst and src tags when reading but not on write.
3697 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3700 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3701 (Int)st_src, (Int)st_dst );
3705 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3713 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3714 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3716 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i );
3717 /* This is a bit of a hack (and isn't really right). It sets
3718 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3719 documentation implies A and S are unchanged.
3721 /* It's also fishy in that it is used both for COMIP and
3722 UCOMIP, and they aren't the same (although similar). */
3723 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3724 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3725 stmt( IRStmt_Put( OFFB_CC_DEP1,
3727 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
3730 /* Set NDEP even though it isn't used. This makes redundant-PUT
3731 elimination of previous stores to this field work better. */
3732 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
3739 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
3746 /* On entry, delta points at the second byte of the insn (the modrm
3748 UChar first_opcode = getIByte(delta-1);
3749 UChar modrm = getIByte(delta+0);
3751 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3753 if (first_opcode == 0xD8) {
3756 /* bits 5,4,3 are an opcode extension, and the modRM also
3757 specifies an address. */
3758 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3761 switch (gregOfRM(modrm)) {
3763 case 0: /* FADD single-real */
3764 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
3767 case 1: /* FMUL single-real */
3768 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
3771 case 2: /* FCOM single-real */
3772 DIP("fcoms %s\n", dis_buf);
3773 /* This forces C1 to zero, which isn't right. */
3780 loadLE(Ity_F32,mkexpr(addr)))),
3786 case 3: /* FCOMP single-real */
3787 DIP("fcomps %s\n", dis_buf);
3788 /* This forces C1 to zero, which isn't right. */
3795 loadLE(Ity_F32,mkexpr(addr)))),
3802 case 4: /* FSUB single-real */
3803 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
3806 case 5: /* FSUBR single-real */
3807 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
3810 case 6: /* FDIV single-real */
3811 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
3814 case 7: /* FDIVR single-real */
3815 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
3819 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
3820 vex_printf("first_opcode == 0xD8\n");
3827 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3828 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
3831 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3832 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
3835 /* Dunno if this is right */
3836 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3837 r_dst = (UInt)modrm - 0xD0;
3838 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst);
3839 /* This forces C1 to zero, which isn't right. */
3843 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3849 /* Dunno if this is right */
3850 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3851 r_dst = (UInt)modrm - 0xD8;
3852 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst);
3853 /* This forces C1 to zero, which isn't right. */
3857 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3864 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3865 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
3868 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3869 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
3872 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3873 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
3876 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3877 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
3886 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3888 if (first_opcode == 0xD9) {
3891 /* bits 5,4,3 are an opcode extension, and the modRM also
3892 specifies an address. */
3893 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3896 switch (gregOfRM(modrm)) {
3898 case 0: /* FLD single-real */
3899 DIP("flds %s\n", dis_buf);
3901 put_ST(0, unop(Iop_F32toF64,
3902 loadLE(Ity_F32, mkexpr(addr))));
3905 case 2: /* FST single-real */
3906 DIP("fsts %s\n", dis_buf);
3907 storeLE(mkexpr(addr),
3908 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
3911 case 3: /* FSTP single-real */
3912 DIP("fstps %s\n", dis_buf);
3913 storeLE(mkexpr(addr),
3914 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
3918 case 4: { /* FLDENV m28 */
3919 /* Uses dirty helper:
3920 VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
3921 IRTemp ew = newTemp(Ity_I32);
3922 IRDirty* d = unsafeIRDirty_0_N (
3924 "x86g_dirtyhelper_FLDENV",
3925 &x86g_dirtyhelper_FLDENV,
3926 mkIRExprVec_1( mkexpr(addr) )
3930 /* declare we're reading memory */
3932 d->mAddr = mkexpr(addr);
3935 /* declare we're writing guest state */
3938 d->fxState[0].fx = Ifx_Write;
3939 d->fxState[0].offset = OFFB_FTOP;
3940 d->fxState[0].size = sizeof(UInt);
3942 d->fxState[1].fx = Ifx_Write;
3943 d->fxState[1].offset = OFFB_FPTAGS;
3944 d->fxState[1].size = 8 * sizeof(UChar);
3946 d->fxState[2].fx = Ifx_Write;
3947 d->fxState[2].offset = OFFB_FPROUND;
3948 d->fxState[2].size = sizeof(UInt);
3950 d->fxState[3].fx = Ifx_Write;
3951 d->fxState[3].offset = OFFB_FC3210;
3952 d->fxState[3].size = sizeof(UInt);
3954 stmt( IRStmt_Dirty(d) );
3956 /* ew contains any emulation warning we may need to
3957 issue. If needed, side-exit to the next insn,
3958 reporting the warning, so that Valgrind's dispatcher
3959 sees the warning. */
3960 put_emwarn( mkexpr(ew) );
3963 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
3965 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
3969 DIP("fldenv %s\n", dis_buf);
3973 case 5: {/* FLDCW */
3974 /* The only thing we observe in the control word is the
3975 rounding mode. Therefore, pass the 16-bit value
3976 (x87 native-format control word) to a clean helper,
3977 getting back a 64-bit value, the lower half of which
3978 is the FPROUND value to store, and the upper half of
3979 which is the emulation-warning token which may be
3982 /* ULong x86h_check_fldcw ( UInt ); */
3983 IRTemp t64 = newTemp(Ity_I64);
3984 IRTemp ew = newTemp(Ity_I32);
3985 DIP("fldcw %s\n", dis_buf);
3986 assign( t64, mkIRExprCCall(
3987 Ity_I64, 0/*regparms*/,
3992 loadLE(Ity_I16, mkexpr(addr)))
3997 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
3998 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
3999 put_emwarn( mkexpr(ew) );
4000 /* Finally, if an emulation warning was reported,
4001 side-exit to the next insn, reporting the warning,
4002 so that Valgrind's dispatcher sees the warning. */
4005 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4007 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
4013 case 6: { /* FNSTENV m28 */
4014 /* Uses dirty helper:
4015 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4016 IRDirty* d = unsafeIRDirty_0_N (
4018 "x86g_dirtyhelper_FSTENV",
4019 &x86g_dirtyhelper_FSTENV,
4020 mkIRExprVec_1( mkexpr(addr) )
4023 /* declare we're writing memory */
4025 d->mAddr = mkexpr(addr);
4028 /* declare we're reading guest state */
4031 d->fxState[0].fx = Ifx_Read;
4032 d->fxState[0].offset = OFFB_FTOP;
4033 d->fxState[0].size = sizeof(UInt);
4035 d->fxState[1].fx = Ifx_Read;
4036 d->fxState[1].offset = OFFB_FPTAGS;
4037 d->fxState[1].size = 8 * sizeof(UChar);
4039 d->fxState[2].fx = Ifx_Read;
4040 d->fxState[2].offset = OFFB_FPROUND;
4041 d->fxState[2].size = sizeof(UInt);
4043 d->fxState[3].fx = Ifx_Read;
4044 d->fxState[3].offset = OFFB_FC3210;
4045 d->fxState[3].size = sizeof(UInt);
4047 stmt( IRStmt_Dirty(d) );
4049 DIP("fnstenv %s\n", dis_buf);
4053 case 7: /* FNSTCW */
4054 /* Fake up a native x87 FPU control word. The only
4055 thing it depends on is FPROUND[1:0], so call a clean
4056 helper to cook it up. */
4057 /* UInt x86h_create_fpucw ( UInt fpround ) */
4058 DIP("fnstcw %s\n", dis_buf);
4064 "x86g_create_fpucw", &x86g_create_fpucw,
4065 mkIRExprVec_1( get_fpround() )
4072 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4073 vex_printf("first_opcode == 0xD9\n");
4081 case 0xC0 ... 0xC7: /* FLD %st(?) */
4082 r_src = (UInt)modrm - 0xC0;
4083 DIP("fld %%st(%d)\n", (Int)r_src);
4084 t1 = newTemp(Ity_F64);
4085 assign(t1, get_ST(r_src));
4087 put_ST(0, mkexpr(t1));
4090 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4091 r_src = (UInt)modrm - 0xC8;
4092 DIP("fxch %%st(%d)\n", (Int)r_src);
4093 t1 = newTemp(Ity_F64);
4094 t2 = newTemp(Ity_F64);
4095 assign(t1, get_ST(0));
4096 assign(t2, get_ST(r_src));
4097 put_ST_UNCHECKED(0, mkexpr(t2));
4098 put_ST_UNCHECKED(r_src, mkexpr(t1));
4101 case 0xE0: /* FCHS */
4103 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4106 case 0xE1: /* FABS */
4108 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4111 case 0xE4: /* FTST */
4113 /* This forces C1 to zero, which isn't right. */
4114 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4115 set to 0 if stack underflow occurred; otherwise, set
4116 to 0" which is pretty nonsensical. I guess it's a
4123 IRExpr_Const(IRConst_F64i(0x0ULL))),
4129 case 0xE5: { /* FXAM */
4130 /* This is an interesting one. It examines %st(0),
4131 regardless of whether the tag says it's empty or not.
4132 Here, just pass both the tag (in our format) and the
4133 value (as a double, actually a ULong) to a helper
4136 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4137 unop(Iop_ReinterpF64asI64,
4138 get_ST_UNCHECKED(0)) );
4139 put_C3210(mkIRExprCCall(
4142 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4149 case 0xE8: /* FLD1 */
4152 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4153 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4156 case 0xE9: /* FLDL2T */
4159 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4160 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4163 case 0xEA: /* FLDL2E */
4166 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4167 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4170 case 0xEB: /* FLDPI */
4173 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4174 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4177 case 0xEC: /* FLDLG2 */
4180 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4181 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4184 case 0xED: /* FLDLN2 */
4187 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4188 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4191 case 0xEE: /* FLDZ */
4194 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4195 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4198 case 0xF0: /* F2XM1 */
4202 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4206 case 0xF1: /* FYL2X */
4210 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4216 case 0xF2: /* FPTAN */
4220 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4223 put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
4224 clear_C2(); /* HACK */
4227 case 0xF3: /* FPATAN */
4231 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4237 case 0xF4: { /* FXTRACT */
4238 IRTemp argF = newTemp(Ity_F64);
4239 IRTemp sigF = newTemp(Ity_F64);
4240 IRTemp expF = newTemp(Ity_F64);
4241 IRTemp argI = newTemp(Ity_I64);
4242 IRTemp sigI = newTemp(Ity_I64);
4243 IRTemp expI = newTemp(Ity_I64);
4245 assign( argF, get_ST(0) );
4246 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4249 Ity_I64, 0/*regparms*/,
4250 "x86amd64g_calculate_FXTRACT",
4251 &x86amd64g_calculate_FXTRACT,
4252 mkIRExprVec_2( mkexpr(argI),
4253 mkIRExpr_HWord(0)/*sig*/ ))
4257 Ity_I64, 0/*regparms*/,
4258 "x86amd64g_calculate_FXTRACT",
4259 &x86amd64g_calculate_FXTRACT,
4260 mkIRExprVec_2( mkexpr(argI),
4261 mkIRExpr_HWord(1)/*exp*/ ))
4263 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4264 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4266 put_ST_UNCHECKED(0, mkexpr(expF) );
4269 put_ST(0, mkexpr(sigF) );
4273 case 0xF5: { /* FPREM1 -- IEEE compliant */
4274 IRTemp a1 = newTemp(Ity_F64);
4275 IRTemp a2 = newTemp(Ity_F64);
4277 /* Do FPREM1 twice, once to get the remainder, and once
4278 to get the C3210 flag values. */
4279 assign( a1, get_ST(0) );
4280 assign( a2, get_ST(1) );
4283 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4287 triop(Iop_PRem1C3210F64,
4288 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4294 case 0xF7: /* FINCSTP */
4296 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4299 case 0xF8: { /* FPREM -- not IEEE compliant */
4300 IRTemp a1 = newTemp(Ity_F64);
4301 IRTemp a2 = newTemp(Ity_F64);
4303 /* Do FPREM twice, once to get the remainder, and once
4304 to get the C3210 flag values. */
4305 assign( a1, get_ST(0) );
4306 assign( a2, get_ST(1) );
4309 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4313 triop(Iop_PRemC3210F64,
4314 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4320 case 0xF9: /* FYL2XP1 */
4323 triop(Iop_Yl2xp1F64,
4324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4330 case 0xFA: /* FSQRT */
4334 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4338 case 0xFB: { /* FSINCOS */
4339 IRTemp a1 = newTemp(Ity_F64);
4340 assign( a1, get_ST(0) );
4344 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4349 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4351 clear_C2(); /* HACK */
4355 case 0xFC: /* FRNDINT */
4358 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4361 case 0xFD: /* FSCALE */
4365 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4370 case 0xFE: /* FSIN */
4374 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4376 clear_C2(); /* HACK */
4379 case 0xFF: /* FCOS */
4383 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4385 clear_C2(); /* HACK */
4394 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4396 if (first_opcode == 0xDA) {
4400 /* bits 5,4,3 are an opcode extension, and the modRM also
4401 specifies an address. */
4403 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4405 switch (gregOfRM(modrm)) {
4407 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4408 DIP("fiaddl %s\n", dis_buf);
4412 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4413 DIP("fimull %s\n", dis_buf);
4417 case 2: /* FICOM m32int */
4418 DIP("ficoml %s\n", dis_buf);
4419 /* This forces C1 to zero, which isn't right. */
4426 loadLE(Ity_I32,mkexpr(addr)))),
4432 case 3: /* FICOMP m32int */
4433 DIP("ficompl %s\n", dis_buf);
4434 /* This forces C1 to zero, which isn't right. */
4441 loadLE(Ity_I32,mkexpr(addr)))),
4448 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4449 DIP("fisubl %s\n", dis_buf);
4453 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4454 DIP("fisubrl %s\n", dis_buf);
4458 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4459 DIP("fidivl %s\n", dis_buf);
4463 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4464 DIP("fidivrl %s\n", dis_buf);
4471 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4474 loadLE(Ity_I32, mkexpr(addr)))));
4480 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4482 loadLE(Ity_I32, mkexpr(addr))),
4487 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4488 vex_printf("first_opcode == 0xDA\n");
4497 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4498 r_src = (UInt)modrm - 0xC0;
4499 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src);
4503 mk_x86g_calculate_condition(X86CondB)),
4504 get_ST(0), get_ST(r_src)) );
4507 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4508 r_src = (UInt)modrm - 0xC8;
4509 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src);
4513 mk_x86g_calculate_condition(X86CondZ)),
4514 get_ST(0), get_ST(r_src)) );
4517 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4518 r_src = (UInt)modrm - 0xD0;
4519 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src);
4523 mk_x86g_calculate_condition(X86CondBE)),
4524 get_ST(0), get_ST(r_src)) );
4527 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4528 r_src = (UInt)modrm - 0xD8;
4529 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src);
4533 mk_x86g_calculate_condition(X86CondP)),
4534 get_ST(0), get_ST(r_src)) );
4537 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4538 DIP("fucompp %%st(0),%%st(1)\n");
4539 /* This forces C1 to zero, which isn't right. */
4543 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4558 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4560 if (first_opcode == 0xDB) {
4563 /* bits 5,4,3 are an opcode extension, and the modRM also
4564 specifies an address. */
4565 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4568 switch (gregOfRM(modrm)) {
4570 case 0: /* FILD m32int */
4571 DIP("fildl %s\n", dis_buf);
4573 put_ST(0, unop(Iop_I32StoF64,
4574 loadLE(Ity_I32, mkexpr(addr))));
4577 case 1: /* FISTTPL m32 (SSE3) */
4578 DIP("fisttpl %s\n", dis_buf);
4579 storeLE( mkexpr(addr),
4580 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4584 case 2: /* FIST m32 */
4585 DIP("fistl %s\n", dis_buf);
4586 storeLE( mkexpr(addr),
4587 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4590 case 3: /* FISTP m32 */
4591 DIP("fistpl %s\n", dis_buf);
4592 storeLE( mkexpr(addr),
4593 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4597 case 5: { /* FLD extended-real */
4598 /* Uses dirty helper:
4599 ULong x86g_loadF80le ( UInt )
4600 addr holds the address. First, do a dirty call to
4601 get hold of the data. */
4602 IRTemp val = newTemp(Ity_I64);
4603 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
4605 IRDirty* d = unsafeIRDirty_1_N (
4608 "x86g_dirtyhelper_loadF80le",
4609 &x86g_dirtyhelper_loadF80le,
4612 /* declare that we're reading memory */
4614 d->mAddr = mkexpr(addr);
4617 /* execute the dirty call, dumping the result in val. */
4618 stmt( IRStmt_Dirty(d) );
4620 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
4622 DIP("fldt %s\n", dis_buf);
4626 case 7: { /* FSTP extended-real */
4627 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4629 = mkIRExprVec_2( mkexpr(addr),
4630 unop(Iop_ReinterpF64asI64, get_ST(0)) );
4632 IRDirty* d = unsafeIRDirty_0_N (
4634 "x86g_dirtyhelper_storeF80le",
4635 &x86g_dirtyhelper_storeF80le,
4638 /* declare we're writing memory */
4640 d->mAddr = mkexpr(addr);
4643 /* execute the dirty call. */
4644 stmt( IRStmt_Dirty(d) );
4647 DIP("fstpt\n %s", dis_buf);
4652 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4653 vex_printf("first_opcode == 0xDB\n");
4662 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4663 r_src = (UInt)modrm - 0xC0;
4664 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src);
4668 mk_x86g_calculate_condition(X86CondNB)),
4669 get_ST(0), get_ST(r_src)) );
4672 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4673 r_src = (UInt)modrm - 0xC8;
4674 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src);
4678 mk_x86g_calculate_condition(X86CondNZ)),
4679 get_ST(0), get_ST(r_src)) );
4682 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4683 r_src = (UInt)modrm - 0xD0;
4684 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src);
4688 mk_x86g_calculate_condition(X86CondNBE)),
4689 get_ST(0), get_ST(r_src)) );
4692 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4693 r_src = (UInt)modrm - 0xD8;
4694 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src);
4698 mk_x86g_calculate_condition(X86CondNP)),
4699 get_ST(0), get_ST(r_src)) );
4707 /* Uses dirty helper:
4708 void x86g_do_FINIT ( VexGuestX86State* ) */
4709 IRDirty* d = unsafeIRDirty_0_N (
4711 "x86g_dirtyhelper_FINIT",
4712 &x86g_dirtyhelper_FINIT,
4717 /* declare we're writing guest state */
4720 d->fxState[0].fx = Ifx_Write;
4721 d->fxState[0].offset = OFFB_FTOP;
4722 d->fxState[0].size = sizeof(UInt);
4724 d->fxState[1].fx = Ifx_Write;
4725 d->fxState[1].offset = OFFB_FPREGS;
4726 d->fxState[1].size = 8 * sizeof(ULong);
4728 d->fxState[2].fx = Ifx_Write;
4729 d->fxState[2].offset = OFFB_FPTAGS;
4730 d->fxState[2].size = 8 * sizeof(UChar);
4732 d->fxState[3].fx = Ifx_Write;
4733 d->fxState[3].offset = OFFB_FPROUND;
4734 d->fxState[3].size = sizeof(UInt);
4736 d->fxState[4].fx = Ifx_Write;
4737 d->fxState[4].offset = OFFB_FC3210;
4738 d->fxState[4].size = sizeof(UInt);
4740 stmt( IRStmt_Dirty(d) );
4746 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4747 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
4750 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4751 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
4760 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4762 if (first_opcode == 0xDC) {
4765 /* bits 5,4,3 are an opcode extension, and the modRM also
4766 specifies an address. */
4767 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4770 switch (gregOfRM(modrm)) {
4772 case 0: /* FADD double-real */
4773 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
4776 case 1: /* FMUL double-real */
4777 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
4780 case 2: /* FCOM double-real */
4781 DIP("fcoml %s\n", dis_buf);
4782 /* This forces C1 to zero, which isn't right. */
4788 loadLE(Ity_F64,mkexpr(addr))),
4794 case 3: /* FCOMP double-real */
4795 DIP("fcompl %s\n", dis_buf);
4796 /* This forces C1 to zero, which isn't right. */
4802 loadLE(Ity_F64,mkexpr(addr))),
4809 case 4: /* FSUB double-real */
4810 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
4813 case 5: /* FSUBR double-real */
4814 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
4817 case 6: /* FDIV double-real */
4818 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
4821 case 7: /* FDIVR double-real */
4822 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
4826 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4827 vex_printf("first_opcode == 0xDC\n");
4836 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4837 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
4840 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4841 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
4844 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4845 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
4848 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4849 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
4852 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4853 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
4856 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4857 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
4867 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
4869 if (first_opcode == 0xDD) {
4873 /* bits 5,4,3 are an opcode extension, and the modRM also
4874 specifies an address. */
4875 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4878 switch (gregOfRM(modrm)) {
4880 case 0: /* FLD double-real */
4881 DIP("fldl %s\n", dis_buf);
4883 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
4886 case 1: /* FISTTPQ m64 (SSE3) */
4887 DIP("fistppll %s\n", dis_buf);
4888 storeLE( mkexpr(addr),
4889 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
4893 case 2: /* FST double-real */
4894 DIP("fstl %s\n", dis_buf);
4895 storeLE(mkexpr(addr), get_ST(0));
4898 case 3: /* FSTP double-real */
4899 DIP("fstpl %s\n", dis_buf);
4900 storeLE(mkexpr(addr), get_ST(0));
4904 case 4: { /* FRSTOR m108 */
4905 /* Uses dirty helper:
4906 VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
4907 IRTemp ew = newTemp(Ity_I32);
4908 IRDirty* d = unsafeIRDirty_0_N (
4910 "x86g_dirtyhelper_FRSTOR",
4911 &x86g_dirtyhelper_FRSTOR,
4912 mkIRExprVec_1( mkexpr(addr) )
4916 /* declare we're reading memory */
4918 d->mAddr = mkexpr(addr);
4921 /* declare we're writing guest state */
4924 d->fxState[0].fx = Ifx_Write;
4925 d->fxState[0].offset = OFFB_FTOP;
4926 d->fxState[0].size = sizeof(UInt);
4928 d->fxState[1].fx = Ifx_Write;
4929 d->fxState[1].offset = OFFB_FPREGS;
4930 d->fxState[1].size = 8 * sizeof(ULong);
4932 d->fxState[2].fx = Ifx_Write;
4933 d->fxState[2].offset = OFFB_FPTAGS;
4934 d->fxState[2].size = 8 * sizeof(UChar);
4936 d->fxState[3].fx = Ifx_Write;
4937 d->fxState[3].offset = OFFB_FPROUND;
4938 d->fxState[3].size = sizeof(UInt);
4940 d->fxState[4].fx = Ifx_Write;
4941 d->fxState[4].offset = OFFB_FC3210;
4942 d->fxState[4].size = sizeof(UInt);
4944 stmt( IRStmt_Dirty(d) );
4946 /* ew contains any emulation warning we may need to
4947 issue. If needed, side-exit to the next insn,
4948 reporting the warning, so that Valgrind's dispatcher
4949 sees the warning. */
4950 put_emwarn( mkexpr(ew) );
4953 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4955 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
4959 DIP("frstor %s\n", dis_buf);
4963 case 6: { /* FNSAVE m108 */
4964 /* Uses dirty helper:
4965 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
4966 IRDirty* d = unsafeIRDirty_0_N (
4968 "x86g_dirtyhelper_FSAVE",
4969 &x86g_dirtyhelper_FSAVE,
4970 mkIRExprVec_1( mkexpr(addr) )
4973 /* declare we're writing memory */
4975 d->mAddr = mkexpr(addr);
4978 /* declare we're reading guest state */
4981 d->fxState[0].fx = Ifx_Read;
4982 d->fxState[0].offset = OFFB_FTOP;
4983 d->fxState[0].size = sizeof(UInt);
4985 d->fxState[1].fx = Ifx_Read;
4986 d->fxState[1].offset = OFFB_FPREGS;
4987 d->fxState[1].size = 8 * sizeof(ULong);
4989 d->fxState[2].fx = Ifx_Read;
4990 d->fxState[2].offset = OFFB_FPTAGS;
4991 d->fxState[2].size = 8 * sizeof(UChar);
4993 d->fxState[3].fx = Ifx_Read;
4994 d->fxState[3].offset = OFFB_FPROUND;
4995 d->fxState[3].size = sizeof(UInt);
4997 d->fxState[4].fx = Ifx_Read;
4998 d->fxState[4].offset = OFFB_FC3210;
4999 d->fxState[4].size = sizeof(UInt);
5001 stmt( IRStmt_Dirty(d) );
5003 DIP("fnsave %s\n", dis_buf);
5007 case 7: { /* FNSTSW m16 */
5008 IRExpr* sw = get_FPU_sw();
5009 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5010 storeLE( mkexpr(addr), sw );
5011 DIP("fnstsw %s\n", dis_buf);
5016 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5017 vex_printf("first_opcode == 0xDD\n");
5024 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5025 r_dst = (UInt)modrm - 0xC0;
5026 DIP("ffree %%st(%d)\n", (Int)r_dst);
5027 put_ST_TAG ( r_dst, mkU8(0) );
5030 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5031 r_dst = (UInt)modrm - 0xD0;
5032 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst);
5033 /* P4 manual says: "If the destination operand is a
5034 non-empty register, the invalid-operation exception
5035 is not generated. Hence put_ST_UNCHECKED. */
5036 put_ST_UNCHECKED(r_dst, get_ST(0));
5039 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5040 r_dst = (UInt)modrm - 0xD8;
5041 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst);
5042 /* P4 manual says: "If the destination operand is a
5043 non-empty register, the invalid-operation exception
5044 is not generated. Hence put_ST_UNCHECKED. */
5045 put_ST_UNCHECKED(r_dst, get_ST(0));
5049 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5050 r_dst = (UInt)modrm - 0xE0;
5051 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst);
5052 /* This forces C1 to zero, which isn't right. */
5056 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5062 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5063 r_dst = (UInt)modrm - 0xE8;
5064 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst);
5065 /* This forces C1 to zero, which isn't right. */
5069 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5082 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5084 if (first_opcode == 0xDE) {
5088 /* bits 5,4,3 are an opcode extension, and the modRM also
5089 specifies an address. */
5091 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5094 switch (gregOfRM(modrm)) {
5096 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5097 DIP("fiaddw %s\n", dis_buf);
5101 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5102 DIP("fimulw %s\n", dis_buf);
5106 case 2: /* FICOM m16int */
5107 DIP("ficomw %s\n", dis_buf);
5108 /* This forces C1 to zero, which isn't right. */
5116 loadLE(Ity_I16,mkexpr(addr))))),
5122 case 3: /* FICOMP m16int */
5123 DIP("ficompw %s\n", dis_buf);
5124 /* This forces C1 to zero, which isn't right. */
5132 loadLE(Ity_I16,mkexpr(addr))))),
5139 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5140 DIP("fisubw %s\n", dis_buf);
5144 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5145 DIP("fisubrw %s\n", dis_buf);
5149 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5150 DIP("fisubw %s\n", dis_buf);
5154 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5155 DIP("fidivrw %s\n", dis_buf);
5162 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5166 loadLE(Ity_I16, mkexpr(addr))))));
5172 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5175 loadLE(Ity_I16, mkexpr(addr)))),
5180 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5181 vex_printf("first_opcode == 0xDE\n");
5190 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5191 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5194 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5195 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5198 case 0xD9: /* FCOMPP %st(0),%st(1) */
5199 DIP("fuompp %%st(0),%%st(1)\n");
5200 /* This forces C1 to zero, which isn't right. */
5204 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5212 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5213 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5216 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5217 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5220 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5221 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5224 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5225 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5235 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5237 if (first_opcode == 0xDF) {
5241 /* bits 5,4,3 are an opcode extension, and the modRM also
5242 specifies an address. */
5243 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5246 switch (gregOfRM(modrm)) {
5248 case 0: /* FILD m16int */
5249 DIP("fildw %s\n", dis_buf);
5251 put_ST(0, unop(Iop_I32StoF64,
5253 loadLE(Ity_I16, mkexpr(addr)))));
5256 case 1: /* FISTTPS m16 (SSE3) */
5257 DIP("fisttps %s\n", dis_buf);
5258 storeLE( mkexpr(addr),
5259 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5263 case 2: /* FIST m16 */
5264 DIP("fistp %s\n", dis_buf);
5265 storeLE( mkexpr(addr),
5266 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5269 case 3: /* FISTP m16 */
5270 DIP("fistps %s\n", dis_buf);
5271 storeLE( mkexpr(addr),
5272 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5276 case 5: /* FILD m64 */
5277 DIP("fildll %s\n", dis_buf);
5279 put_ST(0, binop(Iop_I64StoF64,
5281 loadLE(Ity_I64, mkexpr(addr))));
5284 case 7: /* FISTP m64 */
5285 DIP("fistpll %s\n", dis_buf);
5286 storeLE( mkexpr(addr),
5287 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5292 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5293 vex_printf("first_opcode == 0xDF\n");
5302 case 0xC0: /* FFREEP %st(0) */
5303 DIP("ffreep %%st(%d)\n", 0);
5304 put_ST_TAG ( 0, mkU8(0) );
5308 case 0xE0: /* FNSTSW %ax */
5309 DIP("fnstsw %%ax\n");
5310 /* Get the FPU status word value and dump it in %AX. */
5312 /* The obvious thing to do is simply dump the 16-bit
5313 status word value in %AX. However, due to a
5314 limitation in Memcheck's origin tracking
5315 machinery, this causes Memcheck not to track the
5316 origin of any undefinedness into %AH (only into
5317 %AL/%AX/%EAX), which means origins are lost in
5318 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5319 putIReg(2, R_EAX, get_FPU_sw());
5321 /* So a somewhat lame kludge is to make it very
5322 clear to Memcheck that the value is written to
5323 both %AH and %AL. This generates marginally
5324 worse code, but I don't think it matters much. */
5325 IRTemp t16 = newTemp(Ity_I16);
5326 assign(t16, get_FPU_sw());
5327 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5328 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5332 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5333 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5336 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5337 /* not really right since COMIP != UCOMIP */
5338 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5349 vpanic("dis_FPU(x86): invalid primary opcode");
5360 /*------------------------------------------------------------*/
5362 /*--- MMX INSTRUCTIONS ---*/
5364 /*------------------------------------------------------------*/
5366 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5367 IA32 arch manual, volume 3):
5369 Read from, or write to MMX register (viz, any insn except EMMS):
5370 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5371 * FP stack pointer set to zero
5374 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5375 * FP stack pointer set to zero
5378 static void do_MMX_preamble ( void )
5381 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5382 IRExpr* zero = mkU32(0);
5383 IRExpr* tag1 = mkU8(1);
5385 for (i = 0; i < 8; i++)
5386 stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
5389 static void do_EMMS_preamble ( void )
5392 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5393 IRExpr* zero = mkU32(0);
5394 IRExpr* tag0 = mkU8(0);
5396 for (i = 0; i < 8; i++)
5397 stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
5401 static IRExpr* getMMXReg ( UInt archreg )
5403 vassert(archreg < 8);
5404 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5408 static void putMMXReg ( UInt archreg, IRExpr* e )
5410 vassert(archreg < 8);
5411 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5412 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5416 /* Helper for non-shift MMX insns. Note this is incomplete in the
5417 sense that it does not first call do_MMX_preamble() -- that is the
5418 responsibility of its caller. */
5421 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5425 Bool show_granularity )
5428 UChar modrm = getIByte(delta);
5429 Bool isReg = epartIsReg(modrm);
5430 IRExpr* argL = NULL;
5431 IRExpr* argR = NULL;
5432 IRExpr* argG = NULL;
5433 IRExpr* argE = NULL;
5434 IRTemp res = newTemp(Ity_I64);
5437 IROp op = Iop_INVALID;
5439 HChar* hName = NULL;
5442 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5445 /* Original MMX ones */
5446 case 0xFC: op = Iop_Add8x8; break;
5447 case 0xFD: op = Iop_Add16x4; break;
5448 case 0xFE: op = Iop_Add32x2; break;
5450 case 0xEC: op = Iop_QAdd8Sx8; break;
5451 case 0xED: op = Iop_QAdd16Sx4; break;
5453 case 0xDC: op = Iop_QAdd8Ux8; break;
5454 case 0xDD: op = Iop_QAdd16Ux4; break;
5456 case 0xF8: op = Iop_Sub8x8; break;
5457 case 0xF9: op = Iop_Sub16x4; break;
5458 case 0xFA: op = Iop_Sub32x2; break;
5460 case 0xE8: op = Iop_QSub8Sx8; break;
5461 case 0xE9: op = Iop_QSub16Sx4; break;
5463 case 0xD8: op = Iop_QSub8Ux8; break;
5464 case 0xD9: op = Iop_QSub16Ux4; break;
5466 case 0xE5: op = Iop_MulHi16Sx4; break;
5467 case 0xD5: op = Iop_Mul16x4; break;
5468 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
5470 case 0x74: op = Iop_CmpEQ8x8; break;
5471 case 0x75: op = Iop_CmpEQ16x4; break;
5472 case 0x76: op = Iop_CmpEQ32x2; break;
5474 case 0x64: op = Iop_CmpGT8Sx8; break;
5475 case 0x65: op = Iop_CmpGT16Sx4; break;
5476 case 0x66: op = Iop_CmpGT32Sx2; break;
5478 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
5479 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
5480 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
5482 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
5483 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
5484 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
5486 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
5487 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
5488 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
5490 case 0xDB: op = Iop_And64; break;
5491 case 0xDF: op = Iop_And64; invG = True; break;
5492 case 0xEB: op = Iop_Or64; break;
5493 case 0xEF: /* Possibly do better here if argL and argR are the
5495 op = Iop_Xor64; break;
5497 /* Introduced in SSE1 */
5498 case 0xE0: op = Iop_Avg8Ux8; break;
5499 case 0xE3: op = Iop_Avg16Ux4; break;
5500 case 0xEE: op = Iop_Max16Sx4; break;
5501 case 0xDE: op = Iop_Max8Ux8; break;
5502 case 0xEA: op = Iop_Min16Sx4; break;
5503 case 0xDA: op = Iop_Min8Ux8; break;
5504 case 0xE4: op = Iop_MulHi16Ux4; break;
5505 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
5507 /* Introduced in SSE2 */
5508 case 0xD4: op = Iop_Add64; break;
5509 case 0xFB: op = Iop_Sub64; break;
5512 vex_printf("\n0x%x\n", (Int)opc);
5513 vpanic("dis_MMXop_regmem_to_reg");
5518 argG = getMMXReg(gregOfRM(modrm));
5520 argG = unop(Iop_Not64, argG);
5524 argE = getMMXReg(eregOfRM(modrm));
5527 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5529 argE = loadLE(Ity_I64, mkexpr(addr));
5540 if (op != Iop_INVALID) {
5541 vassert(hName == NULL);
5542 vassert(hAddr == NULL);
5543 assign(res, binop(op, argL, argR));
5545 vassert(hName != NULL);
5546 vassert(hAddr != NULL);
5550 0/*regparms*/, hName, hAddr,
5551 mkIRExprVec_2( argL, argR )
5556 putMMXReg( gregOfRM(modrm), mkexpr(res) );
5558 DIP("%s%s %s, %s\n",
5559 name, show_granularity ? nameMMXGran(opc & 3) : "",
5560 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
5561 nameMMXReg(gregOfRM(modrm)) );
5567 /* Vector by scalar shift of G by the amount specified at the bottom
5568 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5570 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
5571 HChar* opname, IROp op )
5577 UChar rm = getIByte(delta);
5578 IRTemp g0 = newTemp(Ity_I64);
5579 IRTemp g1 = newTemp(Ity_I64);
5580 IRTemp amt = newTemp(Ity_I32);
5581 IRTemp amt8 = newTemp(Ity_I8);
5583 if (epartIsReg(rm)) {
5584 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
5585 DIP("%s %s,%s\n", opname,
5586 nameMMXReg(eregOfRM(rm)),
5587 nameMMXReg(gregOfRM(rm)) );
5590 addr = disAMode ( &alen, sorb, delta, dis_buf );
5591 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
5592 DIP("%s %s,%s\n", opname,
5594 nameMMXReg(gregOfRM(rm)) );
5597 assign( g0, getMMXReg(gregOfRM(rm)) );
5598 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
5600 shl = shr = sar = False;
5603 case Iop_ShlN16x4: shl = True; size = 32; break;
5604 case Iop_ShlN32x2: shl = True; size = 32; break;
5605 case Iop_Shl64: shl = True; size = 64; break;
5606 case Iop_ShrN16x4: shr = True; size = 16; break;
5607 case Iop_ShrN32x2: shr = True; size = 32; break;
5608 case Iop_Shr64: shr = True; size = 64; break;
5609 case Iop_SarN16x4: sar = True; size = 16; break;
5610 case Iop_SarN32x2: sar = True; size = 32; break;
5611 default: vassert(0);
5618 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
5620 binop(op, mkexpr(g0), mkexpr(amt8))
5628 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
5629 binop(op, mkexpr(g0), mkU8(size-1)),
5630 binop(op, mkexpr(g0), mkexpr(amt8))
5638 putMMXReg( gregOfRM(rm), mkexpr(g1) );
5643 /* Vector by scalar shift of E by an immediate byte. This is a
5644 straight copy of dis_SSE_shiftE_imm. */
5647 UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op )
5650 UChar rm = getIByte(delta);
5651 IRTemp e0 = newTemp(Ity_I64);
5652 IRTemp e1 = newTemp(Ity_I64);
5654 vassert(epartIsReg(rm));
5655 vassert(gregOfRM(rm) == 2
5656 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
5657 amt = getIByte(delta+1);
5659 DIP("%s $%d,%s\n", opname,
5661 nameMMXReg(eregOfRM(rm)) );
5663 assign( e0, getMMXReg(eregOfRM(rm)) );
5665 shl = shr = sar = False;
5668 case Iop_ShlN16x4: shl = True; size = 16; break;
5669 case Iop_ShlN32x2: shl = True; size = 32; break;
5670 case Iop_Shl64: shl = True; size = 64; break;
5671 case Iop_SarN16x4: sar = True; size = 16; break;
5672 case Iop_SarN32x2: sar = True; size = 32; break;
5673 case Iop_ShrN16x4: shr = True; size = 16; break;
5674 case Iop_ShrN32x2: shr = True; size = 32; break;
5675 case Iop_Shr64: shr = True; size = 64; break;
5676 default: vassert(0);
5680 assign( e1, amt >= size
5682 : binop(op, mkexpr(e0), mkU8(amt))
5686 assign( e1, amt >= size
5687 ? binop(op, mkexpr(e0), mkU8(size-1))
5688 : binop(op, mkexpr(e0), mkU8(amt))
5695 putMMXReg( eregOfRM(rm), mkexpr(e1) );
5700 /* Completely handle all MMX instructions except emms. */
5703 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
5708 UChar opc = getIByte(delta);
5711 /* dis_MMX handles all insns except emms. */
5717 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5719 goto mmx_decode_failure;
5720 modrm = getIByte(delta);
5721 if (epartIsReg(modrm)) {
5725 binop( Iop_32HLto64,
5727 getIReg(4, eregOfRM(modrm)) ) );
5728 DIP("movd %s, %s\n",
5729 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5731 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5735 binop( Iop_32HLto64,
5737 loadLE(Ity_I32, mkexpr(addr)) ) );
5738 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
5742 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5744 goto mmx_decode_failure;
5745 modrm = getIByte(delta);
5746 if (epartIsReg(modrm)) {
5748 putIReg( 4, eregOfRM(modrm),
5749 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5750 DIP("movd %s, %s\n",
5751 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
5753 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5755 storeLE( mkexpr(addr),
5756 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5757 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
5762 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5764 goto mmx_decode_failure;
5765 modrm = getIByte(delta);
5766 if (epartIsReg(modrm)) {
5768 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
5769 DIP("movq %s, %s\n",
5770 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5772 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5774 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
5775 DIP("movq %s, %s\n",
5776 dis_buf, nameMMXReg(gregOfRM(modrm)));
5781 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5783 goto mmx_decode_failure;
5784 modrm = getIByte(delta);
5785 if (epartIsReg(modrm)) {
5787 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
5788 DIP("movq %s, %s\n",
5789 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
5791 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5793 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
5794 DIP("mov(nt)q %s, %s\n",
5795 nameMMXReg(gregOfRM(modrm)), dis_buf);
5801 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5803 goto mmx_decode_failure;
5804 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
5808 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5810 goto mmx_decode_failure;
5811 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
5815 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5817 goto mmx_decode_failure;
5818 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
5823 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5825 goto mmx_decode_failure;
5826 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
5830 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5832 goto mmx_decode_failure;
5833 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
5837 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5839 goto mmx_decode_failure;
5840 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
5843 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5845 goto mmx_decode_failure;
5846 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
5849 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5851 goto mmx_decode_failure;
5852 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
5855 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5857 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
5862 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5864 goto mmx_decode_failure;
5865 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
5870 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
5872 goto mmx_decode_failure;
5873 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
5876 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
5878 goto mmx_decode_failure;
5879 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
5882 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
5884 goto mmx_decode_failure;
5885 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
5888 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
5890 goto mmx_decode_failure;
5891 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
5896 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
5898 goto mmx_decode_failure;
5899 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
5904 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
5906 goto mmx_decode_failure;
5907 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
5910 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
5912 goto mmx_decode_failure;
5913 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
5916 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
5918 goto mmx_decode_failure;
5919 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
5922 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
5924 goto mmx_decode_failure;
5925 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
5928 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
5930 goto mmx_decode_failure;
5931 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
5934 # define SHIFT_BY_REG(_name,_op) \
5935 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
5938 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
5939 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
5940 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
5941 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
5943 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
5944 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
5945 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
5946 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
5948 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
5949 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
5950 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
5952 # undef SHIFT_BY_REG
5957 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
5958 UChar byte2, subopc;
5960 goto mmx_decode_failure;
5961 byte2 = getIByte(delta); /* amode / sub-opcode */
5962 subopc = toUChar( (byte2 >> 3) & 7 );
5964 # define SHIFT_BY_IMM(_name,_op) \
5965 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
5968 if (subopc == 2 /*SRL*/ && opc == 0x71)
5969 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
5970 else if (subopc == 2 /*SRL*/ && opc == 0x72)
5971 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
5972 else if (subopc == 2 /*SRL*/ && opc == 0x73)
5973 SHIFT_BY_IMM("psrlq", Iop_Shr64);
5975 else if (subopc == 4 /*SAR*/ && opc == 0x71)
5976 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
5977 else if (subopc == 4 /*SAR*/ && opc == 0x72)
5978 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
5980 else if (subopc == 6 /*SHL*/ && opc == 0x71)
5981 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
5982 else if (subopc == 6 /*SHL*/ && opc == 0x72)
5983 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
5984 else if (subopc == 6 /*SHL*/ && opc == 0x73)
5985 SHIFT_BY_IMM("psllq", Iop_Shl64);
5987 else goto mmx_decode_failure;
5989 # undef SHIFT_BY_IMM
5994 IRTemp addr = newTemp(Ity_I32);
5995 IRTemp regD = newTemp(Ity_I64);
5996 IRTemp regM = newTemp(Ity_I64);
5997 IRTemp mask = newTemp(Ity_I64);
5998 IRTemp olddata = newTemp(Ity_I64);
5999 IRTemp newdata = newTemp(Ity_I64);
6001 modrm = getIByte(delta);
6002 if (sz != 4 || (!epartIsReg(modrm)))
6003 goto mmx_decode_failure;
6006 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
6007 assign( regM, getMMXReg( eregOfRM(modrm) ));
6008 assign( regD, getMMXReg( gregOfRM(modrm) ));
6009 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6010 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6018 unop(Iop_Not64, mkexpr(mask)))) );
6019 storeLE( mkexpr(addr), mkexpr(newdata) );
6020 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
6021 nameMMXReg( gregOfRM(modrm) ) );
6025 /* --- MMX decode failure --- */
6029 return delta; /* ignored */
6038 /*------------------------------------------------------------*/
6039 /*--- More misc arithmetic and other obscure insns. ---*/
6040 /*------------------------------------------------------------*/
6042 /* Double length left and right shifts. Apparently only required in
6043 v-size (no b- variant). */
6045 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
6046 Int delta, UChar modrm,
6049 Bool amt_is_literal,
6050 HChar* shift_amt_txt,
6053 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6054 for printing it. And eip on entry points at the modrm byte. */
6058 IRType ty = szToITy(sz);
6059 IRTemp gsrc = newTemp(ty);
6060 IRTemp esrc = newTemp(ty);
6061 IRTemp addr = IRTemp_INVALID;
6062 IRTemp tmpSH = newTemp(Ity_I8);
6063 IRTemp tmpL = IRTemp_INVALID;
6064 IRTemp tmpRes = IRTemp_INVALID;
6065 IRTemp tmpSubSh = IRTemp_INVALID;
6069 IRExpr* mask = NULL;
6071 vassert(sz == 2 || sz == 4);
6073 /* The E-part is the destination; this is shifted. The G-part
6074 supplies bits to be shifted into the E-part, but is not
6077 If shifting left, form a double-length word with E at the top
6078 and G at the bottom, and shift this left. The result is then in
6081 If shifting right, form a double-length word with G at the top
6082 and E at the bottom, and shift this right. The result is then
6085 /* Fetch the operands. */
6087 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6089 if (epartIsReg(modrm)) {
6091 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6092 DIP("sh%cd%c %s, %s, %s\n",
6093 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6095 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6097 addr = disAMode ( &len, sorb, delta, dis_buf );
6099 assign( esrc, loadLE(ty, mkexpr(addr)) );
6100 DIP("sh%cd%c %s, %s, %s\n",
6101 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6103 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6106 /* Round up the relevant primops. */
6109 tmpL = newTemp(Ity_I64);
6110 tmpRes = newTemp(Ity_I32);
6111 tmpSubSh = newTemp(Ity_I32);
6112 mkpair = Iop_32HLto64;
6113 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6114 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6118 tmpL = newTemp(Ity_I32);
6119 tmpRes = newTemp(Ity_I16);
6120 tmpSubSh = newTemp(Ity_I16);
6121 mkpair = Iop_16HLto32;
6122 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6123 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6127 /* Do the shift, calculate the subshift value, and set
6130 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6133 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6135 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6137 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6143 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6146 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6147 tmpRes, tmpSubSh, ty, tmpSH );
6149 /* Put result back. */
6151 if (epartIsReg(modrm)) {
6152 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6154 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6157 if (amt_is_literal) delta++;
6162 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6165 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6167 static HChar* nameBtOp ( BtOp op )
6170 case BtOpNone: return "";
6171 case BtOpSet: return "s";
6172 case BtOpReset: return "r";
6173 case BtOpComp: return "c";
6174 default: vpanic("nameBtOp(x86)");
6180 UInt dis_bt_G_E ( VexAbiInfo* vbi,
6181 UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6186 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6187 t_addr1, t_esp, t_mask, t_new;
6189 vassert(sz == 2 || sz == 4);
6191 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6192 = t_addr0 = t_addr1 = t_esp
6193 = t_mask = t_new = IRTemp_INVALID;
6195 t_fetched = newTemp(Ity_I8);
6196 t_new = newTemp(Ity_I8);
6197 t_bitno0 = newTemp(Ity_I32);
6198 t_bitno1 = newTemp(Ity_I32);
6199 t_bitno2 = newTemp(Ity_I8);
6200 t_addr1 = newTemp(Ity_I32);
6201 modrm = getIByte(delta);
6203 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6205 if (epartIsReg(modrm)) {
6207 /* Get it onto the client's stack. */
6208 t_esp = newTemp(Ity_I32);
6209 t_addr0 = newTemp(Ity_I32);
6211 /* For the choice of the value 128, see comment in dis_bt_G_E in
6212 guest_amd64_toIR.c. We point out here only that 128 is
6213 fast-cased in Memcheck and is > 0, so seems like a good
6215 vassert(vbi->guest_stack_redzone_size == 0);
6216 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
6217 putIReg(4, R_ESP, mkexpr(t_esp));
6219 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6221 /* Make t_addr0 point at it. */
6222 assign( t_addr0, mkexpr(t_esp) );
6224 /* Mask out upper bits of the shift amount, since we're doing a
6226 assign( t_bitno1, binop(Iop_And32,
6228 mkU32(sz == 4 ? 31 : 15)) );
6231 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6233 assign( t_bitno1, mkexpr(t_bitno0) );
6236 /* At this point: t_addr0 is the address being operated on. If it
6237 was a reg, we will have pushed it onto the client's stack.
6238 t_bitno1 is the bit number, suitably masked in the case of a
6241 /* Now the main sequence. */
6245 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6247 /* t_addr1 now holds effective address */
6251 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6253 /* t_bitno2 contains offset of bit within byte */
6255 if (op != BtOpNone) {
6256 t_mask = newTemp(Ity_I8);
6257 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6260 /* t_mask is now a suitable byte mask */
6262 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6264 if (op != BtOpNone) {
6268 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6272 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6276 binop(Iop_And8, mkexpr(t_fetched),
6277 unop(Iop_Not8, mkexpr(t_mask))) );
6280 vpanic("dis_bt_G_E(x86)");
6282 if (locked && !epartIsReg(modrm)) {
6283 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6284 mkexpr(t_new)/*new*/,
6285 guest_EIP_curr_instr );
6287 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6291 /* Side effect done; now get selected bit into Carry flag */
6292 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6293 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6294 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6299 unop(Iop_8Uto32, mkexpr(t_fetched)),
6303 /* Set NDEP even though it isn't used. This makes redundant-PUT
6304 elimination of previous stores to this field work better. */
6305 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6307 /* Move reg operand from stack back to reg */
6308 if (epartIsReg(modrm)) {
6309 /* t_esp still points at it. */
6310 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6311 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
6314 DIP("bt%s%c %s, %s\n",
6315 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6316 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6323 /* Handle BSF/BSR. Only v-size seems necessary. */
6325 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6331 IRType ty = szToITy(sz);
6332 IRTemp src = newTemp(ty);
6333 IRTemp dst = newTemp(ty);
6335 IRTemp src32 = newTemp(Ity_I32);
6336 IRTemp dst32 = newTemp(Ity_I32);
6337 IRTemp src8 = newTemp(Ity_I8);
6339 vassert(sz == 4 || sz == 2);
6341 modrm = getIByte(delta);
6343 isReg = epartIsReg(modrm);
6346 assign( src, getIReg(sz, eregOfRM(modrm)) );
6349 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6351 assign( src, loadLE(ty, mkexpr(addr)) );
6354 DIP("bs%c%c %s, %s\n",
6355 fwds ? 'f' : 'r', nameISize(sz),
6356 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6357 nameIReg(sz, gregOfRM(modrm)));
6359 /* Generate an 8-bit expression which is zero iff the
6360 original is zero, and nonzero otherwise */
6362 unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8),
6363 mkexpr(src), mkU(ty,0))) );
6365 /* Flags: Z is 1 iff source value is zero. All others
6366 are undefined -- we force them to zero. */
6367 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6368 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6371 IRExpr_Mux0X( mkexpr(src8),
6373 mkU32(X86G_CC_MASK_Z),
6378 /* Set NDEP even though it isn't used. This makes redundant-PUT
6379 elimination of previous stores to this field work better. */
6380 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6382 /* Result: iff source value is zero, we can't use
6383 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6384 But anyway, Intel x86 semantics say the result is undefined in
6385 such situations. Hence handle the zero case specially. */
6387 /* Bleh. What we compute:
6389 bsf32: if src == 0 then 0 else Ctz32(src)
6390 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6392 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6393 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6395 First, widen src to 32 bits if it is not already.
6397 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6398 dst register unchanged when src == 0. Hence change accordingly.
6401 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6403 assign( src32, mkexpr(src) );
6405 /* The main computation, guarding against zero. */
6409 /* src == 0 -- leave dst unchanged */
6410 widenUto32( getIReg( sz, gregOfRM(modrm) ) ),
6412 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6415 unop(Iop_Clz32, mkexpr(src32)))
6420 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6422 assign( dst, mkexpr(dst32) );
6424 /* dump result back */
6425 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
6432 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
6434 IRType ty = szToITy(sz);
6435 IRTemp t1 = newTemp(ty);
6436 IRTemp t2 = newTemp(ty);
6437 vassert(sz == 2 || sz == 4);
6438 assign( t1, getIReg(sz, R_EAX) );
6439 assign( t2, getIReg(sz, reg) );
6440 putIReg( sz, R_EAX, mkexpr(t2) );
6441 putIReg( sz, reg, mkexpr(t1) );
6442 DIP("xchg%c %s, %s\n",
6443 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
6448 void codegen_SAHF ( void )
6450 /* Set the flags to:
6451 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6452 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6453 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6455 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6456 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6457 IRTemp oldflags = newTemp(Ity_I32);
6458 assign( oldflags, mk_x86g_calculate_eflags_all() );
6459 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6460 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6461 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6462 stmt( IRStmt_Put( OFFB_CC_DEP1,
6464 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
6466 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
6470 /* Set NDEP even though it isn't used. This makes redundant-PUT
6471 elimination of previous stores to this field work better. */
6472 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6477 void codegen_LAHF ( void )
6479 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6480 IRExpr* eax_with_hole;
6483 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6484 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6486 IRTemp flags = newTemp(Ity_I32);
6487 assign( flags, mk_x86g_calculate_eflags_all() );
6490 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
6492 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
6495 = binop(Iop_Or32, eax_with_hole,
6496 binop(Iop_Shl32, new_byte, mkU8(8)));
6497 putIReg(4, R_EAX, new_eax);
6502 UInt dis_cmpxchg_G_E ( UChar sorb,
6510 IRType ty = szToITy(size);
6511 IRTemp acc = newTemp(ty);
6512 IRTemp src = newTemp(ty);
6513 IRTemp dest = newTemp(ty);
6514 IRTemp dest2 = newTemp(ty);
6515 IRTemp acc2 = newTemp(ty);
6516 IRTemp cond8 = newTemp(Ity_I8);
6517 IRTemp addr = IRTemp_INVALID;
6518 UChar rm = getUChar(delta0);
6520 /* There are 3 cases to consider:
6522 reg-reg: ignore any lock prefix, generate sequence based
6525 reg-mem, not locked: ignore any lock prefix, generate sequence
6528 reg-mem, locked: use IRCAS
6530 if (epartIsReg(rm)) {
6532 assign( dest, getIReg(size, eregOfRM(rm)) );
6534 assign( src, getIReg(size, gregOfRM(rm)) );
6535 assign( acc, getIReg(size, R_EAX) );
6536 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6537 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6538 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
6539 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6540 putIReg(size, R_EAX, mkexpr(acc2));
6541 putIReg(size, eregOfRM(rm), mkexpr(dest2));
6542 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6543 nameIReg(size,gregOfRM(rm)),
6544 nameIReg(size,eregOfRM(rm)) );
6546 else if (!epartIsReg(rm) && !locked) {
6548 addr = disAMode ( &len, sorb, delta0, dis_buf );
6549 assign( dest, loadLE(ty, mkexpr(addr)) );
6551 assign( src, getIReg(size, gregOfRM(rm)) );
6552 assign( acc, getIReg(size, R_EAX) );
6553 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6554 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6555 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
6556 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6557 putIReg(size, R_EAX, mkexpr(acc2));
6558 storeLE( mkexpr(addr), mkexpr(dest2) );
6559 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6560 nameIReg(size,gregOfRM(rm)), dis_buf);
6562 else if (!epartIsReg(rm) && locked) {
6564 /* src is new value. acc is expected value. dest is old value.
6565 Compute success from the output of the IRCAS, and steer the
6566 new value for EAX accordingly: in case of success, EAX is
6568 addr = disAMode ( &len, sorb, delta0, dis_buf );
6570 assign( src, getIReg(size, gregOfRM(rm)) );
6571 assign( acc, getIReg(size, R_EAX) );
6573 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
6574 NULL, mkexpr(acc), NULL, mkexpr(src) )
6576 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6577 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
6578 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
6579 putIReg(size, R_EAX, mkexpr(acc2));
6580 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6581 nameIReg(size,gregOfRM(rm)), dis_buf);
6589 /* Handle conditional move instructions of the form
6590 cmovcc E(reg-or-mem), G(reg)
6592 E(src) is reg-or-mem
6595 If E is reg, --> GET %E, tmps
6600 If E is mem --> (getAddr E) -> tmpa
6607 UInt dis_cmov_E_G ( UChar sorb,
6612 UChar rm = getIByte(delta0);
6616 IRType ty = szToITy(sz);
6617 IRTemp tmps = newTemp(ty);
6618 IRTemp tmpd = newTemp(ty);
6620 if (epartIsReg(rm)) {
6621 assign( tmps, getIReg(sz, eregOfRM(rm)) );
6622 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6624 putIReg(sz, gregOfRM(rm),
6625 IRExpr_Mux0X( unop(Iop_1Uto8,
6626 mk_x86g_calculate_condition(cond)),
6630 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6631 name_X86Condcode(cond),
6632 nameIReg(sz,eregOfRM(rm)),
6633 nameIReg(sz,gregOfRM(rm)));
6637 /* E refers to memory */
6639 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6640 assign( tmps, loadLE(ty, mkexpr(addr)) );
6641 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6643 putIReg(sz, gregOfRM(rm),
6644 IRExpr_Mux0X( unop(Iop_1Uto8,
6645 mk_x86g_calculate_condition(cond)),
6650 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6651 name_X86Condcode(cond),
6653 nameIReg(sz,gregOfRM(rm)));
6660 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
6664 UChar rm = getIByte(delta0);
6667 IRType ty = szToITy(sz);
6668 IRTemp tmpd = newTemp(ty);
6669 IRTemp tmpt0 = newTemp(ty);
6670 IRTemp tmpt1 = newTemp(ty);
6672 /* There are 3 cases to consider:
6674 reg-reg: ignore any lock prefix,
6675 generate 'naive' (non-atomic) sequence
6677 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6678 (non-atomic) sequence
6680 reg-mem, locked: use IRCAS
6683 if (epartIsReg(rm)) {
6685 assign( tmpd, getIReg(sz, eregOfRM(rm)));
6686 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6687 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6688 mkexpr(tmpd), mkexpr(tmpt0)) );
6689 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6690 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
6691 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6692 DIP("xadd%c %s, %s\n",
6693 nameISize(sz), nameIReg(sz,gregOfRM(rm)),
6694 nameIReg(sz,eregOfRM(rm)));
6698 else if (!epartIsReg(rm) && !locked) {
6700 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6701 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6702 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6703 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6704 mkexpr(tmpd), mkexpr(tmpt0)) );
6705 storeLE( mkexpr(addr), mkexpr(tmpt1) );
6706 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6707 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6708 DIP("xadd%c %s, %s\n",
6709 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6713 else if (!epartIsReg(rm) && locked) {
6715 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6716 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6717 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6718 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6719 mkexpr(tmpd), mkexpr(tmpt0)) );
6720 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
6721 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
6722 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6723 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6724 DIP("xadd%c %s, %s\n",
6725 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6733 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6736 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
6740 UChar rm = getIByte(delta0);
6743 if (epartIsReg(rm)) {
6744 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
6745 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
6748 addr = disAMode ( &len, sorb, delta0, dis_buf );
6749 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
6750 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
6755 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6756 dst is ireg and sz==4, zero out top half of it. */
6759 UInt dis_mov_Sw_Ew ( UChar sorb,
6765 UChar rm = getIByte(delta0);
6768 vassert(sz == 2 || sz == 4);
6770 if (epartIsReg(rm)) {
6772 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
6774 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
6776 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
6779 addr = disAMode ( &len, sorb, delta0, dis_buf );
6780 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
6781 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
6788 void dis_push_segreg ( UInt sreg, Int sz )
6790 IRTemp t1 = newTemp(Ity_I16);
6791 IRTemp ta = newTemp(Ity_I32);
6792 vassert(sz == 2 || sz == 4);
6794 assign( t1, getSReg(sreg) );
6795 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6796 putIReg(4, R_ESP, mkexpr(ta));
6797 storeLE( mkexpr(ta), mkexpr(t1) );
6799 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6803 void dis_pop_segreg ( UInt sreg, Int sz )
6805 IRTemp t1 = newTemp(Ity_I16);
6806 IRTemp ta = newTemp(Ity_I32);
6807 vassert(sz == 2 || sz == 4);
6809 assign( ta, getIReg(4, R_ESP) );
6810 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
6812 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
6813 putSReg( sreg, mkexpr(t1) );
6814 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6818 void dis_ret ( UInt d32 )
6820 IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32);
6821 assign(t1, getIReg(4,R_ESP));
6822 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
6823 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
6824 jmp_treg(Ijk_Ret,t2);
6827 /*------------------------------------------------------------*/
6828 /*--- SSE/SSE2/SSE3 helpers ---*/
6829 /*------------------------------------------------------------*/
6831 /* Worker function; do not call directly.
6832 Handles full width G = G `op` E and G = (not G) `op` E.
6835 static UInt dis_SSE_E_to_G_all_wrk (
6836 UChar sorb, Int delta,
6837 HChar* opname, IROp op,
6844 UChar rm = getIByte(delta);
6846 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
6847 : getXMMReg(gregOfRM(rm));
6848 if (epartIsReg(rm)) {
6849 putXMMReg( gregOfRM(rm),
6851 getXMMReg(eregOfRM(rm))) );
6852 DIP("%s %s,%s\n", opname,
6853 nameXMMReg(eregOfRM(rm)),
6854 nameXMMReg(gregOfRM(rm)) );
6857 addr = disAMode ( &alen, sorb, delta, dis_buf );
6858 putXMMReg( gregOfRM(rm),
6860 loadLE(Ity_V128, mkexpr(addr))) );
6861 DIP("%s %s,%s\n", opname,
6863 nameXMMReg(gregOfRM(rm)) );
6869 /* All lanes SSE binary operation, G = G `op` E. */
6872 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op )
6874 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
6877 /* All lanes SSE binary operation, G = (not G) `op` E. */
6880 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
6881 HChar* opname, IROp op )
6883 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
6887 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
6889 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
6890 HChar* opname, IROp op )
6895 UChar rm = getIByte(delta);
6896 IRExpr* gpart = getXMMReg(gregOfRM(rm));
6897 if (epartIsReg(rm)) {
6898 putXMMReg( gregOfRM(rm),
6900 getXMMReg(eregOfRM(rm))) );
6901 DIP("%s %s,%s\n", opname,
6902 nameXMMReg(eregOfRM(rm)),
6903 nameXMMReg(gregOfRM(rm)) );
6906 /* We can only do a 32-bit memory read, so the upper 3/4 of the
6907 E operand needs to be made simply of zeroes. */
6908 IRTemp epart = newTemp(Ity_V128);
6909 addr = disAMode ( &alen, sorb, delta, dis_buf );
6910 assign( epart, unop( Iop_32UtoV128,
6911 loadLE(Ity_I32, mkexpr(addr))) );
6912 putXMMReg( gregOfRM(rm),
6913 binop(op, gpart, mkexpr(epart)) );
6914 DIP("%s %s,%s\n", opname,
6916 nameXMMReg(gregOfRM(rm)) );
6922 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
6924 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
6925 HChar* opname, IROp op )
6930 UChar rm = getIByte(delta);
6931 IRExpr* gpart = getXMMReg(gregOfRM(rm));
6932 if (epartIsReg(rm)) {
6933 putXMMReg( gregOfRM(rm),
6935 getXMMReg(eregOfRM(rm))) );
6936 DIP("%s %s,%s\n", opname,
6937 nameXMMReg(eregOfRM(rm)),
6938 nameXMMReg(gregOfRM(rm)) );
6941 /* We can only do a 64-bit memory read, so the upper half of the
6942 E operand needs to be made simply of zeroes. */
6943 IRTemp epart = newTemp(Ity_V128);
6944 addr = disAMode ( &alen, sorb, delta, dis_buf );
6945 assign( epart, unop( Iop_64UtoV128,
6946 loadLE(Ity_I64, mkexpr(addr))) );
6947 putXMMReg( gregOfRM(rm),
6948 binop(op, gpart, mkexpr(epart)) );
6949 DIP("%s %s,%s\n", opname,
6951 nameXMMReg(gregOfRM(rm)) );
6957 /* All lanes unary SSE operation, G = op(E). */
6959 static UInt dis_SSE_E_to_G_unary_all (
6960 UChar sorb, Int delta,
6961 HChar* opname, IROp op
6967 UChar rm = getIByte(delta);
6968 if (epartIsReg(rm)) {
6969 putXMMReg( gregOfRM(rm),
6970 unop(op, getXMMReg(eregOfRM(rm))) );
6971 DIP("%s %s,%s\n", opname,
6972 nameXMMReg(eregOfRM(rm)),
6973 nameXMMReg(gregOfRM(rm)) );
6976 addr = disAMode ( &alen, sorb, delta, dis_buf );
6977 putXMMReg( gregOfRM(rm),
6978 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
6979 DIP("%s %s,%s\n", opname,
6981 nameXMMReg(gregOfRM(rm)) );
6987 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
6989 static UInt dis_SSE_E_to_G_unary_lo32 (
6990 UChar sorb, Int delta,
6991 HChar* opname, IROp op
6994 /* First we need to get the old G value and patch the low 32 bits
6995 of the E operand into it. Then apply op and write back to G. */
6999 UChar rm = getIByte(delta);
7000 IRTemp oldG0 = newTemp(Ity_V128);
7001 IRTemp oldG1 = newTemp(Ity_V128);
7003 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7005 if (epartIsReg(rm)) {
7007 binop( Iop_SetV128lo32,
7009 getXMMRegLane32(eregOfRM(rm), 0)) );
7010 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7011 DIP("%s %s,%s\n", opname,
7012 nameXMMReg(eregOfRM(rm)),
7013 nameXMMReg(gregOfRM(rm)) );
7016 addr = disAMode ( &alen, sorb, delta, dis_buf );
7018 binop( Iop_SetV128lo32,
7020 loadLE(Ity_I32, mkexpr(addr)) ));
7021 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7022 DIP("%s %s,%s\n", opname,
7024 nameXMMReg(gregOfRM(rm)) );
7030 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7032 static UInt dis_SSE_E_to_G_unary_lo64 (
7033 UChar sorb, Int delta,
7034 HChar* opname, IROp op
7037 /* First we need to get the old G value and patch the low 64 bits
7038 of the E operand into it. Then apply op and write back to G. */
7042 UChar rm = getIByte(delta);
7043 IRTemp oldG0 = newTemp(Ity_V128);
7044 IRTemp oldG1 = newTemp(Ity_V128);
7046 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7048 if (epartIsReg(rm)) {
7050 binop( Iop_SetV128lo64,
7052 getXMMRegLane64(eregOfRM(rm), 0)) );
7053 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7054 DIP("%s %s,%s\n", opname,
7055 nameXMMReg(eregOfRM(rm)),
7056 nameXMMReg(gregOfRM(rm)) );
7059 addr = disAMode ( &alen, sorb, delta, dis_buf );
7061 binop( Iop_SetV128lo64,
7063 loadLE(Ity_I64, mkexpr(addr)) ));
7064 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7065 DIP("%s %s,%s\n", opname,
7067 nameXMMReg(gregOfRM(rm)) );
7073 /* SSE integer binary operation:
7074 G = G `op` E (eLeft == False)
7075 G = E `op` G (eLeft == True)
7077 static UInt dis_SSEint_E_to_G(
7078 UChar sorb, Int delta,
7079 HChar* opname, IROp op,
7086 UChar rm = getIByte(delta);
7087 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7088 IRExpr* epart = NULL;
7089 if (epartIsReg(rm)) {
7090 epart = getXMMReg(eregOfRM(rm));
7091 DIP("%s %s,%s\n", opname,
7092 nameXMMReg(eregOfRM(rm)),
7093 nameXMMReg(gregOfRM(rm)) );
7096 addr = disAMode ( &alen, sorb, delta, dis_buf );
7097 epart = loadLE(Ity_V128, mkexpr(addr));
7098 DIP("%s %s,%s\n", opname,
7100 nameXMMReg(gregOfRM(rm)) );
7103 putXMMReg( gregOfRM(rm),
7104 eLeft ? binop(op, epart, gpart)
7105 : binop(op, gpart, epart) );
7110 /* Helper for doing SSE FP comparisons. */
7112 static void findSSECmpOp ( Bool* needNot, IROp* op,
7113 Int imm8, Bool all_lanes, Int sz )
7123 if (sz == 4 && all_lanes) {
7125 case 0: *op = Iop_CmpEQ32Fx4; return;
7126 case 1: *op = Iop_CmpLT32Fx4; return;
7127 case 2: *op = Iop_CmpLE32Fx4; return;
7128 case 3: *op = Iop_CmpUN32Fx4; return;
7132 if (sz == 4 && !all_lanes) {
7134 case 0: *op = Iop_CmpEQ32F0x4; return;
7135 case 1: *op = Iop_CmpLT32F0x4; return;
7136 case 2: *op = Iop_CmpLE32F0x4; return;
7137 case 3: *op = Iop_CmpUN32F0x4; return;
7141 if (sz == 8 && all_lanes) {
7143 case 0: *op = Iop_CmpEQ64Fx2; return;
7144 case 1: *op = Iop_CmpLT64Fx2; return;
7145 case 2: *op = Iop_CmpLE64Fx2; return;
7146 case 3: *op = Iop_CmpUN64Fx2; return;
7150 if (sz == 8 && !all_lanes) {
7152 case 0: *op = Iop_CmpEQ64F0x2; return;
7153 case 1: *op = Iop_CmpLT64F0x2; return;
7154 case 2: *op = Iop_CmpLE64F0x2; return;
7155 case 3: *op = Iop_CmpUN64F0x2; return;
7159 vpanic("findSSECmpOp(x86,guest)");
7162 /* Handles SSE 32F/64F comparisons. */
7164 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7165 HChar* opname, Bool all_lanes, Int sz )
7170 Bool needNot = False;
7171 IROp op = Iop_INVALID;
7172 IRTemp plain = newTemp(Ity_V128);
7173 UChar rm = getIByte(delta);
7175 vassert(sz == 4 || sz == 8);
7176 if (epartIsReg(rm)) {
7177 imm8 = getIByte(delta+1);
7178 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7179 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7180 getXMMReg(eregOfRM(rm))) );
7182 DIP("%s $%d,%s,%s\n", opname,
7184 nameXMMReg(eregOfRM(rm)),
7185 nameXMMReg(gregOfRM(rm)) );
7187 addr = disAMode ( &alen, sorb, delta, dis_buf );
7188 imm8 = getIByte(delta+alen);
7189 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7193 getXMMReg(gregOfRM(rm)),
7194 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7195 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7196 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7200 DIP("%s $%d,%s,%s\n", opname,
7203 nameXMMReg(gregOfRM(rm)) );
7206 if (needNot && all_lanes) {
7207 putXMMReg( gregOfRM(rm),
7208 unop(Iop_NotV128, mkexpr(plain)) );
7211 if (needNot && !all_lanes) {
7212 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7213 putXMMReg( gregOfRM(rm),
7214 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7217 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7224 /* Vector by scalar shift of G by the amount specified at the bottom
7227 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7228 HChar* opname, IROp op )
7234 UChar rm = getIByte(delta);
7235 IRTemp g0 = newTemp(Ity_V128);
7236 IRTemp g1 = newTemp(Ity_V128);
7237 IRTemp amt = newTemp(Ity_I32);
7238 IRTemp amt8 = newTemp(Ity_I8);
7239 if (epartIsReg(rm)) {
7240 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7241 DIP("%s %s,%s\n", opname,
7242 nameXMMReg(eregOfRM(rm)),
7243 nameXMMReg(gregOfRM(rm)) );
7246 addr = disAMode ( &alen, sorb, delta, dis_buf );
7247 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7248 DIP("%s %s,%s\n", opname,
7250 nameXMMReg(gregOfRM(rm)) );
7253 assign( g0, getXMMReg(gregOfRM(rm)) );
7254 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7256 shl = shr = sar = False;
7259 case Iop_ShlN16x8: shl = True; size = 32; break;
7260 case Iop_ShlN32x4: shl = True; size = 32; break;
7261 case Iop_ShlN64x2: shl = True; size = 64; break;
7262 case Iop_SarN16x8: sar = True; size = 16; break;
7263 case Iop_SarN32x4: sar = True; size = 32; break;
7264 case Iop_ShrN16x8: shr = True; size = 16; break;
7265 case Iop_ShrN32x4: shr = True; size = 32; break;
7266 case Iop_ShrN64x2: shr = True; size = 64; break;
7267 default: vassert(0);
7274 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
7276 binop(op, mkexpr(g0), mkexpr(amt8))
7284 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
7285 binop(op, mkexpr(g0), mkU8(size-1)),
7286 binop(op, mkexpr(g0), mkexpr(amt8))
7294 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7299 /* Vector by scalar shift of E by an immediate byte. */
7302 UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op )
7305 UChar rm = getIByte(delta);
7306 IRTemp e0 = newTemp(Ity_V128);
7307 IRTemp e1 = newTemp(Ity_V128);
7309 vassert(epartIsReg(rm));
7310 vassert(gregOfRM(rm) == 2
7311 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7312 amt = getIByte(delta+1);
7314 DIP("%s $%d,%s\n", opname,
7316 nameXMMReg(eregOfRM(rm)) );
7317 assign( e0, getXMMReg(eregOfRM(rm)) );
7319 shl = shr = sar = False;
7322 case Iop_ShlN16x8: shl = True; size = 16; break;
7323 case Iop_ShlN32x4: shl = True; size = 32; break;
7324 case Iop_ShlN64x2: shl = True; size = 64; break;
7325 case Iop_SarN16x8: sar = True; size = 16; break;
7326 case Iop_SarN32x4: sar = True; size = 32; break;
7327 case Iop_ShrN16x8: shr = True; size = 16; break;
7328 case Iop_ShrN32x4: shr = True; size = 32; break;
7329 case Iop_ShrN64x2: shr = True; size = 64; break;
7330 default: vassert(0);
7334 assign( e1, amt >= size
7336 : binop(op, mkexpr(e0), mkU8(amt))
7340 assign( e1, amt >= size
7341 ? binop(op, mkexpr(e0), mkU8(size-1))
7342 : binop(op, mkexpr(e0), mkU8(amt))
7349 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7354 /* Get the current SSE rounding mode. */
7356 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7358 return binop( Iop_And32,
7359 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7363 static void put_sse_roundingmode ( IRExpr* sseround )
7365 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7366 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7369 /* Break a 128-bit value up into four 32-bit ints. */
7371 static void breakup128to32s ( IRTemp t128,
7373 IRTemp* t3, IRTemp* t2,
7374 IRTemp* t1, IRTemp* t0 )
7376 IRTemp hi64 = newTemp(Ity_I64);
7377 IRTemp lo64 = newTemp(Ity_I64);
7378 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7379 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7381 vassert(t0 && *t0 == IRTemp_INVALID);
7382 vassert(t1 && *t1 == IRTemp_INVALID);
7383 vassert(t2 && *t2 == IRTemp_INVALID);
7384 vassert(t3 && *t3 == IRTemp_INVALID);
7386 *t0 = newTemp(Ity_I32);
7387 *t1 = newTemp(Ity_I32);
7388 *t2 = newTemp(Ity_I32);
7389 *t3 = newTemp(Ity_I32);
7390 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
7391 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
7392 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
7393 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
7396 /* Construct a 128-bit value from four 32-bit ints. */
7398 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
7399 IRTemp t1, IRTemp t0 )
7402 binop( Iop_64HLtoV128,
7403 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
7404 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
7408 /* Break a 64-bit value up into four 16-bit ints. */
7410 static void breakup64to16s ( IRTemp t64,
7412 IRTemp* t3, IRTemp* t2,
7413 IRTemp* t1, IRTemp* t0 )
7415 IRTemp hi32 = newTemp(Ity_I32);
7416 IRTemp lo32 = newTemp(Ity_I32);
7417 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
7418 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
7420 vassert(t0 && *t0 == IRTemp_INVALID);
7421 vassert(t1 && *t1 == IRTemp_INVALID);
7422 vassert(t2 && *t2 == IRTemp_INVALID);
7423 vassert(t3 && *t3 == IRTemp_INVALID);
7425 *t0 = newTemp(Ity_I16);
7426 *t1 = newTemp(Ity_I16);
7427 *t2 = newTemp(Ity_I16);
7428 *t3 = newTemp(Ity_I16);
7429 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
7430 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
7431 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
7432 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
7435 /* Construct a 64-bit value from four 16-bit ints. */
7437 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
7438 IRTemp t1, IRTemp t0 )
7441 binop( Iop_32HLto64,
7442 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
7443 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
7447 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7448 in the given 32-bit temporary. The flags that are set are: O S Z A
7451 In all cases, code to set AC is generated. However, VEX actually
7452 ignores the AC value and so can optionally emit an emulation
7453 warning when it is enabled. In this routine, an emulation warning
7454 is only emitted if emit_AC_emwarn is True, in which case
7455 next_insn_EIP must be correct (this allows for correct code
7456 generation for popfl/popfw). If emit_AC_emwarn is False,
7457 next_insn_EIP is unimportant (this allows for easy if kludgey code
7458 generation for IRET.) */
7461 void set_EFLAGS_from_value ( IRTemp t1,
7462 Bool emit_AC_emwarn,
7463 Addr32 next_insn_EIP )
7465 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
7467 /* t1 is the flag word. Mask out everything except OSZACP and set
7468 the flags thunk to X86G_CC_OP_COPY. */
7469 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7470 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7471 stmt( IRStmt_Put( OFFB_CC_DEP1,
7474 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
7475 | X86G_CC_MASK_A | X86G_CC_MASK_Z
7476 | X86G_CC_MASK_S| X86G_CC_MASK_O )
7480 /* Set NDEP even though it isn't used. This makes redundant-PUT
7481 elimination of previous stores to this field work better. */
7482 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7484 /* Also need to set the D flag, which is held in bit 10 of t1.
7485 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7491 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
7497 /* Set the ID flag */
7503 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
7509 /* And set the AC flag. If setting it 1 to, possibly emit an
7510 emulation warning. */
7516 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
7522 if (emit_AC_emwarn) {
7523 put_emwarn( mkU32(EmWarn_X86_acFlag) );
7527 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
7530 IRConst_U32( next_insn_EIP )
7537 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7538 values (aa,bb), computes, for each of the 4 16-bit lanes:
7540 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7542 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
7544 IRTemp aa = newTemp(Ity_I64);
7545 IRTemp bb = newTemp(Ity_I64);
7546 IRTemp aahi32s = newTemp(Ity_I64);
7547 IRTemp aalo32s = newTemp(Ity_I64);
7548 IRTemp bbhi32s = newTemp(Ity_I64);
7549 IRTemp bblo32s = newTemp(Ity_I64);
7550 IRTemp rHi = newTemp(Ity_I64);
7551 IRTemp rLo = newTemp(Ity_I64);
7552 IRTemp one32x2 = newTemp(Ity_I64);
7557 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
7561 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
7565 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
7569 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
7571 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
7580 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
7596 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
7605 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
7608 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7609 values (aa,bb), computes, for each lane:
7611 if aa_lane < 0 then - bb_lane
7612 else if aa_lane > 0 then bb_lane
7615 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
7617 IRTemp aa = newTemp(Ity_I64);
7618 IRTemp bb = newTemp(Ity_I64);
7619 IRTemp zero = newTemp(Ity_I64);
7620 IRTemp bbNeg = newTemp(Ity_I64);
7621 IRTemp negMask = newTemp(Ity_I64);
7622 IRTemp posMask = newTemp(Ity_I64);
7623 IROp opSub = Iop_INVALID;
7624 IROp opCmpGTS = Iop_INVALID;
7627 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
7628 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
7629 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
7630 default: vassert(0);
7635 assign( zero, mkU64(0) );
7636 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
7637 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
7638 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
7642 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
7643 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
7647 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7648 value aa, computes, for each lane
7650 if aa < 0 then -aa else aa
7652 Note that the result is interpreted as unsigned, so that the
7653 absolute value of the most negative signed input can be
7656 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
7658 IRTemp aa = newTemp(Ity_I64);
7659 IRTemp zero = newTemp(Ity_I64);
7660 IRTemp aaNeg = newTemp(Ity_I64);
7661 IRTemp negMask = newTemp(Ity_I64);
7662 IRTemp posMask = newTemp(Ity_I64);
7663 IROp opSub = Iop_INVALID;
7664 IROp opSarN = Iop_INVALID;
7667 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
7668 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
7669 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
7670 default: vassert(0);
7674 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
7675 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
7676 assign( zero, mkU64(0) );
7677 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
7680 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
7681 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
7684 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
7685 IRTemp lo64, Int byteShift )
7687 vassert(byteShift >= 1 && byteShift <= 7);
7690 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
7691 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
7695 /* Generate a SIGSEGV followed by a restart of the current instruction
7696 if effective_addr is not 16-aligned. This is required behaviour
7697 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7698 This assumes that guest_RIP_curr_instr is set correctly! */
7699 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
7704 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
7707 IRConst_U32(guest_EIP_curr_instr)
7713 /* Helper for deciding whether a given insn (starting at the opcode
7714 byte) may validly be used with a LOCK prefix. The following insns
7715 may be used with LOCK when their destination operand is in memory.
7716 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7718 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7719 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7720 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7721 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7722 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7723 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7724 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7738 CMPXCHG 0F B0, 0F B1
7743 ------------------------------
7745 80 /0 = addb $imm8, rm8
7746 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7747 82 /0 = addb $imm8, rm8
7748 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7751 01 = addl r32, rm32 and addw r16, rm16
7753 Same for ADD OR ADC SBB AND SUB XOR
7756 FF /1 = dec rm32 and dec rm16
7759 FF /0 = inc rm32 and inc rm16
7762 F7 /3 = neg rm32 and neg rm16
7765 F7 /2 = not rm32 and not rm16
7767 0F BB = btcw r16, rm16 and btcl r32, rm32
7768 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7772 static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
7775 case 0x00: case 0x01: case 0x08: case 0x09:
7776 case 0x10: case 0x11: case 0x18: case 0x19:
7777 case 0x20: case 0x21: case 0x28: case 0x29:
7778 case 0x30: case 0x31:
7779 if (!epartIsReg(opc[1]))
7783 case 0x80: case 0x81: case 0x82: case 0x83:
7784 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
7785 && !epartIsReg(opc[1]))
7789 case 0xFE: case 0xFF:
7790 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
7791 && !epartIsReg(opc[1]))
7795 case 0xF6: case 0xF7:
7796 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
7797 && !epartIsReg(opc[1]))
7801 case 0x86: case 0x87:
7802 if (!epartIsReg(opc[1]))
7808 case 0xBB: case 0xB3: case 0xAB:
7809 if (!epartIsReg(opc[2]))
7813 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
7814 && !epartIsReg(opc[2]))
7817 case 0xB0: case 0xB1:
7818 if (!epartIsReg(opc[2]))
7822 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
7825 case 0xC0: case 0xC1:
7826 if (!epartIsReg(opc[2]))
7831 } /* switch (opc[1]) */
7837 } /* switch (opc[0]) */
7843 /*------------------------------------------------------------*/
7844 /*--- Disassemble a single instruction ---*/
7845 /*------------------------------------------------------------*/
7847 /* Disassemble a single instruction into IR. The instruction is
7848 located in host memory at &guest_code[delta]. *expect_CAS is set
7849 to True if the resulting IR is expected to contain an IRCAS
7850 statement, and False if it's not expected to. This makes it
7851 possible for the caller of disInstr_X86_WRK to check that
7852 LOCK-prefixed instructions are at least plausibly translated, in
7853 that it becomes possible to check that a (validly) LOCK-prefixed
7854 instruction generates a translation containing an IRCAS, and
7855 instructions without LOCK prefixes don't generate translations
7856 containing an IRCAS.
7859 DisResult disInstr_X86_WRK (
7860 /*OUT*/Bool* expect_CAS,
7862 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7864 void* callback_opaque,
7866 VexArchInfo* archinfo,
7871 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
7873 UChar opc, modrm, abyte, pre;
7876 Int am_sz, d_sz, n_prefixes;
7878 UChar* insn; /* used in SSE decoders */
7880 /* The running delta */
7881 Int delta = (Int)delta64;
7883 /* Holds eip at the start of the insn, so that we can print
7884 consistent error messages for unimplemented insns. */
7885 Int delta_start = delta;
7887 /* sz denotes the nominal data-op size of the insn; we change it to
7888 2 if an 0x66 prefix is seen */
7891 /* sorb holds the segment-override-prefix byte, if any. Zero if no
7892 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65}
7893 indicating the prefix. */
7896 /* Gets set to True if a LOCK prefix is seen. */
7897 Bool pfx_lock = False;
7899 /* Set result defaults. */
7900 dres.whatNext = Dis_Continue;
7902 dres.continueAt = 0;
7904 *expect_CAS = False;
7906 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
7908 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
7909 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
7911 /* We may be asked to update the guest EIP before going further. */
7913 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) );
7915 /* Spot "Special" instructions (see comment at top of file). */
7917 UChar* code = (UChar*)(guest_code + delta);
7918 /* Spot the 12-byte preamble:
7919 C1C703 roll $3, %edi
7920 C1C70D roll $13, %edi
7921 C1C71D roll $29, %edi
7922 C1C713 roll $19, %edi
7924 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
7925 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
7926 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
7927 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
7928 /* Got a "Special" instruction preamble. Which one is it? */
7929 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
7930 /* %EDX = client_request ( %EAX ) */
7931 DIP("%%edx = client_request ( %%eax )\n");
7933 jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta);
7934 dres.whatNext = Dis_StopHere;
7935 goto decode_success;
7938 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
7939 /* %EAX = guest_NRADDR */
7940 DIP("%%eax = guest_NRADDR\n");
7942 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
7943 goto decode_success;
7946 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
7947 /* call-noredir *%EAX */
7948 DIP("call-noredir *%%eax\n");
7950 t1 = newTemp(Ity_I32);
7951 assign(t1, getIReg(4,R_EAX));
7952 t2 = newTemp(Ity_I32);
7953 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
7954 putIReg(4, R_ESP, mkexpr(t2));
7955 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
7956 jmp_treg(Ijk_NoRedir,t1);
7957 dres.whatNext = Dis_StopHere;
7958 goto decode_success;
7960 /* We don't know what it is. */
7961 goto decode_failure;
7965 /* To make a syscall in L4Re the guest must access its UTCB.
7966 * The address of the UTCB is stored in %fs, so we look for
7967 * the following instructions:
7968 * 64 a1 00 00 00 00 mov %fs:0x0,%eax
7969 * 64 8b 0d 00 00 00 00 mov %fs:0x0,%ecx
7970 * 64 8b 15 00 00 00 00 mov %fs:0x0,%edx
7971 * 64 8b 3d 00 00 00 00 mov %fs:0x0,%edi
7972 * 64 8b 35 00 00 00 00 mov %fs:0x0,%esi
7974 if (code[ 0] == 0x64 && code[ 1] == 0xa1 && code[ 2] == 0x0 &&
7975 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0) {
7976 // printf("%x\n", code);
7977 // printf("0x%x\n", guest_EIP_bbstart+delta);
7978 // DIP("%%edx = client_request ( %%eax )\n");
7979 // putIReg(4, R_EAX, mkexpr(0xdeadbeef));
7980 // vg_enter_kdebug();
7982 jmp_lit(Ijk_l4_utcb_eax, guest_EIP_bbstart+delta);
7983 dres.whatNext = Dis_StopHere;
7984 goto decode_success;
7987 // TODO implement me
7989 if (code[ 0] == 0x64 && code[ 1] == 0x8b && code[ 2] == 0x0d &&
7990 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
7992 jmp_lit(Ijk_l4_utcb_ecx, guest_EIP_bbstart+delta);
7993 dres.whatNext = Dis_StopHere;
7994 goto decode_success;
7997 if (code[ 0] == 0x64 && code[ 1] == 0x8b && code[ 2] == 0x15 &&
7998 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
8000 jmp_lit(Ijk_l4_utcb_edx, guest_EIP_bbstart+delta);
8001 dres.whatNext = Dis_StopHere;
8002 goto decode_success;
8005 if (code[ 0] == 0x64 && code[ 1] == 0x8b && code[ 2] == 0x3d &&
8006 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
8008 jmp_lit(Ijk_l4_utcb_edi, guest_EIP_bbstart+delta);
8009 dres.whatNext = Dis_StopHere;
8010 goto decode_success;
8014 if (code[ 0] == 0x64 && code[ 1] == 0x8b && code[ 2] == 0x35 &&
8015 code[ 3] == 0x0 && code[ 4] == 0x0 && code[ 5] == 0x0 && code[ 6] == 0x0) {
8017 jmp_lit(Ijk_l4_utcb_esi, guest_EIP_bbstart+delta);
8018 dres.whatNext = Dis_StopHere;
8019 goto decode_success;
8023 if (code[ 0] == 0x0F && code[ 1] == 0x0B) {
8025 jmp_lit(Ijk_l4_ud2, guest_EIP_bbstart+delta);
8026 dres.whatNext = Dis_StopHere;
8027 goto decode_success;
8031 * The L4Re artificial trap instruction:
8034 * 0xc1 0xc0 0x42 rol eax, 0x42
8035 * 0xc1 0xc8 0x42 ror eax, 0x42
8038 if (code[ 0] == 0x50 && code[ 1] == 0xc1 &&
8039 code[ 2] == 0xc0 && code[ 3] == 0x42 &&
8040 code[ 4] == 0xc1 && code[ 5] == 0xc8 &&
8041 code[ 6] == 0x42 && code[ 7] == 0x58) {
8043 jmp_lit(Ijk_l4_artificial, guest_EIP_bbstart + delta);
8044 dres.whatNext = Dis_StopHere;
8045 goto decode_success;
8050 /* Handle a couple of weird-ass NOPs that have been observed in the
8053 UChar* code = (UChar*)(guest_code + delta);
8054 /* Sun's JVM 1.5.0 uses the following as a NOP:
8055 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8056 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
8057 && code[3] == 0x65 && code[4] == 0x90) {
8058 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8060 goto decode_success;
8062 /* Don't barf on recent binutils padding,
8063 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8064 66 2e 0f 1f 84 00 00 00 00 00
8065 66 66 2e 0f 1f 84 00 00 00 00 00
8066 66 66 66 2e 0f 1f 84 00 00 00 00 00
8067 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8068 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8069 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8071 if (code[0] == 0x66) {
8073 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8074 if (code[data16_cnt] != 0x66)
8076 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8077 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8078 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8079 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8080 && code[data16_cnt + 8] == 0x00 ) {
8081 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8082 delta += 9 + data16_cnt;
8083 goto decode_success;
8088 /* Normal instruction handling starts here. */
8090 /* Deal with some but not all prefixes:
8093 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8094 Not dealt with (left in place):
8099 if (n_prefixes > 7) goto decode_failure;
8100 pre = getUChar(delta);
8109 case 0x3E: /* %DS: */
8110 case 0x26: /* %ES: */
8111 case 0x64: /* %FS: */
8112 case 0x65: /* %GS: */
8114 goto decode_failure; /* only one seg override allowed */
8117 case 0x2E: { /* %CS: */
8118 /* 2E prefix on a conditional branch instruction is a
8119 branch-prediction hint, which can safely be ignored. */
8120 UChar op1 = getIByte(delta+1);
8121 UChar op2 = getIByte(delta+2);
8122 if ((op1 >= 0x70 && op1 <= 0x7F)
8124 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8125 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8127 /* All other CS override cases are not handled */
8128 goto decode_failure;
8132 case 0x36: /* %SS: */
8133 /* SS override cases are not handled */
8134 goto decode_failure;
8144 /* Now we should be looking at the primary opcode byte or the
8145 leading F2 or F3. Check that any LOCK prefix is actually
8149 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
8152 *expect_CAS = False;
8153 goto decode_failure;
8158 /* ---------------------------------------------------- */
8159 /* --- The SSE decoder. --- */
8160 /* ---------------------------------------------------- */
8162 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8165 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8166 later section, further on. */
8168 insn = (UChar*)&guest_code[delta];
8170 /* Treat fxsave specially. It should be doable even on an SSE0
8171 (Pentium-II class) CPU. Hence be prepared to handle it on
8172 any subarchitecture variant.
8175 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8176 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8177 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8179 modrm = getIByte(delta+2);
8181 vassert(!epartIsReg(modrm));
8183 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8185 gen_SEGV_if_not_16_aligned(addr);
8187 DIP("fxsave %s\n", dis_buf);
8189 /* Uses dirty helper:
8190 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8191 d = unsafeIRDirty_0_N (
8193 "x86g_dirtyhelper_FXSAVE",
8194 &x86g_dirtyhelper_FXSAVE,
8195 mkIRExprVec_1( mkexpr(addr) )
8199 /* declare we're writing memory */
8201 d->mAddr = mkexpr(addr);
8204 /* declare we're reading guest state */
8207 d->fxState[0].fx = Ifx_Read;
8208 d->fxState[0].offset = OFFB_FTOP;
8209 d->fxState[0].size = sizeof(UInt);
8211 d->fxState[1].fx = Ifx_Read;
8212 d->fxState[1].offset = OFFB_FPREGS;
8213 d->fxState[1].size = 8 * sizeof(ULong);
8215 d->fxState[2].fx = Ifx_Read;
8216 d->fxState[2].offset = OFFB_FPTAGS;
8217 d->fxState[2].size = 8 * sizeof(UChar);
8219 d->fxState[3].fx = Ifx_Read;
8220 d->fxState[3].offset = OFFB_FPROUND;
8221 d->fxState[3].size = sizeof(UInt);
8223 d->fxState[4].fx = Ifx_Read;
8224 d->fxState[4].offset = OFFB_FC3210;
8225 d->fxState[4].size = sizeof(UInt);
8227 d->fxState[5].fx = Ifx_Read;
8228 d->fxState[5].offset = OFFB_XMM0;
8229 d->fxState[5].size = 8 * sizeof(U128);
8231 d->fxState[6].fx = Ifx_Read;
8232 d->fxState[6].offset = OFFB_SSEROUND;
8233 d->fxState[6].size = sizeof(UInt);
8235 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8236 images are packed back-to-back. If not, the value of
8237 d->fxState[5].size is wrong. */
8238 vassert(16 == sizeof(U128));
8239 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8241 stmt( IRStmt_Dirty(d) );
8243 goto decode_success;
8246 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8247 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8248 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8250 modrm = getIByte(delta+2);
8252 vassert(!epartIsReg(modrm));
8254 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8256 gen_SEGV_if_not_16_aligned(addr);
8258 DIP("fxrstor %s\n", dis_buf);
8260 /* Uses dirty helper:
8261 VexEmWarn x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8263 the VexEmWarn value is simply ignored (unlike for FRSTOR)
8265 d = unsafeIRDirty_0_N (
8267 "x86g_dirtyhelper_FXRSTOR",
8268 &x86g_dirtyhelper_FXRSTOR,
8269 mkIRExprVec_1( mkexpr(addr) )
8273 /* declare we're reading memory */
8275 d->mAddr = mkexpr(addr);
8278 /* declare we're writing guest state */
8281 d->fxState[0].fx = Ifx_Write;
8282 d->fxState[0].offset = OFFB_FTOP;
8283 d->fxState[0].size = sizeof(UInt);
8285 d->fxState[1].fx = Ifx_Write;
8286 d->fxState[1].offset = OFFB_FPREGS;
8287 d->fxState[1].size = 8 * sizeof(ULong);
8289 d->fxState[2].fx = Ifx_Write;
8290 d->fxState[2].offset = OFFB_FPTAGS;
8291 d->fxState[2].size = 8 * sizeof(UChar);
8293 d->fxState[3].fx = Ifx_Write;
8294 d->fxState[3].offset = OFFB_FPROUND;
8295 d->fxState[3].size = sizeof(UInt);
8297 d->fxState[4].fx = Ifx_Write;
8298 d->fxState[4].offset = OFFB_FC3210;
8299 d->fxState[4].size = sizeof(UInt);
8301 d->fxState[5].fx = Ifx_Write;
8302 d->fxState[5].offset = OFFB_XMM0;
8303 d->fxState[5].size = 8 * sizeof(U128);
8305 d->fxState[6].fx = Ifx_Write;
8306 d->fxState[6].offset = OFFB_SSEROUND;
8307 d->fxState[6].size = sizeof(UInt);
8309 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8310 images are packed back-to-back. If not, the value of
8311 d->fxState[5].size is wrong. */
8312 vassert(16 == sizeof(U128));
8313 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8315 stmt( IRStmt_Dirty(d) );
8317 goto decode_success;
8320 /* ------ SSE decoder main ------ */
8322 /* Skip parts of the decoder which don't apply given the stated
8323 guest subarchitecture. */
8324 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8325 goto after_sse_decoders;
8327 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8330 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8331 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
8332 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8333 goto decode_success;
8336 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8337 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8339 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8340 goto decode_success;
8343 /* 0F 55 = ANDNPS -- G = (not G) and E */
8344 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
8345 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8346 goto decode_success;
8349 /* 0F 54 = ANDPS -- G = G and E */
8350 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
8351 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8352 goto decode_success;
8355 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8356 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
8357 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8358 goto decode_success;
8361 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8362 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8364 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8365 goto decode_success;
8368 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8369 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8370 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8371 IRTemp argL = newTemp(Ity_F32);
8372 IRTemp argR = newTemp(Ity_F32);
8373 modrm = getIByte(delta+2);
8374 if (epartIsReg(modrm)) {
8375 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8377 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8378 nameXMMReg(gregOfRM(modrm)) );
8380 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8381 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
8383 DIP("[u]comiss %s,%s\n", dis_buf,
8384 nameXMMReg(gregOfRM(modrm)) );
8386 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
8388 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8389 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8394 unop(Iop_F32toF64,mkexpr(argL)),
8395 unop(Iop_F32toF64,mkexpr(argR))),
8398 /* Set NDEP even though it isn't used. This makes redundant-PUT
8399 elimination of previous stores to this field work better. */
8400 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8401 goto decode_success;
8404 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8406 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
8407 IRTemp arg64 = newTemp(Ity_I64);
8408 IRTemp rmode = newTemp(Ity_I32);
8411 modrm = getIByte(delta+2);
8413 if (epartIsReg(modrm)) {
8414 assign( arg64, getMMXReg(eregOfRM(modrm)) );
8416 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8417 nameXMMReg(gregOfRM(modrm)));
8419 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8420 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
8422 DIP("cvtpi2ps %s,%s\n", dis_buf,
8423 nameXMMReg(gregOfRM(modrm)) );
8426 assign( rmode, get_sse_roundingmode() );
8433 unop(Iop_64to32, mkexpr(arg64)) )) );
8440 unop(Iop_64HIto32, mkexpr(arg64)) )) );
8442 goto decode_success;
8445 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8447 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
8448 IRTemp arg32 = newTemp(Ity_I32);
8449 IRTemp rmode = newTemp(Ity_I32);
8452 modrm = getIByte(delta+3);
8453 if (epartIsReg(modrm)) {
8454 assign( arg32, getIReg(4, eregOfRM(modrm)) );
8456 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
8457 nameXMMReg(gregOfRM(modrm)));
8459 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8460 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
8462 DIP("cvtsi2ss %s,%s\n", dis_buf,
8463 nameXMMReg(gregOfRM(modrm)) );
8466 assign( rmode, get_sse_roundingmode() );
8472 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
8474 goto decode_success;
8477 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8478 I32 in mmx, according to prevailing SSE rounding mode */
8479 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8480 I32 in mmx, rounding towards zero */
8481 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
8482 IRTemp dst64 = newTemp(Ity_I64);
8483 IRTemp rmode = newTemp(Ity_I32);
8484 IRTemp f32lo = newTemp(Ity_F32);
8485 IRTemp f32hi = newTemp(Ity_F32);
8486 Bool r2zero = toBool(insn[1] == 0x2C);
8489 modrm = getIByte(delta+2);
8491 if (epartIsReg(modrm)) {
8493 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8494 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
8495 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8496 nameXMMReg(eregOfRM(modrm)),
8497 nameMMXReg(gregOfRM(modrm)));
8499 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8500 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8501 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
8505 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8507 nameMMXReg(gregOfRM(modrm)));
8511 assign(rmode, mkU32((UInt)Irrm_ZERO) );
8513 assign( rmode, get_sse_roundingmode() );
8518 binop( Iop_32HLto64,
8519 binop( Iop_F64toI32S,
8521 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
8522 binop( Iop_F64toI32S,
8524 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8528 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
8529 goto decode_success;
8532 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8533 I32 in ireg, according to prevailing SSE rounding mode */
8534 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8535 I32 in ireg, rounding towards zero */
8536 if (insn[0] == 0xF3 && insn[1] == 0x0F
8537 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
8538 IRTemp rmode = newTemp(Ity_I32);
8539 IRTemp f32lo = newTemp(Ity_F32);
8540 Bool r2zero = toBool(insn[2] == 0x2C);
8543 modrm = getIByte(delta+3);
8544 if (epartIsReg(modrm)) {
8546 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8547 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8548 nameXMMReg(eregOfRM(modrm)),
8549 nameIReg(4, gregOfRM(modrm)));
8551 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8552 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8554 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8556 nameIReg(4, gregOfRM(modrm)));
8560 assign( rmode, mkU32((UInt)Irrm_ZERO) );
8562 assign( rmode, get_sse_roundingmode() );
8565 putIReg(4, gregOfRM(modrm),
8566 binop( Iop_F64toI32S,
8568 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8571 goto decode_success;
8574 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8575 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
8576 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
8577 goto decode_success;
8580 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8581 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
8583 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
8584 goto decode_success;
8587 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8588 if (insn[0] == 0x0F && insn[1] == 0xAE
8589 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
8591 IRTemp t64 = newTemp(Ity_I64);
8592 IRTemp ew = newTemp(Ity_I32);
8594 modrm = getIByte(delta+2);
8595 vassert(!epartIsReg(modrm));
8598 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8600 DIP("ldmxcsr %s\n", dis_buf);
8602 /* The only thing we observe in %mxcsr is the rounding mode.
8603 Therefore, pass the 32-bit value (SSE native-format control
8604 word) to a clean helper, getting back a 64-bit value, the
8605 lower half of which is the SSEROUND value to store, and the
8606 upper half of which is the emulation-warning token which may
8609 /* ULong x86h_check_ldmxcsr ( UInt ); */
8610 assign( t64, mkIRExprCCall(
8611 Ity_I64, 0/*regparms*/,
8612 "x86g_check_ldmxcsr",
8613 &x86g_check_ldmxcsr,
8614 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
8618 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
8619 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
8620 put_emwarn( mkexpr(ew) );
8621 /* Finally, if an emulation warning was reported, side-exit to
8622 the next insn, reporting the warning, so that Valgrind's
8623 dispatcher sees the warning. */
8626 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
8628 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
8631 goto decode_success;
8634 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8635 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8636 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
8638 delta = dis_MMX( &ok, sorb, sz, delta+1 );
8640 goto decode_failure;
8641 goto decode_success;
8644 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
8645 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
8646 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
8647 goto decode_success;
8650 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
8651 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
8653 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
8654 goto decode_success;
8657 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
8658 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
8659 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
8660 goto decode_success;
8663 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
8664 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
8666 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
8667 goto decode_success;
8670 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
8671 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
8672 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
8673 modrm = getIByte(delta+2);
8674 if (epartIsReg(modrm)) {
8675 putXMMReg( gregOfRM(modrm),
8676 getXMMReg( eregOfRM(modrm) ));
8677 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8678 nameXMMReg(gregOfRM(modrm)));
8681 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8682 if (insn[1] == 0x28/*movaps*/)
8683 gen_SEGV_if_not_16_aligned( addr );
8684 putXMMReg( gregOfRM(modrm),
8685 loadLE(Ity_V128, mkexpr(addr)) );
8686 DIP("mov[ua]ps %s,%s\n", dis_buf,
8687 nameXMMReg(gregOfRM(modrm)));
8690 goto decode_success;
8693 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
8694 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
8695 if (sz == 4 && insn[0] == 0x0F
8696 && (insn[1] == 0x29 || insn[1] == 0x11)) {
8697 modrm = getIByte(delta+2);
8698 if (epartIsReg(modrm)) {
8699 /* fall through; awaiting test case */
8701 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8702 if (insn[1] == 0x29/*movaps*/)
8703 gen_SEGV_if_not_16_aligned( addr );
8704 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
8705 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
8708 goto decode_success;
8712 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
8713 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
8714 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
8715 modrm = getIByte(delta+2);
8716 if (epartIsReg(modrm)) {
8718 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
8719 getXMMRegLane64( eregOfRM(modrm), 0 ) );
8720 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8721 nameXMMReg(gregOfRM(modrm)));
8723 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8725 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
8726 loadLE(Ity_I64, mkexpr(addr)) );
8727 DIP("movhps %s,%s\n", dis_buf,
8728 nameXMMReg( gregOfRM(modrm) ));
8730 goto decode_success;
8733 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
8734 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
8735 if (!epartIsReg(insn[2])) {
8737 addr = disAMode ( &alen, sorb, delta, dis_buf );
8739 storeLE( mkexpr(addr),
8740 getXMMRegLane64( gregOfRM(insn[2]),
8741 1/*upper lane*/ ) );
8742 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
8744 goto decode_success;
8746 /* else fall through */
8749 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
8750 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
8751 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
8752 modrm = getIByte(delta+2);
8753 if (epartIsReg(modrm)) {
8755 putXMMRegLane64( gregOfRM(modrm),
8757 getXMMRegLane64( eregOfRM(modrm), 1 ));
8758 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
8759 nameXMMReg(gregOfRM(modrm)));
8761 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8763 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
8764 loadLE(Ity_I64, mkexpr(addr)) );
8765 DIP("movlps %s, %s\n",
8766 dis_buf, nameXMMReg( gregOfRM(modrm) ));
8768 goto decode_success;
8771 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
8772 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
8773 if (!epartIsReg(insn[2])) {
8775 addr = disAMode ( &alen, sorb, delta, dis_buf );
8777 storeLE( mkexpr(addr),
8778 getXMMRegLane64( gregOfRM(insn[2]),
8779 0/*lower lane*/ ) );
8780 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
8782 goto decode_success;
8784 /* else fall through */
8787 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
8788 to 4 lowest bits of ireg(G) */
8789 if (insn[0] == 0x0F && insn[1] == 0x50) {
8790 modrm = getIByte(delta+2);
8791 if (sz == 4 && epartIsReg(modrm)) {
8793 t0 = newTemp(Ity_I32);
8794 t1 = newTemp(Ity_I32);
8795 t2 = newTemp(Ity_I32);
8796 t3 = newTemp(Ity_I32);
8798 src = eregOfRM(modrm);
8799 assign( t0, binop( Iop_And32,
8800 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
8802 assign( t1, binop( Iop_And32,
8803 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
8805 assign( t2, binop( Iop_And32,
8806 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
8808 assign( t3, binop( Iop_And32,
8809 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
8811 putIReg(4, gregOfRM(modrm),
8813 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
8814 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
8817 DIP("movmskps %s,%s\n", nameXMMReg(src),
8818 nameIReg(4, gregOfRM(modrm)));
8819 goto decode_success;
8821 /* else fall through */
8824 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
8825 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
8826 if (insn[0] == 0x0F && insn[1] == 0x2B) {
8827 modrm = getIByte(delta+2);
8828 if (!epartIsReg(modrm)) {
8829 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8830 gen_SEGV_if_not_16_aligned( addr );
8831 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
8832 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
8834 nameXMMReg(gregOfRM(modrm)));
8836 goto decode_success;
8838 /* else fall through */
8841 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8842 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8843 Intel manual does not say anything about the usual business of
8844 the FP reg tags getting trashed whenever an MMX insn happens.
8845 So we just leave them alone.
8847 if (insn[0] == 0x0F && insn[1] == 0xE7) {
8848 modrm = getIByte(delta+2);
8849 if (sz == 4 && !epartIsReg(modrm)) {
8850 /* do_MMX_preamble(); Intel docs don't specify this */
8851 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8852 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
8853 DIP("movntq %s,%s\n", dis_buf,
8854 nameMMXReg(gregOfRM(modrm)));
8856 goto decode_success;
8858 /* else fall through */
8861 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
8862 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
8863 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
8865 modrm = getIByte(delta+3);
8866 if (epartIsReg(modrm)) {
8867 putXMMRegLane32( gregOfRM(modrm), 0,
8868 getXMMRegLane32( eregOfRM(modrm), 0 ));
8869 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8870 nameXMMReg(gregOfRM(modrm)));
8873 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8874 /* zero bits 127:64 */
8875 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
8876 /* zero bits 63:32 */
8877 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
8878 /* write bits 31:0 */
8879 putXMMRegLane32( gregOfRM(modrm), 0,
8880 loadLE(Ity_I32, mkexpr(addr)) );
8881 DIP("movss %s,%s\n", dis_buf,
8882 nameXMMReg(gregOfRM(modrm)));
8885 goto decode_success;
8888 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
8890 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
8892 modrm = getIByte(delta+3);
8893 if (epartIsReg(modrm)) {
8894 /* fall through, we don't yet have a test case */
8896 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8897 storeLE( mkexpr(addr),
8898 getXMMRegLane32(gregOfRM(modrm), 0) );
8899 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
8902 goto decode_success;
8906 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
8907 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
8908 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
8909 goto decode_success;
8912 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
8913 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
8915 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
8916 goto decode_success;
8919 /* 0F 56 = ORPS -- G = G and E */
8920 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
8921 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
8922 goto decode_success;
8925 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8926 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8927 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
8929 delta = dis_MMXop_regmem_to_reg (
8930 sorb, delta+2, insn[1], "pavgb", False );
8931 goto decode_success;
8934 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8935 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8936 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
8938 delta = dis_MMXop_regmem_to_reg (
8939 sorb, delta+2, insn[1], "pavgw", False );
8940 goto decode_success;
8943 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8944 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8945 zero-extend of it in ireg(G). */
8946 if (insn[0] == 0x0F && insn[1] == 0xC5) {
8948 if (sz == 4 && epartIsReg(modrm)) {
8949 IRTemp sV = newTemp(Ity_I64);
8950 t5 = newTemp(Ity_I16);
8952 assign(sV, getMMXReg(eregOfRM(modrm)));
8953 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
8954 switch (insn[3] & 3) {
8955 case 0: assign(t5, mkexpr(t0)); break;
8956 case 1: assign(t5, mkexpr(t1)); break;
8957 case 2: assign(t5, mkexpr(t2)); break;
8958 case 3: assign(t5, mkexpr(t3)); break;
8959 default: vassert(0); /*NOTREACHED*/
8961 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
8962 DIP("pextrw $%d,%s,%s\n",
8963 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
8964 nameIReg(4,gregOfRM(modrm)));
8966 goto decode_success;
8968 /* else fall through */
8971 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8972 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8973 put it into the specified lane of mmx(G). */
8974 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
8975 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8976 mmx reg. t4 is the new lane value. t5 is the original
8977 mmx value. t6 is the new mmx value. */
8979 t4 = newTemp(Ity_I16);
8980 t5 = newTemp(Ity_I64);
8981 t6 = newTemp(Ity_I64);
8985 assign(t5, getMMXReg(gregOfRM(modrm)));
8986 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
8988 if (epartIsReg(modrm)) {
8989 assign(t4, getIReg(2, eregOfRM(modrm)));
8992 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
8993 nameIReg(2,eregOfRM(modrm)),
8994 nameMMXReg(gregOfRM(modrm)));
8996 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8998 lane = insn[3+alen-1];
8999 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
9000 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
9002 nameMMXReg(gregOfRM(modrm)));
9006 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
9007 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
9008 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
9009 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
9010 default: vassert(0); /*NOTREACHED*/
9012 putMMXReg(gregOfRM(modrm), mkexpr(t6));
9013 goto decode_success;
9016 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9017 /* 0F EE = PMAXSW -- 16x4 signed max */
9018 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
9020 delta = dis_MMXop_regmem_to_reg (
9021 sorb, delta+2, insn[1], "pmaxsw", False );
9022 goto decode_success;
9025 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9026 /* 0F DE = PMAXUB -- 8x8 unsigned max */
9027 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
9029 delta = dis_MMXop_regmem_to_reg (
9030 sorb, delta+2, insn[1], "pmaxub", False );
9031 goto decode_success;
9034 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9035 /* 0F EA = PMINSW -- 16x4 signed min */
9036 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
9038 delta = dis_MMXop_regmem_to_reg (
9039 sorb, delta+2, insn[1], "pminsw", False );
9040 goto decode_success;
9043 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9044 /* 0F DA = PMINUB -- 8x8 unsigned min */
9045 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
9047 delta = dis_MMXop_regmem_to_reg (
9048 sorb, delta+2, insn[1], "pminub", False );
9049 goto decode_success;
9052 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9053 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9054 mmx(G), turn them into a byte, and put zero-extend of it in
9056 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
9058 if (epartIsReg(modrm)) {
9060 t0 = newTemp(Ity_I64);
9061 t1 = newTemp(Ity_I32);
9062 assign(t0, getMMXReg(eregOfRM(modrm)));
9063 assign(t1, mkIRExprCCall(
9064 Ity_I32, 0/*regparms*/,
9065 "x86g_calculate_mmx_pmovmskb",
9066 &x86g_calculate_mmx_pmovmskb,
9067 mkIRExprVec_1(mkexpr(t0))));
9068 putIReg(4, gregOfRM(modrm), mkexpr(t1));
9069 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9070 nameIReg(4,gregOfRM(modrm)));
9072 goto decode_success;
9074 /* else fall through */
9077 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9078 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9079 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
9081 delta = dis_MMXop_regmem_to_reg (
9082 sorb, delta+2, insn[1], "pmuluh", False );
9083 goto decode_success;
9086 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9087 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9088 /* 0F 18 /2 = PREFETCH1 */
9089 /* 0F 18 /3 = PREFETCH2 */
9090 if (insn[0] == 0x0F && insn[1] == 0x18
9091 && !epartIsReg(insn[2])
9092 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
9093 HChar* hintstr = "??";
9095 modrm = getIByte(delta+2);
9096 vassert(!epartIsReg(modrm));
9098 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9101 switch (gregOfRM(modrm)) {
9102 case 0: hintstr = "nta"; break;
9103 case 1: hintstr = "t0"; break;
9104 case 2: hintstr = "t1"; break;
9105 case 3: hintstr = "t2"; break;
9106 default: vassert(0); /*NOTREACHED*/
9109 DIP("prefetch%s %s\n", hintstr, dis_buf);
9110 goto decode_success;
9113 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9114 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9115 if (insn[0] == 0x0F && insn[1] == 0x0D
9116 && !epartIsReg(insn[2])
9117 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9118 HChar* hintstr = "??";
9120 modrm = getIByte(delta+2);
9121 vassert(!epartIsReg(modrm));
9123 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9126 switch (gregOfRM(modrm)) {
9127 case 0: hintstr = ""; break;
9128 case 1: hintstr = "w"; break;
9129 default: vassert(0); /*NOTREACHED*/
9132 DIP("prefetch%s %s\n", hintstr, dis_buf);
9133 goto decode_success;
9136 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9137 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9138 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
9140 delta = dis_MMXop_regmem_to_reg (
9141 sorb, delta+2, insn[1], "psadbw", False );
9142 goto decode_success;
9145 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9146 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9147 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
9149 IRTemp sV, dV, s3, s2, s1, s0;
9150 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9151 sV = newTemp(Ity_I64);
9152 dV = newTemp(Ity_I64);
9155 if (epartIsReg(modrm)) {
9156 assign( sV, getMMXReg(eregOfRM(modrm)) );
9157 order = (Int)insn[3];
9159 DIP("pshufw $%d,%s,%s\n", order,
9160 nameMMXReg(eregOfRM(modrm)),
9161 nameMMXReg(gregOfRM(modrm)));
9163 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9164 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9165 order = (Int)insn[2+alen];
9167 DIP("pshufw $%d,%s,%s\n", order,
9169 nameMMXReg(gregOfRM(modrm)));
9171 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9174 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9176 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9177 SEL((order>>2)&3), SEL((order>>0)&3) )
9179 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9181 goto decode_success;
9184 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9185 if (insn[0] == 0x0F && insn[1] == 0x53) {
9187 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9188 "rcpps", Iop_Recip32Fx4 );
9189 goto decode_success;
9192 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9193 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9195 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9196 "rcpss", Iop_Recip32F0x4 );
9197 goto decode_success;
9200 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9201 if (insn[0] == 0x0F && insn[1] == 0x52) {
9203 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9204 "rsqrtps", Iop_RSqrt32Fx4 );
9205 goto decode_success;
9208 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9209 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9211 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9212 "rsqrtss", Iop_RSqrt32F0x4 );
9213 goto decode_success;
9216 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9217 if (insn[0] == 0x0F && insn[1] == 0xAE
9218 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9221 /* Insert a memory fence. It's sometimes important that these
9222 are carried through to the generated code. */
9223 stmt( IRStmt_MBE(Imbe_Fence) );
9225 goto decode_success;
9228 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9229 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
9232 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9233 sV = newTemp(Ity_V128);
9234 dV = newTemp(Ity_V128);
9235 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9237 assign( dV, getXMMReg(gregOfRM(modrm)) );
9239 if (epartIsReg(modrm)) {
9240 assign( sV, getXMMReg(eregOfRM(modrm)) );
9241 select = (Int)insn[3];
9243 DIP("shufps $%d,%s,%s\n", select,
9244 nameXMMReg(eregOfRM(modrm)),
9245 nameXMMReg(gregOfRM(modrm)));
9247 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9248 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9249 select = (Int)insn[2+alen];
9251 DIP("shufps $%d,%s,%s\n", select,
9253 nameXMMReg(gregOfRM(modrm)));
9256 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9257 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9259 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9260 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9264 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9265 SELD((select>>2)&3), SELD((select>>0)&3) )
9271 goto decode_success;
9274 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9275 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
9276 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9277 "sqrtps", Iop_Sqrt32Fx4 );
9278 goto decode_success;
9281 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9282 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9284 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9285 "sqrtss", Iop_Sqrt32F0x4 );
9286 goto decode_success;
9289 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9290 if (insn[0] == 0x0F && insn[1] == 0xAE
9291 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9292 modrm = getIByte(delta+2);
9294 vassert(!epartIsReg(modrm));
9296 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9299 /* Fake up a native SSE mxcsr word. The only thing it depends
9300 on is SSEROUND[1:0], so call a clean helper to cook it up.
9302 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9303 DIP("stmxcsr %s\n", dis_buf);
9304 storeLE( mkexpr(addr),
9307 "x86g_create_mxcsr", &x86g_create_mxcsr,
9308 mkIRExprVec_1( get_sse_roundingmode() )
9311 goto decode_success;
9314 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9315 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
9316 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9317 goto decode_success;
9320 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9321 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9323 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9324 goto decode_success;
9327 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9328 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9329 /* These just appear to be special cases of SHUFPS */
9330 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9332 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9333 Bool hi = toBool(insn[1] == 0x15);
9334 sV = newTemp(Ity_V128);
9335 dV = newTemp(Ity_V128);
9336 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9338 assign( dV, getXMMReg(gregOfRM(modrm)) );
9340 if (epartIsReg(modrm)) {
9341 assign( sV, getXMMReg(eregOfRM(modrm)) );
9343 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9344 nameXMMReg(eregOfRM(modrm)),
9345 nameXMMReg(gregOfRM(modrm)));
9347 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9348 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9350 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9352 nameXMMReg(gregOfRM(modrm)));
9355 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9356 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9359 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9361 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9364 goto decode_success;
9367 /* 0F 57 = XORPS -- G = G and E */
9368 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
9369 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9370 goto decode_success;
9373 /* ---------------------------------------------------- */
9374 /* --- end of the SSE decoder. --- */
9375 /* ---------------------------------------------------- */
9377 /* ---------------------------------------------------- */
9378 /* --- start of the SSE2 decoder. --- */
9379 /* ---------------------------------------------------- */
9381 /* Skip parts of the decoder which don't apply given the stated
9382 guest subarchitecture. */
9383 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
9384 goto after_sse_decoders; /* no SSE2 capabilities */
9386 insn = (UChar*)&guest_code[delta];
9388 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9389 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
9390 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
9391 goto decode_success;
9394 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9395 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
9397 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
9398 goto decode_success;
9401 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9402 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
9403 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
9404 goto decode_success;
9407 /* 66 0F 54 = ANDPD -- G = G and E */
9408 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
9409 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
9410 goto decode_success;
9413 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9414 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
9415 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
9416 goto decode_success;
9419 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9420 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
9422 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
9423 goto decode_success;
9426 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9427 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9428 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9429 IRTemp argL = newTemp(Ity_F64);
9430 IRTemp argR = newTemp(Ity_F64);
9431 modrm = getIByte(delta+2);
9432 if (epartIsReg(modrm)) {
9433 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
9435 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9436 nameXMMReg(gregOfRM(modrm)) );
9438 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9439 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9441 DIP("[u]comisd %s,%s\n", dis_buf,
9442 nameXMMReg(gregOfRM(modrm)) );
9444 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9446 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9447 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9451 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
9454 /* Set NDEP even though it isn't used. This makes redundant-PUT
9455 elimination of previous stores to this field work better. */
9456 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9457 goto decode_success;
9460 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9462 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
9463 IRTemp arg64 = newTemp(Ity_I64);
9466 modrm = getIByte(delta+3);
9467 if (epartIsReg(modrm)) {
9468 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
9470 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9471 nameXMMReg(gregOfRM(modrm)));
9473 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9474 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9476 DIP("cvtdq2pd %s,%s\n", dis_buf,
9477 nameXMMReg(gregOfRM(modrm)) );
9482 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
9487 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
9490 goto decode_success;
9493 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9495 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
9496 IRTemp argV = newTemp(Ity_V128);
9497 IRTemp rmode = newTemp(Ity_I32);
9499 modrm = getIByte(delta+2);
9500 if (epartIsReg(modrm)) {
9501 assign( argV, getXMMReg(eregOfRM(modrm)) );
9503 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9504 nameXMMReg(gregOfRM(modrm)));
9506 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9507 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9509 DIP("cvtdq2ps %s,%s\n", dis_buf,
9510 nameXMMReg(gregOfRM(modrm)) );
9513 assign( rmode, get_sse_roundingmode() );
9514 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9516 # define CVT(_t) binop( Iop_F64toF32, \
9518 unop(Iop_I32StoF64,mkexpr(_t)))
9520 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
9521 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
9522 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9523 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9527 goto decode_success;
9530 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9531 lo half xmm(G), and zero upper half */
9532 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
9533 IRTemp argV = newTemp(Ity_V128);
9534 IRTemp rmode = newTemp(Ity_I32);
9537 modrm = getIByte(delta+3);
9538 if (epartIsReg(modrm)) {
9539 assign( argV, getXMMReg(eregOfRM(modrm)) );
9541 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9542 nameXMMReg(gregOfRM(modrm)));
9544 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9545 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9547 DIP("cvtpd2dq %s,%s\n", dis_buf,
9548 nameXMMReg(gregOfRM(modrm)) );
9551 assign( rmode, get_sse_roundingmode() );
9552 t0 = newTemp(Ity_F64);
9553 t1 = newTemp(Ity_F64);
9554 assign( t0, unop(Iop_ReinterpI64asF64,
9555 unop(Iop_V128to64, mkexpr(argV))) );
9556 assign( t1, unop(Iop_ReinterpI64asF64,
9557 unop(Iop_V128HIto64, mkexpr(argV))) );
9559 # define CVT(_t) binop( Iop_F64toI32S, \
9563 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9564 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9565 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9566 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9570 goto decode_success;
9573 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9574 I32 in mmx, according to prevailing SSE rounding mode */
9575 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9576 I32 in mmx, rounding towards zero */
9577 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9578 IRTemp dst64 = newTemp(Ity_I64);
9579 IRTemp rmode = newTemp(Ity_I32);
9580 IRTemp f64lo = newTemp(Ity_F64);
9581 IRTemp f64hi = newTemp(Ity_F64);
9582 Bool r2zero = toBool(insn[1] == 0x2C);
9585 modrm = getIByte(delta+2);
9587 if (epartIsReg(modrm)) {
9589 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9590 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
9591 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
9592 nameXMMReg(eregOfRM(modrm)),
9593 nameMMXReg(gregOfRM(modrm)));
9595 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9596 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9597 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
9601 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
9603 nameMMXReg(gregOfRM(modrm)));
9607 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9609 assign( rmode, get_sse_roundingmode() );
9614 binop( Iop_32HLto64,
9615 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
9616 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
9620 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9621 goto decode_success;
9624 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9625 lo half xmm(G), and zero upper half */
9626 /* Note, this is practically identical to CVTPD2DQ. It would have
9627 been nicer to merge them together, but the insn[] offsets differ
9629 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
9630 IRTemp argV = newTemp(Ity_V128);
9631 IRTemp rmode = newTemp(Ity_I32);
9633 modrm = getIByte(delta+2);
9634 if (epartIsReg(modrm)) {
9635 assign( argV, getXMMReg(eregOfRM(modrm)) );
9637 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9638 nameXMMReg(gregOfRM(modrm)));
9640 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9641 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9643 DIP("cvtpd2ps %s,%s\n", dis_buf,
9644 nameXMMReg(gregOfRM(modrm)) );
9647 assign( rmode, get_sse_roundingmode() );
9648 t0 = newTemp(Ity_F64);
9649 t1 = newTemp(Ity_F64);
9650 assign( t0, unop(Iop_ReinterpI64asF64,
9651 unop(Iop_V128to64, mkexpr(argV))) );
9652 assign( t1, unop(Iop_ReinterpI64asF64,
9653 unop(Iop_V128HIto64, mkexpr(argV))) );
9655 # define CVT(_t) binop( Iop_F64toF32, \
9659 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9660 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9661 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9662 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9666 goto decode_success;
9669 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9671 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
9672 IRTemp arg64 = newTemp(Ity_I64);
9674 modrm = getIByte(delta+2);
9675 if (epartIsReg(modrm)) {
9676 /* Only switch to MMX mode if the source is a MMX register.
9677 This is inconsistent with all other instructions which
9678 convert between XMM and (M64 or MMX), which always switch
9679 to MMX mode even if 64-bit operand is M64 and not MMX. At
9680 least, that's what the Intel docs seem to me to say.
9683 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9685 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9686 nameXMMReg(gregOfRM(modrm)));
9688 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9689 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9691 DIP("cvtpi2pd %s,%s\n", dis_buf,
9692 nameXMMReg(gregOfRM(modrm)) );
9697 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
9702 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
9705 goto decode_success;
9708 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9710 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
9711 IRTemp argV = newTemp(Ity_V128);
9712 IRTemp rmode = newTemp(Ity_I32);
9714 modrm = getIByte(delta+2);
9715 if (epartIsReg(modrm)) {
9716 assign( argV, getXMMReg(eregOfRM(modrm)) );
9718 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9719 nameXMMReg(gregOfRM(modrm)));
9721 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9722 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9724 DIP("cvtps2dq %s,%s\n", dis_buf,
9725 nameXMMReg(gregOfRM(modrm)) );
9728 assign( rmode, get_sse_roundingmode() );
9729 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9731 /* This is less than ideal. If it turns out to be a performance
9732 bottleneck it can be improved. */
9734 binop( Iop_F64toI32S, \
9736 unop( Iop_F32toF64, \
9737 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9739 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9740 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9741 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9742 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9746 goto decode_success;
9749 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9751 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
9752 IRTemp f32lo = newTemp(Ity_F32);
9753 IRTemp f32hi = newTemp(Ity_F32);
9755 modrm = getIByte(delta+2);
9756 if (epartIsReg(modrm)) {
9757 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
9758 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
9760 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9761 nameXMMReg(gregOfRM(modrm)));
9763 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9764 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9765 assign( f32hi, loadLE(Ity_F32,
9766 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
9768 DIP("cvtps2pd %s,%s\n", dis_buf,
9769 nameXMMReg(gregOfRM(modrm)) );
9772 putXMMRegLane64F( gregOfRM(modrm), 1,
9773 unop(Iop_F32toF64, mkexpr(f32hi)) );
9774 putXMMRegLane64F( gregOfRM(modrm), 0,
9775 unop(Iop_F32toF64, mkexpr(f32lo)) );
9777 goto decode_success;
9780 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9781 I32 in ireg, according to prevailing SSE rounding mode */
9782 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9783 I32 in ireg, rounding towards zero */
9784 if (insn[0] == 0xF2 && insn[1] == 0x0F
9785 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
9786 IRTemp rmode = newTemp(Ity_I32);
9787 IRTemp f64lo = newTemp(Ity_F64);
9788 Bool r2zero = toBool(insn[2] == 0x2C);
9791 modrm = getIByte(delta+3);
9792 if (epartIsReg(modrm)) {
9794 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9795 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9796 nameXMMReg(eregOfRM(modrm)),
9797 nameIReg(4, gregOfRM(modrm)));
9799 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9800 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9802 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9804 nameIReg(4, gregOfRM(modrm)));
9808 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9810 assign( rmode, get_sse_roundingmode() );
9813 putIReg(4, gregOfRM(modrm),
9814 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
9816 goto decode_success;
9819 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
9820 low 1/4 xmm(G), according to prevailing SSE rounding mode */
9821 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
9822 IRTemp rmode = newTemp(Ity_I32);
9823 IRTemp f64lo = newTemp(Ity_F64);
9826 modrm = getIByte(delta+3);
9827 if (epartIsReg(modrm)) {
9829 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9830 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9831 nameXMMReg(gregOfRM(modrm)));
9833 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9834 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9836 DIP("cvtsd2ss %s,%s\n", dis_buf,
9837 nameXMMReg(gregOfRM(modrm)));
9840 assign( rmode, get_sse_roundingmode() );
9843 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
9846 goto decode_success;
9849 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
9851 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
9852 IRTemp arg32 = newTemp(Ity_I32);
9855 modrm = getIByte(delta+3);
9856 if (epartIsReg(modrm)) {
9857 assign( arg32, getIReg(4, eregOfRM(modrm)) );
9859 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
9860 nameXMMReg(gregOfRM(modrm)));
9862 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9863 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
9865 DIP("cvtsi2sd %s,%s\n", dis_buf,
9866 nameXMMReg(gregOfRM(modrm)) );
9871 unop(Iop_I32StoF64, mkexpr(arg32)) );
9873 goto decode_success;
9876 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
9878 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
9879 IRTemp f32lo = newTemp(Ity_F32);
9882 modrm = getIByte(delta+3);
9883 if (epartIsReg(modrm)) {
9885 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
9886 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9887 nameXMMReg(gregOfRM(modrm)));
9889 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9890 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9892 DIP("cvtss2sd %s,%s\n", dis_buf,
9893 nameXMMReg(gregOfRM(modrm)));
9896 putXMMRegLane64F( gregOfRM(modrm), 0,
9897 unop( Iop_F32toF64, mkexpr(f32lo) ) );
9899 goto decode_success;
9902 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9903 lo half xmm(G), and zero upper half, rounding towards zero */
9904 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
9905 IRTemp argV = newTemp(Ity_V128);
9906 IRTemp rmode = newTemp(Ity_I32);
9908 modrm = getIByte(delta+2);
9909 if (epartIsReg(modrm)) {
9910 assign( argV, getXMMReg(eregOfRM(modrm)) );
9912 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9913 nameXMMReg(gregOfRM(modrm)));
9915 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9916 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9918 DIP("cvttpd2dq %s,%s\n", dis_buf,
9919 nameXMMReg(gregOfRM(modrm)) );
9922 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9924 t0 = newTemp(Ity_F64);
9925 t1 = newTemp(Ity_F64);
9926 assign( t0, unop(Iop_ReinterpI64asF64,
9927 unop(Iop_V128to64, mkexpr(argV))) );
9928 assign( t1, unop(Iop_ReinterpI64asF64,
9929 unop(Iop_V128HIto64, mkexpr(argV))) );
9931 # define CVT(_t) binop( Iop_F64toI32S, \
9935 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9936 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9937 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9938 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9942 goto decode_success;
9945 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9946 xmm(G), rounding towards zero */
9947 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
9948 IRTemp argV = newTemp(Ity_V128);
9949 IRTemp rmode = newTemp(Ity_I32);
9952 modrm = getIByte(delta+3);
9953 if (epartIsReg(modrm)) {
9954 assign( argV, getXMMReg(eregOfRM(modrm)) );
9956 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9957 nameXMMReg(gregOfRM(modrm)));
9959 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9960 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9962 DIP("cvttps2dq %s,%s\n", dis_buf,
9963 nameXMMReg(gregOfRM(modrm)) );
9966 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9967 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9969 /* This is less than ideal. If it turns out to be a performance
9970 bottleneck it can be improved. */
9972 binop( Iop_F64toI32S, \
9974 unop( Iop_F32toF64, \
9975 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9977 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9978 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9979 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9980 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9984 goto decode_success;
9987 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
9988 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
9989 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
9990 goto decode_success;
9993 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
9994 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
9996 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
9997 goto decode_success;
10000 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10001 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10002 if (insn[0] == 0x0F && insn[1] == 0xAE
10003 && epartIsReg(insn[2])
10004 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
10007 /* Insert a memory fence. It's sometimes important that these
10008 are carried through to the generated code. */
10009 stmt( IRStmt_MBE(Imbe_Fence) );
10010 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
10011 goto decode_success;
10014 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10015 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
10016 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
10017 goto decode_success;
10020 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10021 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
10023 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
10024 goto decode_success;
10027 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10028 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
10029 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
10030 goto decode_success;
10033 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10034 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
10036 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
10037 goto decode_success;
10040 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10041 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10042 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10043 if (sz == 2 && insn[0] == 0x0F
10044 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10045 HChar* wot = insn[1]==0x28 ? "apd" :
10046 insn[1]==0x10 ? "upd" : "dqa";
10047 modrm = getIByte(delta+2);
10048 if (epartIsReg(modrm)) {
10049 putXMMReg( gregOfRM(modrm),
10050 getXMMReg( eregOfRM(modrm) ));
10051 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
10052 nameXMMReg(gregOfRM(modrm)));
10055 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10056 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
10057 gen_SEGV_if_not_16_aligned( addr );
10058 putXMMReg( gregOfRM(modrm),
10059 loadLE(Ity_V128, mkexpr(addr)) );
10060 DIP("mov%s %s,%s\n", wot, dis_buf,
10061 nameXMMReg(gregOfRM(modrm)));
10064 goto decode_success;
10067 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10068 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10069 if (sz == 2 && insn[0] == 0x0F
10070 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10071 HChar* wot = insn[1]==0x29 ? "apd" : "upd";
10072 modrm = getIByte(delta+2);
10073 if (epartIsReg(modrm)) {
10074 /* fall through; awaiting test case */
10076 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10077 if (insn[1] == 0x29/*movapd*/)
10078 gen_SEGV_if_not_16_aligned( addr );
10079 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10080 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10083 goto decode_success;
10087 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10088 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
10089 modrm = getIByte(delta+2);
10090 if (epartIsReg(modrm)) {
10094 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10096 DIP("movd %s, %s\n",
10097 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10099 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10103 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10105 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10107 goto decode_success;
10110 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10111 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
10112 modrm = getIByte(delta+2);
10113 if (epartIsReg(modrm)) {
10115 putIReg( 4, eregOfRM(modrm),
10116 getXMMRegLane32(gregOfRM(modrm), 0) );
10117 DIP("movd %s, %s\n",
10118 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10120 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10122 storeLE( mkexpr(addr),
10123 getXMMRegLane32(gregOfRM(modrm), 0) );
10124 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10126 goto decode_success;
10129 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10130 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
10131 modrm = getIByte(delta+2);
10132 if (epartIsReg(modrm)) {
10134 putXMMReg( eregOfRM(modrm),
10135 getXMMReg(gregOfRM(modrm)) );
10136 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10137 nameXMMReg(eregOfRM(modrm)));
10139 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10141 gen_SEGV_if_not_16_aligned( addr );
10142 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10143 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10145 goto decode_success;
10148 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10149 /* Unfortunately can't simply use the MOVDQA case since the
10150 prefix lengths are different (66 vs F3) */
10151 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10153 modrm = getIByte(delta+3);
10154 if (epartIsReg(modrm)) {
10155 putXMMReg( gregOfRM(modrm),
10156 getXMMReg( eregOfRM(modrm) ));
10157 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10158 nameXMMReg(gregOfRM(modrm)));
10161 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10162 putXMMReg( gregOfRM(modrm),
10163 loadLE(Ity_V128, mkexpr(addr)) );
10164 DIP("movdqu %s,%s\n", dis_buf,
10165 nameXMMReg(gregOfRM(modrm)));
10168 goto decode_success;
10171 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10172 /* Unfortunately can't simply use the MOVDQA case since the
10173 prefix lengths are different (66 vs F3) */
10174 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10176 modrm = getIByte(delta+3);
10177 if (epartIsReg(modrm)) {
10179 putXMMReg( eregOfRM(modrm),
10180 getXMMReg(gregOfRM(modrm)) );
10181 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10182 nameXMMReg(eregOfRM(modrm)));
10184 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10186 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10187 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10189 goto decode_success;
10192 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10193 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10195 modrm = getIByte(delta+3);
10196 if (epartIsReg(modrm)) {
10198 putMMXReg( gregOfRM(modrm),
10199 getXMMRegLane64( eregOfRM(modrm), 0 ));
10200 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10201 nameMMXReg(gregOfRM(modrm)));
10203 goto decode_success;
10205 /* fall through, apparently no mem case for this insn */
10209 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10210 /* These seems identical to MOVHPS. This instruction encoding is
10211 completely crazy. */
10212 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
10213 modrm = getIByte(delta+2);
10214 if (epartIsReg(modrm)) {
10215 /* fall through; apparently reg-reg is not possible */
10217 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10219 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10220 loadLE(Ity_I64, mkexpr(addr)) );
10221 DIP("movhpd %s,%s\n", dis_buf,
10222 nameXMMReg( gregOfRM(modrm) ));
10223 goto decode_success;
10227 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10228 /* Again, this seems identical to MOVHPS. */
10229 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
10230 if (!epartIsReg(insn[2])) {
10232 addr = disAMode ( &alen, sorb, delta, dis_buf );
10234 storeLE( mkexpr(addr),
10235 getXMMRegLane64( gregOfRM(insn[2]),
10236 1/*upper lane*/ ) );
10237 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10239 goto decode_success;
10241 /* else fall through */
10244 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10245 /* Identical to MOVLPS ? */
10246 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
10247 modrm = getIByte(delta+2);
10248 if (epartIsReg(modrm)) {
10249 /* fall through; apparently reg-reg is not possible */
10251 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10253 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10254 loadLE(Ity_I64, mkexpr(addr)) );
10255 DIP("movlpd %s, %s\n",
10256 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10257 goto decode_success;
10261 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10262 /* Identical to MOVLPS ? */
10263 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
10264 if (!epartIsReg(insn[2])) {
10266 addr = disAMode ( &alen, sorb, delta, dis_buf );
10268 storeLE( mkexpr(addr),
10269 getXMMRegLane64( gregOfRM(insn[2]),
10270 0/*lower lane*/ ) );
10271 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10273 goto decode_success;
10275 /* else fall through */
10278 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10279 2 lowest bits of ireg(G) */
10280 if (insn[0] == 0x0F && insn[1] == 0x50) {
10281 modrm = getIByte(delta+2);
10282 if (sz == 2 && epartIsReg(modrm)) {
10284 t0 = newTemp(Ity_I32);
10285 t1 = newTemp(Ity_I32);
10287 src = eregOfRM(modrm);
10288 assign( t0, binop( Iop_And32,
10289 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10291 assign( t1, binop( Iop_And32,
10292 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10294 putIReg(4, gregOfRM(modrm),
10295 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10297 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10298 nameIReg(4, gregOfRM(modrm)));
10299 goto decode_success;
10301 /* else fall through */
10304 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10305 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10306 modrm = getIByte(delta+2);
10307 if (sz == 2 && epartIsReg(modrm)) {
10308 IRTemp regD = newTemp(Ity_V128);
10309 IRTemp mask = newTemp(Ity_V128);
10310 IRTemp olddata = newTemp(Ity_V128);
10311 IRTemp newdata = newTemp(Ity_V128);
10312 addr = newTemp(Ity_I32);
10314 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10315 assign( regD, getXMMReg( gregOfRM(modrm) ));
10317 /* Unfortunately can't do the obvious thing with SarN8x16
10318 here since that can't be re-emitted as SSE2 code - no such
10322 binop(Iop_64HLtoV128,
10324 getXMMRegLane64( eregOfRM(modrm), 1 ),
10327 getXMMRegLane64( eregOfRM(modrm), 0 ),
10329 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10337 unop(Iop_NotV128, mkexpr(mask)))) );
10338 storeLE( mkexpr(addr), mkexpr(newdata) );
10341 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10342 nameXMMReg( gregOfRM(modrm) ) );
10343 goto decode_success;
10345 /* else fall through */
10348 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10349 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10350 modrm = getIByte(delta+2);
10351 if (sz == 2 && !epartIsReg(modrm)) {
10352 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10353 gen_SEGV_if_not_16_aligned( addr );
10354 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10355 DIP("movntdq %s,%s\n", dis_buf,
10356 nameXMMReg(gregOfRM(modrm)));
10358 goto decode_success;
10360 /* else fall through */
10363 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10364 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10366 modrm = getIByte(delta+2);
10367 if (!epartIsReg(modrm)) {
10368 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10369 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10370 DIP("movnti %s,%s\n", dis_buf,
10371 nameIReg(4, gregOfRM(modrm)));
10373 goto decode_success;
10375 /* else fall through */
10378 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10379 or lo half xmm). */
10380 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
10381 modrm = getIByte(delta+2);
10382 if (epartIsReg(modrm)) {
10383 /* fall through, awaiting test case */
10384 /* dst: lo half copied, hi half zeroed */
10386 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10387 storeLE( mkexpr(addr),
10388 getXMMRegLane64( gregOfRM(modrm), 0 ));
10389 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
10391 goto decode_success;
10395 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10397 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
10399 modrm = getIByte(delta+3);
10400 if (epartIsReg(modrm)) {
10402 putXMMReg( gregOfRM(modrm),
10403 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
10404 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10405 nameXMMReg(gregOfRM(modrm)));
10407 goto decode_success;
10409 /* fall through, apparently no mem case for this insn */
10413 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10414 G (lo half xmm). Upper half of G is zeroed out. */
10415 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10416 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10417 If E is reg, upper half of G is unchanged. */
10418 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
10419 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
10421 modrm = getIByte(delta+3);
10422 if (epartIsReg(modrm)) {
10423 putXMMRegLane64( gregOfRM(modrm), 0,
10424 getXMMRegLane64( eregOfRM(modrm), 0 ));
10425 if (insn[0] == 0xF3/*MOVQ*/) {
10426 /* zero bits 127:64 */
10427 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10429 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10430 nameXMMReg(gregOfRM(modrm)));
10433 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10434 /* zero bits 127:64 */
10435 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10436 /* write bits 63:0 */
10437 putXMMRegLane64( gregOfRM(modrm), 0,
10438 loadLE(Ity_I64, mkexpr(addr)) );
10439 DIP("movsd %s,%s\n", dis_buf,
10440 nameXMMReg(gregOfRM(modrm)));
10443 goto decode_success;
10446 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10447 or lo half xmm). */
10448 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
10450 modrm = getIByte(delta+3);
10451 if (epartIsReg(modrm)) {
10452 putXMMRegLane64( eregOfRM(modrm), 0,
10453 getXMMRegLane64( gregOfRM(modrm), 0 ));
10454 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10455 nameXMMReg(eregOfRM(modrm)));
10458 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10459 storeLE( mkexpr(addr),
10460 getXMMRegLane64(gregOfRM(modrm), 0) );
10461 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10465 goto decode_success;
10468 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10469 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
10470 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
10471 goto decode_success;
10474 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10475 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
10477 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
10478 goto decode_success;
10481 /* 66 0F 56 = ORPD -- G = G and E */
10482 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
10483 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
10484 goto decode_success;
10487 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10488 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
10490 IRTemp sV = newTemp(Ity_V128);
10491 IRTemp dV = newTemp(Ity_V128);
10492 IRTemp s1 = newTemp(Ity_I64);
10493 IRTemp s0 = newTemp(Ity_I64);
10494 IRTemp d1 = newTemp(Ity_I64);
10495 IRTemp d0 = newTemp(Ity_I64);
10498 assign( dV, getXMMReg(gregOfRM(modrm)) );
10500 if (epartIsReg(modrm)) {
10501 assign( sV, getXMMReg(eregOfRM(modrm)) );
10502 select = (Int)insn[3];
10504 DIP("shufpd $%d,%s,%s\n", select,
10505 nameXMMReg(eregOfRM(modrm)),
10506 nameXMMReg(gregOfRM(modrm)));
10508 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10509 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10510 select = (Int)insn[2+alen];
10512 DIP("shufpd $%d,%s,%s\n", select,
10514 nameXMMReg(gregOfRM(modrm)));
10517 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10518 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10519 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10520 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10522 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10523 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10527 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
10533 goto decode_success;
10536 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10537 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
10538 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
10539 "sqrtpd", Iop_Sqrt64Fx2 );
10540 goto decode_success;
10543 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10544 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
10546 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
10547 "sqrtsd", Iop_Sqrt64F0x2 );
10548 goto decode_success;
10551 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10552 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
10553 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
10554 goto decode_success;
10557 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10558 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
10560 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
10561 goto decode_success;
10564 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10565 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10566 /* These just appear to be special cases of SHUFPS */
10567 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10568 IRTemp s1 = newTemp(Ity_I64);
10569 IRTemp s0 = newTemp(Ity_I64);
10570 IRTemp d1 = newTemp(Ity_I64);
10571 IRTemp d0 = newTemp(Ity_I64);
10572 IRTemp sV = newTemp(Ity_V128);
10573 IRTemp dV = newTemp(Ity_V128);
10574 Bool hi = toBool(insn[1] == 0x15);
10577 assign( dV, getXMMReg(gregOfRM(modrm)) );
10579 if (epartIsReg(modrm)) {
10580 assign( sV, getXMMReg(eregOfRM(modrm)) );
10582 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10583 nameXMMReg(eregOfRM(modrm)),
10584 nameXMMReg(gregOfRM(modrm)));
10586 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10587 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10589 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10591 nameXMMReg(gregOfRM(modrm)));
10594 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10595 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10596 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10597 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10600 putXMMReg( gregOfRM(modrm),
10601 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
10603 putXMMReg( gregOfRM(modrm),
10604 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
10607 goto decode_success;
10610 /* 66 0F 57 = XORPD -- G = G and E */
10611 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
10612 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
10613 goto decode_success;
10616 /* 66 0F 6B = PACKSSDW */
10617 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
10618 delta = dis_SSEint_E_to_G( sorb, delta+2,
10619 "packssdw", Iop_QNarrow32Sx4, True );
10620 goto decode_success;
10623 /* 66 0F 63 = PACKSSWB */
10624 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
10625 delta = dis_SSEint_E_to_G( sorb, delta+2,
10626 "packsswb", Iop_QNarrow16Sx8, True );
10627 goto decode_success;
10630 /* 66 0F 67 = PACKUSWB */
10631 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
10632 delta = dis_SSEint_E_to_G( sorb, delta+2,
10633 "packuswb", Iop_QNarrow16Ux8, True );
10634 goto decode_success;
10637 /* 66 0F FC = PADDB */
10638 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
10639 delta = dis_SSEint_E_to_G( sorb, delta+2,
10640 "paddb", Iop_Add8x16, False );
10641 goto decode_success;
10644 /* 66 0F FE = PADDD */
10645 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
10646 delta = dis_SSEint_E_to_G( sorb, delta+2,
10647 "paddd", Iop_Add32x4, False );
10648 goto decode_success;
10651 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10652 /* 0F D4 = PADDQ -- add 64x1 */
10653 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
10655 delta = dis_MMXop_regmem_to_reg (
10656 sorb, delta+2, insn[1], "paddq", False );
10657 goto decode_success;
10660 /* 66 0F D4 = PADDQ */
10661 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
10662 delta = dis_SSEint_E_to_G( sorb, delta+2,
10663 "paddq", Iop_Add64x2, False );
10664 goto decode_success;
10667 /* 66 0F FD = PADDW */
10668 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
10669 delta = dis_SSEint_E_to_G( sorb, delta+2,
10670 "paddw", Iop_Add16x8, False );
10671 goto decode_success;
10674 /* 66 0F EC = PADDSB */
10675 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
10676 delta = dis_SSEint_E_to_G( sorb, delta+2,
10677 "paddsb", Iop_QAdd8Sx16, False );
10678 goto decode_success;
10681 /* 66 0F ED = PADDSW */
10682 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
10683 delta = dis_SSEint_E_to_G( sorb, delta+2,
10684 "paddsw", Iop_QAdd16Sx8, False );
10685 goto decode_success;
10688 /* 66 0F DC = PADDUSB */
10689 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
10690 delta = dis_SSEint_E_to_G( sorb, delta+2,
10691 "paddusb", Iop_QAdd8Ux16, False );
10692 goto decode_success;
10695 /* 66 0F DD = PADDUSW */
10696 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
10697 delta = dis_SSEint_E_to_G( sorb, delta+2,
10698 "paddusw", Iop_QAdd16Ux8, False );
10699 goto decode_success;
10702 /* 66 0F DB = PAND */
10703 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
10704 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
10705 goto decode_success;
10708 /* 66 0F DF = PANDN */
10709 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
10710 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
10711 goto decode_success;
10714 /* 66 0F E0 = PAVGB */
10715 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
10716 delta = dis_SSEint_E_to_G( sorb, delta+2,
10717 "pavgb", Iop_Avg8Ux16, False );
10718 goto decode_success;
10721 /* 66 0F E3 = PAVGW */
10722 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
10723 delta = dis_SSEint_E_to_G( sorb, delta+2,
10724 "pavgw", Iop_Avg16Ux8, False );
10725 goto decode_success;
10728 /* 66 0F 74 = PCMPEQB */
10729 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
10730 delta = dis_SSEint_E_to_G( sorb, delta+2,
10731 "pcmpeqb", Iop_CmpEQ8x16, False );
10732 goto decode_success;
10735 /* 66 0F 76 = PCMPEQD */
10736 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
10737 delta = dis_SSEint_E_to_G( sorb, delta+2,
10738 "pcmpeqd", Iop_CmpEQ32x4, False );
10739 goto decode_success;
10742 /* 66 0F 75 = PCMPEQW */
10743 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
10744 delta = dis_SSEint_E_to_G( sorb, delta+2,
10745 "pcmpeqw", Iop_CmpEQ16x8, False );
10746 goto decode_success;
10749 /* 66 0F 64 = PCMPGTB */
10750 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
10751 delta = dis_SSEint_E_to_G( sorb, delta+2,
10752 "pcmpgtb", Iop_CmpGT8Sx16, False );
10753 goto decode_success;
10756 /* 66 0F 66 = PCMPGTD */
10757 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
10758 delta = dis_SSEint_E_to_G( sorb, delta+2,
10759 "pcmpgtd", Iop_CmpGT32Sx4, False );
10760 goto decode_success;
10763 /* 66 0F 65 = PCMPGTW */
10764 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
10765 delta = dis_SSEint_E_to_G( sorb, delta+2,
10766 "pcmpgtw", Iop_CmpGT16Sx8, False );
10767 goto decode_success;
10770 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10771 zero-extend of it in ireg(G). */
10772 if (insn[0] == 0x0F && insn[1] == 0xC5) {
10774 if (sz == 2 && epartIsReg(modrm)) {
10775 t5 = newTemp(Ity_V128);
10776 t4 = newTemp(Ity_I16);
10777 assign(t5, getXMMReg(eregOfRM(modrm)));
10778 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
10779 switch (insn[3] & 7) {
10780 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
10781 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
10782 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
10783 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
10784 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
10785 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
10786 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
10787 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
10788 default: vassert(0); /*NOTREACHED*/
10790 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
10791 DIP("pextrw $%d,%s,%s\n",
10792 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
10793 nameIReg(4,gregOfRM(modrm)));
10795 goto decode_success;
10797 /* else fall through */
10800 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
10801 put it into the specified lane of xmm(G). */
10802 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
10804 t4 = newTemp(Ity_I16);
10807 if (epartIsReg(modrm)) {
10808 assign(t4, getIReg(2, eregOfRM(modrm)));
10810 lane = insn[3+1-1];
10811 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10812 nameIReg(2,eregOfRM(modrm)),
10813 nameXMMReg(gregOfRM(modrm)));
10815 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10817 lane = insn[3+alen-1];
10818 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
10819 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10821 nameXMMReg(gregOfRM(modrm)));
10824 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
10825 goto decode_success;
10828 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
10829 E(xmm or mem) to G(xmm) */
10830 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
10831 IRTemp s1V = newTemp(Ity_V128);
10832 IRTemp s2V = newTemp(Ity_V128);
10833 IRTemp dV = newTemp(Ity_V128);
10834 IRTemp s1Hi = newTemp(Ity_I64);
10835 IRTemp s1Lo = newTemp(Ity_I64);
10836 IRTemp s2Hi = newTemp(Ity_I64);
10837 IRTemp s2Lo = newTemp(Ity_I64);
10838 IRTemp dHi = newTemp(Ity_I64);
10839 IRTemp dLo = newTemp(Ity_I64);
10841 if (epartIsReg(modrm)) {
10842 assign( s1V, getXMMReg(eregOfRM(modrm)) );
10844 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10845 nameXMMReg(gregOfRM(modrm)));
10847 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10848 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
10850 DIP("pmaddwd %s,%s\n", dis_buf,
10851 nameXMMReg(gregOfRM(modrm)));
10853 assign( s2V, getXMMReg(gregOfRM(modrm)) );
10854 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
10855 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
10856 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
10857 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
10858 assign( dHi, mkIRExprCCall(
10859 Ity_I64, 0/*regparms*/,
10860 "x86g_calculate_mmx_pmaddwd",
10861 &x86g_calculate_mmx_pmaddwd,
10862 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
10864 assign( dLo, mkIRExprCCall(
10865 Ity_I64, 0/*regparms*/,
10866 "x86g_calculate_mmx_pmaddwd",
10867 &x86g_calculate_mmx_pmaddwd,
10868 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
10870 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
10871 putXMMReg(gregOfRM(modrm), mkexpr(dV));
10872 goto decode_success;
10875 /* 66 0F EE = PMAXSW -- 16x8 signed max */
10876 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
10877 delta = dis_SSEint_E_to_G( sorb, delta+2,
10878 "pmaxsw", Iop_Max16Sx8, False );
10879 goto decode_success;
10882 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
10883 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
10884 delta = dis_SSEint_E_to_G( sorb, delta+2,
10885 "pmaxub", Iop_Max8Ux16, False );
10886 goto decode_success;
10889 /* 66 0F EA = PMINSW -- 16x8 signed min */
10890 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
10891 delta = dis_SSEint_E_to_G( sorb, delta+2,
10892 "pminsw", Iop_Min16Sx8, False );
10893 goto decode_success;
10896 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
10897 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
10898 delta = dis_SSEint_E_to_G( sorb, delta+2,
10899 "pminub", Iop_Min8Ux16, False );
10900 goto decode_success;
10903 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
10904 xmm(G), turn them into a byte, and put zero-extend of it in
10905 ireg(G). Doing this directly is just too cumbersome; give up
10906 therefore and call a helper. */
10907 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
10908 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
10910 if (epartIsReg(modrm)) {
10911 t0 = newTemp(Ity_I64);
10912 t1 = newTemp(Ity_I64);
10913 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
10914 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
10915 t5 = newTemp(Ity_I32);
10916 assign(t5, mkIRExprCCall(
10917 Ity_I32, 0/*regparms*/,
10918 "x86g_calculate_sse_pmovmskb",
10919 &x86g_calculate_sse_pmovmskb,
10920 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
10921 putIReg(4, gregOfRM(modrm), mkexpr(t5));
10922 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10923 nameIReg(4,gregOfRM(modrm)));
10925 goto decode_success;
10927 /* else fall through */
10930 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
10931 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
10932 delta = dis_SSEint_E_to_G( sorb, delta+2,
10933 "pmulhuw", Iop_MulHi16Ux8, False );
10934 goto decode_success;
10937 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
10938 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
10939 delta = dis_SSEint_E_to_G( sorb, delta+2,
10940 "pmulhw", Iop_MulHi16Sx8, False );
10941 goto decode_success;
10944 /* 66 0F D5 = PMULHL -- 16x8 multiply */
10945 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
10946 delta = dis_SSEint_E_to_G( sorb, delta+2,
10947 "pmullw", Iop_Mul16x8, False );
10948 goto decode_success;
10951 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10952 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
10953 0 to form 64-bit result */
10954 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
10955 IRTemp sV = newTemp(Ity_I64);
10956 IRTemp dV = newTemp(Ity_I64);
10957 t1 = newTemp(Ity_I32);
10958 t0 = newTemp(Ity_I32);
10962 assign( dV, getMMXReg(gregOfRM(modrm)) );
10964 if (epartIsReg(modrm)) {
10965 assign( sV, getMMXReg(eregOfRM(modrm)) );
10967 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10968 nameMMXReg(gregOfRM(modrm)));
10970 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10971 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
10973 DIP("pmuludq %s,%s\n", dis_buf,
10974 nameMMXReg(gregOfRM(modrm)));
10977 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
10978 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
10979 putMMXReg( gregOfRM(modrm),
10980 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
10981 goto decode_success;
10984 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
10985 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
10987 /* This is a really poor translation -- could be improved if
10988 performance critical */
10989 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
10991 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10992 sV = newTemp(Ity_V128);
10993 dV = newTemp(Ity_V128);
10994 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10995 t1 = newTemp(Ity_I64);
10996 t0 = newTemp(Ity_I64);
10998 assign( dV, getXMMReg(gregOfRM(modrm)) );
11000 if (epartIsReg(modrm)) {
11001 assign( sV, getXMMReg(eregOfRM(modrm)) );
11003 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11004 nameXMMReg(gregOfRM(modrm)));
11006 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11007 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11009 DIP("pmuludq %s,%s\n", dis_buf,
11010 nameXMMReg(gregOfRM(modrm)));
11013 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11014 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11016 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11017 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
11018 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11019 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
11020 goto decode_success;
11023 /* 66 0F EB = POR */
11024 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
11025 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
11026 goto decode_success;
11029 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11030 from E(xmm or mem) to G(xmm) */
11031 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
11032 IRTemp s1V = newTemp(Ity_V128);
11033 IRTemp s2V = newTemp(Ity_V128);
11034 IRTemp dV = newTemp(Ity_V128);
11035 IRTemp s1Hi = newTemp(Ity_I64);
11036 IRTemp s1Lo = newTemp(Ity_I64);
11037 IRTemp s2Hi = newTemp(Ity_I64);
11038 IRTemp s2Lo = newTemp(Ity_I64);
11039 IRTemp dHi = newTemp(Ity_I64);
11040 IRTemp dLo = newTemp(Ity_I64);
11042 if (epartIsReg(modrm)) {
11043 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11045 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11046 nameXMMReg(gregOfRM(modrm)));
11048 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11049 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11051 DIP("psadbw %s,%s\n", dis_buf,
11052 nameXMMReg(gregOfRM(modrm)));
11054 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11055 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11056 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11057 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11058 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11059 assign( dHi, mkIRExprCCall(
11060 Ity_I64, 0/*regparms*/,
11061 "x86g_calculate_mmx_psadbw",
11062 &x86g_calculate_mmx_psadbw,
11063 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11065 assign( dLo, mkIRExprCCall(
11066 Ity_I64, 0/*regparms*/,
11067 "x86g_calculate_mmx_psadbw",
11068 &x86g_calculate_mmx_psadbw,
11069 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11071 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11072 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11073 goto decode_success;
11076 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11077 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
11079 IRTemp sV, dV, s3, s2, s1, s0;
11080 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11081 sV = newTemp(Ity_V128);
11082 dV = newTemp(Ity_V128);
11084 if (epartIsReg(modrm)) {
11085 assign( sV, getXMMReg(eregOfRM(modrm)) );
11086 order = (Int)insn[3];
11088 DIP("pshufd $%d,%s,%s\n", order,
11089 nameXMMReg(eregOfRM(modrm)),
11090 nameXMMReg(gregOfRM(modrm)));
11092 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11093 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11094 order = (Int)insn[2+alen];
11096 DIP("pshufd $%d,%s,%s\n", order,
11098 nameXMMReg(gregOfRM(modrm)));
11100 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11103 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11105 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11106 SEL((order>>2)&3), SEL((order>>0)&3) )
11108 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11110 goto decode_success;
11113 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11114 mem) to G(xmm), and copy lower half */
11115 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11117 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11118 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11119 sV = newTemp(Ity_V128);
11120 dV = newTemp(Ity_V128);
11121 sVhi = newTemp(Ity_I64);
11122 dVhi = newTemp(Ity_I64);
11124 if (epartIsReg(modrm)) {
11125 assign( sV, getXMMReg(eregOfRM(modrm)) );
11126 order = (Int)insn[4];
11128 DIP("pshufhw $%d,%s,%s\n", order,
11129 nameXMMReg(eregOfRM(modrm)),
11130 nameXMMReg(gregOfRM(modrm)));
11132 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11133 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11134 order = (Int)insn[3+alen];
11136 DIP("pshufhw $%d,%s,%s\n", order,
11138 nameXMMReg(gregOfRM(modrm)));
11140 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11141 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11144 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11146 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11147 SEL((order>>2)&3), SEL((order>>0)&3) )
11149 assign(dV, binop( Iop_64HLtoV128,
11151 unop(Iop_V128to64, mkexpr(sV))) );
11152 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11154 goto decode_success;
11157 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11158 mem) to G(xmm), and copy upper half */
11159 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11161 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11162 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11163 sV = newTemp(Ity_V128);
11164 dV = newTemp(Ity_V128);
11165 sVlo = newTemp(Ity_I64);
11166 dVlo = newTemp(Ity_I64);
11168 if (epartIsReg(modrm)) {
11169 assign( sV, getXMMReg(eregOfRM(modrm)) );
11170 order = (Int)insn[4];
11172 DIP("pshuflw $%d,%s,%s\n", order,
11173 nameXMMReg(eregOfRM(modrm)),
11174 nameXMMReg(gregOfRM(modrm)));
11176 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11177 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11178 order = (Int)insn[3+alen];
11180 DIP("pshuflw $%d,%s,%s\n", order,
11182 nameXMMReg(gregOfRM(modrm)));
11184 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11185 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11188 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11190 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11191 SEL((order>>2)&3), SEL((order>>0)&3) )
11193 assign(dV, binop( Iop_64HLtoV128,
11194 unop(Iop_V128HIto64, mkexpr(sV)),
11196 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11198 goto decode_success;
11201 /* 66 0F 72 /6 ib = PSLLD by immediate */
11202 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11203 && epartIsReg(insn[2])
11204 && gregOfRM(insn[2]) == 6) {
11205 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11206 goto decode_success;
11209 /* 66 0F F2 = PSLLD by E */
11210 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
11211 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11212 goto decode_success;
11215 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11216 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11217 && epartIsReg(insn[2])
11218 && gregOfRM(insn[2]) == 7) {
11219 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11220 Int imm = (Int)insn[3];
11221 Int reg = eregOfRM(insn[2]);
11222 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11223 vassert(imm >= 0 && imm <= 255);
11226 sV = newTemp(Ity_V128);
11227 dV = newTemp(Ity_V128);
11228 hi64 = newTemp(Ity_I64);
11229 lo64 = newTemp(Ity_I64);
11230 hi64r = newTemp(Ity_I64);
11231 lo64r = newTemp(Ity_I64);
11234 putXMMReg(reg, mkV128(0x0000));
11235 goto decode_success;
11238 assign( sV, getXMMReg(reg) );
11239 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11240 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11243 assign( lo64r, mkexpr(lo64) );
11244 assign( hi64r, mkexpr(hi64) );
11248 assign( lo64r, mkU64(0) );
11249 assign( hi64r, mkexpr(lo64) );
11253 assign( lo64r, mkU64(0) );
11254 assign( hi64r, binop( Iop_Shl64,
11256 mkU8( 8*(imm-8) ) ));
11258 assign( lo64r, binop( Iop_Shl64,
11263 binop(Iop_Shl64, mkexpr(hi64),
11265 binop(Iop_Shr64, mkexpr(lo64),
11266 mkU8(8 * (8 - imm)) )
11270 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11271 putXMMReg(reg, mkexpr(dV));
11272 goto decode_success;
11275 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11276 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11277 && epartIsReg(insn[2])
11278 && gregOfRM(insn[2]) == 6) {
11279 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11280 goto decode_success;
11283 /* 66 0F F3 = PSLLQ by E */
11284 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
11285 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11286 goto decode_success;
11289 /* 66 0F 71 /6 ib = PSLLW by immediate */
11290 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11291 && epartIsReg(insn[2])
11292 && gregOfRM(insn[2]) == 6) {
11293 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11294 goto decode_success;
11297 /* 66 0F F1 = PSLLW by E */
11298 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
11299 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11300 goto decode_success;
11303 /* 66 0F 72 /4 ib = PSRAD by immediate */
11304 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11305 && epartIsReg(insn[2])
11306 && gregOfRM(insn[2]) == 4) {
11307 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11308 goto decode_success;
11311 /* 66 0F E2 = PSRAD by E */
11312 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
11313 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11314 goto decode_success;
11317 /* 66 0F 71 /4 ib = PSRAW by immediate */
11318 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11319 && epartIsReg(insn[2])
11320 && gregOfRM(insn[2]) == 4) {
11321 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11322 goto decode_success;
11325 /* 66 0F E1 = PSRAW by E */
11326 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
11327 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11328 goto decode_success;
11331 /* 66 0F 72 /2 ib = PSRLD by immediate */
11332 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11333 && epartIsReg(insn[2])
11334 && gregOfRM(insn[2]) == 2) {
11335 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11336 goto decode_success;
11339 /* 66 0F D2 = PSRLD by E */
11340 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
11341 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11342 goto decode_success;
11345 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11346 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11347 && epartIsReg(insn[2])
11348 && gregOfRM(insn[2]) == 3) {
11349 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11350 Int imm = (Int)insn[3];
11351 Int reg = eregOfRM(insn[2]);
11352 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11353 vassert(imm >= 0 && imm <= 255);
11356 sV = newTemp(Ity_V128);
11357 dV = newTemp(Ity_V128);
11358 hi64 = newTemp(Ity_I64);
11359 lo64 = newTemp(Ity_I64);
11360 hi64r = newTemp(Ity_I64);
11361 lo64r = newTemp(Ity_I64);
11364 putXMMReg(reg, mkV128(0x0000));
11365 goto decode_success;
11368 assign( sV, getXMMReg(reg) );
11369 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11370 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11373 assign( lo64r, mkexpr(lo64) );
11374 assign( hi64r, mkexpr(hi64) );
11378 assign( hi64r, mkU64(0) );
11379 assign( lo64r, mkexpr(hi64) );
11383 assign( hi64r, mkU64(0) );
11384 assign( lo64r, binop( Iop_Shr64,
11386 mkU8( 8*(imm-8) ) ));
11388 assign( hi64r, binop( Iop_Shr64,
11393 binop(Iop_Shr64, mkexpr(lo64),
11395 binop(Iop_Shl64, mkexpr(hi64),
11396 mkU8(8 * (8 - imm)) )
11401 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11402 putXMMReg(reg, mkexpr(dV));
11403 goto decode_success;
11406 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11407 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11408 && epartIsReg(insn[2])
11409 && gregOfRM(insn[2]) == 2) {
11410 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
11411 goto decode_success;
11414 /* 66 0F D3 = PSRLQ by E */
11415 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
11416 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
11417 goto decode_success;
11420 /* 66 0F 71 /2 ib = PSRLW by immediate */
11421 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11422 && epartIsReg(insn[2])
11423 && gregOfRM(insn[2]) == 2) {
11424 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
11425 goto decode_success;
11428 /* 66 0F D1 = PSRLW by E */
11429 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
11430 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
11431 goto decode_success;
11434 /* 66 0F F8 = PSUBB */
11435 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
11436 delta = dis_SSEint_E_to_G( sorb, delta+2,
11437 "psubb", Iop_Sub8x16, False );
11438 goto decode_success;
11441 /* 66 0F FA = PSUBD */
11442 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
11443 delta = dis_SSEint_E_to_G( sorb, delta+2,
11444 "psubd", Iop_Sub32x4, False );
11445 goto decode_success;
11448 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11449 /* 0F FB = PSUBQ -- sub 64x1 */
11450 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
11452 delta = dis_MMXop_regmem_to_reg (
11453 sorb, delta+2, insn[1], "psubq", False );
11454 goto decode_success;
11457 /* 66 0F FB = PSUBQ */
11458 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
11459 delta = dis_SSEint_E_to_G( sorb, delta+2,
11460 "psubq", Iop_Sub64x2, False );
11461 goto decode_success;
11464 /* 66 0F F9 = PSUBW */
11465 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
11466 delta = dis_SSEint_E_to_G( sorb, delta+2,
11467 "psubw", Iop_Sub16x8, False );
11468 goto decode_success;
11471 /* 66 0F E8 = PSUBSB */
11472 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
11473 delta = dis_SSEint_E_to_G( sorb, delta+2,
11474 "psubsb", Iop_QSub8Sx16, False );
11475 goto decode_success;
11478 /* 66 0F E9 = PSUBSW */
11479 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
11480 delta = dis_SSEint_E_to_G( sorb, delta+2,
11481 "psubsw", Iop_QSub16Sx8, False );
11482 goto decode_success;
11485 /* 66 0F D8 = PSUBSB */
11486 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
11487 delta = dis_SSEint_E_to_G( sorb, delta+2,
11488 "psubusb", Iop_QSub8Ux16, False );
11489 goto decode_success;
11492 /* 66 0F D9 = PSUBSW */
11493 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
11494 delta = dis_SSEint_E_to_G( sorb, delta+2,
11495 "psubusw", Iop_QSub16Ux8, False );
11496 goto decode_success;
11499 /* 66 0F 68 = PUNPCKHBW */
11500 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
11501 delta = dis_SSEint_E_to_G( sorb, delta+2,
11503 Iop_InterleaveHI8x16, True );
11504 goto decode_success;
11507 /* 66 0F 6A = PUNPCKHDQ */
11508 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
11509 delta = dis_SSEint_E_to_G( sorb, delta+2,
11511 Iop_InterleaveHI32x4, True );
11512 goto decode_success;
11515 /* 66 0F 6D = PUNPCKHQDQ */
11516 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
11517 delta = dis_SSEint_E_to_G( sorb, delta+2,
11519 Iop_InterleaveHI64x2, True );
11520 goto decode_success;
11523 /* 66 0F 69 = PUNPCKHWD */
11524 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
11525 delta = dis_SSEint_E_to_G( sorb, delta+2,
11527 Iop_InterleaveHI16x8, True );
11528 goto decode_success;
11531 /* 66 0F 60 = PUNPCKLBW */
11532 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
11533 delta = dis_SSEint_E_to_G( sorb, delta+2,
11535 Iop_InterleaveLO8x16, True );
11536 goto decode_success;
11539 /* 66 0F 62 = PUNPCKLDQ */
11540 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
11541 delta = dis_SSEint_E_to_G( sorb, delta+2,
11543 Iop_InterleaveLO32x4, True );
11544 goto decode_success;
11547 /* 66 0F 6C = PUNPCKLQDQ */
11548 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
11549 delta = dis_SSEint_E_to_G( sorb, delta+2,
11551 Iop_InterleaveLO64x2, True );
11552 goto decode_success;
11555 /* 66 0F 61 = PUNPCKLWD */
11556 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
11557 delta = dis_SSEint_E_to_G( sorb, delta+2,
11559 Iop_InterleaveLO16x8, True );
11560 goto decode_success;
11563 /* 66 0F EF = PXOR */
11564 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
11565 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
11566 goto decode_success;
11569 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11570 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11571 //-- && (!epartIsReg(insn[2]))
11572 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11573 //-- Bool store = gregOfRM(insn[2]) == 0;
11574 //-- vg_assert(sz == 4);
11575 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11576 //-- t1 = LOW24(pair);
11577 //-- eip += 2+HI8(pair);
11578 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11579 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11580 //-- Lit16, (UShort)insn[2],
11581 //-- TempReg, t1 );
11582 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11583 //-- goto decode_success;
11586 /* 0F AE /7 = CLFLUSH -- flush cache line */
11587 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
11588 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
11590 /* This is something of a hack. We need to know the size of the
11591 cache line containing addr. Since we don't (easily), assume
11592 256 on the basis that no real cache would have a line that
11593 big. It's safe to invalidate more stuff than we need, just
11595 UInt lineszB = 256;
11597 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11600 /* Round addr down to the start of the containing block. */
11605 mkU32( ~(lineszB-1) ))) );
11607 stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) );
11609 irsb->jumpkind = Ijk_TInval;
11610 irsb->next = mkU32(guest_EIP_bbstart+delta);
11611 dres.whatNext = Dis_StopHere;
11613 DIP("clflush %s\n", dis_buf);
11614 goto decode_success;
11617 /* ---------------------------------------------------- */
11618 /* --- end of the SSE2 decoder. --- */
11619 /* ---------------------------------------------------- */
11621 /* ---------------------------------------------------- */
11622 /* --- start of the SSE3 decoder. --- */
11623 /* ---------------------------------------------------- */
11625 /* Skip parts of the decoder which don't apply given the stated
11626 guest subarchitecture. */
11627 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */
11628 /* In fact this is highly bogus; we accept SSE3 insns even on a
11629 SSE2-only guest since they turn into IR which can be re-emitted
11630 successfully on an SSE2 host. */
11631 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
11632 goto after_sse_decoders; /* no SSE3 capabilities */
11634 insn = (UChar*)&guest_code[delta];
11636 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11637 duplicating some lanes (2:2:0:0). */
11638 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11639 duplicating some lanes (3:3:1:1). */
11640 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
11641 && (insn[2] == 0x12 || insn[2] == 0x16)) {
11642 IRTemp s3, s2, s1, s0;
11643 IRTemp sV = newTemp(Ity_V128);
11644 Bool isH = insn[2] == 0x16;
11645 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11648 if (epartIsReg(modrm)) {
11649 assign( sV, getXMMReg( eregOfRM(modrm)) );
11650 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11651 nameXMMReg(eregOfRM(modrm)),
11652 nameXMMReg(gregOfRM(modrm)));
11655 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11656 gen_SEGV_if_not_16_aligned( addr );
11657 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11658 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11660 nameXMMReg(gregOfRM(modrm)));
11664 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11665 putXMMReg( gregOfRM(modrm),
11666 isH ? mk128from32s( s3, s3, s1, s1 )
11667 : mk128from32s( s2, s2, s0, s0 ) );
11668 goto decode_success;
11671 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11672 duplicating some lanes (0:1:0:1). */
11673 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
11674 IRTemp sV = newTemp(Ity_V128);
11675 IRTemp d0 = newTemp(Ity_I64);
11678 if (epartIsReg(modrm)) {
11679 assign( sV, getXMMReg( eregOfRM(modrm)) );
11680 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11681 nameXMMReg(gregOfRM(modrm)));
11683 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11685 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11686 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11687 DIP("movddup %s,%s\n", dis_buf,
11688 nameXMMReg(gregOfRM(modrm)));
11692 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11693 goto decode_success;
11696 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11697 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
11698 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11699 IRTemp eV = newTemp(Ity_V128);
11700 IRTemp gV = newTemp(Ity_V128);
11701 IRTemp addV = newTemp(Ity_V128);
11702 IRTemp subV = newTemp(Ity_V128);
11703 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11706 if (epartIsReg(modrm)) {
11707 assign( eV, getXMMReg( eregOfRM(modrm)) );
11708 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11709 nameXMMReg(gregOfRM(modrm)));
11712 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11713 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11714 DIP("addsubps %s,%s\n", dis_buf,
11715 nameXMMReg(gregOfRM(modrm)));
11719 assign( gV, getXMMReg(gregOfRM(modrm)) );
11721 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
11722 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
11724 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
11725 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
11727 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
11728 goto decode_success;
11731 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11732 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
11733 IRTemp eV = newTemp(Ity_V128);
11734 IRTemp gV = newTemp(Ity_V128);
11735 IRTemp addV = newTemp(Ity_V128);
11736 IRTemp subV = newTemp(Ity_V128);
11737 IRTemp a1 = newTemp(Ity_I64);
11738 IRTemp s0 = newTemp(Ity_I64);
11741 if (epartIsReg(modrm)) {
11742 assign( eV, getXMMReg( eregOfRM(modrm)) );
11743 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11744 nameXMMReg(gregOfRM(modrm)));
11747 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11748 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11749 DIP("addsubpd %s,%s\n", dis_buf,
11750 nameXMMReg(gregOfRM(modrm)));
11754 assign( gV, getXMMReg(gregOfRM(modrm)) );
11756 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
11757 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
11759 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11760 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11762 putXMMReg( gregOfRM(modrm),
11763 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11764 goto decode_success;
11767 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11768 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11769 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
11770 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
11771 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
11772 IRTemp eV = newTemp(Ity_V128);
11773 IRTemp gV = newTemp(Ity_V128);
11774 IRTemp leftV = newTemp(Ity_V128);
11775 IRTemp rightV = newTemp(Ity_V128);
11776 Bool isAdd = insn[2] == 0x7C;
11777 HChar* str = isAdd ? "add" : "sub";
11778 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
11781 if (epartIsReg(modrm)) {
11782 assign( eV, getXMMReg( eregOfRM(modrm)) );
11783 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11784 nameXMMReg(gregOfRM(modrm)));
11787 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11788 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11789 DIP("h%sps %s,%s\n", str, dis_buf,
11790 nameXMMReg(gregOfRM(modrm)));
11794 assign( gV, getXMMReg(gregOfRM(modrm)) );
11796 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
11797 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
11799 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
11800 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
11802 putXMMReg( gregOfRM(modrm),
11803 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
11804 mkexpr(leftV), mkexpr(rightV) ) );
11805 goto decode_success;
11808 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
11809 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
11810 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
11811 IRTemp e1 = newTemp(Ity_I64);
11812 IRTemp e0 = newTemp(Ity_I64);
11813 IRTemp g1 = newTemp(Ity_I64);
11814 IRTemp g0 = newTemp(Ity_I64);
11815 IRTemp eV = newTemp(Ity_V128);
11816 IRTemp gV = newTemp(Ity_V128);
11817 IRTemp leftV = newTemp(Ity_V128);
11818 IRTemp rightV = newTemp(Ity_V128);
11819 Bool isAdd = insn[1] == 0x7C;
11820 HChar* str = isAdd ? "add" : "sub";
11823 if (epartIsReg(modrm)) {
11824 assign( eV, getXMMReg( eregOfRM(modrm)) );
11825 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11826 nameXMMReg(gregOfRM(modrm)));
11829 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11830 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11831 DIP("h%spd %s,%s\n", str, dis_buf,
11832 nameXMMReg(gregOfRM(modrm)));
11836 assign( gV, getXMMReg(gregOfRM(modrm)) );
11838 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
11839 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
11840 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
11841 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
11843 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
11844 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
11846 putXMMReg( gregOfRM(modrm),
11847 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
11848 mkexpr(leftV), mkexpr(rightV) ) );
11849 goto decode_success;
11852 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
11853 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
11854 modrm = getIByte(delta+3);
11855 if (epartIsReg(modrm)) {
11856 goto decode_failure;
11858 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11859 putXMMReg( gregOfRM(modrm),
11860 loadLE(Ity_V128, mkexpr(addr)) );
11861 DIP("lddqu %s,%s\n", dis_buf,
11862 nameXMMReg(gregOfRM(modrm)));
11865 goto decode_success;
11868 /* ---------------------------------------------------- */
11869 /* --- end of the SSE3 decoder. --- */
11870 /* ---------------------------------------------------- */
11872 /* ---------------------------------------------------- */
11873 /* --- start of the SSSE3 decoder. --- */
11874 /* ---------------------------------------------------- */
11876 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
11877 Unsigned Bytes (MMX) */
11879 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
11880 IRTemp sV = newTemp(Ity_I64);
11881 IRTemp dV = newTemp(Ity_I64);
11882 IRTemp sVoddsSX = newTemp(Ity_I64);
11883 IRTemp sVevensSX = newTemp(Ity_I64);
11884 IRTemp dVoddsZX = newTemp(Ity_I64);
11885 IRTemp dVevensZX = newTemp(Ity_I64);
11889 assign( dV, getMMXReg(gregOfRM(modrm)) );
11891 if (epartIsReg(modrm)) {
11892 assign( sV, getMMXReg(eregOfRM(modrm)) );
11894 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11895 nameMMXReg(gregOfRM(modrm)));
11897 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11898 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11900 DIP("pmaddubsw %s,%s\n", dis_buf,
11901 nameMMXReg(gregOfRM(modrm)));
11904 /* compute dV unsigned x sV signed */
11906 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
11908 binop(Iop_SarN16x4,
11909 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
11912 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
11914 binop(Iop_ShrN16x4,
11915 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
11920 binop(Iop_QAdd16Sx4,
11921 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
11922 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
11925 goto decode_success;
11928 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
11929 Unsigned Bytes (XMM) */
11931 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
11932 IRTemp sV = newTemp(Ity_V128);
11933 IRTemp dV = newTemp(Ity_V128);
11934 IRTemp sVoddsSX = newTemp(Ity_V128);
11935 IRTemp sVevensSX = newTemp(Ity_V128);
11936 IRTemp dVoddsZX = newTemp(Ity_V128);
11937 IRTemp dVevensZX = newTemp(Ity_V128);
11940 assign( dV, getXMMReg(gregOfRM(modrm)) );
11942 if (epartIsReg(modrm)) {
11943 assign( sV, getXMMReg(eregOfRM(modrm)) );
11945 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11946 nameXMMReg(gregOfRM(modrm)));
11948 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11949 gen_SEGV_if_not_16_aligned( addr );
11950 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11952 DIP("pmaddubsw %s,%s\n", dis_buf,
11953 nameXMMReg(gregOfRM(modrm)));
11956 /* compute dV unsigned x sV signed */
11958 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
11960 binop(Iop_SarN16x8,
11961 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
11964 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
11966 binop(Iop_ShrN16x8,
11967 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
11972 binop(Iop_QAdd16Sx8,
11973 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
11974 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
11977 goto decode_success;
11980 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
11981 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
11982 mmx) and G to G (mmx). */
11983 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
11984 mmx) and G to G (mmx). */
11985 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
11987 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
11989 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
11991 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
11995 && insn[0] == 0x0F && insn[1] == 0x38
11996 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
11997 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
11998 HChar* str = "???";
11999 IROp opV64 = Iop_INVALID;
12000 IROp opCatO = Iop_CatOddLanes16x4;
12001 IROp opCatE = Iop_CatEvenLanes16x4;
12002 IRTemp sV = newTemp(Ity_I64);
12003 IRTemp dV = newTemp(Ity_I64);
12008 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12009 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12010 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12011 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12012 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12013 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12014 default: vassert(0);
12016 if (insn[2] == 0x02 || insn[2] == 0x06) {
12017 opCatO = Iop_InterleaveHI32x2;
12018 opCatE = Iop_InterleaveLO32x2;
12022 assign( dV, getMMXReg(gregOfRM(modrm)) );
12024 if (epartIsReg(modrm)) {
12025 assign( sV, getMMXReg(eregOfRM(modrm)) );
12027 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12028 nameMMXReg(gregOfRM(modrm)));
12030 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12031 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12033 DIP("ph%s %s,%s\n", str, dis_buf,
12034 nameMMXReg(gregOfRM(modrm)));
12040 binop(opCatE,mkexpr(sV),mkexpr(dV)),
12041 binop(opCatO,mkexpr(sV),mkexpr(dV))
12044 goto decode_success;
12047 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12048 xmm) and G to G (xmm). */
12049 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12050 xmm) and G to G (xmm). */
12051 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12053 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12055 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12057 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12061 && insn[0] == 0x0F && insn[1] == 0x38
12062 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12063 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12064 HChar* str = "???";
12065 IROp opV64 = Iop_INVALID;
12066 IROp opCatO = Iop_CatOddLanes16x4;
12067 IROp opCatE = Iop_CatEvenLanes16x4;
12068 IRTemp sV = newTemp(Ity_V128);
12069 IRTemp dV = newTemp(Ity_V128);
12070 IRTemp sHi = newTemp(Ity_I64);
12071 IRTemp sLo = newTemp(Ity_I64);
12072 IRTemp dHi = newTemp(Ity_I64);
12073 IRTemp dLo = newTemp(Ity_I64);
12078 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12079 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12080 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12081 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12082 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12083 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12084 default: vassert(0);
12086 if (insn[2] == 0x02 || insn[2] == 0x06) {
12087 opCatO = Iop_InterleaveHI32x2;
12088 opCatE = Iop_InterleaveLO32x2;
12091 assign( dV, getXMMReg(gregOfRM(modrm)) );
12093 if (epartIsReg(modrm)) {
12094 assign( sV, getXMMReg( eregOfRM(modrm)) );
12095 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12096 nameXMMReg(gregOfRM(modrm)));
12099 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12100 gen_SEGV_if_not_16_aligned( addr );
12101 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12102 DIP("ph%s %s,%s\n", str, dis_buf,
12103 nameXMMReg(gregOfRM(modrm)));
12107 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12108 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12109 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12110 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12112 /* This isn't a particularly efficient way to compute the
12113 result, but at least it avoids a proliferation of IROps,
12114 hence avoids complication all the backends. */
12117 binop(Iop_64HLtoV128,
12119 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12120 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12123 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12124 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12128 goto decode_success;
12131 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12134 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12135 IRTemp sV = newTemp(Ity_I64);
12136 IRTemp dV = newTemp(Ity_I64);
12140 assign( dV, getMMXReg(gregOfRM(modrm)) );
12142 if (epartIsReg(modrm)) {
12143 assign( sV, getMMXReg(eregOfRM(modrm)) );
12145 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12146 nameMMXReg(gregOfRM(modrm)));
12148 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12149 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12151 DIP("pmulhrsw %s,%s\n", dis_buf,
12152 nameMMXReg(gregOfRM(modrm)));
12157 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12159 goto decode_success;
12162 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12165 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12166 IRTemp sV = newTemp(Ity_V128);
12167 IRTemp dV = newTemp(Ity_V128);
12168 IRTemp sHi = newTemp(Ity_I64);
12169 IRTemp sLo = newTemp(Ity_I64);
12170 IRTemp dHi = newTemp(Ity_I64);
12171 IRTemp dLo = newTemp(Ity_I64);
12174 assign( dV, getXMMReg(gregOfRM(modrm)) );
12176 if (epartIsReg(modrm)) {
12177 assign( sV, getXMMReg(eregOfRM(modrm)) );
12179 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12180 nameXMMReg(gregOfRM(modrm)));
12182 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12183 gen_SEGV_if_not_16_aligned( addr );
12184 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12186 DIP("pmulhrsw %s,%s\n", dis_buf,
12187 nameXMMReg(gregOfRM(modrm)));
12190 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12191 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12192 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12193 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12197 binop(Iop_64HLtoV128,
12198 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12199 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12202 goto decode_success;
12205 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12206 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12207 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12209 && insn[0] == 0x0F && insn[1] == 0x38
12210 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12211 IRTemp sV = newTemp(Ity_I64);
12212 IRTemp dV = newTemp(Ity_I64);
12213 HChar* str = "???";
12217 case 0x08: laneszB = 1; str = "b"; break;
12218 case 0x09: laneszB = 2; str = "w"; break;
12219 case 0x0A: laneszB = 4; str = "d"; break;
12220 default: vassert(0);
12225 assign( dV, getMMXReg(gregOfRM(modrm)) );
12227 if (epartIsReg(modrm)) {
12228 assign( sV, getMMXReg(eregOfRM(modrm)) );
12230 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12231 nameMMXReg(gregOfRM(modrm)));
12233 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12234 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12236 DIP("psign%s %s,%s\n", str, dis_buf,
12237 nameMMXReg(gregOfRM(modrm)));
12242 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12244 goto decode_success;
12247 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12248 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12249 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12251 && insn[0] == 0x0F && insn[1] == 0x38
12252 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12253 IRTemp sV = newTemp(Ity_V128);
12254 IRTemp dV = newTemp(Ity_V128);
12255 IRTemp sHi = newTemp(Ity_I64);
12256 IRTemp sLo = newTemp(Ity_I64);
12257 IRTemp dHi = newTemp(Ity_I64);
12258 IRTemp dLo = newTemp(Ity_I64);
12259 HChar* str = "???";
12263 case 0x08: laneszB = 1; str = "b"; break;
12264 case 0x09: laneszB = 2; str = "w"; break;
12265 case 0x0A: laneszB = 4; str = "d"; break;
12266 default: vassert(0);
12270 assign( dV, getXMMReg(gregOfRM(modrm)) );
12272 if (epartIsReg(modrm)) {
12273 assign( sV, getXMMReg(eregOfRM(modrm)) );
12275 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12276 nameXMMReg(gregOfRM(modrm)));
12278 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12279 gen_SEGV_if_not_16_aligned( addr );
12280 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12282 DIP("psign%s %s,%s\n", str, dis_buf,
12283 nameXMMReg(gregOfRM(modrm)));
12286 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12287 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12288 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12289 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12293 binop(Iop_64HLtoV128,
12294 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12295 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12298 goto decode_success;
12301 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12302 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12303 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12305 && insn[0] == 0x0F && insn[1] == 0x38
12306 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12307 IRTemp sV = newTemp(Ity_I64);
12308 HChar* str = "???";
12312 case 0x1C: laneszB = 1; str = "b"; break;
12313 case 0x1D: laneszB = 2; str = "w"; break;
12314 case 0x1E: laneszB = 4; str = "d"; break;
12315 default: vassert(0);
12321 if (epartIsReg(modrm)) {
12322 assign( sV, getMMXReg(eregOfRM(modrm)) );
12324 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12325 nameMMXReg(gregOfRM(modrm)));
12327 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12328 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12330 DIP("pabs%s %s,%s\n", str, dis_buf,
12331 nameMMXReg(gregOfRM(modrm)));
12336 dis_PABS_helper( mkexpr(sV), laneszB )
12338 goto decode_success;
12341 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12342 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12343 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12345 && insn[0] == 0x0F && insn[1] == 0x38
12346 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12347 IRTemp sV = newTemp(Ity_V128);
12348 IRTemp sHi = newTemp(Ity_I64);
12349 IRTemp sLo = newTemp(Ity_I64);
12350 HChar* str = "???";
12354 case 0x1C: laneszB = 1; str = "b"; break;
12355 case 0x1D: laneszB = 2; str = "w"; break;
12356 case 0x1E: laneszB = 4; str = "d"; break;
12357 default: vassert(0);
12362 if (epartIsReg(modrm)) {
12363 assign( sV, getXMMReg(eregOfRM(modrm)) );
12365 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12366 nameXMMReg(gregOfRM(modrm)));
12368 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12369 gen_SEGV_if_not_16_aligned( addr );
12370 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12372 DIP("pabs%s %s,%s\n", str, dis_buf,
12373 nameXMMReg(gregOfRM(modrm)));
12376 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12377 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12381 binop(Iop_64HLtoV128,
12382 dis_PABS_helper( mkexpr(sHi), laneszB ),
12383 dis_PABS_helper( mkexpr(sLo), laneszB )
12386 goto decode_success;
12389 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12391 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12392 IRTemp sV = newTemp(Ity_I64);
12393 IRTemp dV = newTemp(Ity_I64);
12394 IRTemp res = newTemp(Ity_I64);
12398 assign( dV, getMMXReg(gregOfRM(modrm)) );
12400 if (epartIsReg(modrm)) {
12401 assign( sV, getMMXReg(eregOfRM(modrm)) );
12402 d32 = (UInt)insn[3+1];
12404 DIP("palignr $%d,%s,%s\n", (Int)d32,
12405 nameMMXReg(eregOfRM(modrm)),
12406 nameMMXReg(gregOfRM(modrm)));
12408 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12409 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12410 d32 = (UInt)insn[3+alen];
12412 DIP("palignr $%d%s,%s\n", (Int)d32,
12414 nameMMXReg(gregOfRM(modrm)));
12418 assign( res, mkexpr(sV) );
12420 else if (d32 >= 1 && d32 <= 7) {
12423 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
12424 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
12427 else if (d32 == 8) {
12428 assign( res, mkexpr(dV) );
12430 else if (d32 >= 9 && d32 <= 15) {
12431 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
12433 else if (d32 >= 16 && d32 <= 255) {
12434 assign( res, mkU64(0) );
12439 putMMXReg( gregOfRM(modrm), mkexpr(res) );
12440 goto decode_success;
12443 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12445 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12446 IRTemp sV = newTemp(Ity_V128);
12447 IRTemp dV = newTemp(Ity_V128);
12448 IRTemp sHi = newTemp(Ity_I64);
12449 IRTemp sLo = newTemp(Ity_I64);
12450 IRTemp dHi = newTemp(Ity_I64);
12451 IRTemp dLo = newTemp(Ity_I64);
12452 IRTemp rHi = newTemp(Ity_I64);
12453 IRTemp rLo = newTemp(Ity_I64);
12456 assign( dV, getXMMReg(gregOfRM(modrm)) );
12458 if (epartIsReg(modrm)) {
12459 assign( sV, getXMMReg(eregOfRM(modrm)) );
12460 d32 = (UInt)insn[3+1];
12462 DIP("palignr $%d,%s,%s\n", (Int)d32,
12463 nameXMMReg(eregOfRM(modrm)),
12464 nameXMMReg(gregOfRM(modrm)));
12466 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12467 gen_SEGV_if_not_16_aligned( addr );
12468 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12469 d32 = (UInt)insn[3+alen];
12471 DIP("palignr $%d,%s,%s\n", (Int)d32,
12473 nameXMMReg(gregOfRM(modrm)));
12476 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12477 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12478 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12479 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12482 assign( rHi, mkexpr(sHi) );
12483 assign( rLo, mkexpr(sLo) );
12485 else if (d32 >= 1 && d32 <= 7) {
12486 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
12487 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
12489 else if (d32 == 8) {
12490 assign( rHi, mkexpr(dLo) );
12491 assign( rLo, mkexpr(sHi) );
12493 else if (d32 >= 9 && d32 <= 15) {
12494 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
12495 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
12497 else if (d32 == 16) {
12498 assign( rHi, mkexpr(dHi) );
12499 assign( rLo, mkexpr(dLo) );
12501 else if (d32 >= 17 && d32 <= 23) {
12502 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
12503 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
12505 else if (d32 == 24) {
12506 assign( rHi, mkU64(0) );
12507 assign( rLo, mkexpr(dHi) );
12509 else if (d32 >= 25 && d32 <= 31) {
12510 assign( rHi, mkU64(0) );
12511 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
12513 else if (d32 >= 32 && d32 <= 255) {
12514 assign( rHi, mkU64(0) );
12515 assign( rLo, mkU64(0) );
12522 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12524 goto decode_success;
12527 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12529 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12530 IRTemp sV = newTemp(Ity_I64);
12531 IRTemp dV = newTemp(Ity_I64);
12535 assign( dV, getMMXReg(gregOfRM(modrm)) );
12537 if (epartIsReg(modrm)) {
12538 assign( sV, getMMXReg(eregOfRM(modrm)) );
12540 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12541 nameMMXReg(gregOfRM(modrm)));
12543 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12544 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12546 DIP("pshufb %s,%s\n", dis_buf,
12547 nameMMXReg(gregOfRM(modrm)));
12554 /* permute the lanes */
12558 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
12560 /* mask off lanes which have (index & 0x80) == 0x80 */
12561 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
12564 goto decode_success;
12567 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12569 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12570 IRTemp sV = newTemp(Ity_V128);
12571 IRTemp dV = newTemp(Ity_V128);
12572 IRTemp sHi = newTemp(Ity_I64);
12573 IRTemp sLo = newTemp(Ity_I64);
12574 IRTemp dHi = newTemp(Ity_I64);
12575 IRTemp dLo = newTemp(Ity_I64);
12576 IRTemp rHi = newTemp(Ity_I64);
12577 IRTemp rLo = newTemp(Ity_I64);
12578 IRTemp sevens = newTemp(Ity_I64);
12579 IRTemp mask0x80hi = newTemp(Ity_I64);
12580 IRTemp mask0x80lo = newTemp(Ity_I64);
12581 IRTemp maskBit3hi = newTemp(Ity_I64);
12582 IRTemp maskBit3lo = newTemp(Ity_I64);
12583 IRTemp sAnd7hi = newTemp(Ity_I64);
12584 IRTemp sAnd7lo = newTemp(Ity_I64);
12585 IRTemp permdHi = newTemp(Ity_I64);
12586 IRTemp permdLo = newTemp(Ity_I64);
12589 assign( dV, getXMMReg(gregOfRM(modrm)) );
12591 if (epartIsReg(modrm)) {
12592 assign( sV, getXMMReg(eregOfRM(modrm)) );
12594 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12595 nameXMMReg(gregOfRM(modrm)));
12597 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12598 gen_SEGV_if_not_16_aligned( addr );
12599 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12601 DIP("pshufb %s,%s\n", dis_buf,
12602 nameXMMReg(gregOfRM(modrm)));
12605 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12606 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12607 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12608 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12610 assign( sevens, mkU64(0x0707070707070707ULL) );
12613 mask0x80hi = Not(SarN8x8(sHi,7))
12614 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12615 sAnd7hi = And(sHi,sevens)
12616 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12617 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12618 rHi = And(permdHi,mask0x80hi)
12622 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
12627 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
12630 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
12637 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
12638 mkexpr(maskBit3hi)),
12640 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
12641 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
12643 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
12645 /* And the same for the lower half of the result. What fun. */
12649 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
12654 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
12657 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
12664 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
12665 mkexpr(maskBit3lo)),
12667 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
12668 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
12670 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
12674 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12676 goto decode_success;
12679 /* ---------------------------------------------------- */
12680 /* --- end of the SSSE3 decoder. --- */
12681 /* ---------------------------------------------------- */
12683 /* ---------------------------------------------------- */
12684 /* --- start of the SSE4 decoder --- */
12685 /* ---------------------------------------------------- */
12687 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12688 (Partial implementation only -- only deal with cases where
12689 the rounding mode is specified directly by the immediate byte.)
12690 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12691 (Limitations ditto)
12694 && insn[0] == 0x0F && insn[1] == 0x3A
12695 && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) {
12697 Bool isD = insn[2] == 0x0B;
12698 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
12699 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
12704 if (epartIsReg(modrm)) {
12706 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
12707 : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
12709 if (imm & ~3) goto decode_failure;
12711 DIP( "rounds%c $%d,%s,%s\n",
12713 imm, nameXMMReg( eregOfRM(modrm) ),
12714 nameXMMReg( gregOfRM(modrm) ) );
12716 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12717 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
12718 imm = insn[3+alen];
12719 if (imm & ~3) goto decode_failure;
12721 DIP( "roundsd $%d,%s,%s\n",
12722 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
12725 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12726 that encoding is the same as the encoding for IRRoundingMode,
12727 we can use that value directly in the IR as a rounding
12729 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12730 mkU32(imm & 3), mkexpr(src)) );
12733 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
12735 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
12737 goto decode_success;
12740 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12741 which we can only decode if we're sure this is an AMD cpu that
12742 supports LZCNT, since otherwise it's BSR, which behaves
12744 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
12745 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
12746 vassert(sz == 2 || sz == 4);
12747 /*IRType*/ ty = szToITy(sz);
12748 IRTemp src = newTemp(ty);
12750 if (epartIsReg(modrm)) {
12751 assign(src, getIReg(sz, eregOfRM(modrm)));
12753 DIP("lzcnt%c %s, %s\n", nameISize(sz),
12754 nameIReg(sz, eregOfRM(modrm)),
12755 nameIReg(sz, gregOfRM(modrm)));
12757 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12758 assign(src, loadLE(ty, mkexpr(addr)));
12760 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
12761 nameIReg(sz, gregOfRM(modrm)));
12764 IRTemp res = gen_LZCNT(ty, src);
12765 putIReg(sz, gregOfRM(modrm), mkexpr(res));
12767 // Update flags. This is pretty lame .. perhaps can do better
12768 // if this turns out to be performance critical.
12769 // O S A P are cleared. Z is set if RESULT == 0.
12770 // C is set if SRC is zero.
12771 IRTemp src32 = newTemp(Ity_I32);
12772 IRTemp res32 = newTemp(Ity_I32);
12773 assign(src32, widenUto32(mkexpr(src)));
12774 assign(res32, widenUto32(mkexpr(res)));
12776 IRTemp oszacp = newTemp(Ity_I32);
12782 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
12783 mkU8(X86G_CC_SHIFT_Z)),
12786 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
12787 mkU8(X86G_CC_SHIFT_C))
12791 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
12792 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
12793 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
12794 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
12796 goto decode_success;
12799 /* ---------------------------------------------------- */
12800 /* --- end of the SSE4 decoder --- */
12801 /* ---------------------------------------------------- */
12803 after_sse_decoders:
12805 /* ---------------------------------------------------- */
12806 /* --- deal with misc 0x67 pfxs (addr size override) -- */
12807 /* ---------------------------------------------------- */
12809 /* 67 E3 = JCXZ (for JECXZ see below) */
12810 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
12812 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
12815 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
12819 DIP("jcxz 0x%x\n", d32);
12820 goto decode_success;
12823 /* ---------------------------------------------------- */
12824 /* --- start of the baseline insn decoder -- */
12825 /* ---------------------------------------------------- */
12827 /* Get the primary opcode. */
12828 opc = getIByte(delta); delta++;
12830 /* We get here if the current insn isn't SSE, or this CPU doesn't
12835 /* ------------------------ Control flow --------------- */
12837 case 0xC2: /* RET imm16 */
12838 d32 = getUDisp16(delta);
12841 dres.whatNext = Dis_StopHere;
12842 DIP("ret %d\n", (Int)d32);
12844 case 0xC3: /* RET */
12846 dres.whatNext = Dis_StopHere;
12850 case 0xCF: /* IRET */
12851 /* Note, this is an extremely kludgey and limited implementation
12852 of iret. All it really does is:
12853 popl %EIP; popl %CS; popl %EFLAGS.
12854 %CS is set but ignored (as it is in (eg) popw %cs)". */
12855 t1 = newTemp(Ity_I32); /* ESP */
12856 t2 = newTemp(Ity_I32); /* new EIP */
12857 t3 = newTemp(Ity_I32); /* new CS */
12858 t4 = newTemp(Ity_I32); /* new EFLAGS */
12859 assign(t1, getIReg(4,R_ESP));
12860 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
12861 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
12862 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
12863 /* Get stuff off stack */
12864 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
12865 /* set %CS (which is ignored anyway) */
12866 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
12868 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
12869 /* goto new EIP value */
12870 jmp_treg(Ijk_Ret,t2);
12871 dres.whatNext = Dis_StopHere;
12872 DIP("iret (very kludgey)\n");
12875 case 0xE8: /* CALL J4 */
12876 d32 = getUDisp32(delta); delta += 4;
12877 d32 += (guest_EIP_bbstart+delta);
12878 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
12879 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
12880 && getIByte(delta) <= 0x5F) {
12881 /* Specially treat the position-independent-code idiom
12886 since this generates better code, but for no other reason. */
12887 Int archReg = getIByte(delta) - 0x58;
12888 /* vex_printf("-- fPIC thingy\n"); */
12889 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
12890 delta++; /* Step over the POP */
12891 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
12893 /* The normal sequence for a call. */
12894 t1 = newTemp(Ity_I32);
12895 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
12896 putIReg(4, R_ESP, mkexpr(t1));
12897 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
12898 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) {
12899 /* follow into the call target. */
12900 dres.whatNext = Dis_ResteerU;
12901 dres.continueAt = (Addr64)(Addr32)d32;
12903 jmp_lit(Ijk_Call,d32);
12904 dres.whatNext = Dis_StopHere;
12906 DIP("call 0x%x\n",d32);
12910 //-- case 0xC8: /* ENTER */
12911 //-- d32 = getUDisp16(eip); eip += 2;
12912 //-- abyte = getIByte(delta); delta++;
12914 //-- vg_assert(sz == 4);
12915 //-- vg_assert(abyte == 0);
12917 //-- t1 = newTemp(cb); t2 = newTemp(cb);
12918 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
12919 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
12920 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
12921 //-- uLiteral(cb, sz);
12922 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
12923 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
12924 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
12926 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
12927 //-- uLiteral(cb, d32);
12928 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
12930 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
12933 case 0xC9: /* LEAVE */
12935 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
12936 assign(t1, getIReg(4,R_EBP));
12937 /* First PUT ESP looks redundant, but need it because ESP must
12938 always be up-to-date for Memcheck to work... */
12939 putIReg(4, R_ESP, mkexpr(t1));
12940 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
12941 putIReg(4, R_EBP, mkexpr(t2));
12942 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
12946 /* ---------------- Misc weird-ass insns --------------- */
12948 case 0x27: /* DAA */
12949 case 0x2F: /* DAS */
12950 case 0x37: /* AAA */
12951 case 0x3F: /* AAS */
12952 /* An ugly implementation for some ugly instructions. Oh
12954 if (sz != 4) goto decode_failure;
12955 t1 = newTemp(Ity_I32);
12956 t2 = newTemp(Ity_I32);
12957 /* Make up a 32-bit value (t1), with the old value of AX in the
12958 bottom 16 bits, and the old OSZACP bitmask in the upper 16
12961 binop(Iop_16HLto32,
12963 mk_x86g_calculate_eflags_all()),
12966 /* Call the helper fn, to get a new AX and OSZACP value, and
12967 poke both back into the guest state. Also pass the helper
12968 the actual opcode so it knows which of the 4 instructions it
12969 is doing the computation for. */
12970 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
12973 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
12974 &x86g_calculate_daa_das_aaa_aas,
12975 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
12977 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
12979 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
12980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
12981 stmt( IRStmt_Put( OFFB_CC_DEP1,
12983 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
12984 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
12985 | X86G_CC_MASK_A | X86G_CC_MASK_Z
12986 | X86G_CC_MASK_S| X86G_CC_MASK_O )
12990 /* Set NDEP even though it isn't used. This makes redundant-PUT
12991 elimination of previous stores to this field work better. */
12992 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
12994 case 0x27: DIP("daa\n"); break;
12995 case 0x2F: DIP("das\n"); break;
12996 case 0x37: DIP("aaa\n"); break;
12997 case 0x3F: DIP("aas\n"); break;
12998 default: vassert(0);
13002 case 0xD4: /* AAM */
13003 case 0xD5: /* AAD */
13004 d32 = getIByte(delta); delta++;
13005 if (sz != 4 || d32 != 10) goto decode_failure;
13006 t1 = newTemp(Ity_I32);
13007 t2 = newTemp(Ity_I32);
13008 /* Make up a 32-bit value (t1), with the old value of AX in the
13009 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13012 binop(Iop_16HLto32,
13014 mk_x86g_calculate_eflags_all()),
13017 /* Call the helper fn, to get a new AX and OSZACP value, and
13018 poke both back into the guest state. Also pass the helper
13019 the actual opcode so it knows which of the 2 instructions it
13020 is doing the computation for. */
13023 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
13024 &x86g_calculate_aad_aam,
13025 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13027 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13029 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13030 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13031 stmt( IRStmt_Put( OFFB_CC_DEP1,
13033 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13034 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13035 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13036 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13040 /* Set NDEP even though it isn't used. This makes
13041 redundant-PUT elimination of previous stores to this field
13043 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13045 DIP(opc == 0xD4 ? "aam\n" : "aad\n");
13048 /* ------------------------ CWD/CDQ -------------------- */
13050 case 0x98: /* CBW */
13052 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
13056 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
13061 case 0x99: /* CWD/CDQ */
13064 binop(mkSizedOp(ty,Iop_Sar8),
13065 getIReg(sz, R_EAX),
13066 mkU8(sz == 2 ? 15 : 31)) );
13067 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
13070 /* ------------------------ FPU ops -------------------- */
13072 case 0x9E: /* SAHF */
13077 case 0x9F: /* LAHF */
13082 case 0x9B: /* FWAIT */
13095 Int delta0 = delta;
13096 Bool decode_OK = False;
13097 delta = dis_FPU ( &decode_OK, sorb, delta );
13100 goto decode_failure;
13105 /* ------------------------ INC & DEC ------------------ */
13107 case 0x40: /* INC eAX */
13108 case 0x41: /* INC eCX */
13109 case 0x42: /* INC eDX */
13110 case 0x43: /* INC eBX */
13111 case 0x44: /* INC eSP */
13112 case 0x45: /* INC eBP */
13113 case 0x46: /* INC eSI */
13114 case 0x47: /* INC eDI */
13115 vassert(sz == 2 || sz == 4);
13118 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
13119 getIReg(sz, (UInt)(opc - 0x40)),
13121 setFlags_INC_DEC( True, t1, ty );
13122 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
13123 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
13126 case 0x48: /* DEC eAX */
13127 case 0x49: /* DEC eCX */
13128 case 0x4A: /* DEC eDX */
13129 case 0x4B: /* DEC eBX */
13130 case 0x4C: /* DEC eSP */
13131 case 0x4D: /* DEC eBP */
13132 case 0x4E: /* DEC eSI */
13133 case 0x4F: /* DEC eDI */
13134 vassert(sz == 2 || sz == 4);
13137 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
13138 getIReg(sz, (UInt)(opc - 0x48)),
13140 setFlags_INC_DEC( False, t1, ty );
13141 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
13142 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
13145 /* ------------------------ INT ------------------------ */
13147 case 0xCC: /* INT 3 */
13148 jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta);
13149 dres.whatNext = Dis_StopHere;
13153 case 0xCD: /* INT imm8 */
13154 d32 = getIByte(delta); delta++;
13156 /* For any of the cases where we emit a jump (that is, for all
13157 currently handled cases), it's important that all ArchRegs
13158 carry their up-to-date value at this point. So we declare an
13159 end-of-block here, which forces any TempRegs caching ArchRegs
13162 /* Handle int $0x30 (l4re syscall) */
13164 jmp_lit(Ijk_Sys_int48,((Addr32)guest_EIP_bbstart)+delta);
13165 dres.whatNext = Dis_StopHere;
13166 DIP("int $0x30\n");
13170 /* Handle int $0x32 (l4re syscall) */
13172 jmp_lit(Ijk_Sys_int50,((Addr32)guest_EIP_bbstart)+delta);
13173 dres.whatNext = Dis_StopHere;
13174 DIP("int $0x32\n");
13178 /* Handle int $0x40 .. $0x43 by synthesising a segfault and a
13179 restart of this instruction (hence the "-2" two lines below,
13180 to get the restart EIP to be this instruction. This is
13181 probably Linux-specific and it would be more correct to only
13182 do this if the VexAbiInfo says that is what we should do. */
13183 if (d32 >= 0x40 && d32 <= 0x43) {
13184 jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2);
13185 dres.whatNext = Dis_StopHere;
13186 DIP("int $0x%x\n", (Int)d32);
13190 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13191 (darwin syscalls). As part of this, note where we are, so we
13192 can back up the guest to this point if the syscall needs to
13195 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13196 mkU32(guest_EIP_curr_instr) ) );
13197 jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta);
13198 dres.whatNext = Dis_StopHere;
13199 DIP("int $0x80\n");
13203 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13204 mkU32(guest_EIP_curr_instr) ) );
13205 jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta);
13206 dres.whatNext = Dis_StopHere;
13207 DIP("int $0x81\n");
13211 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13212 mkU32(guest_EIP_curr_instr) ) );
13213 jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta);
13214 dres.whatNext = Dis_StopHere;
13215 DIP("int $0x82\n");
13219 /* none of the above */
13220 goto decode_failure;
13222 /* ------------------------ Jcond, byte offset --------- */
13224 case 0xEB: /* Jb (jump, byte offset) */
13225 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13227 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13228 dres.whatNext = Dis_ResteerU;
13229 dres.continueAt = (Addr64)(Addr32)d32;
13231 jmp_lit(Ijk_Boring,d32);
13232 dres.whatNext = Dis_StopHere;
13234 DIP("jmp-8 0x%x\n", d32);
13237 case 0xE9: /* Jv (jump, 16/32 offset) */
13238 vassert(sz == 4); /* JRS added 2004 July 11 */
13239 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13241 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13242 dres.whatNext = Dis_ResteerU;
13243 dres.continueAt = (Addr64)(Addr32)d32;
13245 jmp_lit(Ijk_Boring,d32);
13246 dres.whatNext = Dis_StopHere;
13248 DIP("jmp 0x%x\n", d32);
13253 case 0x72: /* JBb/JNAEb (jump below) */
13254 case 0x73: /* JNBb/JAEb (jump not below) */
13255 case 0x74: /* JZb/JEb (jump zero) */
13256 case 0x75: /* JNZb/JNEb (jump not zero) */
13257 case 0x76: /* JBEb/JNAb (jump below or equal) */
13258 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13259 case 0x78: /* JSb (jump negative) */
13260 case 0x79: /* JSb (jump not negative) */
13261 case 0x7A: /* JP (jump parity even) */
13262 case 0x7B: /* JNP/JPO (jump parity odd) */
13263 case 0x7C: /* JLb/JNGEb (jump less) */
13264 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13265 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13266 case 0x7F: /* JGb/JNLEb (jump greater) */
13268 HChar* comment = "";
13269 jmpDelta = (Int)getSDisp8(delta);
13270 vassert(-128 <= jmpDelta && jmpDelta < 128);
13271 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13274 && vex_control.guest_chase_cond
13275 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13277 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13278 /* Speculation: assume this backward branch is taken. So we
13279 need to emit a side-exit to the insn following this one,
13280 on the negation of the condition, and continue at the
13281 branch target address (d32). If we wind up back at the
13282 first instruction of the trace, just stop; it's better to
13283 let the IR loop unroller handle that case. */
13285 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13287 IRConst_U32(guest_EIP_bbstart+delta) ) );
13288 dres.whatNext = Dis_ResteerC;
13289 dres.continueAt = (Addr64)(Addr32)d32;
13290 comment = "(assumed taken)";
13294 && vex_control.guest_chase_cond
13295 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13297 && resteerOkFn( callback_opaque,
13298 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
13299 /* Speculation: assume this forward branch is not taken. So
13300 we need to emit a side-exit to d32 (the dest) and continue
13301 disassembling at the insn immediately following this
13304 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13306 IRConst_U32(d32) ) );
13307 dres.whatNext = Dis_ResteerC;
13308 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
13309 comment = "(assumed not taken)";
13312 /* Conservative default translation - end the block at this
13314 jcc_01( (X86Condcode)(opc - 0x70),
13315 (Addr32)(guest_EIP_bbstart+delta), d32);
13316 dres.whatNext = Dis_StopHere;
13318 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13322 case 0xE3: /* JECXZ (for JCXZ see above) */
13323 if (sz != 4) goto decode_failure;
13324 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13327 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13331 DIP("jecxz 0x%x\n", d32);
13334 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13335 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13336 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13337 { /* Again, the docs say this uses ECX/CX as a count depending on
13338 the address size override, not the operand one. Since we
13339 don't handle address size overrides, I guess that means
13341 IRExpr* zbit = NULL;
13342 IRExpr* count = NULL;
13343 IRExpr* cond = NULL;
13344 HChar* xtra = NULL;
13346 if (sz != 4) goto decode_failure;
13347 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13349 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
13351 count = getIReg(4,R_ECX);
13352 cond = binop(Iop_CmpNE32, count, mkU32(0));
13359 zbit = mk_x86g_calculate_condition( X86CondZ );
13360 cond = mkAnd1(cond, zbit);
13364 zbit = mk_x86g_calculate_condition( X86CondNZ );
13365 cond = mkAnd1(cond, zbit);
13370 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) );
13372 DIP("loop%s 0x%x\n", xtra, d32);
13376 /* ------------------------ IMUL ----------------------- */
13378 case 0x69: /* IMUL Iv, Ev, Gv */
13379 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
13381 case 0x6B: /* IMUL Ib, Ev, Gv */
13382 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
13385 /* ------------------------ MOV ------------------------ */
13387 case 0x88: /* MOV Gb,Eb */
13388 delta = dis_mov_G_E(sorb, 1, delta);
13391 case 0x89: /* MOV Gv,Ev */
13392 delta = dis_mov_G_E(sorb, sz, delta);
13395 case 0x8A: /* MOV Eb,Gb */
13396 delta = dis_mov_E_G(sorb, 1, delta);
13399 case 0x8B: /* MOV Ev,Gv */
13400 delta = dis_mov_E_G(sorb, sz, delta);
13403 case 0x8D: /* LEA M,Gv */
13405 goto decode_failure;
13406 modrm = getIByte(delta);
13407 if (epartIsReg(modrm))
13408 goto decode_failure;
13409 /* NOTE! this is the one place where a segment override prefix
13410 has no effect on the address calculation. Therefore we pass
13411 zero instead of sorb here. */
13412 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
13414 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
13415 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
13416 nameIReg(sz,gregOfRM(modrm)));
13419 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13420 delta = dis_mov_Sw_Ew(sorb, sz, delta);
13423 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13424 delta = dis_mov_Ew_Sw(sorb, delta);
13427 case 0xA0: /* MOV Ob,AL */
13429 /* Fall through ... */
13430 case 0xA1: /* MOV Ov,eAX */
13431 d32 = getUDisp32(delta); delta += 4;
13433 addr = newTemp(Ity_I32);
13434 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13435 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
13436 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
13437 d32, nameIReg(sz,R_EAX));
13440 case 0xA2: /* MOV Ob,AL */
13442 /* Fall through ... */
13443 case 0xA3: /* MOV eAX,Ov */
13444 d32 = getUDisp32(delta); delta += 4;
13446 addr = newTemp(Ity_I32);
13447 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13448 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
13449 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
13450 sorbTxt(sorb), d32);
13453 case 0xB0: /* MOV imm,AL */
13454 case 0xB1: /* MOV imm,CL */
13455 case 0xB2: /* MOV imm,DL */
13456 case 0xB3: /* MOV imm,BL */
13457 case 0xB4: /* MOV imm,AH */
13458 case 0xB5: /* MOV imm,CH */
13459 case 0xB6: /* MOV imm,DH */
13460 case 0xB7: /* MOV imm,BH */
13461 d32 = getIByte(delta); delta += 1;
13462 putIReg(1, opc-0xB0, mkU8(d32));
13463 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
13466 case 0xB8: /* MOV imm,eAX */
13467 case 0xB9: /* MOV imm,eCX */
13468 case 0xBA: /* MOV imm,eDX */
13469 case 0xBB: /* MOV imm,eBX */
13470 case 0xBC: /* MOV imm,eSP */
13471 case 0xBD: /* MOV imm,eBP */
13472 case 0xBE: /* MOV imm,eSI */
13473 case 0xBF: /* MOV imm,eDI */
13474 d32 = getUDisp(sz,delta); delta += sz;
13475 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
13476 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
13479 case 0xC6: /* MOV Ib,Eb */
13482 case 0xC7: /* MOV Iv,Ev */
13486 modrm = getIByte(delta);
13487 if (epartIsReg(modrm)) {
13488 delta++; /* mod/rm byte */
13489 d32 = getUDisp(sz,delta); delta += sz;
13490 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
13491 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
13492 nameIReg(sz,eregOfRM(modrm)));
13494 addr = disAMode ( &alen, sorb, delta, dis_buf );
13496 d32 = getUDisp(sz,delta); delta += sz;
13497 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
13498 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
13502 /* ------------------------ opl imm, A ----------------- */
13504 case 0x04: /* ADD Ib, AL */
13505 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
13507 case 0x05: /* ADD Iv, eAX */
13508 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
13511 case 0x0C: /* OR Ib, AL */
13512 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
13514 case 0x0D: /* OR Iv, eAX */
13515 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
13518 case 0x14: /* ADC Ib, AL */
13519 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
13521 case 0x15: /* ADC Iv, eAX */
13522 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
13525 case 0x1C: /* SBB Ib, AL */
13526 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
13528 case 0x1D: /* SBB Iv, eAX */
13529 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
13532 case 0x24: /* AND Ib, AL */
13533 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
13535 case 0x25: /* AND Iv, eAX */
13536 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
13539 case 0x2C: /* SUB Ib, AL */
13540 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
13542 case 0x2D: /* SUB Iv, eAX */
13543 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
13546 case 0x34: /* XOR Ib, AL */
13547 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
13549 case 0x35: /* XOR Iv, eAX */
13550 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
13553 case 0x3C: /* CMP Ib, AL */
13554 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
13556 case 0x3D: /* CMP Iv, eAX */
13557 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
13560 case 0xA8: /* TEST Ib, AL */
13561 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
13563 case 0xA9: /* TEST Iv, eAX */
13564 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
13567 /* ------------------------ opl Ev, Gv ----------------- */
13569 case 0x02: /* ADD Eb,Gb */
13570 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
13572 case 0x03: /* ADD Ev,Gv */
13573 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
13576 case 0x0A: /* OR Eb,Gb */
13577 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
13579 case 0x0B: /* OR Ev,Gv */
13580 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
13583 case 0x12: /* ADC Eb,Gb */
13584 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
13586 case 0x13: /* ADC Ev,Gv */
13587 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
13590 case 0x1A: /* SBB Eb,Gb */
13591 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
13593 case 0x1B: /* SBB Ev,Gv */
13594 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
13597 case 0x22: /* AND Eb,Gb */
13598 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
13600 case 0x23: /* AND Ev,Gv */
13601 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
13604 case 0x2A: /* SUB Eb,Gb */
13605 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
13607 case 0x2B: /* SUB Ev,Gv */
13608 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
13611 case 0x32: /* XOR Eb,Gb */
13612 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
13614 case 0x33: /* XOR Ev,Gv */
13615 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
13618 case 0x3A: /* CMP Eb,Gb */
13619 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
13621 case 0x3B: /* CMP Ev,Gv */
13622 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
13625 case 0x84: /* TEST Eb,Gb */
13626 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
13628 case 0x85: /* TEST Ev,Gv */
13629 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
13632 /* ------------------------ opl Gv, Ev ----------------- */
13634 case 0x00: /* ADD Gb,Eb */
13635 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13636 Iop_Add8, True, 1, delta, "add" );
13638 case 0x01: /* ADD Gv,Ev */
13639 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13640 Iop_Add8, True, sz, delta, "add" );
13643 case 0x08: /* OR Gb,Eb */
13644 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13645 Iop_Or8, True, 1, delta, "or" );
13647 case 0x09: /* OR Gv,Ev */
13648 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13649 Iop_Or8, True, sz, delta, "or" );
13652 case 0x10: /* ADC Gb,Eb */
13653 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13654 Iop_Add8, True, 1, delta, "adc" );
13656 case 0x11: /* ADC Gv,Ev */
13657 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13658 Iop_Add8, True, sz, delta, "adc" );
13661 case 0x18: /* SBB Gb,Eb */
13662 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13663 Iop_Sub8, True, 1, delta, "sbb" );
13665 case 0x19: /* SBB Gv,Ev */
13666 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13667 Iop_Sub8, True, sz, delta, "sbb" );
13670 case 0x20: /* AND Gb,Eb */
13671 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13672 Iop_And8, True, 1, delta, "and" );
13674 case 0x21: /* AND Gv,Ev */
13675 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13676 Iop_And8, True, sz, delta, "and" );
13679 case 0x28: /* SUB Gb,Eb */
13680 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13681 Iop_Sub8, True, 1, delta, "sub" );
13683 case 0x29: /* SUB Gv,Ev */
13684 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13685 Iop_Sub8, True, sz, delta, "sub" );
13688 case 0x30: /* XOR Gb,Eb */
13689 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13690 Iop_Xor8, True, 1, delta, "xor" );
13692 case 0x31: /* XOR Gv,Ev */
13693 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13694 Iop_Xor8, True, sz, delta, "xor" );
13697 case 0x38: /* CMP Gb,Eb */
13698 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13699 Iop_Sub8, False, 1, delta, "cmp" );
13701 case 0x39: /* CMP Gv,Ev */
13702 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13703 Iop_Sub8, False, sz, delta, "cmp" );
13706 /* ------------------------ POP ------------------------ */
13708 case 0x58: /* POP eAX */
13709 case 0x59: /* POP eCX */
13710 case 0x5A: /* POP eDX */
13711 case 0x5B: /* POP eBX */
13712 case 0x5D: /* POP eBP */
13713 case 0x5E: /* POP eSI */
13714 case 0x5F: /* POP eDI */
13715 case 0x5C: /* POP eSP */
13716 vassert(sz == 2 || sz == 4);
13717 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
13718 assign(t2, getIReg(4, R_ESP));
13719 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
13720 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13721 putIReg(sz, opc-0x58, mkexpr(t1));
13722 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
13725 case 0x9D: /* POPF */
13726 vassert(sz == 2 || sz == 4);
13727 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13728 assign(t2, getIReg(4, R_ESP));
13729 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
13730 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13732 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13734 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
13735 ((Addr32)guest_EIP_bbstart)+delta );
13737 DIP("popf%c\n", nameISize(sz));
13740 case 0x61: /* POPA */
13741 /* This is almost certainly wrong for sz==2. So ... */
13742 if (sz != 4) goto decode_failure;
13744 /* t5 is the old %ESP value. */
13745 t5 = newTemp(Ity_I32);
13746 assign( t5, getIReg(4, R_ESP) );
13748 /* Reload all the registers, except %esp. */
13749 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
13750 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
13751 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
13752 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
13753 /* ignore saved %ESP */
13754 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
13755 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
13756 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
13758 /* and move %ESP back up */
13759 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
13761 DIP("popa%c\n", nameISize(sz));
13764 case 0x8F: /* POPL/POPW m32 */
13766 UChar rm = getIByte(delta);
13768 /* make sure this instruction is correct POP */
13769 if (epartIsReg(rm) || gregOfRM(rm) != 0)
13770 goto decode_failure;
13771 /* and has correct size */
13772 if (sz != 4 && sz != 2)
13773 goto decode_failure;
13776 t1 = newTemp(Ity_I32); /* stack address */
13777 t3 = newTemp(ty); /* data */
13778 /* set t1 to ESP: t1 = ESP */
13779 assign( t1, getIReg(4, R_ESP) );
13780 /* load M[ESP] to virtual register t3: t3 = M[t1] */
13781 assign( t3, loadLE(ty, mkexpr(t1)) );
13783 /* increase ESP; must be done before the STORE. Intel manual says:
13784 If the ESP register is used as a base register for addressing
13785 a destination operand in memory, the POP instruction computes
13786 the effective address of the operand after it increments the
13789 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
13791 /* resolve MODR/M */
13792 addr = disAMode ( &len, sorb, delta, dis_buf);
13793 storeLE( mkexpr(addr), mkexpr(t3) );
13795 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
13801 case 0x1F: /* POP %DS */
13802 dis_pop_segreg( R_DS, sz ); break;
13803 case 0x07: /* POP %ES */
13804 dis_pop_segreg( R_ES, sz ); break;
13805 case 0x17: /* POP %SS */
13806 dis_pop_segreg( R_SS, sz ); break;
13808 /* ------------------------ PUSH ----------------------- */
13810 case 0x50: /* PUSH eAX */
13811 case 0x51: /* PUSH eCX */
13812 case 0x52: /* PUSH eDX */
13813 case 0x53: /* PUSH eBX */
13814 case 0x55: /* PUSH eBP */
13815 case 0x56: /* PUSH eSI */
13816 case 0x57: /* PUSH eDI */
13817 case 0x54: /* PUSH eSP */
13818 /* This is the Right Way, in that the value to be pushed is
13819 established before %esp is changed, so that pushl %esp
13820 correctly pushes the old value. */
13821 vassert(sz == 2 || sz == 4);
13822 ty = sz==2 ? Ity_I16 : Ity_I32;
13823 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
13824 assign(t1, getIReg(sz, opc-0x50));
13825 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
13826 putIReg(4, R_ESP, mkexpr(t2) );
13827 storeLE(mkexpr(t2),mkexpr(t1));
13828 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
13832 case 0x68: /* PUSH Iv */
13833 d32 = getUDisp(sz,delta); delta += sz;
13835 case 0x6A: /* PUSH Ib, sign-extended to sz */
13836 d32 = getSDisp8(delta); delta += 1;
13840 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
13841 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
13842 putIReg(4, R_ESP, mkexpr(t1) );
13843 /* stop mkU16 asserting if d32 is a negative 16-bit number
13847 storeLE( mkexpr(t1), mkU(ty,d32) );
13848 DIP("push%c $0x%x\n", nameISize(sz), d32);
13851 case 0x9C: /* PUSHF */ {
13852 vassert(sz == 2 || sz == 4);
13854 t1 = newTemp(Ity_I32);
13855 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
13856 putIReg(4, R_ESP, mkexpr(t1) );
13858 /* Calculate OSZACP, and patch in fixed fields as per
13860 - bit 1 is always 1
13861 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
13863 t2 = newTemp(Ity_I32);
13864 assign( t2, binop(Iop_Or32,
13865 mk_x86g_calculate_eflags_all(),
13866 mkU32( (1<<1)|(1<<9) ) ));
13868 /* Patch in the D flag. This can simply be a copy of bit 10 of
13869 baseBlock[OFFB_DFLAG]. */
13870 t3 = newTemp(Ity_I32);
13871 assign( t3, binop(Iop_Or32,
13874 IRExpr_Get(OFFB_DFLAG,Ity_I32),
13878 /* And patch in the ID flag. */
13879 t4 = newTemp(Ity_I32);
13880 assign( t4, binop(Iop_Or32,
13883 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
13888 /* And patch in the AC flag. */
13889 t5 = newTemp(Ity_I32);
13890 assign( t5, binop(Iop_Or32,
13893 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
13898 /* if sz==2, the stored value needs to be narrowed. */
13900 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
13902 storeLE( mkexpr(t1), mkexpr(t5) );
13904 DIP("pushf%c\n", nameISize(sz));
13908 case 0x60: /* PUSHA */
13909 /* This is almost certainly wrong for sz==2. So ... */
13910 if (sz != 4) goto decode_failure;
13912 /* This is the Right Way, in that the value to be pushed is
13913 established before %esp is changed, so that pusha
13914 correctly pushes the old %esp value. New value of %esp is
13915 pushed at start. */
13916 /* t0 is the %ESP value we're going to push. */
13917 t0 = newTemp(Ity_I32);
13918 assign( t0, getIReg(4, R_ESP) );
13920 /* t5 will be the new %ESP value. */
13921 t5 = newTemp(Ity_I32);
13922 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
13924 /* Update guest state before prodding memory. */
13925 putIReg(4, R_ESP, mkexpr(t5));
13927 /* Dump all the registers. */
13928 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
13929 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
13930 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
13931 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
13932 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
13933 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
13934 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
13935 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
13937 DIP("pusha%c\n", nameISize(sz));
13940 case 0x0E: /* PUSH %CS */
13941 dis_push_segreg( R_CS, sz ); break;
13942 case 0x1E: /* PUSH %DS */
13943 dis_push_segreg( R_DS, sz ); break;
13944 case 0x06: /* PUSH %ES */
13945 dis_push_segreg( R_ES, sz ); break;
13946 case 0x16: /* PUSH %SS */
13947 dis_push_segreg( R_SS, sz ); break;
13949 /* ------------------------ SCAS et al ----------------- */
13951 case 0xA4: /* MOVS, no REP prefix */
13954 goto decode_failure; /* else dis_string_op asserts */
13955 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
13958 case 0xA6: /* CMPSb, no REP prefix */
13961 goto decode_failure; /* else dis_string_op asserts */
13962 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
13965 case 0xAA: /* STOS, no REP prefix */
13968 goto decode_failure; /* else dis_string_op asserts */
13969 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
13972 case 0xAC: /* LODS, no REP prefix */
13975 goto decode_failure; /* else dis_string_op asserts */
13976 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
13979 case 0xAE: /* SCAS, no REP prefix */
13982 goto decode_failure; /* else dis_string_op asserts */
13983 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
13987 case 0xFC: /* CLD */
13988 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
13992 case 0xFD: /* STD */
13993 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
13997 case 0xF8: /* CLC */
13998 case 0xF9: /* STC */
13999 case 0xF5: /* CMC */
14000 t0 = newTemp(Ity_I32);
14001 t1 = newTemp(Ity_I32);
14002 assign( t0, mk_x86g_calculate_eflags_all() );
14005 assign( t1, binop(Iop_And32, mkexpr(t0),
14006 mkU32(~X86G_CC_MASK_C)));
14010 assign( t1, binop(Iop_Or32, mkexpr(t0),
14011 mkU32(X86G_CC_MASK_C)));
14015 assign( t1, binop(Iop_Xor32, mkexpr(t0),
14016 mkU32(X86G_CC_MASK_C)));
14020 vpanic("disInstr(x86)(clc/stc/cmc)");
14022 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14023 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14024 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
14025 /* Set NDEP even though it isn't used. This makes redundant-PUT
14026 elimination of previous stores to this field work better. */
14027 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14030 case 0xD6: /* SALC */
14031 t0 = newTemp(Ity_I32);
14032 t1 = newTemp(Ity_I32);
14033 assign( t0, binop(Iop_And32,
14034 mk_x86g_calculate_eflags_c(),
14036 assign( t1, binop(Iop_Sar32,
14037 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
14039 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
14043 /* REPNE prefix insn */
14045 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14046 if (sorb != 0) goto decode_failure;
14047 abyte = getIByte(delta); delta++;
14049 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14050 dres.whatNext = Dis_StopHere;
14053 /* According to the Intel manual, "repne movs" should never occur, but
14054 * in practice it has happened, so allow for it here... */
14055 case 0xA4: sz = 1; /* REPNE MOVS<sz> */
14057 dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig,
14058 guest_EIP_bbstart+delta, "repne movs" );
14061 case 0xA6: sz = 1; /* REPNE CMP<sz> */
14063 dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig,
14064 guest_EIP_bbstart+delta, "repne cmps" );
14067 case 0xAA: sz = 1; /* REPNE STOS<sz> */
14069 dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig,
14070 guest_EIP_bbstart+delta, "repne stos" );
14073 case 0xAE: sz = 1; /* REPNE SCAS<sz> */
14075 dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
14076 guest_EIP_bbstart+delta, "repne scas" );
14080 goto decode_failure;
14085 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14086 for the rest, it means REP) */
14088 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14089 if (sorb != 0) goto decode_failure;
14090 abyte = getIByte(delta); delta++;
14092 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14093 dres.whatNext = Dis_StopHere;
14096 case 0xA4: sz = 1; /* REP MOVS<sz> */
14098 dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig,
14099 guest_EIP_bbstart+delta, "rep movs" );
14102 case 0xA6: sz = 1; /* REPE CMP<sz> */
14104 dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig,
14105 guest_EIP_bbstart+delta, "repe cmps" );
14108 case 0xAA: sz = 1; /* REP STOS<sz> */
14110 dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig,
14111 guest_EIP_bbstart+delta, "rep stos" );
14114 case 0xAC: sz = 1; /* REP LODS<sz> */
14116 dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig,
14117 guest_EIP_bbstart+delta, "rep lods" );
14120 case 0xAE: sz = 1; /* REPE SCAS<sz> */
14122 dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig,
14123 guest_EIP_bbstart+delta, "repe scas" );
14126 case 0x90: /* REP NOP (PAUSE) */
14127 /* a hint to the P4 re spin-wait loop */
14128 DIP("rep nop (P4 pause)\n");
14129 /* "observe" the hint. The Vex client needs to be careful not
14130 to cause very long delays as a result, though. */
14131 jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
14132 dres.whatNext = Dis_StopHere;
14135 case 0xC3: /* REP RET -- same as normal ret? */
14137 dres.whatNext = Dis_StopHere;
14142 goto decode_failure;
14147 /* ------------------------ XCHG ----------------------- */
14149 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14150 prefix; hence it must be translated with an IRCAS (at least, the
14151 memory variant). */
14152 case 0x86: /* XCHG Gb,Eb */
14154 /* Fall through ... */
14155 case 0x87: /* XCHG Gv,Ev */
14156 modrm = getIByte(delta);
14158 t1 = newTemp(ty); t2 = newTemp(ty);
14159 if (epartIsReg(modrm)) {
14160 assign(t1, getIReg(sz, eregOfRM(modrm)));
14161 assign(t2, getIReg(sz, gregOfRM(modrm)));
14162 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
14163 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
14165 DIP("xchg%c %s, %s\n",
14166 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
14167 nameIReg(sz,eregOfRM(modrm)));
14169 *expect_CAS = True;
14170 addr = disAMode ( &alen, sorb, delta, dis_buf );
14171 assign( t1, loadLE(ty,mkexpr(addr)) );
14172 assign( t2, getIReg(sz,gregOfRM(modrm)) );
14173 casLE( mkexpr(addr),
14174 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
14175 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
14177 DIP("xchg%c %s, %s\n", nameISize(sz),
14178 nameIReg(sz,gregOfRM(modrm)), dis_buf);
14182 case 0x90: /* XCHG eAX,eAX */
14185 case 0x91: /* XCHG eAX,eCX */
14186 case 0x92: /* XCHG eAX,eDX */
14187 case 0x93: /* XCHG eAX,eBX */
14188 case 0x94: /* XCHG eAX,eSP */
14189 case 0x95: /* XCHG eAX,eBP */
14190 case 0x96: /* XCHG eAX,eSI */
14191 case 0x97: /* XCHG eAX,eDI */
14192 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
14195 /* ------------------------ XLAT ----------------------- */
14197 case 0xD7: /* XLAT */
14198 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
14207 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
14209 DIP("xlat%c [ebx]\n", nameISize(sz));
14212 /* ------------------------ IN / OUT ----------------------- */
14214 case 0xE4: /* IN imm8, AL */
14216 t1 = newTemp(Ity_I32);
14217 abyte = getIByte(delta); delta++;
14218 assign(t1, mkU32( abyte & 0xFF ));
14219 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
14221 case 0xE5: /* IN imm8, eAX */
14222 vassert(sz == 2 || sz == 4);
14223 t1 = newTemp(Ity_I32);
14224 abyte = getIByte(delta); delta++;
14225 assign(t1, mkU32( abyte & 0xFF ));
14226 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
14228 case 0xEC: /* IN %DX, AL */
14230 t1 = newTemp(Ity_I32);
14231 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14232 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14233 nameIReg(sz,R_EAX));
14235 case 0xED: /* IN %DX, eAX */
14236 vassert(sz == 2 || sz == 4);
14237 t1 = newTemp(Ity_I32);
14238 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14239 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14240 nameIReg(sz,R_EAX));
14243 /* At this point, sz indicates the width, and t1 is a 32-bit
14244 value giving port number. */
14246 vassert(sz == 1 || sz == 2 || sz == 4);
14248 t2 = newTemp(Ity_I32);
14249 d = unsafeIRDirty_1_N(
14252 "x86g_dirtyhelper_IN",
14253 &x86g_dirtyhelper_IN,
14254 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14256 /* do the call, dumping the result in t2. */
14257 stmt( IRStmt_Dirty(d) );
14258 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14262 case 0xE6: /* OUT AL, imm8 */
14264 t1 = newTemp(Ity_I32);
14265 abyte = getIByte(delta); delta++;
14266 assign( t1, mkU32( abyte & 0xFF ) );
14267 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14269 case 0xE7: /* OUT eAX, imm8 */
14270 vassert(sz == 2 || sz == 4);
14271 t1 = newTemp(Ity_I32);
14272 abyte = getIByte(delta); delta++;
14273 assign( t1, mkU32( abyte & 0xFF ) );
14274 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14276 case 0xEE: /* OUT AL, %DX */
14278 t1 = newTemp(Ity_I32);
14279 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14280 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14281 nameIReg(2,R_EDX));
14283 case 0xEF: /* OUT eAX, %DX */
14284 vassert(sz == 2 || sz == 4);
14285 t1 = newTemp(Ity_I32);
14286 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14287 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14288 nameIReg(2,R_EDX));
14291 /* At this point, sz indicates the width, and t1 is a 32-bit
14292 value giving port number. */
14294 vassert(sz == 1 || sz == 2 || sz == 4);
14296 d = unsafeIRDirty_0_N(
14298 "x86g_dirtyhelper_OUT",
14299 &x86g_dirtyhelper_OUT,
14300 mkIRExprVec_3( mkexpr(t1),
14301 widenUto32( getIReg(sz, R_EAX) ),
14304 stmt( IRStmt_Dirty(d) );
14308 /* ------------------------ (Grp1 extensions) ---------- */
14310 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14311 case 0x80, but only in 32-bit mode. */
14313 case 0x80: /* Grp1 Ib,Eb */
14314 modrm = getIByte(delta);
14315 am_sz = lengthAMode(delta);
14318 d32 = getUChar(delta + am_sz);
14319 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14322 case 0x81: /* Grp1 Iv,Ev */
14323 modrm = getIByte(delta);
14324 am_sz = lengthAMode(delta);
14326 d32 = getUDisp(d_sz, delta + am_sz);
14327 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14330 case 0x83: /* Grp1 Ib,Ev */
14331 modrm = getIByte(delta);
14332 am_sz = lengthAMode(delta);
14334 d32 = getSDisp8(delta + am_sz);
14335 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14338 /* ------------------------ (Grp2 extensions) ---------- */
14340 case 0xC0: { /* Grp2 Ib,Eb */
14341 Bool decode_OK = True;
14342 modrm = getIByte(delta);
14343 am_sz = lengthAMode(delta);
14345 d32 = getUChar(delta + am_sz);
14347 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14348 mkU8(d32 & 0xFF), NULL, &decode_OK );
14350 goto decode_failure;
14353 case 0xC1: { /* Grp2 Ib,Ev */
14354 Bool decode_OK = True;
14355 modrm = getIByte(delta);
14356 am_sz = lengthAMode(delta);
14358 d32 = getUChar(delta + am_sz);
14359 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14360 mkU8(d32 & 0xFF), NULL, &decode_OK );
14362 goto decode_failure;
14365 case 0xD0: { /* Grp2 1,Eb */
14366 Bool decode_OK = True;
14367 modrm = getIByte(delta);
14368 am_sz = lengthAMode(delta);
14372 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14373 mkU8(d32), NULL, &decode_OK );
14375 goto decode_failure;
14378 case 0xD1: { /* Grp2 1,Ev */
14379 Bool decode_OK = True;
14380 modrm = getUChar(delta);
14381 am_sz = lengthAMode(delta);
14384 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14385 mkU8(d32), NULL, &decode_OK );
14387 goto decode_failure;
14390 case 0xD2: { /* Grp2 CL,Eb */
14391 Bool decode_OK = True;
14392 modrm = getUChar(delta);
14393 am_sz = lengthAMode(delta);
14396 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14397 getIReg(1,R_ECX), "%cl", &decode_OK );
14399 goto decode_failure;
14402 case 0xD3: { /* Grp2 CL,Ev */
14403 Bool decode_OK = True;
14404 modrm = getIByte(delta);
14405 am_sz = lengthAMode(delta);
14407 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14408 getIReg(1,R_ECX), "%cl", &decode_OK );
14410 goto decode_failure;
14414 /* ------------------------ (Grp3 extensions) ---------- */
14416 case 0xF6: { /* Grp3 Eb */
14417 Bool decode_OK = True;
14418 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
14420 goto decode_failure;
14423 case 0xF7: { /* Grp3 Ev */
14424 Bool decode_OK = True;
14425 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
14427 goto decode_failure;
14431 /* ------------------------ (Grp4 extensions) ---------- */
14433 case 0xFE: { /* Grp4 Eb */
14434 Bool decode_OK = True;
14435 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
14437 goto decode_failure;
14441 /* ------------------------ (Grp5 extensions) ---------- */
14443 case 0xFF: { /* Grp5 Ev */
14444 Bool decode_OK = True;
14445 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
14447 goto decode_failure;
14451 /* ------------------------ Escapes to 2-byte opcodes -- */
14454 opc = getIByte(delta); delta++;
14457 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14459 case 0xBA: { /* Grp8 Ib,Ev */
14460 Bool decode_OK = False;
14461 modrm = getUChar(delta);
14462 am_sz = lengthAMode(delta);
14463 d32 = getSDisp8(delta + am_sz);
14464 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
14465 am_sz, sz, d32, &decode_OK );
14467 goto decode_failure;
14471 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14473 case 0xBC: /* BSF Gv,Ev */
14474 delta = dis_bs_E_G ( sorb, sz, delta, True );
14476 case 0xBD: /* BSR Gv,Ev */
14477 delta = dis_bs_E_G ( sorb, sz, delta, False );
14480 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14482 case 0xC8: /* BSWAP %eax */
14489 case 0xCF: /* BSWAP %edi */
14490 /* AFAICS from the Intel docs, this only exists at size 4. */
14492 t1 = newTemp(Ity_I32);
14493 t2 = newTemp(Ity_I32);
14494 assign( t1, getIReg(4, opc-0xC8) );
14498 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
14500 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
14501 mkU32(0x00FF0000)),
14503 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
14504 mkU32(0x0000FF00)),
14505 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
14506 mkU32(0x000000FF) )
14510 putIReg(4, opc-0xC8, mkexpr(t2));
14511 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
14514 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14516 case 0xA3: /* BT Gv,Ev */
14517 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
14519 case 0xB3: /* BTR Gv,Ev */
14520 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
14522 case 0xAB: /* BTS Gv,Ev */
14523 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
14525 case 0xBB: /* BTC Gv,Ev */
14526 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
14529 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14533 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14534 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14535 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14536 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14537 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14538 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14539 case 0x48: /* CMOVSb (cmov negative) */
14540 case 0x49: /* CMOVSb (cmov not negative) */
14541 case 0x4A: /* CMOVP (cmov parity even) */
14542 case 0x4B: /* CMOVNP (cmov parity odd) */
14543 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14544 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14545 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14546 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14547 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
14550 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14552 case 0xB0: /* CMPXCHG Gb,Eb */
14553 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
14555 case 0xB1: /* CMPXCHG Gv,Ev */
14556 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
14559 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14560 IRTemp expdHi = newTemp(Ity_I32);
14561 IRTemp expdLo = newTemp(Ity_I32);
14562 IRTemp dataHi = newTemp(Ity_I32);
14563 IRTemp dataLo = newTemp(Ity_I32);
14564 IRTemp oldHi = newTemp(Ity_I32);
14565 IRTemp oldLo = newTemp(Ity_I32);
14566 IRTemp flags_old = newTemp(Ity_I32);
14567 IRTemp flags_new = newTemp(Ity_I32);
14568 IRTemp success = newTemp(Ity_I1);
14570 /* Translate this using a DCAS, even if there is no LOCK
14571 prefix. Life is too short to bother with generating two
14572 different translations for the with/without-LOCK-prefix
14574 *expect_CAS = True;
14576 /* Decode, and generate address. */
14577 if (sz != 4) goto decode_failure;
14578 modrm = getIByte(delta);
14579 if (epartIsReg(modrm)) goto decode_failure;
14580 if (gregOfRM(modrm) != 1) goto decode_failure;
14581 addr = disAMode ( &alen, sorb, delta, dis_buf );
14584 /* Get the expected and new values. */
14585 assign( expdHi, getIReg(4,R_EDX) );
14586 assign( expdLo, getIReg(4,R_EAX) );
14587 assign( dataHi, getIReg(4,R_ECX) );
14588 assign( dataLo, getIReg(4,R_EBX) );
14592 mkIRCAS( oldHi, oldLo,
14593 Iend_LE, mkexpr(addr),
14594 mkexpr(expdHi), mkexpr(expdLo),
14595 mkexpr(dataHi), mkexpr(dataLo)
14598 /* success when oldHi:oldLo == expdHi:expdLo */
14600 binop(Iop_CasCmpEQ32,
14602 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
14603 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
14608 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14609 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14610 which is where they came from originally. Both the actual
14611 contents of these two regs, and any shadow values, are
14612 unchanged. If the DCAS fails then we're putting into
14613 EDX:EAX the value seen in memory. */
14615 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
14620 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
14625 /* Copy the success bit into the Z flag and leave the others
14627 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
14631 binop(Iop_And32, mkexpr(flags_old),
14632 mkU32(~X86G_CC_MASK_Z)),
14635 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
14636 mkU8(X86G_CC_SHIFT_Z)) ));
14638 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14639 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
14640 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14641 /* Set NDEP even though it isn't used. This makes
14642 redundant-PUT elimination of previous stores to this field
14644 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14646 /* Sheesh. Aren't you glad it was me and not you that had to
14647 write and validate all this grunge? */
14649 DIP("cmpxchg8b %s\n", dis_buf);
14653 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14655 case 0xA2: { /* CPUID */
14656 /* Uses dirty helper:
14657 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14658 declared to mod eax, wr ebx, ecx, edx
14661 HChar* fName = NULL;
14662 void* fAddr = NULL;
14663 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
14664 fName = "x86g_dirtyhelper_CPUID_sse2";
14665 fAddr = &x86g_dirtyhelper_CPUID_sse2;
14668 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
14669 fName = "x86g_dirtyhelper_CPUID_sse1";
14670 fAddr = &x86g_dirtyhelper_CPUID_sse1;
14673 if (archinfo->hwcaps == 0/*no SSE*/) {
14674 fName = "x86g_dirtyhelper_CPUID_sse0";
14675 fAddr = &x86g_dirtyhelper_CPUID_sse0;
14677 vpanic("disInstr(x86)(cpuid)");
14679 vassert(fName); vassert(fAddr);
14680 d = unsafeIRDirty_0_N ( 0/*regparms*/,
14681 fName, fAddr, mkIRExprVec_0() );
14682 /* declare guest state effects */
14683 d->needsBBP = True;
14685 d->fxState[0].fx = Ifx_Modify;
14686 d->fxState[0].offset = OFFB_EAX;
14687 d->fxState[0].size = 4;
14688 d->fxState[1].fx = Ifx_Write;
14689 d->fxState[1].offset = OFFB_EBX;
14690 d->fxState[1].size = 4;
14691 d->fxState[2].fx = Ifx_Modify;
14692 d->fxState[2].offset = OFFB_ECX;
14693 d->fxState[2].size = 4;
14694 d->fxState[3].fx = Ifx_Write;
14695 d->fxState[3].offset = OFFB_EDX;
14696 d->fxState[3].size = 4;
14697 /* execute the dirty call, side-effecting guest state */
14698 stmt( IRStmt_Dirty(d) );
14699 /* CPUID is a serialising insn. So, just in case someone is
14700 using it as a memory fence ... */
14701 stmt( IRStmt_MBE(Imbe_Fence) );
14706 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14707 //-- goto decode_failure;
14709 //-- t1 = newTemp(cb);
14710 //-- t2 = newTemp(cb);
14711 //-- t3 = newTemp(cb);
14712 //-- t4 = newTemp(cb);
14713 //-- uInstr0(cb, CALLM_S, 0);
14715 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14716 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14718 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14719 //-- uLiteral(cb, 0);
14720 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14722 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14723 //-- uLiteral(cb, 0);
14724 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14726 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14727 //-- uLiteral(cb, 0);
14728 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14730 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14731 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14733 //-- uInstr1(cb, POP, 4, TempReg, t4);
14734 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14736 //-- uInstr1(cb, POP, 4, TempReg, t3);
14737 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14739 //-- uInstr1(cb, POP, 4, TempReg, t2);
14740 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14742 //-- uInstr1(cb, POP, 4, TempReg, t1);
14743 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14745 //-- uInstr0(cb, CALLM_E, 0);
14746 //-- DIP("cpuid\n");
14749 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
14751 case 0xB6: /* MOVZXb Eb,Gv */
14752 if (sz != 2 && sz != 4)
14753 goto decode_failure;
14754 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
14757 case 0xB7: /* MOVZXw Ew,Gv */
14759 goto decode_failure;
14760 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
14763 case 0xBE: /* MOVSXb Eb,Gv */
14764 if (sz != 2 && sz != 4)
14765 goto decode_failure;
14766 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
14769 case 0xBF: /* MOVSXw Ew,Gv */
14770 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
14771 goto decode_failure;
14772 delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
14775 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
14777 //-- case 0xC3: /* MOVNTI Gv,Ev */
14778 //-- vg_assert(sz == 4);
14779 //-- modrm = getUChar(eip);
14780 //-- vg_assert(!epartIsReg(modrm));
14781 //-- t1 = newTemp(cb);
14782 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
14783 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
14784 //-- t2 = LOW24(pair);
14785 //-- eip += HI8(pair);
14786 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
14787 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
14790 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
14792 case 0xAF: /* IMUL Ev, Gv */
14793 delta = dis_mul_E_G ( sorb, sz, delta );
14796 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
14799 modrm = getUChar(delta);
14800 if (epartIsReg(modrm)) goto decode_failure;
14801 addr = disAMode ( &alen, sorb, delta, dis_buf );
14803 DIP("nop%c %s\n", nameISize(sz), dis_buf);
14806 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
14809 case 0x82: /* JBb/JNAEb (jump below) */
14810 case 0x83: /* JNBb/JAEb (jump not below) */
14811 case 0x84: /* JZb/JEb (jump zero) */
14812 case 0x85: /* JNZb/JNEb (jump not zero) */
14813 case 0x86: /* JBEb/JNAb (jump below or equal) */
14814 case 0x87: /* JNBEb/JAb (jump not below or equal) */
14815 case 0x88: /* JSb (jump negative) */
14816 case 0x89: /* JSb (jump not negative) */
14817 case 0x8A: /* JP (jump parity even) */
14818 case 0x8B: /* JNP/JPO (jump parity odd) */
14819 case 0x8C: /* JLb/JNGEb (jump less) */
14820 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
14821 case 0x8E: /* JLEb/JNGb (jump less or equal) */
14822 case 0x8F: /* JGb/JNLEb (jump greater) */
14824 HChar* comment = "";
14825 jmpDelta = (Int)getUDisp32(delta);
14826 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
14829 && vex_control.guest_chase_cond
14830 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
14832 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
14833 /* Speculation: assume this backward branch is taken. So
14834 we need to emit a side-exit to the insn following this
14835 one, on the negation of the condition, and continue at
14836 the branch target address (d32). If we wind up back at
14837 the first instruction of the trace, just stop; it's
14838 better to let the IR loop unroller handle that case.*/
14840 mk_x86g_calculate_condition((X86Condcode)
14841 (1 ^ (opc - 0x80))),
14843 IRConst_U32(guest_EIP_bbstart+delta) ) );
14844 dres.whatNext = Dis_ResteerC;
14845 dres.continueAt = (Addr64)(Addr32)d32;
14846 comment = "(assumed taken)";
14850 && vex_control.guest_chase_cond
14851 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
14853 && resteerOkFn( callback_opaque,
14854 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
14855 /* Speculation: assume this forward branch is not taken.
14856 So we need to emit a side-exit to d32 (the dest) and
14857 continue disassembling at the insn immediately
14858 following this one. */
14860 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
14862 IRConst_U32(d32) ) );
14863 dres.whatNext = Dis_ResteerC;
14864 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
14865 comment = "(assumed not taken)";
14868 /* Conservative default translation - end the block at
14870 jcc_01( (X86Condcode)(opc - 0x80),
14871 (Addr32)(guest_EIP_bbstart+delta), d32);
14872 dres.whatNext = Dis_StopHere;
14874 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
14878 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
14879 case 0x31: { /* RDTSC */
14880 IRTemp val = newTemp(Ity_I64);
14881 IRExpr** args = mkIRExprVec_0();
14882 IRDirty* d = unsafeIRDirty_1_N (
14885 "x86g_dirtyhelper_RDTSC",
14886 &x86g_dirtyhelper_RDTSC,
14889 /* execute the dirty call, dumping the result in val. */
14890 stmt( IRStmt_Dirty(d) );
14891 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
14892 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
14897 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
14899 case 0xA1: /* POP %FS */
14900 dis_pop_segreg( R_FS, sz ); break;
14901 case 0xA9: /* POP %GS */
14902 dis_pop_segreg( R_GS, sz ); break;
14904 case 0xA0: /* PUSH %FS */
14905 dis_push_segreg( R_FS, sz ); break;
14906 case 0xA8: /* PUSH %GS */
14907 dis_push_segreg( R_GS, sz ); break;
14909 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
14912 case 0x92: /* set-Bb/set-NAEb (jump below) */
14913 case 0x93: /* set-NBb/set-AEb (jump not below) */
14914 case 0x94: /* set-Zb/set-Eb (jump zero) */
14915 case 0x95: /* set-NZb/set-NEb (jump not zero) */
14916 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
14917 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
14918 case 0x98: /* set-Sb (jump negative) */
14919 case 0x99: /* set-Sb (jump not negative) */
14920 case 0x9A: /* set-P (jump parity even) */
14921 case 0x9B: /* set-NP (jump parity odd) */
14922 case 0x9C: /* set-Lb/set-NGEb (jump less) */
14923 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
14924 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
14925 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
14926 t1 = newTemp(Ity_I8);
14927 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
14928 modrm = getIByte(delta);
14929 if (epartIsReg(modrm)) {
14931 putIReg(1, eregOfRM(modrm), mkexpr(t1));
14932 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
14933 nameIReg(1,eregOfRM(modrm)));
14935 addr = disAMode ( &alen, sorb, delta, dis_buf );
14937 storeLE( mkexpr(addr), mkexpr(t1) );
14938 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
14942 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
14944 case 0xA4: /* SHLDv imm8,Gv,Ev */
14945 modrm = getIByte(delta);
14946 d32 = delta + lengthAMode(delta);
14947 vex_sprintf(dis_buf, "$%d", getIByte(d32));
14948 delta = dis_SHLRD_Gv_Ev (
14949 sorb, delta, modrm, sz,
14950 mkU8(getIByte(d32)), True, /* literal */
14953 case 0xA5: /* SHLDv %cl,Gv,Ev */
14954 modrm = getIByte(delta);
14955 delta = dis_SHLRD_Gv_Ev (
14956 sorb, delta, modrm, sz,
14957 getIReg(1,R_ECX), False, /* not literal */
14961 case 0xAC: /* SHRDv imm8,Gv,Ev */
14962 modrm = getIByte(delta);
14963 d32 = delta + lengthAMode(delta);
14964 vex_sprintf(dis_buf, "$%d", getIByte(d32));
14965 delta = dis_SHLRD_Gv_Ev (
14966 sorb, delta, modrm, sz,
14967 mkU8(getIByte(d32)), True, /* literal */
14970 case 0xAD: /* SHRDv %cl,Gv,Ev */
14971 modrm = getIByte(delta);
14972 delta = dis_SHLRD_Gv_Ev (
14973 sorb, delta, modrm, sz,
14974 getIReg(1,R_ECX), False, /* not literal */
14978 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
14981 /* Simple implementation needing a long explaination.
14983 sysenter is a kind of syscall entry. The key thing here
14984 is that the return address is not known -- that is
14985 something that is beyond Vex's knowledge. So this IR
14986 forces a return to the scheduler, which can do what it
14987 likes to simulate the systenter, but it MUST set this
14988 thread's guest_EIP field with the continuation address
14989 before resuming execution. If that doesn't happen, the
14990 thread will jump to address zero, which is probably
14994 /* Note where we are, so we can back up the guest to this
14995 point if the syscall needs to be restarted. */
14996 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
14997 mkU32(guest_EIP_curr_instr) ) );
14998 jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
14999 dres.whatNext = Dis_StopHere;
15003 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15005 case 0xC0: { /* XADD Gb,Eb */
15007 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
15008 if (!decodeOK) goto decode_failure;
15011 case 0xC1: { /* XADD Gv,Ev */
15013 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
15014 if (!decodeOK) goto decode_failure;
15018 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15022 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15024 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15025 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15026 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15027 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15031 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15034 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15037 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15041 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15044 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15047 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15049 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15050 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15052 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15056 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15060 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15062 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15063 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15064 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15068 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15072 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15074 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15075 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15076 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15077 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15079 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15083 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15087 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15090 Int delta0 = delta-1;
15091 Bool decode_OK = False;
15093 /* If sz==2 this is SSE, and we assume sse idec has
15094 already spotted those cases by now. */
15096 goto decode_failure;
15098 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
15101 goto decode_failure;
15106 case 0x77: /* EMMS */
15108 goto decode_failure;
15109 do_EMMS_preamble();
15113 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15114 case 0x01: /* 0F 01 /0 -- SGDT */
15115 /* 0F 01 /1 -- SIDT */
15117 /* This is really revolting, but ... since each processor
15118 (core) only has one IDT and one GDT, just let the guest
15119 see it (pass-through semantics). I can't see any way to
15120 construct a faked-up value, so don't bother to try. */
15121 modrm = getUChar(delta);
15122 addr = disAMode ( &alen, sorb, delta, dis_buf );
15124 if (epartIsReg(modrm)) goto decode_failure;
15125 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)
15126 goto decode_failure;
15127 switch (gregOfRM(modrm)) {
15128 case 0: DIP("sgdt %s\n", dis_buf); break;
15129 case 1: DIP("sidt %s\n", dis_buf); break;
15130 default: vassert(0); /*NOTREACHED*/
15133 IRDirty* d = unsafeIRDirty_0_N (
15135 "x86g_dirtyhelper_SxDT",
15136 &x86g_dirtyhelper_SxDT,
15137 mkIRExprVec_2( mkexpr(addr),
15138 mkU32(gregOfRM(modrm)) )
15140 /* declare we're writing memory */
15141 d->mFx = Ifx_Write;
15142 d->mAddr = mkexpr(addr);
15144 stmt( IRStmt_Dirty(d) );
15148 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15151 goto decode_failure;
15152 } /* switch (opc) for the 2-byte opcodes */
15153 goto decode_success;
15154 } /* case 0x0F: of primary opcode */
15156 /* ------------------------ ??? ------------------------ */
15160 /* All decode failures end up here. */
15161 vex_printf("vex x86->IR: unhandled instruction bytes: "
15162 "0x%x 0x%x 0x%x 0x%x\n",
15163 (Int)getIByte(delta_start+0),
15164 (Int)getIByte(delta_start+1),
15165 (Int)getIByte(delta_start+2),
15166 (Int)getIByte(delta_start+3) );
15168 /* Tell the dispatcher that this insn cannot be decoded, and so has
15169 not been executed, and (is currently) the next to be executed.
15170 EIP should be up-to-date since it made so at the start of each
15171 insn, but nevertheless be paranoid and update it again right
15173 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15174 jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
15175 dres.whatNext = Dis_StopHere;
15177 /* We also need to say that a CAS is not expected now, regardless
15178 of what it might have been set to at the start of the function,
15179 since the IR that we've emitted just above (to synthesis a
15180 SIGILL) does not involve any CAS, and presumably no other IR has
15181 been emitted for this (non-decoded) insn. */
15182 *expect_CAS = False;
15185 } /* switch (opc) for the main (primary) opcode switch. */
15188 /* All decode successes end up here. */
15190 dres.len = delta - delta_start;
15198 /*------------------------------------------------------------*/
15199 /*--- Top-level fn ---*/
15200 /*------------------------------------------------------------*/
15202 /* Disassemble a single instruction into IR. The instruction
15203 is located in host memory at &guest_code[delta]. */
15205 DisResult disInstr_X86 ( IRSB* irsb_IN,
15207 Bool (*resteerOkFn) ( void*, Addr64 ),
15209 void* callback_opaque,
15210 UChar* guest_code_IN,
15213 VexArch guest_arch,
15214 VexArchInfo* archinfo,
15215 VexAbiInfo* abiinfo,
15216 Bool host_bigendian_IN )
15219 Bool expect_CAS, has_CAS;
15222 /* Set globals (see top of this file) */
15223 vassert(guest_arch == VexArchX86);
15224 guest_code = guest_code_IN;
15226 host_is_bigendian = host_bigendian_IN;
15227 guest_EIP_curr_instr = (Addr32)guest_IP;
15228 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
15230 x1 = irsb_IN->stmts_used;
15231 expect_CAS = False;
15232 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
15235 delta, archinfo, abiinfo );
15236 x2 = irsb_IN->stmts_used;
15239 /* See comment at the top of disInstr_X86_WRK for meaning of
15240 expect_CAS. Here, we (sanity-)check for the presence/absence of
15241 IRCAS as directed by the returned expect_CAS value. */
15243 for (i = x1; i < x2; i++) {
15244 if (irsb_IN->stmts[i]->tag == Ist_CAS)
15248 if (expect_CAS != has_CAS) {
15249 /* inconsistency detected. re-disassemble the instruction so as
15250 to generate a useful error message; then assert. */
15251 vex_traceflags |= VEX_TRACE_FE;
15252 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
15255 delta, archinfo, abiinfo );
15256 for (i = x1; i < x2; i++) {
15257 vex_printf("\t\t");
15258 ppIRStmt(irsb_IN->stmts[i]);
15261 /* Failure of this assertion is serious and denotes a bug in
15263 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15270 /*--------------------------------------------------------------------*/
15271 /*--- end guest_x86_toIR.c ---*/
15272 /*--------------------------------------------------------------------*/