2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates AMD64 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
45 * all arithmetic done at 64 bits
47 * no FP exceptions, except for handling stack over/underflow
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
58 * some of the FCOM cases could do with testing -- not convinced
59 that the args are the right way round.
61 * FSAVE does not re-initialise the FPU; it should do
63 * FINIT not only initialises the FPU environment, it also zeroes
64 all the FP registers. It should leave the registers unchanged.
66 RDTSC returns zero, always.
68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
69 per Intel docs this bit has no meaning anyway. Since PUSHF is the
70 only way to observe eflags[1], a proper fix would be to make that
73 This module uses global variables and so is not MT-safe (if that
74 should ever become relevant).
77 /* Notes re address size overrides (0x67).
79 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
81 and System Instructions"), Section 1.2.3 ("Address-Size Override
84 0x67 applies to all explicit memory references, causing the top
85 32 bits of the effective address to become zero.
87 0x67 has no effect on stack references (push/pop); these always
90 0x67 changes the interpretation of instructions which implicitly
91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
106 /* "Special" instructions.
108 This instruction decoder can decode three special instructions
109 which mean nothing natively (are no-ops as far as regs/mem are
110 concerned) but have meaning for supporting Valgrind. A special
111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
114 Following that, one of the following 3 are allowed (standard
115 interpretation in parentheses):
117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
121 Any other bytes following the 16-byte preamble are illegal and
122 constitute a failure in instruction decoding. This all assumes
123 that the preamble will never occur except in specific code
124 fragments designed for Valgrind to catch.
126 No prefixes may precede a "Special" instruction.
129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
130 insns: the side-exit back to the start of the insn is done with
131 Ijk_Boring. This is quite wrong, it should be done with
132 Ijk_NoRedir, since otherwise the side exit, which is intended to
133 restart the instruction for whatever reason, could go somewhere
134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
135 no-redir jumps performance critical, at least for rep-prefixed
136 instructions, since all iterations thereof would involve such a
137 jump. It's not such a big deal with casLE since the side exit is
138 only taken if the CAS fails, that is, the location is contended,
139 which is relatively unlikely.
141 Note also, the test for CAS success vs failure is done using
142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
144 shouldn't definedness-check these comparisons. See
145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
146 background/rationale.
149 /* LOCK prefixed instructions. These are translated using IR-level
150 CAS statements (IRCAS) and are believed to preserve atomicity, even
151 from the point of view of some other process racing against a
152 simulated one (presumably they communicate via a shared memory
155 Handlers which are aware of LOCK prefixes are:
156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
157 dis_cmpxchg_G_E (cmpxchg)
158 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
162 dis_Grp8_Imm (bts, btc, btr)
163 dis_bt_G_E (bts, btc, btr)
168 #include "libvex_basictypes.h"
169 #include "libvex_ir.h"
171 #include "libvex_guest_amd64.h"
173 #include "main_util.h"
174 #include "main_globals.h"
175 #include "guest_generic_bb_to_IR.h"
176 #include "guest_generic_x87.h"
177 #include "guest_amd64_defs.h"
180 /*------------------------------------------------------------*/
182 /*------------------------------------------------------------*/
184 /* These are set at the start of the translation of an insn, right
185 down in disInstr_AMD64, so that we don't have to pass them around
186 endlessly. They are all constant during the translation of any
189 /* These are set at the start of the translation of a BB, so
190 that we don't have to pass them around endlessly. */
192 /* We need to know this to do sub-register accesses correctly. */
193 static Bool host_is_bigendian;
195 /* Pointer to the guest code area (points to start of BB, not to the
196 insn being processed). */
197 static UChar* guest_code;
199 /* The guest address corresponding to guest_code[0]. */
200 static Addr64 guest_RIP_bbstart;
202 /* The guest address for the instruction currently being
204 static Addr64 guest_RIP_curr_instr;
206 /* The IRSB* into which we're generating code. */
209 /* For ensuring that %rip-relative addressing is done right. A read
210 of %rip generates the address of the next instruction. It may be
211 that we don't conveniently know that inside disAMode(). For sanity
212 checking, if the next insn %rip is needed, we make a guess at what
213 it is, record that guess here, and set the accompanying Bool to
214 indicate that -- after this insn's decode is finished -- that guess
215 needs to be checked. */
217 /* At the start of each insn decode, is set to (0, False).
218 After the decode, if _mustcheck is now True, _assumed is
221 static Addr64 guest_RIP_next_assumed;
222 static Bool guest_RIP_next_mustcheck;
225 /*------------------------------------------------------------*/
226 /*--- Helpers for constructing IR. ---*/
227 /*------------------------------------------------------------*/
229 /* Generate a new temporary of the given type. */
230 static IRTemp newTemp ( IRType ty )
232 vassert(isPlausibleIRType(ty));
233 return newIRTemp( irsb->tyenv, ty );
236 /* Add a statement to the list held by "irsb". */
237 static void stmt ( IRStmt* st )
239 addStmtToIRSB( irsb, st );
242 /* Generate a statement "dst := e". */
243 static void assign ( IRTemp dst, IRExpr* e )
245 stmt( IRStmt_WrTmp(dst, e) );
248 static IRExpr* unop ( IROp op, IRExpr* a )
250 return IRExpr_Unop(op, a);
253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
255 return IRExpr_Binop(op, a1, a2);
258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
260 return IRExpr_Triop(op, a1, a2, a3);
263 static IRExpr* mkexpr ( IRTemp tmp )
265 return IRExpr_RdTmp(tmp);
268 static IRExpr* mkU8 ( ULong i )
271 return IRExpr_Const(IRConst_U8( (UChar)i ));
274 static IRExpr* mkU16 ( ULong i )
276 vassert(i < 0x10000ULL);
277 return IRExpr_Const(IRConst_U16( (UShort)i ));
280 static IRExpr* mkU32 ( ULong i )
282 vassert(i < 0x100000000ULL);
283 return IRExpr_Const(IRConst_U32( (UInt)i ));
286 static IRExpr* mkU64 ( ULong i )
288 return IRExpr_Const(IRConst_U64(i));
291 static IRExpr* mkU ( IRType ty, ULong i )
294 case Ity_I8: return mkU8(i);
295 case Ity_I16: return mkU16(i);
296 case Ity_I32: return mkU32(i);
297 case Ity_I64: return mkU64(i);
298 default: vpanic("mkU(amd64)");
302 static void storeLE ( IRExpr* addr, IRExpr* data )
304 stmt( IRStmt_Store(Iend_LE, addr, data) );
307 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
309 return IRExpr_Load(Iend_LE, ty, addr);
312 static IROp mkSizedOp ( IRType ty, IROp op8 )
314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
319 || op8 == Iop_CasCmpNE8
320 || op8 == Iop_Not8 );
322 case Ity_I8: return 0 +op8;
323 case Ity_I16: return 1 +op8;
324 case Ity_I32: return 2 +op8;
325 case Ity_I64: return 3 +op8;
326 default: vpanic("mkSizedOp(amd64)");
331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
333 if (szSmall == 1 && szBig == 4) {
334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
336 if (szSmall == 1 && szBig == 2) {
337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
339 if (szSmall == 2 && szBig == 4) {
340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
342 if (szSmall == 1 && szBig == 8 && !signd) {
343 return unop(Iop_8Uto64, src);
345 if (szSmall == 1 && szBig == 8 && signd) {
346 return unop(Iop_8Sto64, src);
348 if (szSmall == 2 && szBig == 8 && !signd) {
349 return unop(Iop_16Uto64, src);
351 if (szSmall == 2 && szBig == 8 && signd) {
352 return unop(Iop_16Sto64, src);
354 vpanic("doScalarWidening(amd64)");
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
415 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
416 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
417 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
419 //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
420 //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
421 //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
422 //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
423 //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
424 //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
425 //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
426 //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
428 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
429 #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0)
430 #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1)
431 #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2)
432 #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3)
433 #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4)
434 #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5)
435 #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6)
436 #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7)
437 #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8)
438 #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9)
439 #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10)
440 #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11)
441 #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12)
442 #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13)
443 #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14)
444 #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15)
446 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN)
447 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART)
448 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN)
450 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
453 /*------------------------------------------------------------*/
454 /*--- Helper bits and pieces for deconstructing the ---*/
455 /*--- amd64 insn stream. ---*/
456 /*------------------------------------------------------------*/
458 /* This is the AMD64 register encoding -- integer regs. */
476 //.. #define R_AL (0+R_EAX)
477 //.. #define R_AH (4+R_EAX)
479 /* This is the Intel register encoding -- segment regs. */
488 /* Various simple conversions */
490 static ULong extend_s_8to64 ( UChar x )
492 return (ULong)((((Long)x) << 56) >> 56);
495 static ULong extend_s_16to64 ( UShort x )
497 return (ULong)((((Long)x) << 48) >> 48);
500 static ULong extend_s_32to64 ( UInt x )
502 return (ULong)((((Long)x) << 32) >> 32);
505 /* Figure out whether the mod and rm parts of a modRM byte refer to a
506 register or memory. If so, the byte will have the form 11XXXYYY,
507 where YYY is the register number. */
509 static Bool epartIsReg ( UChar mod_reg_rm )
511 return toBool(0xC0 == (mod_reg_rm & 0xC0));
514 /* Extract the 'g' field from a modRM byte. This only produces 3
515 bits, which is not a complete register number. You should avoid
516 this function if at all possible. */
518 static Int gregLO3ofRM ( UChar mod_reg_rm )
520 return (Int)( (mod_reg_rm >> 3) & 7 );
523 /* Ditto the 'e' field of a modRM byte. */
525 static Int eregLO3ofRM ( UChar mod_reg_rm )
527 return (Int)(mod_reg_rm & 0x7);
530 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
532 static UChar getUChar ( Long delta )
534 UChar v = guest_code[delta+0];
538 static UInt getUDisp16 ( Long delta )
540 UInt v = guest_code[delta+1]; v <<= 8;
541 v |= guest_code[delta+0];
545 //.. static UInt getUDisp ( Int size, Long delta )
548 //.. case 4: return getUDisp32(delta);
549 //.. case 2: return getUDisp16(delta);
550 //.. case 1: return getUChar(delta);
551 //.. default: vpanic("getUDisp(x86)");
553 //.. return 0; /*notreached*/
557 /* Get a byte value out of the insn stream and sign-extend to 64
559 static Long getSDisp8 ( Long delta )
561 return extend_s_8to64( guest_code[delta] );
564 /* Get a 16-bit value out of the insn stream and sign-extend to 64
566 static Long getSDisp16 ( Long delta )
568 UInt v = guest_code[delta+1]; v <<= 8;
569 v |= guest_code[delta+0];
570 return extend_s_16to64( (UShort)v );
573 /* Get a 32-bit value out of the insn stream and sign-extend to 64
575 static Long getSDisp32 ( Long delta )
577 UInt v = guest_code[delta+3]; v <<= 8;
578 v |= guest_code[delta+2]; v <<= 8;
579 v |= guest_code[delta+1]; v <<= 8;
580 v |= guest_code[delta+0];
581 return extend_s_32to64( v );
584 /* Get a 64-bit value out of the insn stream. */
585 static Long getDisp64 ( Long delta )
588 v |= guest_code[delta+7]; v <<= 8;
589 v |= guest_code[delta+6]; v <<= 8;
590 v |= guest_code[delta+5]; v <<= 8;
591 v |= guest_code[delta+4]; v <<= 8;
592 v |= guest_code[delta+3]; v <<= 8;
593 v |= guest_code[delta+2]; v <<= 8;
594 v |= guest_code[delta+1]; v <<= 8;
595 v |= guest_code[delta+0];
599 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
600 if this is called with size==8. Should not happen. */
601 static Long getSDisp ( Int size, Long delta )
604 case 4: return getSDisp32(delta);
605 case 2: return getSDisp16(delta);
606 case 1: return getSDisp8(delta);
607 default: vpanic("getSDisp(amd64)");
611 static ULong mkSizeMask ( Int sz )
614 case 1: return 0x00000000000000FFULL;
615 case 2: return 0x000000000000FFFFULL;
616 case 4: return 0x00000000FFFFFFFFULL;
617 case 8: return 0xFFFFFFFFFFFFFFFFULL;
618 default: vpanic("mkSzMask(amd64)");
622 static Int imin ( Int a, Int b )
624 return (a < b) ? a : b;
627 static IRType szToITy ( Int n )
630 case 1: return Ity_I8;
631 case 2: return Ity_I16;
632 case 4: return Ity_I32;
633 case 8: return Ity_I64;
634 default: vex_printf("\nszToITy(%d)\n", n);
635 vpanic("szToITy(amd64)");
640 /*------------------------------------------------------------*/
641 /*--- For dealing with prefixes. ---*/
642 /*------------------------------------------------------------*/
644 /* The idea is to pass around an int holding a bitmask summarising
645 info from the prefixes seen on the current instruction, including
646 info from the REX byte. This info is used in various places, but
647 most especially when making sense of register fields in
650 The top 16 bits of the prefix are 0x3141, just as a hacky way
651 to ensure it really is a valid prefix.
653 Things you can safely assume about a well-formed prefix:
654 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
655 * if REX is not present then REXW,REXR,REXX,REXB will read
657 * F2 and F3 will not both be 1.
662 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
663 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
664 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
665 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
666 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
667 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
668 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
669 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
670 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
671 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
672 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
673 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
674 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
675 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
676 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
677 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
679 #define PFX_EMPTY 0x31410000
681 static Bool IS_VALID_PFX ( Prefix pfx ) {
682 return toBool((pfx & 0xFFFF0000) == PFX_EMPTY);
685 static Bool haveREX ( Prefix pfx ) {
686 return toBool(pfx & PFX_REX);
689 static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
692 /* Apparently unused.
693 static Int getRexR ( Prefix pfx ) {
694 return (pfx & PFX_REXR) ? 1 : 0;
697 static Int getRexX ( Prefix pfx ) {
698 return (pfx & PFX_REXX) ? 1 : 0;
700 static Int getRexB ( Prefix pfx ) {
701 return (pfx & PFX_REXB) ? 1 : 0;
704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
705 completely changes what instruction it really is. */
706 static Bool haveF2orF3 ( Prefix pfx ) {
707 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
709 static Bool haveF2 ( Prefix pfx ) {
710 return toBool((pfx & PFX_F2) > 0);
712 static Bool haveF3 ( Prefix pfx ) {
713 return toBool((pfx & PFX_F3) > 0);
716 static Bool have66 ( Prefix pfx ) {
717 return toBool((pfx & PFX_66) > 0);
719 static Bool haveASO ( Prefix pfx ) {
720 return toBool((pfx & PFX_ASO) > 0);
723 /* Return True iff pfx has 66 set and F2 and F3 clear */
724 static Bool have66noF2noF3 ( Prefix pfx )
727 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
730 /* Return True iff pfx has F2 set and 66 and F3 clear */
731 static Bool haveF2no66noF3 ( Prefix pfx )
734 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
737 /* Return True iff pfx has F3 set and 66 and F2 clear */
738 static Bool haveF3no66noF2 ( Prefix pfx )
741 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
744 /* Return True iff pfx has 66, F2 and F3 clear */
745 static Bool haveNo66noF2noF3 ( Prefix pfx )
748 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
751 /* Return True iff pfx has any of 66, F2 and F3 set */
752 static Bool have66orF2orF3 ( Prefix pfx )
754 return toBool( ! haveNo66noF2noF3(pfx) );
757 /* Return True iff pfx has 66 or F2 set */
758 static Bool have66orF2 ( Prefix pfx )
760 return toBool((pfx & (PFX_66|PFX_F2)) > 0);
763 /* Clear all the segment-override bits in a prefix. */
764 static Prefix clearSegBits ( Prefix p )
767 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
771 /*------------------------------------------------------------*/
772 /*--- For dealing with integer registers ---*/
773 /*------------------------------------------------------------*/
775 /* This is somewhat complex. The rules are:
777 For 64, 32 and 16 bit register references, the e or g fields in the
778 modrm bytes supply the low 3 bits of the register number. The
779 fourth (most-significant) bit of the register number is supplied by
780 the REX byte, if it is present; else that bit is taken to be zero.
782 The REX.R bit supplies the high bit corresponding to the g register
783 field, and the REX.B bit supplies the high bit corresponding to the
784 e register field (when the mod part of modrm indicates that modrm's
785 e component refers to a register and not to memory).
787 The REX.X bit supplies a high register bit for certain registers
788 in SIB address modes, and is generally rarely used.
790 For 8 bit register references, the presence of the REX byte itself
791 has significance. If there is no REX present, then the 3-bit
792 number extracted from the modrm e or g field is treated as an index
793 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
794 old x86 encoding scheme.
796 But if there is a REX present, the register reference is
797 interpreted in the same way as for 64/32/16-bit references: a high
798 bit is extracted from REX, giving a 4-bit number, and the denoted
799 register is the lowest 8 bits of the 16 integer registers denoted
800 by the number. In particular, values 3 through 7 of this sequence
801 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
804 The REX.W bit has no bearing at all on register numbers. Instead
805 its presence indicates that the operand size is to be overridden
806 from its default value (32 bits) to 64 bits instead. This is in
807 the same fashion that an 0x66 prefix indicates the operand size is
808 to be overridden from 32 bits down to 16 bits. When both REX.W and
809 0x66 are present there is a conflict, and REX.W takes precedence.
811 Rather than try to handle this complexity using a single huge
812 function, several smaller ones are provided. The aim is to make it
813 as difficult as possible to screw up register decoding in a subtle
814 and hard-to-track-down way.
816 Because these routines fish around in the host's memory (that is,
817 in the guest state area) for sub-parts of guest registers, their
818 correctness depends on the host's endianness. So far these
819 routines only work for little-endian hosts. Those for which
820 endianness is important have assertions to ensure sanity.
824 /* About the simplest question you can ask: where do the 64-bit
825 integer registers live (in the guest state) ? */
827 static Int integerGuestReg64Offset ( UInt reg )
830 case R_RAX: return OFFB_RAX;
831 case R_RCX: return OFFB_RCX;
832 case R_RDX: return OFFB_RDX;
833 case R_RBX: return OFFB_RBX;
834 case R_RSP: return OFFB_RSP;
835 case R_RBP: return OFFB_RBP;
836 case R_RSI: return OFFB_RSI;
837 case R_RDI: return OFFB_RDI;
838 case R_R8: return OFFB_R8;
839 case R_R9: return OFFB_R9;
840 case R_R10: return OFFB_R10;
841 case R_R11: return OFFB_R11;
842 case R_R12: return OFFB_R12;
843 case R_R13: return OFFB_R13;
844 case R_R14: return OFFB_R14;
845 case R_R15: return OFFB_R15;
846 default: vpanic("integerGuestReg64Offset(amd64)");
851 /* Produce the name of an integer register, for printing purposes.
852 reg is a number in the range 0 .. 15 that has been generated from a
853 3-bit reg-field number and a REX extension bit. irregular denotes
854 the case where sz==1 and no REX byte is present. */
857 HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
859 static HChar* ireg64_names[16]
860 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
861 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
862 static HChar* ireg32_names[16]
863 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
864 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
865 static HChar* ireg16_names[16]
866 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
867 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
868 static HChar* ireg8_names[16]
869 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
870 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
871 static HChar* ireg8_irregular[8]
872 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
879 vassert(irregular == False);
883 case 8: return ireg64_names[reg];
884 case 4: return ireg32_names[reg];
885 case 2: return ireg16_names[reg];
886 case 1: if (irregular) {
887 return ireg8_irregular[reg];
889 return ireg8_names[reg];
891 default: vpanic("nameIReg(amd64)");
895 /* Using the same argument conventions as nameIReg, produce the
896 guest state offset of an integer register. */
899 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
906 vassert(irregular == False);
909 /* Deal with irregular case -- sz==1 and no REX present */
910 if (sz == 1 && irregular) {
912 case R_RSP: return 1+ OFFB_RAX;
913 case R_RBP: return 1+ OFFB_RCX;
914 case R_RSI: return 1+ OFFB_RDX;
915 case R_RDI: return 1+ OFFB_RBX;
916 default: break; /* use the normal case */
921 return integerGuestReg64Offset(reg);
925 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
927 static IRExpr* getIRegCL ( void )
929 vassert(!host_is_bigendian);
930 return IRExpr_Get( OFFB_RCX, Ity_I8 );
934 /* Write to the %AH register. */
936 static void putIRegAH ( IRExpr* e )
938 vassert(!host_is_bigendian);
939 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
940 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
944 /* Read/write various widths of %RAX, as it has various
945 special-purpose uses. */
947 static HChar* nameIRegRAX ( Int sz )
950 case 1: return "%al";
951 case 2: return "%ax";
952 case 4: return "%eax";
953 case 8: return "%rax";
954 default: vpanic("nameIRegRAX(amd64)");
958 static IRExpr* getIRegRAX ( Int sz )
960 vassert(!host_is_bigendian);
962 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
963 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
964 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
965 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
966 default: vpanic("getIRegRAX(amd64)");
970 static void putIRegRAX ( Int sz, IRExpr* e )
972 IRType ty = typeOfIRExpr(irsb->tyenv, e);
973 vassert(!host_is_bigendian);
975 case 8: vassert(ty == Ity_I64);
976 stmt( IRStmt_Put( OFFB_RAX, e ));
978 case 4: vassert(ty == Ity_I32);
979 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
981 case 2: vassert(ty == Ity_I16);
982 stmt( IRStmt_Put( OFFB_RAX, e ));
984 case 1: vassert(ty == Ity_I8);
985 stmt( IRStmt_Put( OFFB_RAX, e ));
987 default: vpanic("putIRegRAX(amd64)");
992 /* Read/write various widths of %RDX, as it has various
993 special-purpose uses. */
995 static HChar* nameIRegRDX ( Int sz )
998 case 1: return "%dl";
999 case 2: return "%dx";
1000 case 4: return "%edx";
1001 case 8: return "%rdx";
1002 default: vpanic("nameIRegRDX(amd64)");
1006 static IRExpr* getIRegRDX ( Int sz )
1008 vassert(!host_is_bigendian);
1010 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1011 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
1012 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1013 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1014 default: vpanic("getIRegRDX(amd64)");
1018 static void putIRegRDX ( Int sz, IRExpr* e )
1020 vassert(!host_is_bigendian);
1021 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1023 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1025 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1027 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1029 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1031 default: vpanic("putIRegRDX(amd64)");
1036 /* Simplistic functions to deal with the integer registers as a
1037 straightforward bank of 16 64-bit regs. */
1039 static IRExpr* getIReg64 ( UInt regno )
1041 return IRExpr_Get( integerGuestReg64Offset(regno),
1045 static void putIReg64 ( UInt regno, IRExpr* e )
1047 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1048 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1051 static HChar* nameIReg64 ( UInt regno )
1053 return nameIReg( 8, regno, False );
1057 /* Simplistic functions to deal with the lower halves of integer
1058 registers as a straightforward bank of 16 32-bit regs. */
1060 static IRExpr* getIReg32 ( UInt regno )
1062 vassert(!host_is_bigendian);
1063 return unop(Iop_64to32,
1064 IRExpr_Get( integerGuestReg64Offset(regno),
1068 static void putIReg32 ( UInt regno, IRExpr* e )
1070 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1071 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1072 unop(Iop_32Uto64,e) ) );
1075 static HChar* nameIReg32 ( UInt regno )
1077 return nameIReg( 4, regno, False );
1081 /* Simplistic functions to deal with the lower quarters of integer
1082 registers as a straightforward bank of 16 16-bit regs. */
1084 static IRExpr* getIReg16 ( UInt regno )
1086 vassert(!host_is_bigendian);
1087 return IRExpr_Get( integerGuestReg64Offset(regno),
1091 static HChar* nameIReg16 ( UInt regno )
1093 return nameIReg( 2, regno, False );
1097 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1098 which field of the REX byte is to be used to extend to a 4-bit
1099 number. These functions cater for that situation.
1101 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1103 vassert(lo3bits < 8);
1104 vassert(IS_VALID_PFX(pfx));
1105 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1108 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1110 vassert(lo3bits < 8);
1111 vassert(IS_VALID_PFX(pfx));
1112 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1115 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1117 vassert(lo3bits < 8);
1118 vassert(IS_VALID_PFX(pfx));
1119 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1120 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
1121 toBool(sz==1 && !haveREX(pfx)) );
1124 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1126 vassert(lo3bits < 8);
1127 vassert(IS_VALID_PFX(pfx));
1128 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1131 return unop(Iop_64to32,
1133 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1134 toBool(sz==1 && !haveREX(pfx)) ),
1140 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1141 toBool(sz==1 && !haveREX(pfx)) ),
1147 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1149 vassert(lo3bits < 8);
1150 vassert(IS_VALID_PFX(pfx));
1151 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1152 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1154 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1155 toBool(sz==1 && !haveREX(pfx)) ),
1156 sz==4 ? unop(Iop_32Uto64,e) : e
1161 /* Functions for getting register numbers from modrm bytes and REX
1162 when we don't have to consider the complexities of integer subreg
1165 /* Extract the g reg field from a modRM byte, and augment it using the
1166 REX.R bit from the supplied REX byte. The R bit usually is
1167 associated with the g register field.
1169 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1171 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1172 reg += (pfx & PFX_REXR) ? 8 : 0;
1176 /* Extract the e reg field from a modRM byte, and augment it using the
1177 REX.B bit from the supplied REX byte. The B bit usually is
1178 associated with the e register field (when modrm indicates e is a
1181 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1184 vassert(epartIsReg(mod_reg_rm));
1185 rm = (Int)(mod_reg_rm & 0x7);
1186 rm += (pfx & PFX_REXB) ? 8 : 0;
1191 /* General functions for dealing with integer register access. */
1193 /* Produce the guest state offset for a reference to the 'g' register
1194 field in a modrm byte, taking into account REX (or its absence),
1195 and the size of the access.
1197 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1200 vassert(!host_is_bigendian);
1201 vassert(IS_VALID_PFX(pfx));
1202 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1203 reg = gregOfRexRM( pfx, mod_reg_rm );
1204 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1208 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1212 return unop(Iop_64to32,
1213 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1216 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1222 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1224 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1226 e = unop(Iop_32Uto64,e);
1228 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1232 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1234 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
1235 toBool(sz==1 && !haveREX(pfx)) );
1239 /* Produce the guest state offset for a reference to the 'e' register
1240 field in a modrm byte, taking into account REX (or its absence),
1241 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1242 denotes a memory access rather than a register access.
1244 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1247 vassert(!host_is_bigendian);
1248 vassert(IS_VALID_PFX(pfx));
1249 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1250 reg = eregOfRexRM( pfx, mod_reg_rm );
1251 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1255 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1259 return unop(Iop_64to32,
1260 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1263 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1269 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1271 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1273 e = unop(Iop_32Uto64,e);
1275 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1279 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1281 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
1282 toBool(sz==1 && !haveREX(pfx)) );
1286 /*------------------------------------------------------------*/
1287 /*--- For dealing with XMM registers ---*/
1288 /*------------------------------------------------------------*/
1290 //.. static Int segmentGuestRegOffset ( UInt sreg )
1292 //.. switch (sreg) {
1293 //.. case R_ES: return OFFB_ES;
1294 //.. case R_CS: return OFFB_CS;
1295 //.. case R_SS: return OFFB_SS;
1296 //.. case R_DS: return OFFB_DS;
1297 //.. case R_FS: return OFFB_FS;
1298 //.. case R_GS: return OFFB_GS;
1299 //.. default: vpanic("segmentGuestRegOffset(x86)");
1303 static Int xmmGuestRegOffset ( UInt xmmreg )
1306 case 0: return OFFB_XMM0;
1307 case 1: return OFFB_XMM1;
1308 case 2: return OFFB_XMM2;
1309 case 3: return OFFB_XMM3;
1310 case 4: return OFFB_XMM4;
1311 case 5: return OFFB_XMM5;
1312 case 6: return OFFB_XMM6;
1313 case 7: return OFFB_XMM7;
1314 case 8: return OFFB_XMM8;
1315 case 9: return OFFB_XMM9;
1316 case 10: return OFFB_XMM10;
1317 case 11: return OFFB_XMM11;
1318 case 12: return OFFB_XMM12;
1319 case 13: return OFFB_XMM13;
1320 case 14: return OFFB_XMM14;
1321 case 15: return OFFB_XMM15;
1322 default: vpanic("xmmGuestRegOffset(amd64)");
1326 /* Lanes of vector registers are always numbered from zero being the
1327 least significant lane (rightmost in the register). */
1329 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1331 /* Correct for little-endian host only. */
1332 vassert(!host_is_bigendian);
1333 vassert(laneno >= 0 && laneno < 8);
1334 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1337 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1339 /* Correct for little-endian host only. */
1340 vassert(!host_is_bigendian);
1341 vassert(laneno >= 0 && laneno < 4);
1342 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1345 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1347 /* Correct for little-endian host only. */
1348 vassert(!host_is_bigendian);
1349 vassert(laneno >= 0 && laneno < 2);
1350 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1353 //.. static IRExpr* getSReg ( UInt sreg )
1355 //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
1358 //.. static void putSReg ( UInt sreg, IRExpr* e )
1360 //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1361 //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
1364 static IRExpr* getXMMReg ( UInt xmmreg )
1366 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1369 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1371 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1374 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1376 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1379 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1381 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1384 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1386 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1389 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1391 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1394 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1396 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1397 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1400 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1402 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1403 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1406 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1408 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1409 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1412 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1414 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1415 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1418 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1420 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1421 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1424 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
1426 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1427 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
1430 static IRExpr* mkV128 ( UShort mask )
1432 return IRExpr_Const(IRConst_V128(mask));
1435 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1437 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1438 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1439 return unop(Iop_64to1,
1442 unop(Iop_1Uto64,y)));
1445 /* Generate a compare-and-swap operation, operating on memory at
1446 'addr'. The expected value is 'expVal' and the new value is
1447 'newVal'. If the operation fails, then transfer control (with a
1448 no-redir jump (XXX no -- see comment at top of this file)) to
1449 'restart_point', which is presumably the address of the guest
1450 instruction again -- retrying, essentially. */
1451 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1452 Addr64 restart_point )
1455 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1456 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1457 IRTemp oldTmp = newTemp(tyE);
1458 IRTemp expTmp = newTemp(tyE);
1459 vassert(tyE == tyN);
1460 vassert(tyE == Ity_I64 || tyE == Ity_I32
1461 || tyE == Ity_I16 || tyE == Ity_I8);
1462 assign(expTmp, expVal);
1463 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1464 NULL, mkexpr(expTmp), NULL, newVal );
1465 stmt( IRStmt_CAS(cas) );
1467 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1468 mkexpr(oldTmp), mkexpr(expTmp) ),
1469 Ijk_Boring, /*Ijk_NoRedir*/
1470 IRConst_U64( restart_point )
1475 /*------------------------------------------------------------*/
1476 /*--- Helpers for %rflags. ---*/
1477 /*------------------------------------------------------------*/
1479 /* -------------- Evaluating the flags-thunk. -------------- */
1481 /* Build IR to calculate all the eflags from stored
1482 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1484 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1487 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1488 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1489 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1490 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1495 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1498 /* Exclude OP and NDEP from definedness checking. We're only
1499 interested in DEP1 and DEP2. */
1500 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1504 /* Build IR to calculate some particular condition from stored
1505 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1507 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1510 = mkIRExprVec_5( mkU64(cond),
1511 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1512 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1513 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1514 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1519 "amd64g_calculate_condition", &amd64g_calculate_condition,
1522 /* Exclude the requested condition, OP and NDEP from definedness
1523 checking. We're only interested in DEP1 and DEP2. */
1524 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1525 return unop(Iop_64to1, call);
1528 /* Build IR to calculate just the carry flag from stored
1529 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1530 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1533 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1534 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1535 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1536 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1541 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1544 /* Exclude OP and NDEP from definedness checking. We're only
1545 interested in DEP1 and DEP2. */
1546 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1551 /* -------------- Building the flags-thunk. -------------- */
1553 /* The machinery in this section builds the flag-thunk following a
1554 flag-setting operation. Hence the various setFlags_* functions.
1557 static Bool isAddSub ( IROp op8 )
1559 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1562 static Bool isLogic ( IROp op8 )
1564 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1567 /* U-widen 8/16/32/64 bit int expr to 64. */
1568 static IRExpr* widenUto64 ( IRExpr* e )
1570 switch (typeOfIRExpr(irsb->tyenv,e)) {
1571 case Ity_I64: return e;
1572 case Ity_I32: return unop(Iop_32Uto64, e);
1573 case Ity_I16: return unop(Iop_16Uto64, e);
1574 case Ity_I8: return unop(Iop_8Uto64, e);
1575 default: vpanic("widenUto64");
1579 /* S-widen 8/16/32/64 bit int expr to 32. */
1580 static IRExpr* widenSto64 ( IRExpr* e )
1582 switch (typeOfIRExpr(irsb->tyenv,e)) {
1583 case Ity_I64: return e;
1584 case Ity_I32: return unop(Iop_32Sto64, e);
1585 case Ity_I16: return unop(Iop_16Sto64, e);
1586 case Ity_I8: return unop(Iop_8Sto64, e);
1587 default: vpanic("widenSto64");
1591 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1592 of these combinations make sense. */
1593 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1595 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1596 if (src_ty == dst_ty)
1598 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1599 return unop(Iop_32to16, e);
1600 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1601 return unop(Iop_32to8, e);
1602 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1603 return unop(Iop_64to32, e);
1604 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1605 return unop(Iop_64to16, e);
1606 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1607 return unop(Iop_64to8, e);
1609 vex_printf("\nsrc, dst tys are: ");
1614 vpanic("narrowTo(amd64)");
1618 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1619 auto-sized up to the real op. */
1622 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1626 case Ity_I8: ccOp = 0; break;
1627 case Ity_I16: ccOp = 1; break;
1628 case Ity_I32: ccOp = 2; break;
1629 case Ity_I64: ccOp = 3; break;
1630 default: vassert(0);
1633 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1634 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1635 default: ppIROp(op8);
1636 vpanic("setFlags_DEP1_DEP2(amd64)");
1638 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1639 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1640 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1644 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1647 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1651 case Ity_I8: ccOp = 0; break;
1652 case Ity_I16: ccOp = 1; break;
1653 case Ity_I32: ccOp = 2; break;
1654 case Ity_I64: ccOp = 3; break;
1655 default: vassert(0);
1660 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1661 default: ppIROp(op8);
1662 vpanic("setFlags_DEP1(amd64)");
1664 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1665 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1666 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1670 /* For shift operations, we put in the result and the undershifted
1671 result. Except if the shift amount is zero, the thunk is left
1674 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1682 case Ity_I8: ccOp = 0; break;
1683 case Ity_I16: ccOp = 1; break;
1684 case Ity_I32: ccOp = 2; break;
1685 case Ity_I64: ccOp = 3; break;
1686 default: vassert(0);
1691 /* Both kinds of right shifts are handled by the same thunk
1695 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1696 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1697 default: ppIROp(op64);
1698 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1701 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1702 stmt( IRStmt_Put( OFFB_CC_OP,
1703 IRExpr_Mux0X( mkexpr(guard),
1704 IRExpr_Get(OFFB_CC_OP,Ity_I64),
1706 stmt( IRStmt_Put( OFFB_CC_DEP1,
1707 IRExpr_Mux0X( mkexpr(guard),
1708 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
1709 widenUto64(mkexpr(res)))) );
1710 stmt( IRStmt_Put( OFFB_CC_DEP2,
1711 IRExpr_Mux0X( mkexpr(guard),
1712 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
1713 widenUto64(mkexpr(resUS)))) );
1717 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1718 the former value of the carry flag, which unfortunately we have to
1721 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1723 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1726 case Ity_I8: ccOp += 0; break;
1727 case Ity_I16: ccOp += 1; break;
1728 case Ity_I32: ccOp += 2; break;
1729 case Ity_I64: ccOp += 3; break;
1730 default: vassert(0);
1733 /* This has to come first, because calculating the C flag
1734 may require reading all four thunk fields. */
1735 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1736 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1737 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1738 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1742 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1746 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1750 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1753 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1756 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1759 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1762 vpanic("setFlags_MUL(amd64)");
1764 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1765 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1769 /* -------------- Condition codes. -------------- */
1771 /* Condition codes, using the AMD encoding. */
1773 static HChar* name_AMD64Condcode ( AMD64Condcode cond )
1776 case AMD64CondO: return "o";
1777 case AMD64CondNO: return "no";
1778 case AMD64CondB: return "b";
1779 case AMD64CondNB: return "ae"; /*"nb";*/
1780 case AMD64CondZ: return "e"; /*"z";*/
1781 case AMD64CondNZ: return "ne"; /*"nz";*/
1782 case AMD64CondBE: return "be";
1783 case AMD64CondNBE: return "a"; /*"nbe";*/
1784 case AMD64CondS: return "s";
1785 case AMD64CondNS: return "ns";
1786 case AMD64CondP: return "p";
1787 case AMD64CondNP: return "np";
1788 case AMD64CondL: return "l";
1789 case AMD64CondNL: return "ge"; /*"nl";*/
1790 case AMD64CondLE: return "le";
1791 case AMD64CondNLE: return "g"; /*"nle";*/
1792 case AMD64CondAlways: return "ALWAYS";
1793 default: vpanic("name_AMD64Condcode");
1798 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1799 /*OUT*/Bool* needInvert )
1801 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1806 *needInvert = False;
1812 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1814 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1817 Optionally, generate a store for the 'tres' value. This can either
1818 be a normal store, or it can be a cas-with-possible-failure style
1821 if taddr is IRTemp_INVALID, then no store is generated.
1823 if taddr is not IRTemp_INVALID, then a store (using taddr as
1824 the address) is generated:
1826 if texpVal is IRTemp_INVALID then a normal store is
1827 generated, and restart_point must be zero (it is irrelevant).
1829 if texpVal is not IRTemp_INVALID then a cas-style store is
1830 generated. texpVal is the expected value, restart_point
1831 is the restart point if the store fails, and texpVal must
1832 have the same type as tres.
1835 static void helper_ADC ( Int sz,
1836 IRTemp tres, IRTemp ta1, IRTemp ta2,
1837 /* info about optional store: */
1838 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1841 IRType ty = szToITy(sz);
1842 IRTemp oldc = newTemp(Ity_I64);
1843 IRTemp oldcn = newTemp(ty);
1844 IROp plus = mkSizedOp(ty, Iop_Add8);
1845 IROp xor = mkSizedOp(ty, Iop_Xor8);
1847 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1850 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
1851 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
1852 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
1853 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
1854 default: vassert(0);
1857 /* oldc = old carry flag, 0 or 1 */
1858 assign( oldc, binop(Iop_And64,
1859 mk_amd64g_calculate_rflags_c(),
1862 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1864 assign( tres, binop(plus,
1865 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1868 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1869 start of this function. */
1870 if (taddr != IRTemp_INVALID) {
1871 if (texpVal == IRTemp_INVALID) {
1872 vassert(restart_point == 0);
1873 storeLE( mkexpr(taddr), mkexpr(tres) );
1875 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1876 /* .. and hence 'texpVal' has the same type as 'tres'. */
1877 casLE( mkexpr(taddr),
1878 mkexpr(texpVal), mkexpr(tres), restart_point );
1882 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
1883 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
1884 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
1885 mkexpr(oldcn)) )) );
1886 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1890 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1891 appropriately. As with helper_ADC, possibly generate a store of
1892 the result -- see comments on helper_ADC for details.
1894 static void helper_SBB ( Int sz,
1895 IRTemp tres, IRTemp ta1, IRTemp ta2,
1896 /* info about optional store: */
1897 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1900 IRType ty = szToITy(sz);
1901 IRTemp oldc = newTemp(Ity_I64);
1902 IRTemp oldcn = newTemp(ty);
1903 IROp minus = mkSizedOp(ty, Iop_Sub8);
1904 IROp xor = mkSizedOp(ty, Iop_Xor8);
1906 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1909 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
1910 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
1911 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
1912 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
1913 default: vassert(0);
1916 /* oldc = old carry flag, 0 or 1 */
1917 assign( oldc, binop(Iop_And64,
1918 mk_amd64g_calculate_rflags_c(),
1921 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1923 assign( tres, binop(minus,
1924 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1927 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1928 start of this function. */
1929 if (taddr != IRTemp_INVALID) {
1930 if (texpVal == IRTemp_INVALID) {
1931 vassert(restart_point == 0);
1932 storeLE( mkexpr(taddr), mkexpr(tres) );
1934 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1935 /* .. and hence 'texpVal' has the same type as 'tres'. */
1936 casLE( mkexpr(taddr),
1937 mkexpr(texpVal), mkexpr(tres), restart_point );
1941 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
1942 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
1943 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
1944 mkexpr(oldcn)) )) );
1945 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1949 /* -------------- Helpers for disassembly printing. -------------- */
1951 static HChar* nameGrp1 ( Int opc_aux )
1953 static HChar* grp1_names[8]
1954 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1955 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
1956 return grp1_names[opc_aux];
1959 static HChar* nameGrp2 ( Int opc_aux )
1961 static HChar* grp2_names[8]
1962 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1963 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
1964 return grp2_names[opc_aux];
1967 static HChar* nameGrp4 ( Int opc_aux )
1969 static HChar* grp4_names[8]
1970 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1971 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
1972 return grp4_names[opc_aux];
1975 static HChar* nameGrp5 ( Int opc_aux )
1977 static HChar* grp5_names[8]
1978 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1979 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
1980 return grp5_names[opc_aux];
1983 static HChar* nameGrp8 ( Int opc_aux )
1985 static HChar* grp8_names[8]
1986 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1987 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
1988 return grp8_names[opc_aux];
1991 //.. static HChar* nameSReg ( UInt sreg )
1993 //.. switch (sreg) {
1994 //.. case R_ES: return "%es";
1995 //.. case R_CS: return "%cs";
1996 //.. case R_SS: return "%ss";
1997 //.. case R_DS: return "%ds";
1998 //.. case R_FS: return "%fs";
1999 //.. case R_GS: return "%gs";
2000 //.. default: vpanic("nameSReg(x86)");
2004 static HChar* nameMMXReg ( Int mmxreg )
2006 static HChar* mmx_names[8]
2007 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2008 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2009 return mmx_names[mmxreg];
2012 static HChar* nameXMMReg ( Int xmmreg )
2014 static HChar* xmm_names[16]
2015 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2016 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2017 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2018 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2019 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2020 return xmm_names[xmmreg];
2023 static HChar* nameMMXGran ( Int gran )
2030 default: vpanic("nameMMXGran(amd64,guest)");
2034 static HChar nameISize ( Int size )
2041 default: vpanic("nameISize(amd64)");
2046 /*------------------------------------------------------------*/
2047 /*--- JMP helpers ---*/
2048 /*------------------------------------------------------------*/
2050 static void jmp_lit( IRJumpKind kind, Addr64 d64 )
2052 irsb->next = mkU64(d64);
2053 irsb->jumpkind = kind;
2056 static void jmp_treg( IRJumpKind kind, IRTemp t )
2058 irsb->next = mkexpr(t);
2059 irsb->jumpkind = kind;
2063 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2066 AMD64Condcode condPos;
2067 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2069 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2071 IRConst_U64(d64_false) ) );
2072 irsb->next = mkU64(d64_true);
2073 irsb->jumpkind = Ijk_Boring;
2075 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2077 IRConst_U64(d64_true) ) );
2078 irsb->next = mkU64(d64_false);
2079 irsb->jumpkind = Ijk_Boring;
2083 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2084 guest address of the next instruction to be executed.
2086 This function generates an AbiHint to say that -128(%rsp)
2087 .. -1(%rsp) should now be regarded as uninitialised.
2090 void make_redzone_AbiHint ( VexAbiInfo* vbi,
2091 IRTemp new_rsp, IRTemp nia, HChar* who )
2093 Int szB = vbi->guest_stack_redzone_size;
2096 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2097 for is ELF. So just check it's the expected 128 value
2099 vassert(szB == 128);
2101 if (0) vex_printf("AbiHint: %s\n", who);
2102 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2103 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2105 stmt( IRStmt_AbiHint(
2106 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2113 /*------------------------------------------------------------*/
2114 /*--- Disassembling addressing modes ---*/
2115 /*------------------------------------------------------------*/
2118 HChar* segRegTxt ( Prefix pfx )
2120 if (pfx & PFX_CS) return "%cs:";
2121 if (pfx & PFX_DS) return "%ds:";
2122 if (pfx & PFX_ES) return "%es:";
2123 if (pfx & PFX_FS) return "%fs:";
2124 if (pfx & PFX_GS) return "%gs:";
2125 if (pfx & PFX_SS) return "%ss:";
2126 return ""; /* no override */
2130 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2131 linear address by adding any required segment override as indicated
2132 by sorb, and also dealing with any address size override
2135 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
2136 Prefix pfx, IRExpr* virtual )
2138 /* --- segment overrides --- */
2140 if (vbi->guest_amd64_assume_fs_is_zero) {
2141 /* Note that this is a linux-kernel specific hack that relies
2142 on the assumption that %fs is always zero. */
2143 /* return virtual + guest_FS_ZERO. */
2144 virtual = binop(Iop_Add64, virtual,
2145 IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
2147 unimplemented("amd64 %fs segment override");
2152 if (vbi->guest_amd64_assume_gs_is_0x60) {
2153 /* Note that this is a darwin-kernel specific hack that relies
2154 on the assumption that %gs is always 0x60. */
2155 /* return virtual + guest_GS_0x60. */
2156 virtual = binop(Iop_Add64, virtual,
2157 IRExpr_Get(OFFB_GS_0x60, Ity_I64));
2159 unimplemented("amd64 %gs segment override");
2163 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2165 /* --- address size override --- */
2167 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2174 //.. IRType hWordTy;
2175 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2178 //.. /* the common case - no override */
2179 //.. return virtual;
2181 //.. switch (sorb) {
2182 //.. case 0x3E: sreg = R_DS; break;
2183 //.. case 0x26: sreg = R_ES; break;
2184 //.. case 0x64: sreg = R_FS; break;
2185 //.. case 0x65: sreg = R_GS; break;
2186 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2189 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2191 //.. seg_selector = newTemp(Ity_I32);
2192 //.. ldt_ptr = newTemp(hWordTy);
2193 //.. gdt_ptr = newTemp(hWordTy);
2194 //.. r64 = newTemp(Ity_I64);
2196 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2197 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2198 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2201 //.. Call this to do the translation and limit checks:
2202 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2203 //.. UInt seg_selector, UInt virtual_addr )
2210 //.. "x86g_use_seg_selector",
2211 //.. &x86g_use_seg_selector,
2212 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2213 //.. mkexpr(seg_selector), virtual)
2217 //.. /* If the high 32 of the result are non-zero, there was a
2218 //.. failure in address translation. In which case, make a
2223 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2225 //.. IRConst_U32( guest_eip_curr_instr )
2229 //.. /* otherwise, here's the translated result. */
2230 //.. return unop(Iop_64to32, mkexpr(r64));
2234 /* Generate IR to calculate an address indicated by a ModRM and
2235 following SIB bytes. The expression, and the number of bytes in
2236 the address mode, are returned (the latter in *len). Note that
2237 this fn should not be called if the R/M part of the address denotes
2238 a register instead of memory. If print_codegen is true, text of
2239 the addressing mode is placed in buf.
2241 The computed address is stored in a new tempreg, and the
2242 identity of the tempreg is returned.
2244 extra_bytes holds the number of bytes after the amode, as supplied
2245 by the caller. This is needed to make sense of %rip-relative
2246 addresses. Note that the value that *len is set to is only the
2247 length of the amode itself and does not include the value supplied
2251 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2253 IRTemp tmp = newTemp(Ity_I64);
2254 assign( tmp, addr64 );
2259 IRTemp disAMode ( /*OUT*/Int* len,
2260 VexAbiInfo* vbi, Prefix pfx, Long delta,
2261 /*OUT*/HChar* buf, Int extra_bytes )
2263 UChar mod_reg_rm = getUChar(delta);
2267 vassert(extra_bytes >= 0 && extra_bytes < 10);
2269 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2270 jump table seems a bit excessive.
2272 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2273 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2274 /* is now XX0XXYYY */
2275 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2276 switch (mod_reg_rm) {
2278 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2279 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2281 case 0x00: case 0x01: case 0x02: case 0x03:
2282 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2283 { UChar rm = toUChar(mod_reg_rm & 7);
2284 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2286 return disAMode_copy2tmp(
2287 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2290 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2291 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2293 case 0x08: case 0x09: case 0x0A: case 0x0B:
2294 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2295 { UChar rm = toUChar(mod_reg_rm & 7);
2296 Long d = getSDisp8(delta);
2298 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2300 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2303 return disAMode_copy2tmp(
2304 handleAddrOverrides(vbi, pfx,
2305 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2308 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2309 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2311 case 0x10: case 0x11: case 0x12: case 0x13:
2312 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2313 { UChar rm = toUChar(mod_reg_rm & 7);
2314 Long d = getSDisp32(delta);
2315 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2317 return disAMode_copy2tmp(
2318 handleAddrOverrides(vbi, pfx,
2319 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2322 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2323 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2324 case 0x18: case 0x19: case 0x1A: case 0x1B:
2325 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2326 vpanic("disAMode(amd64): not an addr!");
2328 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2329 correctly at the start of handling each instruction. */
2331 { Long d = getSDisp32(delta);
2333 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2334 /* We need to know the next instruction's start address.
2335 Try and figure out what it is, record the guess, and ask
2336 the top-level driver logic (bbToIR_AMD64) to check we
2337 guessed right, after the instruction is completely
2339 guest_RIP_next_mustcheck = True;
2340 guest_RIP_next_assumed = guest_RIP_bbstart
2341 + delta+4 + extra_bytes;
2342 return disAMode_copy2tmp(
2343 handleAddrOverrides(vbi, pfx,
2344 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2349 /* SIB, with no displacement. Special cases:
2350 -- %rsp cannot act as an index value.
2351 If index_r indicates %rsp, zero is used for the index.
2352 -- when mod is zero and base indicates RBP or R13, base is
2353 instead a 32-bit sign-extended literal.
2354 It's all madness, I tell you. Extract %index, %base and
2355 scale from the SIB byte. The value denoted is then:
2356 | %index == %RSP && (%base == %RBP || %base == %R13)
2357 = d32 following SIB byte
2358 | %index == %RSP && !(%base == %RBP || %base == %R13)
2360 | %index != %RSP && (%base == %RBP || %base == %R13)
2361 = d32 following SIB byte + (%index << scale)
2362 | %index != %RSP && !(%base == %RBP || %base == %R13)
2363 = %base + (%index << scale)
2365 UChar sib = getUChar(delta);
2366 UChar scale = toUChar((sib >> 6) & 3);
2367 UChar index_r = toUChar((sib >> 3) & 7);
2368 UChar base_r = toUChar(sib & 7);
2369 /* correct since #(R13) == 8 + #(RBP) */
2370 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2371 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2374 if ((!index_is_SP) && (!base_is_BPor13)) {
2376 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2377 nameIRegRexB(8,pfx,base_r),
2378 nameIReg64rexX(pfx,index_r));
2380 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2381 nameIRegRexB(8,pfx,base_r),
2382 nameIReg64rexX(pfx,index_r), 1<<scale);
2387 handleAddrOverrides(vbi, pfx,
2389 getIRegRexB(8,pfx,base_r),
2390 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2394 if ((!index_is_SP) && base_is_BPor13) {
2395 Long d = getSDisp32(delta);
2396 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2397 nameIReg64rexX(pfx,index_r), 1<<scale);
2401 handleAddrOverrides(vbi, pfx,
2403 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2408 if (index_is_SP && (!base_is_BPor13)) {
2409 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2411 return disAMode_copy2tmp(
2412 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2415 if (index_is_SP && base_is_BPor13) {
2416 Long d = getSDisp32(delta);
2417 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2419 return disAMode_copy2tmp(
2420 handleAddrOverrides(vbi, pfx, mkU64(d)));
2426 /* SIB, with 8-bit displacement. Special cases:
2427 -- %esp cannot act as an index value.
2428 If index_r indicates %esp, zero is used for the index.
2433 = d8 + %base + (%index << scale)
2436 UChar sib = getUChar(delta);
2437 UChar scale = toUChar((sib >> 6) & 3);
2438 UChar index_r = toUChar((sib >> 3) & 7);
2439 UChar base_r = toUChar(sib & 7);
2440 Long d = getSDisp8(delta+1);
2442 if (index_r == R_RSP && 0==getRexX(pfx)) {
2443 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2444 d, nameIRegRexB(8,pfx,base_r));
2446 return disAMode_copy2tmp(
2447 handleAddrOverrides(vbi, pfx,
2448 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2451 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2452 nameIRegRexB(8,pfx,base_r),
2453 nameIReg64rexX(pfx,index_r));
2455 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2456 nameIRegRexB(8,pfx,base_r),
2457 nameIReg64rexX(pfx,index_r), 1<<scale);
2462 handleAddrOverrides(vbi, pfx,
2465 getIRegRexB(8,pfx,base_r),
2467 getIReg64rexX(pfx,index_r), mkU8(scale))),
2470 vassert(0); /*NOTREACHED*/
2473 /* SIB, with 32-bit displacement. Special cases:
2474 -- %rsp cannot act as an index value.
2475 If index_r indicates %rsp, zero is used for the index.
2480 = d32 + %base + (%index << scale)
2483 UChar sib = getUChar(delta);
2484 UChar scale = toUChar((sib >> 6) & 3);
2485 UChar index_r = toUChar((sib >> 3) & 7);
2486 UChar base_r = toUChar(sib & 7);
2487 Long d = getSDisp32(delta+1);
2489 if (index_r == R_RSP && 0==getRexX(pfx)) {
2490 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2491 d, nameIRegRexB(8,pfx,base_r));
2493 return disAMode_copy2tmp(
2494 handleAddrOverrides(vbi, pfx,
2495 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2498 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2499 nameIRegRexB(8,pfx,base_r),
2500 nameIReg64rexX(pfx,index_r));
2502 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2503 nameIRegRexB(8,pfx,base_r),
2504 nameIReg64rexX(pfx,index_r), 1<<scale);
2509 handleAddrOverrides(vbi, pfx,
2512 getIRegRexB(8,pfx,base_r),
2514 getIReg64rexX(pfx,index_r), mkU8(scale))),
2517 vassert(0); /*NOTREACHED*/
2521 vpanic("disAMode(amd64)");
2522 return 0; /*notreached*/
2527 /* Figure out the number of (insn-stream) bytes constituting the amode
2528 beginning at delta. Is useful for getting hold of literals beyond
2529 the end of the amode before it has been disassembled. */
2531 static UInt lengthAMode ( Prefix pfx, Long delta )
2533 UChar mod_reg_rm = getUChar(delta);
2536 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2537 jump table seems a bit excessive.
2539 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2540 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2541 /* is now XX0XXYYY */
2542 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2543 switch (mod_reg_rm) {
2545 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2546 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2548 case 0x00: case 0x01: case 0x02: case 0x03:
2549 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2552 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2553 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2555 case 0x08: case 0x09: case 0x0A: case 0x0B:
2556 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2559 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2560 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2562 case 0x10: case 0x11: case 0x12: case 0x13:
2563 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2566 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2567 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2568 /* Not an address, but still handled. */
2569 case 0x18: case 0x19: case 0x1A: case 0x1B:
2570 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2578 /* SIB, with no displacement. */
2579 UChar sib = getUChar(delta);
2580 UChar base_r = toUChar(sib & 7);
2581 /* correct since #(R13) == 8 + #(RBP) */
2582 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2584 if (base_is_BPor13) {
2591 /* SIB, with 8-bit displacement. */
2595 /* SIB, with 32-bit displacement. */
2600 vpanic("lengthAMode(amd64)");
2601 return 0; /*notreached*/
2606 /*------------------------------------------------------------*/
2607 /*--- Disassembling common idioms ---*/
2608 /*------------------------------------------------------------*/
2610 /* Handle binary integer instructions of the form
2613 Is passed the a ptr to the modRM byte, the actual operation, and the
2614 data size. Returns the address advanced completely over this
2617 E(src) is reg-or-mem
2620 If E is reg, --> GET %G, tmp
2624 If E is mem and OP is not reversible,
2625 --> (getAddr E) -> tmpa
2631 If E is mem and OP is reversible
2632 --> (getAddr E) -> tmpa
2638 ULong dis_op2_E_G ( VexAbiInfo* vbi,
2649 IRType ty = szToITy(size);
2650 IRTemp dst1 = newTemp(ty);
2651 IRTemp src = newTemp(ty);
2652 IRTemp dst0 = newTemp(ty);
2653 UChar rm = getUChar(delta0);
2654 IRTemp addr = IRTemp_INVALID;
2656 /* addSubCarry == True indicates the intended operation is
2657 add-with-carry or subtract-with-borrow. */
2659 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2663 if (epartIsReg(rm)) {
2664 /* Specially handle XOR reg,reg, because that doesn't really
2665 depend on reg, and doing the obvious thing potentially
2666 generates a spurious value check failure due to the bogus
2668 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2669 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
2670 if (False && op8 == Iop_Sub8)
2671 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
2672 putIRegG(size,pfx,rm, mkU(ty,0));
2675 assign( dst0, getIRegG(size,pfx,rm) );
2676 assign( src, getIRegE(size,pfx,rm) );
2678 if (addSubCarry && op8 == Iop_Add8) {
2679 helper_ADC( size, dst1, dst0, src,
2680 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2681 putIRegG(size, pfx, rm, mkexpr(dst1));
2683 if (addSubCarry && op8 == Iop_Sub8) {
2684 helper_SBB( size, dst1, dst0, src,
2685 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2686 putIRegG(size, pfx, rm, mkexpr(dst1));
2688 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2690 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2692 setFlags_DEP1(op8, dst1, ty);
2694 putIRegG(size, pfx, rm, mkexpr(dst1));
2697 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2698 nameIRegE(size,pfx,rm),
2699 nameIRegG(size,pfx,rm));
2702 /* E refers to memory */
2703 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2704 assign( dst0, getIRegG(size,pfx,rm) );
2705 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2707 if (addSubCarry && op8 == Iop_Add8) {
2708 helper_ADC( size, dst1, dst0, src,
2709 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2710 putIRegG(size, pfx, rm, mkexpr(dst1));
2712 if (addSubCarry && op8 == Iop_Sub8) {
2713 helper_SBB( size, dst1, dst0, src,
2714 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2715 putIRegG(size, pfx, rm, mkexpr(dst1));
2717 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2719 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2721 setFlags_DEP1(op8, dst1, ty);
2723 putIRegG(size, pfx, rm, mkexpr(dst1));
2726 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2727 dis_buf, nameIRegG(size, pfx, rm));
2734 /* Handle binary integer instructions of the form
2737 Is passed the a ptr to the modRM byte, the actual operation, and the
2738 data size. Returns the address advanced completely over this
2742 E(dst) is reg-or-mem
2744 If E is reg, --> GET %E, tmp
2748 If E is mem, --> (getAddr E) -> tmpa
2754 ULong dis_op2_G_E ( VexAbiInfo* vbi,
2765 IRType ty = szToITy(size);
2766 IRTemp dst1 = newTemp(ty);
2767 IRTemp src = newTemp(ty);
2768 IRTemp dst0 = newTemp(ty);
2769 UChar rm = getUChar(delta0);
2770 IRTemp addr = IRTemp_INVALID;
2772 /* addSubCarry == True indicates the intended operation is
2773 add-with-carry or subtract-with-borrow. */
2775 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2779 if (epartIsReg(rm)) {
2780 /* Specially handle XOR reg,reg, because that doesn't really
2781 depend on reg, and doing the obvious thing potentially
2782 generates a spurious value check failure due to the bogus
2783 dependency. Ditto SBB reg,reg. */
2784 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2785 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
2786 putIRegE(size,pfx,rm, mkU(ty,0));
2789 assign(dst0, getIRegE(size,pfx,rm));
2790 assign(src, getIRegG(size,pfx,rm));
2792 if (addSubCarry && op8 == Iop_Add8) {
2793 helper_ADC( size, dst1, dst0, src,
2794 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2795 putIRegE(size, pfx, rm, mkexpr(dst1));
2797 if (addSubCarry && op8 == Iop_Sub8) {
2798 helper_SBB( size, dst1, dst0, src,
2799 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2800 putIRegE(size, pfx, rm, mkexpr(dst1));
2802 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2804 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2806 setFlags_DEP1(op8, dst1, ty);
2808 putIRegE(size, pfx, rm, mkexpr(dst1));
2811 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2812 nameIRegG(size,pfx,rm),
2813 nameIRegE(size,pfx,rm));
2817 /* E refers to memory */
2819 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2820 assign(dst0, loadLE(ty,mkexpr(addr)));
2821 assign(src, getIRegG(size,pfx,rm));
2823 if (addSubCarry && op8 == Iop_Add8) {
2824 if (pfx & PFX_LOCK) {
2825 /* cas-style store */
2826 helper_ADC( size, dst1, dst0, src,
2827 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2830 helper_ADC( size, dst1, dst0, src,
2831 /*store*/addr, IRTemp_INVALID, 0 );
2834 if (addSubCarry && op8 == Iop_Sub8) {
2835 if (pfx & PFX_LOCK) {
2836 /* cas-style store */
2837 helper_SBB( size, dst1, dst0, src,
2838 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2841 helper_SBB( size, dst1, dst0, src,
2842 /*store*/addr, IRTemp_INVALID, 0 );
2845 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2847 if (pfx & PFX_LOCK) {
2848 if (0) vex_printf("locked case\n" );
2849 casLE( mkexpr(addr),
2850 mkexpr(dst0)/*expval*/,
2851 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
2853 if (0) vex_printf("nonlocked case\n");
2854 storeLE(mkexpr(addr), mkexpr(dst1));
2858 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2860 setFlags_DEP1(op8, dst1, ty);
2863 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2864 nameIRegG(size,pfx,rm), dis_buf);
2870 /* Handle move instructions of the form
2873 Is passed the a ptr to the modRM byte, and the data size. Returns
2874 the address advanced completely over this instruction.
2876 E(src) is reg-or-mem
2879 If E is reg, --> GET %E, tmpv
2882 If E is mem --> (getAddr E) -> tmpa
2887 ULong dis_mov_E_G ( VexAbiInfo* vbi,
2893 UChar rm = getUChar(delta0);
2896 if (epartIsReg(rm)) {
2897 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
2898 DIP("mov%c %s,%s\n", nameISize(size),
2899 nameIRegE(size,pfx,rm),
2900 nameIRegG(size,pfx,rm));
2904 /* E refers to memory */
2906 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2907 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
2908 DIP("mov%c %s,%s\n", nameISize(size),
2910 nameIRegG(size,pfx,rm));
2916 /* Handle move instructions of the form
2919 Is passed the a ptr to the modRM byte, and the data size. Returns
2920 the address advanced completely over this instruction.
2923 E(dst) is reg-or-mem
2925 If E is reg, --> GET %G, tmp
2928 If E is mem, --> (getAddr E) -> tmpa
2933 ULong dis_mov_G_E ( VexAbiInfo* vbi,
2939 UChar rm = getUChar(delta0);
2942 if (epartIsReg(rm)) {
2943 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
2944 DIP("mov%c %s,%s\n", nameISize(size),
2945 nameIRegG(size,pfx,rm),
2946 nameIRegE(size,pfx,rm));
2950 /* E refers to memory */
2952 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2953 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
2954 DIP("mov%c %s,%s\n", nameISize(size),
2955 nameIRegG(size,pfx,rm),
2962 /* op $immediate, AL/AX/EAX/RAX. */
2964 ULong dis_op_imm_A ( Int size,
2971 Int size4 = imin(size,4);
2972 IRType ty = szToITy(size);
2973 IRTemp dst0 = newTemp(ty);
2974 IRTemp src = newTemp(ty);
2975 IRTemp dst1 = newTemp(ty);
2976 Long lit = getSDisp(size4,delta);
2977 assign(dst0, getIRegRAX(size));
2978 assign(src, mkU(ty,lit & mkSizeMask(size)));
2980 if (isAddSub(op8) && !carrying) {
2981 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2982 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2987 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2988 setFlags_DEP1(op8, dst1, ty);
2991 if (op8 == Iop_Add8 && carrying) {
2992 helper_ADC( size, dst1, dst0, src,
2993 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2996 if (op8 == Iop_Sub8 && carrying) {
2997 helper_SBB( size, dst1, dst0, src,
2998 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3001 vpanic("dis_op_imm_A(amd64,guest)");
3004 putIRegRAX(size, mkexpr(dst1));
3006 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3007 lit, nameIRegRAX(size));
3012 /* Sign- and Zero-extending moves. */
3014 ULong dis_movx_E_G ( VexAbiInfo* vbi,
3016 Long delta, Int szs, Int szd, Bool sign_extend )
3018 UChar rm = getUChar(delta);
3019 if (epartIsReg(rm)) {
3020 putIRegG(szd, pfx, rm,
3022 szs,szd,sign_extend,
3023 getIRegE(szs,pfx,rm)));
3024 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3027 nameIRegE(szs,pfx,rm),
3028 nameIRegG(szd,pfx,rm));
3032 /* E refers to memory */
3036 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3037 putIRegG(szd, pfx, rm,
3039 szs,szd,sign_extend,
3040 loadLE(szToITy(szs),mkexpr(addr))));
3041 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3045 nameIRegG(szd,pfx,rm));
3051 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3052 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3054 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3056 /* special-case the 64-bit case */
3058 IROp op = signed_divide ? Iop_DivModS128to64
3059 : Iop_DivModU128to64;
3060 IRTemp src128 = newTemp(Ity_I128);
3061 IRTemp dst128 = newTemp(Ity_I128);
3062 assign( src128, binop(Iop_64HLto128,
3064 getIReg64(R_RAX)) );
3065 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3066 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3067 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3069 IROp op = signed_divide ? Iop_DivModS64to32
3070 : Iop_DivModU64to32;
3071 IRTemp src64 = newTemp(Ity_I64);
3072 IRTemp dst64 = newTemp(Ity_I64);
3076 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3078 binop(op, mkexpr(src64), mkexpr(t)) );
3079 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3080 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3083 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3084 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3085 assign( src64, unop(widen3264,
3089 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3090 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3091 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3095 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3096 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3097 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3098 assign( src64, unop(widen3264,
3099 unop(widen1632, getIRegRAX(2))) );
3101 binop(op, mkexpr(src64),
3102 unop(widen1632, unop(widen816, mkexpr(t)))) );
3103 putIRegRAX( 1, unop(Iop_16to8,
3105 unop(Iop_64to32,mkexpr(dst64)))) );
3106 putIRegAH( unop(Iop_16to8,
3108 unop(Iop_64HIto32,mkexpr(dst64)))) );
3112 vpanic("codegen_div(amd64)");
3118 ULong dis_Grp1 ( VexAbiInfo* vbi,
3120 Long delta, UChar modrm,
3121 Int am_sz, Int d_sz, Int sz, Long d64 )
3125 IRType ty = szToITy(sz);
3126 IRTemp dst1 = newTemp(ty);
3127 IRTemp src = newTemp(ty);
3128 IRTemp dst0 = newTemp(ty);
3129 IRTemp addr = IRTemp_INVALID;
3130 IROp op8 = Iop_INVALID;
3131 ULong mask = mkSizeMask(sz);
3133 switch (gregLO3ofRM(modrm)) {
3134 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3135 case 2: break; // ADC
3136 case 3: break; // SBB
3137 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3138 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3140 default: vpanic("dis_Grp1(amd64): unhandled case");
3143 if (epartIsReg(modrm)) {
3144 vassert(am_sz == 1);
3146 assign(dst0, getIRegE(sz,pfx,modrm));
3147 assign(src, mkU(ty,d64 & mask));
3149 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3150 helper_ADC( sz, dst1, dst0, src,
3151 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3153 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3154 helper_SBB( sz, dst1, dst0, src,
3155 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3157 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3159 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3161 setFlags_DEP1(op8, dst1, ty);
3164 if (gregLO3ofRM(modrm) < 7)
3165 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3167 delta += (am_sz + d_sz);
3168 DIP("%s%c $%lld, %s\n",
3169 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3170 nameIRegE(sz,pfx,modrm));
3172 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3174 assign(dst0, loadLE(ty,mkexpr(addr)));
3175 assign(src, mkU(ty,d64 & mask));
3177 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3178 if (pfx & PFX_LOCK) {
3179 /* cas-style store */
3180 helper_ADC( sz, dst1, dst0, src,
3181 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3184 helper_ADC( sz, dst1, dst0, src,
3185 /*store*/addr, IRTemp_INVALID, 0 );
3188 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3189 if (pfx & PFX_LOCK) {
3190 /* cas-style store */
3191 helper_SBB( sz, dst1, dst0, src,
3192 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3195 helper_SBB( sz, dst1, dst0, src,
3196 /*store*/addr, IRTemp_INVALID, 0 );
3199 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3200 if (gregLO3ofRM(modrm) < 7) {
3201 if (pfx & PFX_LOCK) {
3202 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3203 mkexpr(dst1)/*newVal*/,
3204 guest_RIP_curr_instr );
3206 storeLE(mkexpr(addr), mkexpr(dst1));
3210 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3212 setFlags_DEP1(op8, dst1, ty);
3215 delta += (len+d_sz);
3216 DIP("%s%c $%lld, %s\n",
3217 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3224 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3228 ULong dis_Grp2 ( VexAbiInfo* vbi,
3230 Long delta, UChar modrm,
3231 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3232 HChar* shift_expr_txt, Bool* decode_OK )
3234 /* delta on entry points at the modrm byte. */
3237 Bool isShift, isRotate, isRotateC;
3238 IRType ty = szToITy(sz);
3239 IRTemp dst0 = newTemp(ty);
3240 IRTemp dst1 = newTemp(ty);
3241 IRTemp addr = IRTemp_INVALID;
3245 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3247 /* Put value to shift/rotate in dst0. */
3248 if (epartIsReg(modrm)) {
3249 assign(dst0, getIRegE(sz, pfx, modrm));
3250 delta += (am_sz + d_sz);
3252 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3253 assign(dst0, loadLE(ty,mkexpr(addr)));
3254 delta += len + d_sz;
3258 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; }
3261 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3264 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3266 if (gregLO3ofRM(modrm) == 6) {
3271 if (!isShift && !isRotate && !isRotateC) {
3273 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3277 /* Call a helper; this insn is so ridiculous it does not deserve
3278 better. One problem is, the helper has to calculate both the
3279 new value and the new flags. This is more than 64 bits, and
3280 there is no way to return more than 64 bits from the helper.
3281 Hence the crude and obvious solution is to call it twice,
3282 using the sign of the sz field to indicate whether it is the
3283 value or rflags result we want.
3285 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3287 IRExpr** argsRFLAGS;
3289 IRTemp new_value = newTemp(Ity_I64);
3290 IRTemp new_rflags = newTemp(Ity_I64);
3291 IRTemp old_rflags = newTemp(Ity_I64);
3293 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3296 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3297 widenUto64(shift_expr), /* rotate amount */
3304 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3305 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3311 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3312 widenUto64(shift_expr), /* rotate amount */
3319 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3320 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3325 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3326 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3327 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3328 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3329 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3335 IRTemp pre64 = newTemp(Ity_I64);
3336 IRTemp res64 = newTemp(Ity_I64);
3337 IRTemp res64ss = newTemp(Ity_I64);
3338 IRTemp shift_amt = newTemp(Ity_I8);
3339 UChar mask = toUChar(sz==8 ? 63 : 31);
3342 switch (gregLO3ofRM(modrm)) {
3343 case 4: op64 = Iop_Shl64; break;
3344 case 5: op64 = Iop_Shr64; break;
3345 case 7: op64 = Iop_Sar64; break;
3347 default: vpanic("dis_Grp2:shift"); break;
3350 /* Widen the value to be shifted to 64 bits, do the shift, and
3351 narrow back down. This seems surprisingly long-winded, but
3352 unfortunately the AMD semantics requires that 8/16/32-bit
3353 shifts give defined results for shift values all the way up
3354 to 32, and this seems the simplest way to do it. It has the
3355 advantage that the only IR level shifts generated are of 64
3356 bit values, and the shift amount is guaranteed to be in the
3357 range 0 .. 63, thereby observing the IR semantics requiring
3358 all shift values to be in the range 0 .. 2^word_size-1.
3360 Therefore the shift amount is masked with 63 for 64-bit shifts
3361 and 31 for all others.
3363 /* shift_amt = shift_expr & MASK, regardless of operation size */
3364 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3366 /* suitably widen the value to be shifted to 64 bits. */
3367 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3368 : widenUto64(mkexpr(dst0)) );
3370 /* res64 = pre64 `shift` shift_amt */
3371 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3373 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3379 mkexpr(shift_amt), mkU8(1)),
3382 /* Build the flags thunk. */
3383 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3385 /* Narrow the result back down. */
3386 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3388 } /* if (isShift) */
3392 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3393 : (ty==Ity_I32 ? 2 : 3));
3394 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3395 IRTemp rot_amt = newTemp(Ity_I8);
3396 IRTemp rot_amt64 = newTemp(Ity_I8);
3397 IRTemp oldFlags = newTemp(Ity_I64);
3398 UChar mask = toUChar(sz==8 ? 63 : 31);
3400 /* rot_amt = shift_expr & mask */
3401 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3402 expressions never shift beyond the word size and thus remain
3404 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3407 assign(rot_amt, mkexpr(rot_amt64));
3409 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3413 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3415 binop( mkSizedOp(ty,Iop_Or8),
3416 binop( mkSizedOp(ty,Iop_Shl8),
3420 binop( mkSizedOp(ty,Iop_Shr8),
3422 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3426 ccOp += AMD64G_CC_OP_ROLB;
3428 } else { /* right */
3430 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3432 binop( mkSizedOp(ty,Iop_Or8),
3433 binop( mkSizedOp(ty,Iop_Shr8),
3437 binop( mkSizedOp(ty,Iop_Shl8),
3439 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3443 ccOp += AMD64G_CC_OP_RORB;
3447 /* dst1 now holds the rotated value. Build flag thunk. We
3448 need the resulting value for this, and the previous flags.
3449 Except don't set it if the rotate count is zero. */
3451 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3453 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3454 stmt( IRStmt_Put( OFFB_CC_OP,
3455 IRExpr_Mux0X( mkexpr(rot_amt64),
3456 IRExpr_Get(OFFB_CC_OP,Ity_I64),
3458 stmt( IRStmt_Put( OFFB_CC_DEP1,
3459 IRExpr_Mux0X( mkexpr(rot_amt64),
3460 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
3461 widenUto64(mkexpr(dst1)))) );
3462 stmt( IRStmt_Put( OFFB_CC_DEP2,
3463 IRExpr_Mux0X( mkexpr(rot_amt64),
3464 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
3466 stmt( IRStmt_Put( OFFB_CC_NDEP,
3467 IRExpr_Mux0X( mkexpr(rot_amt64),
3468 IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
3469 mkexpr(oldFlags))) );
3470 } /* if (isRotate) */
3472 /* Save result, and finish up. */
3473 if (epartIsReg(modrm)) {
3474 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3475 if (vex_traceflags & VEX_TRACE_FE) {
3477 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3479 vex_printf("%s", shift_expr_txt);
3481 ppIRExpr(shift_expr);
3482 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3485 storeLE(mkexpr(addr), mkexpr(dst1));
3486 if (vex_traceflags & VEX_TRACE_FE) {
3488 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3490 vex_printf("%s", shift_expr_txt);
3492 ppIRExpr(shift_expr);
3493 vex_printf(", %s\n", dis_buf);
3500 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3502 ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
3504 Long delta, UChar modrm,
3505 Int am_sz, Int sz, ULong src_val,
3508 /* src_val denotes a d8.
3509 And delta on entry points at the modrm byte. */
3511 IRType ty = szToITy(sz);
3512 IRTemp t2 = newTemp(Ity_I64);
3513 IRTemp t2m = newTemp(Ity_I64);
3514 IRTemp t_addr = IRTemp_INVALID;
3518 /* we're optimists :-) */
3521 /* Limit src_val -- the bit offset -- to something within a word.
3522 The Intel docs say that literal offsets larger than a word are
3523 masked in this way. */
3525 case 2: src_val &= 15; break;
3526 case 4: src_val &= 31; break;
3527 case 8: src_val &= 63; break;
3528 default: *decode_OK = False; return delta;
3531 /* Invent a mask suitable for the operation. */
3532 switch (gregLO3ofRM(modrm)) {
3533 case 4: /* BT */ mask = 0; break;
3534 case 5: /* BTS */ mask = 1ULL << src_val; break;
3535 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3536 case 7: /* BTC */ mask = 1ULL << src_val; break;
3537 /* If this needs to be extended, probably simplest to make a
3538 new function to handle the other cases (0 .. 3). The
3539 Intel docs do however not indicate any use for 0 .. 3, so
3540 we don't expect this to happen. */
3541 default: *decode_OK = False; return delta;
3544 /* Fetch the value to be tested and modified into t2, which is
3545 64-bits wide regardless of sz. */
3546 if (epartIsReg(modrm)) {
3547 vassert(am_sz == 1);
3548 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3549 delta += (am_sz + 1);
3550 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3552 src_val, nameIRegE(sz,pfx,modrm));
3555 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
3557 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3558 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3563 /* Compute the new value into t2m, if non-BT. */
3564 switch (gregLO3ofRM(modrm)) {
3568 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3571 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3574 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3577 /*NOTREACHED*/ /*the previous switch guards this*/
3581 /* Write the result back, if non-BT. */
3582 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3583 if (epartIsReg(modrm)) {
3584 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
3586 if (pfx & PFX_LOCK) {
3587 casLE( mkexpr(t_addr),
3588 narrowTo(ty, mkexpr(t2))/*expd*/,
3589 narrowTo(ty, mkexpr(t2m))/*new*/,
3590 guest_RIP_curr_instr );
3592 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
3597 /* Copy relevant bit from t2 into the carry flag. */
3598 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
3599 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3600 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3604 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
3607 /* Set NDEP even though it isn't used. This makes redundant-PUT
3608 elimination of previous stores to this field work better. */
3609 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3615 /* Signed/unsigned widening multiply. Generate IR to multiply the
3616 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
3617 RDX:RAX/EDX:EAX/DX:AX/AX.
3619 static void codegen_mulL_A_D ( Int sz, Bool syned,
3620 IRTemp tmp, HChar* tmp_txt )
3622 IRType ty = szToITy(sz);
3623 IRTemp t1 = newTemp(ty);
3625 assign( t1, getIRegRAX(sz) );
3629 IRTemp res128 = newTemp(Ity_I128);
3630 IRTemp resHi = newTemp(Ity_I64);
3631 IRTemp resLo = newTemp(Ity_I64);
3632 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
3633 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3634 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
3635 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3636 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
3637 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
3638 putIReg64(R_RDX, mkexpr(resHi));
3639 putIReg64(R_RAX, mkexpr(resLo));
3643 IRTemp res64 = newTemp(Ity_I64);
3644 IRTemp resHi = newTemp(Ity_I32);
3645 IRTemp resLo = newTemp(Ity_I32);
3646 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
3647 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3648 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
3649 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3650 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
3651 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
3652 putIRegRDX(4, mkexpr(resHi));
3653 putIRegRAX(4, mkexpr(resLo));
3657 IRTemp res32 = newTemp(Ity_I32);
3658 IRTemp resHi = newTemp(Ity_I16);
3659 IRTemp resLo = newTemp(Ity_I16);
3660 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
3661 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3662 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
3663 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3664 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
3665 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
3666 putIRegRDX(2, mkexpr(resHi));
3667 putIRegRAX(2, mkexpr(resLo));
3671 IRTemp res16 = newTemp(Ity_I16);
3672 IRTemp resHi = newTemp(Ity_I8);
3673 IRTemp resLo = newTemp(Ity_I8);
3674 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
3675 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3676 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
3677 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3678 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
3679 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
3680 putIRegRAX(2, mkexpr(res16));
3685 vpanic("codegen_mulL_A_D(amd64)");
3687 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
3691 /* Group 3 extended opcodes. */
3693 ULong dis_Grp3 ( VexAbiInfo* vbi,
3694 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
3701 IRType ty = szToITy(sz);
3702 IRTemp t1 = newTemp(ty);
3703 IRTemp dst1, src, dst0;
3705 modrm = getUChar(delta);
3706 if (epartIsReg(modrm)) {
3707 switch (gregLO3ofRM(modrm)) {
3708 case 0: { /* TEST */
3710 d64 = getSDisp(imin(4,sz), delta);
3711 delta += imin(4,sz);
3713 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3714 getIRegE(sz,pfx,modrm),
3715 mkU(ty, d64 & mkSizeMask(sz))));
3716 setFlags_DEP1( Iop_And8, dst1, ty );
3717 DIP("test%c $%lld, %s\n",
3719 nameIRegE(sz, pfx, modrm));
3727 putIRegE(sz, pfx, modrm,
3728 unop(mkSizedOp(ty,Iop_Not8),
3729 getIRegE(sz, pfx, modrm)));
3730 DIP("not%c %s\n", nameISize(sz),
3731 nameIRegE(sz, pfx, modrm));
3738 assign(dst0, mkU(ty,0));
3739 assign(src, getIRegE(sz, pfx, modrm));
3740 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3742 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3743 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3744 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
3746 case 4: /* MUL (unsigned widening) */
3749 assign(src, getIRegE(sz,pfx,modrm));
3750 codegen_mulL_A_D ( sz, False, src,
3751 nameIRegE(sz,pfx,modrm) );
3753 case 5: /* IMUL (signed widening) */
3756 assign(src, getIRegE(sz,pfx,modrm));
3757 codegen_mulL_A_D ( sz, True, src,
3758 nameIRegE(sz,pfx,modrm) );
3762 assign( t1, getIRegE(sz, pfx, modrm) );
3763 codegen_div ( sz, t1, False );
3764 DIP("div%c %s\n", nameISize(sz),
3765 nameIRegE(sz, pfx, modrm));
3769 assign( t1, getIRegE(sz, pfx, modrm) );
3770 codegen_div ( sz, t1, True );
3771 DIP("idiv%c %s\n", nameISize(sz),
3772 nameIRegE(sz, pfx, modrm));
3776 vpanic("Grp3(amd64,R)");
3779 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
3780 /* we have to inform disAMode of any immediate
3782 gregLO3ofRM(modrm)==0/*TEST*/
3788 assign(t1, loadLE(ty,mkexpr(addr)));
3789 switch (gregLO3ofRM(modrm)) {
3790 case 0: { /* TEST */
3791 d64 = getSDisp(imin(4,sz), delta);
3792 delta += imin(4,sz);
3794 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3796 mkU(ty, d64 & mkSizeMask(sz))));
3797 setFlags_DEP1( Iop_And8, dst1, ty );
3798 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
3806 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
3807 if (pfx & PFX_LOCK) {
3808 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3809 guest_RIP_curr_instr );
3811 storeLE( mkexpr(addr), mkexpr(dst1) );
3813 DIP("not%c %s\n", nameISize(sz), dis_buf);
3819 assign(dst0, mkU(ty,0));
3820 assign(src, mkexpr(t1));
3821 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3823 if (pfx & PFX_LOCK) {
3824 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3825 guest_RIP_curr_instr );
3827 storeLE( mkexpr(addr), mkexpr(dst1) );
3829 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3830 DIP("neg%c %s\n", nameISize(sz), dis_buf);
3832 case 4: /* MUL (unsigned widening) */
3833 codegen_mulL_A_D ( sz, False, t1, dis_buf );
3836 codegen_mulL_A_D ( sz, True, t1, dis_buf );
3839 codegen_div ( sz, t1, False );
3840 DIP("div%c %s\n", nameISize(sz), dis_buf);
3843 codegen_div ( sz, t1, True );
3844 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
3848 vpanic("Grp3(amd64,M)");
3855 /* Group 4 extended opcodes. */
3857 ULong dis_Grp4 ( VexAbiInfo* vbi,
3858 Prefix pfx, Long delta, Bool* decode_OK )
3864 IRTemp t1 = newTemp(ty);
3865 IRTemp t2 = newTemp(ty);
3869 modrm = getUChar(delta);
3870 if (epartIsReg(modrm)) {
3871 assign(t1, getIRegE(1, pfx, modrm));
3872 switch (gregLO3ofRM(modrm)) {
3874 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3875 putIRegE(1, pfx, modrm, mkexpr(t2));
3876 setFlags_INC_DEC( True, t2, ty );
3879 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3880 putIRegE(1, pfx, modrm, mkexpr(t2));
3881 setFlags_INC_DEC( False, t2, ty );
3888 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
3889 nameIRegE(1, pfx, modrm));
3891 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
3892 assign( t1, loadLE(ty, mkexpr(addr)) );
3893 switch (gregLO3ofRM(modrm)) {
3895 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3896 if (pfx & PFX_LOCK) {
3897 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3898 guest_RIP_curr_instr );
3900 storeLE( mkexpr(addr), mkexpr(t2) );
3902 setFlags_INC_DEC( True, t2, ty );
3905 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3906 if (pfx & PFX_LOCK) {
3907 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3908 guest_RIP_curr_instr );
3910 storeLE( mkexpr(addr), mkexpr(t2) );
3912 setFlags_INC_DEC( False, t2, ty );
3919 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
3925 /* Group 5 extended opcodes. */
3927 ULong dis_Grp5 ( VexAbiInfo* vbi,
3928 Prefix pfx, Int sz, Long delta,
3929 DisResult* dres, Bool* decode_OK )
3934 IRTemp addr = IRTemp_INVALID;
3935 IRType ty = szToITy(sz);
3936 IRTemp t1 = newTemp(ty);
3937 IRTemp t2 = IRTemp_INVALID;
3938 IRTemp t3 = IRTemp_INVALID;
3943 modrm = getUChar(delta);
3944 if (epartIsReg(modrm)) {
3945 assign(t1, getIRegE(sz,pfx,modrm));
3946 switch (gregLO3ofRM(modrm)) {
3949 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3950 mkexpr(t1), mkU(ty,1)));
3951 setFlags_INC_DEC( True, t2, ty );
3952 putIRegE(sz,pfx,modrm, mkexpr(t2));
3956 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3957 mkexpr(t1), mkU(ty,1)));
3958 setFlags_INC_DEC( False, t2, ty );
3959 putIRegE(sz,pfx,modrm, mkexpr(t2));
3961 case 2: /* call Ev */
3962 /* Ignore any sz value and operate as if sz==8. */
3963 if (!(sz == 4 || sz == 8)) goto unhandled;
3965 t3 = newTemp(Ity_I64);
3966 assign(t3, getIRegE(sz,pfx,modrm));
3967 t2 = newTemp(Ity_I64);
3968 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
3969 putIReg64(R_RSP, mkexpr(t2));
3970 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
3971 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
3972 jmp_treg(Ijk_Call,t3);
3973 dres->whatNext = Dis_StopHere;
3976 case 4: /* jmp Ev */
3977 /* Ignore any sz value and operate as if sz==8. */
3978 if (!(sz == 4 || sz == 8)) goto unhandled;
3980 t3 = newTemp(Ity_I64);
3981 assign(t3, getIRegE(sz,pfx,modrm));
3982 jmp_treg(Ijk_Boring,t3);
3983 dres->whatNext = Dis_StopHere;
3991 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
3992 showSz ? nameISize(sz) : ' ',
3993 nameIRegE(sz, pfx, modrm));
3995 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3996 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
3997 && gregLO3ofRM(modrm) != 6) {
3998 assign(t1, loadLE(ty,mkexpr(addr)));
4000 switch (gregLO3ofRM(modrm)) {
4003 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4004 mkexpr(t1), mkU(ty,1)));
4005 if (pfx & PFX_LOCK) {
4006 casLE( mkexpr(addr),
4007 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4009 storeLE(mkexpr(addr),mkexpr(t2));
4011 setFlags_INC_DEC( True, t2, ty );
4015 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4016 mkexpr(t1), mkU(ty,1)));
4017 if (pfx & PFX_LOCK) {
4018 casLE( mkexpr(addr),
4019 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4021 storeLE(mkexpr(addr),mkexpr(t2));
4023 setFlags_INC_DEC( False, t2, ty );
4025 case 2: /* call Ev */
4026 /* Ignore any sz value and operate as if sz==8. */
4027 if (!(sz == 4 || sz == 8)) goto unhandled;
4029 t3 = newTemp(Ity_I64);
4030 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4031 t2 = newTemp(Ity_I64);
4032 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4033 putIReg64(R_RSP, mkexpr(t2));
4034 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4035 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4036 jmp_treg(Ijk_Call,t3);
4037 dres->whatNext = Dis_StopHere;
4040 case 4: /* JMP Ev */
4041 /* Ignore any sz value and operate as if sz==8. */
4042 if (!(sz == 4 || sz == 8)) goto unhandled;
4044 t3 = newTemp(Ity_I64);
4045 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4046 jmp_treg(Ijk_Boring,t3);
4047 dres->whatNext = Dis_StopHere;
4050 case 6: /* PUSH Ev */
4051 /* There is no encoding for 32-bit operand size; hence ... */
4052 if (sz == 4) sz = 8;
4053 if (!(sz == 8 || sz == 2)) goto unhandled;
4055 t3 = newTemp(Ity_I64);
4056 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4057 t2 = newTemp(Ity_I64);
4058 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4059 putIReg64(R_RSP, mkexpr(t2) );
4060 storeLE( mkexpr(t2), mkexpr(t3) );
4063 goto unhandled; /* awaiting test case */
4071 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4072 showSz ? nameISize(sz) : ' ',
4079 /*------------------------------------------------------------*/
4080 /*--- Disassembling string ops (including REP prefixes) ---*/
4081 /*------------------------------------------------------------*/
4083 /* Code shared by all the string ops */
4085 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4088 if (sz == 8 || sz == 4 || sz == 2) {
4090 if (sz == 4) logSz = 2;
4091 if (sz == 8) logSz = 3;
4093 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4097 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4102 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
4103 Int sz, HChar* name, Prefix pfx )
4105 IRTemp t_inc = newTemp(Ity_I64);
4106 /* Really we ought to inspect the override prefixes, but we don't.
4107 The following assertion catches any resulting sillyness. */
4108 vassert(pfx == clearSegBits(pfx));
4109 dis_string_op_increment(sz, t_inc);
4110 dis_OP( sz, t_inc );
4111 DIP("%s%c\n", name, nameISize(sz));
4115 void dis_MOVS ( Int sz, IRTemp t_inc )
4117 IRType ty = szToITy(sz);
4118 IRTemp td = newTemp(Ity_I64); /* RDI */
4119 IRTemp ts = newTemp(Ity_I64); /* RSI */
4121 assign( td, getIReg64(R_RDI) );
4122 assign( ts, getIReg64(R_RSI) );
4124 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4126 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4127 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4131 void dis_LODS ( Int sz, IRTemp t_inc )
4133 IRType ty = szToITy(sz);
4134 IRTemp ts = newTemp(Ity_I64); /* RSI */
4136 assign( ts, getIReg64(R_RSI) );
4138 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4140 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4144 void dis_STOS ( Int sz, IRTemp t_inc )
4146 IRType ty = szToITy(sz);
4147 IRTemp ta = newTemp(ty); /* rAX */
4148 IRTemp td = newTemp(Ity_I64); /* RDI */
4150 assign( ta, getIRegRAX(sz) );
4152 assign( td, getIReg64(R_RDI) );
4154 storeLE( mkexpr(td), mkexpr(ta) );
4156 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4160 void dis_CMPS ( Int sz, IRTemp t_inc )
4162 IRType ty = szToITy(sz);
4163 IRTemp tdv = newTemp(ty); /* (RDI) */
4164 IRTemp tsv = newTemp(ty); /* (RSI) */
4165 IRTemp td = newTemp(Ity_I64); /* RDI */
4166 IRTemp ts = newTemp(Ity_I64); /* RSI */
4168 assign( td, getIReg64(R_RDI) );
4170 assign( ts, getIReg64(R_RSI) );
4172 assign( tdv, loadLE(ty,mkexpr(td)) );
4174 assign( tsv, loadLE(ty,mkexpr(ts)) );
4176 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4178 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4180 putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4184 void dis_SCAS ( Int sz, IRTemp t_inc )
4186 IRType ty = szToITy(sz);
4187 IRTemp ta = newTemp(ty); /* rAX */
4188 IRTemp td = newTemp(Ity_I64); /* RDI */
4189 IRTemp tdv = newTemp(ty); /* (RDI) */
4191 assign( ta, getIRegRAX(sz) );
4193 assign( td, getIReg64(R_RDI) );
4195 assign( tdv, loadLE(ty,mkexpr(td)) );
4197 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4199 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4203 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4204 the insn is the last one in the basic block, and so emit a jump to
4205 the next insn, rather than just falling through. */
4207 void dis_REP_op ( AMD64Condcode cond,
4208 void (*dis_OP)(Int, IRTemp),
4209 Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
4212 IRTemp t_inc = newTemp(Ity_I64);
4213 IRTemp tc = newTemp(Ity_I64); /* RCX */
4215 /* Really we ought to inspect the override prefixes, but we don't.
4216 The following assertion catches any resulting sillyness. */
4217 vassert(pfx == clearSegBits(pfx));
4219 assign( tc, getIReg64(R_RCX) );
4221 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)),
4223 IRConst_U64(rip_next) ) );
4225 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4227 dis_string_op_increment(sz, t_inc);
4230 if (cond == AMD64CondAlways) {
4231 jmp_lit(Ijk_Boring,rip);
4233 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4235 IRConst_U64(rip) ) );
4236 jmp_lit(Ijk_Boring,rip_next);
4238 DIP("%s%c\n", name, nameISize(sz));
4242 /*------------------------------------------------------------*/
4243 /*--- Arithmetic, etc. ---*/
4244 /*------------------------------------------------------------*/
4246 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4248 ULong dis_mul_E_G ( VexAbiInfo* vbi,
4255 UChar rm = getUChar(delta0);
4256 IRType ty = szToITy(size);
4257 IRTemp te = newTemp(ty);
4258 IRTemp tg = newTemp(ty);
4259 IRTemp resLo = newTemp(ty);
4261 assign( tg, getIRegG(size, pfx, rm) );
4262 if (epartIsReg(rm)) {
4263 assign( te, getIRegE(size, pfx, rm) );
4265 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4266 assign( te, loadLE(ty,mkexpr(addr)) );
4269 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4271 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4273 putIRegG(size, pfx, rm, mkexpr(resLo) );
4275 if (epartIsReg(rm)) {
4276 DIP("imul%c %s, %s\n", nameISize(size),
4277 nameIRegE(size,pfx,rm),
4278 nameIRegG(size,pfx,rm));
4281 DIP("imul%c %s, %s\n", nameISize(size),
4283 nameIRegG(size,pfx,rm));
4289 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4291 ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
4300 UChar rm = getUChar(delta);
4301 IRType ty = szToITy(size);
4302 IRTemp te = newTemp(ty);
4303 IRTemp tl = newTemp(ty);
4304 IRTemp resLo = newTemp(ty);
4306 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4308 if (epartIsReg(rm)) {
4309 assign(te, getIRegE(size, pfx, rm));
4312 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4314 assign(te, loadLE(ty, mkexpr(addr)));
4317 d64 = getSDisp(imin(4,litsize),delta);
4318 delta += imin(4,litsize);
4320 d64 &= mkSizeMask(size);
4321 assign(tl, mkU(ty,d64));
4323 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4325 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4327 putIRegG(size, pfx, rm, mkexpr(resLo));
4329 DIP("imul%c $%lld, %s, %s\n",
4330 nameISize(size), d64,
4331 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4332 nameIRegG(size,pfx,rm) );
4337 /*------------------------------------------------------------*/
4339 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
4341 /*------------------------------------------------------------*/
4343 /* --- Helper functions for dealing with the register stack. --- */
4345 /* --- Set the emulation-warning pseudo-register. --- */
4347 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
4349 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4350 stmt( IRStmt_Put( OFFB_EMWARN, e ) );
4353 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
4355 static IRExpr* mkQNaN64 ( void )
4357 /* QNaN is 0 2047 1 0(51times)
4358 == 0b 11111111111b 1 0(51times)
4359 == 0x7FF8 0000 0000 0000
4361 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
4364 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
4366 static IRExpr* get_ftop ( void )
4368 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
4371 static void put_ftop ( IRExpr* e )
4373 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4374 stmt( IRStmt_Put( OFFB_FTOP, e ) );
4377 /* --------- Get/put the C3210 bits. --------- */
4379 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
4381 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
4384 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
4386 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
4387 stmt( IRStmt_Put( OFFB_FC3210, e ) );
4390 /* --------- Get/put the FPU rounding mode. --------- */
4391 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
4393 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
4396 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
4398 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4399 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
4403 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
4404 /* Produces a value in 0 .. 3, which is encoded as per the type
4405 IRRoundingMode. Since the guest_FPROUND value is also encoded as
4406 per IRRoundingMode, we merely need to get it and mask it for
4409 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
4411 return binop( Iop_And32, get_fpround(), mkU32(3) );
4414 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
4416 return mkU32(Irrm_NEAREST);
4420 /* --------- Get/set FP register tag bytes. --------- */
4422 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
4424 static void put_ST_TAG ( Int i, IRExpr* value )
4427 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
4428 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
4429 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
4432 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
4433 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
4435 static IRExpr* get_ST_TAG ( Int i )
4437 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
4438 return IRExpr_GetI( descr, get_ftop(), i );
4442 /* --------- Get/set FP registers. --------- */
4444 /* Given i, and some expression e, emit 'ST(i) = e' and set the
4445 register's tag to indicate the register is full. The previous
4446 state of the register is not checked. */
4448 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
4451 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
4452 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
4453 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
4454 /* Mark the register as in-use. */
4455 put_ST_TAG(i, mkU8(1));
4458 /* Given i, and some expression e, emit
4459 ST(i) = is_full(i) ? NaN : e
4460 and set the tag accordingly.
4463 static void put_ST ( Int i, IRExpr* value )
4465 put_ST_UNCHECKED( i,
4466 IRExpr_Mux0X( get_ST_TAG(i),
4469 /* non-0 means full */
4476 /* Given i, generate an expression yielding 'ST(i)'. */
4478 static IRExpr* get_ST_UNCHECKED ( Int i )
4480 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
4481 return IRExpr_GetI( descr, get_ftop(), i );
4485 /* Given i, generate an expression yielding
4486 is_full(i) ? ST(i) : NaN
4489 static IRExpr* get_ST ( Int i )
4492 IRExpr_Mux0X( get_ST_TAG(i),
4495 /* non-0 means full */
4496 get_ST_UNCHECKED(i));
4500 /* Adjust FTOP downwards by one register. */
4502 static void fp_push ( void )
4504 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
4507 /* Adjust FTOP upwards by one register, and mark the vacated register
4510 static void fp_pop ( void )
4512 put_ST_TAG(0, mkU8(0));
4513 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4516 /* Clear the C2 bit of the FPU status register, for
4517 sin/cos/tan/sincos. */
4519 static void clear_C2 ( void )
4521 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
4524 /* Invent a plausible-looking FPU status word value:
4525 ((ftop & 7) << 11) | (c3210 & 0x4700)
4527 static IRExpr* get_FPU_sw ( void )
4533 binop(Iop_And32, get_ftop(), mkU32(7)),
4535 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
4541 /* ------------------------------------------------------- */
4542 /* Given all that stack-mangling junk, we can now go ahead
4543 and describe FP instructions.
4546 /* ST(0) = ST(0) `op` mem64/32(addr)
4547 Need to check ST(0)'s tag on read, but not on write.
4550 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
4553 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4557 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4559 loadLE(Ity_F64,mkexpr(addr))
4564 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4566 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
4572 /* ST(0) = mem64/32(addr) `op` ST(0)
4573 Need to check ST(0)'s tag on read, but not on write.
4576 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
4579 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4583 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4584 loadLE(Ity_F64,mkexpr(addr)),
4590 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4591 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
4598 /* ST(dst) = ST(dst) `op` ST(src).
4599 Check dst and src tags when reading but not on write.
4602 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4605 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
4609 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4617 /* ST(dst) = ST(src) `op` ST(dst).
4618 Check dst and src tags when reading but not on write.
4621 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4624 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
4628 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4636 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
4637 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
4639 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
4640 /* This is a bit of a hack (and isn't really right). It sets
4641 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
4642 documentation implies A and S are unchanged.
4644 /* It's also fishy in that it is used both for COMIP and
4645 UCOMIP, and they aren't the same (although similar). */
4646 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4647 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4652 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
4661 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
4663 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
4665 IRTemp t32 = newTemp(Ity_I32);
4672 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
4675 unop(Iop_32to16, mkexpr(t32)));
4680 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
4681 VexAbiInfo* vbi, Prefix pfx, Long delta )
4688 /* On entry, delta points at the second byte of the insn (the modrm
4690 UChar first_opcode = getUChar(delta-1);
4691 UChar modrm = getUChar(delta+0);
4693 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
4695 if (first_opcode == 0xD8) {
4698 /* bits 5,4,3 are an opcode extension, and the modRM also
4699 specifies an address. */
4700 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
4703 switch (gregLO3ofRM(modrm)) {
4705 case 0: /* FADD single-real */
4706 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
4709 case 1: /* FMUL single-real */
4710 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
4713 //.. case 2: /* FCOM single-real */
4714 //.. DIP("fcoms %s\n", dis_buf);
4715 //.. /* This forces C1 to zero, which isn't right. */
4717 //.. binop( Iop_And32,
4718 //.. binop(Iop_Shl32,
4719 //.. binop(Iop_CmpF64,
4721 //.. unop(Iop_F32toF64,
4722 //.. loadLE(Ity_F32,mkexpr(addr)))),
4728 //.. case 3: /* FCOMP single-real */
4729 //.. DIP("fcomps %s\n", dis_buf);
4730 //.. /* This forces C1 to zero, which isn't right. */
4732 //.. binop( Iop_And32,
4733 //.. binop(Iop_Shl32,
4734 //.. binop(Iop_CmpF64,
4736 //.. unop(Iop_F32toF64,
4737 //.. loadLE(Ity_F32,mkexpr(addr)))),
4744 case 4: /* FSUB single-real */
4745 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
4748 case 5: /* FSUBR single-real */
4749 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
4752 case 6: /* FDIV single-real */
4753 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
4756 case 7: /* FDIVR single-real */
4757 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
4761 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
4762 vex_printf("first_opcode == 0xD8\n");
4769 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
4770 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
4773 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
4774 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
4777 /* Dunno if this is right */
4778 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
4779 r_dst = (UInt)modrm - 0xD0;
4780 DIP("fcom %%st(0),%%st(%d)\n", r_dst);
4781 /* This forces C1 to zero, which isn't right. */
4786 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4792 /* Dunno if this is right */
4793 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
4794 r_dst = (UInt)modrm - 0xD8;
4795 DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
4796 /* This forces C1 to zero, which isn't right. */
4801 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4808 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
4809 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
4812 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
4813 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
4816 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
4817 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
4820 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
4821 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
4830 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
4832 if (first_opcode == 0xD9) {
4835 /* bits 5,4,3 are an opcode extension, and the modRM also
4836 specifies an address. */
4837 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
4840 switch (gregLO3ofRM(modrm)) {
4842 case 0: /* FLD single-real */
4843 DIP("flds %s\n", dis_buf);
4845 put_ST(0, unop(Iop_F32toF64,
4846 loadLE(Ity_F32, mkexpr(addr))));
4849 case 2: /* FST single-real */
4850 DIP("fsts %s\n", dis_buf);
4851 storeLE(mkexpr(addr),
4852 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4855 case 3: /* FSTP single-real */
4856 DIP("fstps %s\n", dis_buf);
4857 storeLE(mkexpr(addr),
4858 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4862 case 4: { /* FLDENV m28 */
4863 /* Uses dirty helper:
4864 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
4865 IRTemp ew = newTemp(Ity_I32);
4866 IRTemp w64 = newTemp(Ity_I64);
4867 IRDirty* d = unsafeIRDirty_0_N (
4869 "amd64g_dirtyhelper_FLDENV",
4870 &amd64g_dirtyhelper_FLDENV,
4871 mkIRExprVec_1( mkexpr(addr) )
4875 /* declare we're reading memory */
4877 d->mAddr = mkexpr(addr);
4880 /* declare we're writing guest state */
4883 d->fxState[0].fx = Ifx_Write;
4884 d->fxState[0].offset = OFFB_FTOP;
4885 d->fxState[0].size = sizeof(UInt);
4887 d->fxState[1].fx = Ifx_Write;
4888 d->fxState[1].offset = OFFB_FPTAGS;
4889 d->fxState[1].size = 8 * sizeof(UChar);
4891 d->fxState[2].fx = Ifx_Write;
4892 d->fxState[2].offset = OFFB_FPROUND;
4893 d->fxState[2].size = sizeof(ULong);
4895 d->fxState[3].fx = Ifx_Write;
4896 d->fxState[3].offset = OFFB_FC3210;
4897 d->fxState[3].size = sizeof(ULong);
4899 stmt( IRStmt_Dirty(d) );
4901 /* ew contains any emulation warning we may need to
4902 issue. If needed, side-exit to the next insn,
4903 reporting the warning, so that Valgrind's dispatcher
4904 sees the warning. */
4905 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
4906 put_emwarn( mkexpr(ew) );
4909 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4911 IRConst_U64( guest_RIP_bbstart+delta )
4915 DIP("fldenv %s\n", dis_buf);
4919 case 5: {/* FLDCW */
4920 /* The only thing we observe in the control word is the
4921 rounding mode. Therefore, pass the 16-bit value
4922 (x87 native-format control word) to a clean helper,
4923 getting back a 64-bit value, the lower half of which
4924 is the FPROUND value to store, and the upper half of
4925 which is the emulation-warning token which may be
4928 /* ULong amd64h_check_fldcw ( ULong ); */
4929 IRTemp t64 = newTemp(Ity_I64);
4930 IRTemp ew = newTemp(Ity_I32);
4931 DIP("fldcw %s\n", dis_buf);
4932 assign( t64, mkIRExprCCall(
4933 Ity_I64, 0/*regparms*/,
4934 "amd64g_check_fldcw",
4935 &amd64g_check_fldcw,
4938 loadLE(Ity_I16, mkexpr(addr)))
4943 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
4944 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
4945 put_emwarn( mkexpr(ew) );
4946 /* Finally, if an emulation warning was reported,
4947 side-exit to the next insn, reporting the warning,
4948 so that Valgrind's dispatcher sees the warning. */
4951 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4953 IRConst_U64( guest_RIP_bbstart+delta )
4959 case 6: { /* FNSTENV m28 */
4960 /* Uses dirty helper:
4961 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
4962 IRDirty* d = unsafeIRDirty_0_N (
4964 "amd64g_dirtyhelper_FSTENV",
4965 &amd64g_dirtyhelper_FSTENV,
4966 mkIRExprVec_1( mkexpr(addr) )
4969 /* declare we're writing memory */
4971 d->mAddr = mkexpr(addr);
4974 /* declare we're reading guest state */
4977 d->fxState[0].fx = Ifx_Read;
4978 d->fxState[0].offset = OFFB_FTOP;
4979 d->fxState[0].size = sizeof(UInt);
4981 d->fxState[1].fx = Ifx_Read;
4982 d->fxState[1].offset = OFFB_FPTAGS;
4983 d->fxState[1].size = 8 * sizeof(UChar);
4985 d->fxState[2].fx = Ifx_Read;
4986 d->fxState[2].offset = OFFB_FPROUND;
4987 d->fxState[2].size = sizeof(ULong);
4989 d->fxState[3].fx = Ifx_Read;
4990 d->fxState[3].offset = OFFB_FC3210;
4991 d->fxState[3].size = sizeof(ULong);
4993 stmt( IRStmt_Dirty(d) );
4995 DIP("fnstenv %s\n", dis_buf);
4999 case 7: /* FNSTCW */
5000 /* Fake up a native x87 FPU control word. The only
5001 thing it depends on is FPROUND[1:0], so call a clean
5002 helper to cook it up. */
5003 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5004 DIP("fnstcw %s\n", dis_buf);
5010 "amd64g_create_fpucw", &amd64g_create_fpucw,
5011 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5018 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5019 vex_printf("first_opcode == 0xD9\n");
5027 case 0xC0 ... 0xC7: /* FLD %st(?) */
5028 r_src = (UInt)modrm - 0xC0;
5029 DIP("fld %%st(%u)\n", r_src);
5030 t1 = newTemp(Ity_F64);
5031 assign(t1, get_ST(r_src));
5033 put_ST(0, mkexpr(t1));
5036 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5037 r_src = (UInt)modrm - 0xC8;
5038 DIP("fxch %%st(%u)\n", r_src);
5039 t1 = newTemp(Ity_F64);
5040 t2 = newTemp(Ity_F64);
5041 assign(t1, get_ST(0));
5042 assign(t2, get_ST(r_src));
5043 put_ST_UNCHECKED(0, mkexpr(t2));
5044 put_ST_UNCHECKED(r_src, mkexpr(t1));
5047 case 0xE0: /* FCHS */
5049 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5052 case 0xE1: /* FABS */
5054 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5057 case 0xE5: { /* FXAM */
5058 /* This is an interesting one. It examines %st(0),
5059 regardless of whether the tag says it's empty or not.
5060 Here, just pass both the tag (in our format) and the
5061 value (as a double, actually a ULong) to a helper
5064 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5065 unop(Iop_ReinterpF64asI64,
5066 get_ST_UNCHECKED(0)) );
5067 put_C3210(mkIRExprCCall(
5070 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5077 case 0xE8: /* FLD1 */
5080 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5081 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5084 case 0xE9: /* FLDL2T */
5087 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5088 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5091 case 0xEA: /* FLDL2E */
5094 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5095 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5098 case 0xEB: /* FLDPI */
5101 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5102 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5105 case 0xEC: /* FLDLG2 */
5108 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5109 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5112 case 0xED: /* FLDLN2 */
5115 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5116 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5119 case 0xEE: /* FLDZ */
5122 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5123 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5126 case 0xF0: /* F2XM1 */
5130 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5134 case 0xF1: /* FYL2X */
5138 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5144 case 0xF2: /* FPTAN */
5148 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5151 put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
5152 clear_C2(); /* HACK */
5155 case 0xF3: /* FPATAN */
5159 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5165 case 0xF4: { /* FXTRACT */
5166 IRTemp argF = newTemp(Ity_F64);
5167 IRTemp sigF = newTemp(Ity_F64);
5168 IRTemp expF = newTemp(Ity_F64);
5169 IRTemp argI = newTemp(Ity_I64);
5170 IRTemp sigI = newTemp(Ity_I64);
5171 IRTemp expI = newTemp(Ity_I64);
5173 assign( argF, get_ST(0) );
5174 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
5177 Ity_I64, 0/*regparms*/,
5178 "x86amd64g_calculate_FXTRACT",
5179 &x86amd64g_calculate_FXTRACT,
5180 mkIRExprVec_2( mkexpr(argI),
5181 mkIRExpr_HWord(0)/*sig*/ ))
5185 Ity_I64, 0/*regparms*/,
5186 "x86amd64g_calculate_FXTRACT",
5187 &x86amd64g_calculate_FXTRACT,
5188 mkIRExprVec_2( mkexpr(argI),
5189 mkIRExpr_HWord(1)/*exp*/ ))
5191 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
5192 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
5194 put_ST_UNCHECKED(0, mkexpr(expF) );
5197 put_ST(0, mkexpr(sigF) );
5201 case 0xF5: { /* FPREM1 -- IEEE compliant */
5202 IRTemp a1 = newTemp(Ity_F64);
5203 IRTemp a2 = newTemp(Ity_F64);
5205 /* Do FPREM1 twice, once to get the remainder, and once
5206 to get the C3210 flag values. */
5207 assign( a1, get_ST(0) );
5208 assign( a2, get_ST(1) );
5211 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5216 triop(Iop_PRem1C3210F64,
5217 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5223 case 0xF7: /* FINCSTP */
5225 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5228 case 0xF8: { /* FPREM -- not IEEE compliant */
5229 IRTemp a1 = newTemp(Ity_F64);
5230 IRTemp a2 = newTemp(Ity_F64);
5232 /* Do FPREM twice, once to get the remainder, and once
5233 to get the C3210 flag values. */
5234 assign( a1, get_ST(0) );
5235 assign( a2, get_ST(1) );
5238 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5243 triop(Iop_PRemC3210F64,
5244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5250 case 0xF9: /* FYL2XP1 */
5253 triop(Iop_Yl2xp1F64,
5254 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5260 case 0xFA: /* FSQRT */
5264 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5268 case 0xFB: { /* FSINCOS */
5269 IRTemp a1 = newTemp(Ity_F64);
5270 assign( a1, get_ST(0) );
5274 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5279 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5281 clear_C2(); /* HACK */
5285 case 0xFC: /* FRNDINT */
5288 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
5291 case 0xFD: /* FSCALE */
5295 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5300 case 0xFE: /* FSIN */
5304 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5306 clear_C2(); /* HACK */
5309 case 0xFF: /* FCOS */
5313 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5315 clear_C2(); /* HACK */
5324 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
5326 if (first_opcode == 0xDA) {
5330 /* bits 5,4,3 are an opcode extension, and the modRM also
5331 specifies an address. */
5333 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5335 switch (gregLO3ofRM(modrm)) {
5337 case 0: /* FIADD m32int */ /* ST(0) += m32int */
5338 DIP("fiaddl %s\n", dis_buf);
5342 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
5343 DIP("fimull %s\n", dis_buf);
5347 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
5348 DIP("fisubl %s\n", dis_buf);
5352 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
5353 DIP("fisubrl %s\n", dis_buf);
5357 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
5358 DIP("fisubl %s\n", dis_buf);
5362 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
5363 DIP("fidivrl %s\n", dis_buf);
5370 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5373 loadLE(Ity_I32, mkexpr(addr)))));
5379 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5381 loadLE(Ity_I32, mkexpr(addr))),
5386 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5387 vex_printf("first_opcode == 0xDA\n");
5396 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
5397 r_src = (UInt)modrm - 0xC0;
5398 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
5402 mk_amd64g_calculate_condition(AMD64CondB)),
5403 get_ST(0), get_ST(r_src)) );
5406 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
5407 r_src = (UInt)modrm - 0xC8;
5408 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
5412 mk_amd64g_calculate_condition(AMD64CondZ)),
5413 get_ST(0), get_ST(r_src)) );
5416 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
5417 r_src = (UInt)modrm - 0xD0;
5418 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
5422 mk_amd64g_calculate_condition(AMD64CondBE)),
5423 get_ST(0), get_ST(r_src)) );
5426 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
5427 r_src = (UInt)modrm - 0xD8;
5428 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
5432 mk_amd64g_calculate_condition(AMD64CondP)),
5433 get_ST(0), get_ST(r_src)) );
5436 case 0xE9: /* FUCOMPP %st(0),%st(1) */
5437 DIP("fucompp %%st(0),%%st(1)\n");
5438 /* This forces C1 to zero, which isn't right. */
5443 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5458 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
5460 if (first_opcode == 0xDB) {
5463 /* bits 5,4,3 are an opcode extension, and the modRM also
5464 specifies an address. */
5465 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5468 switch (gregLO3ofRM(modrm)) {
5470 case 0: /* FILD m32int */
5471 DIP("fildl %s\n", dis_buf);
5473 put_ST(0, unop(Iop_I32StoF64,
5474 loadLE(Ity_I32, mkexpr(addr))));
5477 case 1: /* FISTTPL m32 (SSE3) */
5478 DIP("fisttpl %s\n", dis_buf);
5479 storeLE( mkexpr(addr),
5480 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
5484 case 2: /* FIST m32 */
5485 DIP("fistl %s\n", dis_buf);
5486 storeLE( mkexpr(addr),
5487 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
5490 case 3: /* FISTP m32 */
5491 DIP("fistpl %s\n", dis_buf);
5492 storeLE( mkexpr(addr),
5493 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
5497 case 5: { /* FLD extended-real */
5498 /* Uses dirty helper:
5499 ULong amd64g_loadF80le ( ULong )
5500 addr holds the address. First, do a dirty call to
5501 get hold of the data. */
5502 IRTemp val = newTemp(Ity_I64);
5503 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
5505 IRDirty* d = unsafeIRDirty_1_N (
5508 "amd64g_dirtyhelper_loadF80le",
5509 &amd64g_dirtyhelper_loadF80le,
5512 /* declare that we're reading memory */
5514 d->mAddr = mkexpr(addr);
5517 /* execute the dirty call, dumping the result in val. */
5518 stmt( IRStmt_Dirty(d) );
5520 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
5522 DIP("fldt %s\n", dis_buf);
5526 case 7: { /* FSTP extended-real */
5527 /* Uses dirty helper:
5528 void amd64g_storeF80le ( ULong addr, ULong data )
5531 = mkIRExprVec_2( mkexpr(addr),
5532 unop(Iop_ReinterpF64asI64, get_ST(0)) );
5534 IRDirty* d = unsafeIRDirty_0_N (
5536 "amd64g_dirtyhelper_storeF80le",
5537 &amd64g_dirtyhelper_storeF80le,
5540 /* declare we're writing memory */
5542 d->mAddr = mkexpr(addr);
5545 /* execute the dirty call. */
5546 stmt( IRStmt_Dirty(d) );
5549 DIP("fstpt\n %s", dis_buf);
5554 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5555 vex_printf("first_opcode == 0xDB\n");
5564 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
5565 r_src = (UInt)modrm - 0xC0;
5566 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
5570 mk_amd64g_calculate_condition(AMD64CondNB)),
5571 get_ST(0), get_ST(r_src)) );
5574 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
5575 r_src = (UInt)modrm - 0xC8;
5576 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
5581 mk_amd64g_calculate_condition(AMD64CondNZ)),
5588 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
5589 r_src = (UInt)modrm - 0xD0;
5590 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
5595 mk_amd64g_calculate_condition(AMD64CondNBE)),
5602 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
5603 r_src = (UInt)modrm - 0xD8;
5604 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
5609 mk_amd64g_calculate_condition(AMD64CondNP)),
5621 /* Uses dirty helper:
5622 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5623 IRDirty* d = unsafeIRDirty_0_N (
5625 "amd64g_dirtyhelper_FINIT",
5626 &amd64g_dirtyhelper_FINIT,
5631 /* declare we're writing guest state */
5634 d->fxState[0].fx = Ifx_Write;
5635 d->fxState[0].offset = OFFB_FTOP;
5636 d->fxState[0].size = sizeof(UInt);
5638 d->fxState[1].fx = Ifx_Write;
5639 d->fxState[1].offset = OFFB_FPREGS;
5640 d->fxState[1].size = 8 * sizeof(ULong);
5642 d->fxState[2].fx = Ifx_Write;
5643 d->fxState[2].offset = OFFB_FPTAGS;
5644 d->fxState[2].size = 8 * sizeof(UChar);
5646 d->fxState[3].fx = Ifx_Write;
5647 d->fxState[3].offset = OFFB_FPROUND;
5648 d->fxState[3].size = sizeof(ULong);
5650 d->fxState[4].fx = Ifx_Write;
5651 d->fxState[4].offset = OFFB_FC3210;
5652 d->fxState[4].size = sizeof(ULong);
5654 stmt( IRStmt_Dirty(d) );
5660 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
5661 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
5664 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
5665 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
5674 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
5676 if (first_opcode == 0xDC) {
5679 /* bits 5,4,3 are an opcode extension, and the modRM also
5680 specifies an address. */
5681 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5684 switch (gregLO3ofRM(modrm)) {
5686 case 0: /* FADD double-real */
5687 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
5690 case 1: /* FMUL double-real */
5691 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
5694 //.. case 2: /* FCOM double-real */
5695 //.. DIP("fcoml %s\n", dis_buf);
5696 //.. /* This forces C1 to zero, which isn't right. */
5698 //.. binop( Iop_And32,
5699 //.. binop(Iop_Shl32,
5700 //.. binop(Iop_CmpF64,
5702 //.. loadLE(Ity_F64,mkexpr(addr))),
5708 case 3: /* FCOMP double-real */
5709 DIP("fcompl %s\n", dis_buf);
5710 /* This forces C1 to zero, which isn't right. */
5717 loadLE(Ity_F64,mkexpr(addr))),
5724 case 4: /* FSUB double-real */
5725 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
5728 case 5: /* FSUBR double-real */
5729 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
5732 case 6: /* FDIV double-real */
5733 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
5736 case 7: /* FDIVR double-real */
5737 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
5741 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5742 vex_printf("first_opcode == 0xDC\n");
5751 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
5752 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
5755 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
5756 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
5759 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
5760 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
5763 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
5764 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
5767 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
5768 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
5771 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
5772 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
5782 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5784 if (first_opcode == 0xDD) {
5788 /* bits 5,4,3 are an opcode extension, and the modRM also
5789 specifies an address. */
5790 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5793 switch (gregLO3ofRM(modrm)) {
5795 case 0: /* FLD double-real */
5796 DIP("fldl %s\n", dis_buf);
5798 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5801 case 1: /* FISTTPQ m64 (SSE3) */
5802 DIP("fistppll %s\n", dis_buf);
5803 storeLE( mkexpr(addr),
5804 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5808 case 2: /* FST double-real */
5809 DIP("fstl %s\n", dis_buf);
5810 storeLE(mkexpr(addr), get_ST(0));
5813 case 3: /* FSTP double-real */
5814 DIP("fstpl %s\n", dis_buf);
5815 storeLE(mkexpr(addr), get_ST(0));
5819 //.. case 4: { /* FRSTOR m108 */
5820 //.. /* Uses dirty helper:
5821 //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5822 //.. IRTemp ew = newTemp(Ity_I32);
5823 //.. IRDirty* d = unsafeIRDirty_0_N (
5825 //.. "x86g_dirtyhelper_FRSTOR",
5826 //.. &x86g_dirtyhelper_FRSTOR,
5827 //.. mkIRExprVec_1( mkexpr(addr) )
5829 //.. d->needsBBP = True;
5831 //.. /* declare we're reading memory */
5832 //.. d->mFx = Ifx_Read;
5833 //.. d->mAddr = mkexpr(addr);
5834 //.. d->mSize = 108;
5836 //.. /* declare we're writing guest state */
5837 //.. d->nFxState = 5;
5839 //.. d->fxState[0].fx = Ifx_Write;
5840 //.. d->fxState[0].offset = OFFB_FTOP;
5841 //.. d->fxState[0].size = sizeof(UInt);
5843 //.. d->fxState[1].fx = Ifx_Write;
5844 //.. d->fxState[1].offset = OFFB_FPREGS;
5845 //.. d->fxState[1].size = 8 * sizeof(ULong);
5847 //.. d->fxState[2].fx = Ifx_Write;
5848 //.. d->fxState[2].offset = OFFB_FPTAGS;
5849 //.. d->fxState[2].size = 8 * sizeof(UChar);
5851 //.. d->fxState[3].fx = Ifx_Write;
5852 //.. d->fxState[3].offset = OFFB_FPROUND;
5853 //.. d->fxState[3].size = sizeof(UInt);
5855 //.. d->fxState[4].fx = Ifx_Write;
5856 //.. d->fxState[4].offset = OFFB_FC3210;
5857 //.. d->fxState[4].size = sizeof(UInt);
5859 //.. stmt( IRStmt_Dirty(d) );
5861 //.. /* ew contains any emulation warning we may need to
5862 //.. issue. If needed, side-exit to the next insn,
5863 //.. reporting the warning, so that Valgrind's dispatcher
5864 //.. sees the warning. */
5865 //.. put_emwarn( mkexpr(ew) );
5868 //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5870 //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
5874 //.. DIP("frstor %s\n", dis_buf);
5878 //.. case 6: { /* FNSAVE m108 */
5879 //.. /* Uses dirty helper:
5880 //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5881 //.. IRDirty* d = unsafeIRDirty_0_N (
5883 //.. "x86g_dirtyhelper_FSAVE",
5884 //.. &x86g_dirtyhelper_FSAVE,
5885 //.. mkIRExprVec_1( mkexpr(addr) )
5887 //.. d->needsBBP = True;
5888 //.. /* declare we're writing memory */
5889 //.. d->mFx = Ifx_Write;
5890 //.. d->mAddr = mkexpr(addr);
5891 //.. d->mSize = 108;
5893 //.. /* declare we're reading guest state */
5894 //.. d->nFxState = 5;
5896 //.. d->fxState[0].fx = Ifx_Read;
5897 //.. d->fxState[0].offset = OFFB_FTOP;
5898 //.. d->fxState[0].size = sizeof(UInt);
5900 //.. d->fxState[1].fx = Ifx_Read;
5901 //.. d->fxState[1].offset = OFFB_FPREGS;
5902 //.. d->fxState[1].size = 8 * sizeof(ULong);
5904 //.. d->fxState[2].fx = Ifx_Read;
5905 //.. d->fxState[2].offset = OFFB_FPTAGS;
5906 //.. d->fxState[2].size = 8 * sizeof(UChar);
5908 //.. d->fxState[3].fx = Ifx_Read;
5909 //.. d->fxState[3].offset = OFFB_FPROUND;
5910 //.. d->fxState[3].size = sizeof(UInt);
5912 //.. d->fxState[4].fx = Ifx_Read;
5913 //.. d->fxState[4].offset = OFFB_FC3210;
5914 //.. d->fxState[4].size = sizeof(UInt);
5916 //.. stmt( IRStmt_Dirty(d) );
5918 //.. DIP("fnsave %s\n", dis_buf);
5922 case 7: { /* FNSTSW m16 */
5923 IRExpr* sw = get_FPU_sw();
5924 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5925 storeLE( mkexpr(addr), sw );
5926 DIP("fnstsw %s\n", dis_buf);
5931 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5932 vex_printf("first_opcode == 0xDD\n");
5939 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5940 r_dst = (UInt)modrm - 0xC0;
5941 DIP("ffree %%st(%u)\n", r_dst);
5942 put_ST_TAG ( r_dst, mkU8(0) );
5945 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5946 r_dst = (UInt)modrm - 0xD0;
5947 DIP("fst %%st(0),%%st(%u)\n", r_dst);
5948 /* P4 manual says: "If the destination operand is a
5949 non-empty register, the invalid-operation exception
5950 is not generated. Hence put_ST_UNCHECKED. */
5951 put_ST_UNCHECKED(r_dst, get_ST(0));
5954 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5955 r_dst = (UInt)modrm - 0xD8;
5956 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
5957 /* P4 manual says: "If the destination operand is a
5958 non-empty register, the invalid-operation exception
5959 is not generated. Hence put_ST_UNCHECKED. */
5960 put_ST_UNCHECKED(r_dst, get_ST(0));
5964 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5965 r_dst = (UInt)modrm - 0xE0;
5966 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
5967 /* This forces C1 to zero, which isn't right. */
5972 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5978 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5979 r_dst = (UInt)modrm - 0xE8;
5980 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
5981 /* This forces C1 to zero, which isn't right. */
5986 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5999 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6001 if (first_opcode == 0xDE) {
6005 /* bits 5,4,3 are an opcode extension, and the modRM also
6006 specifies an address. */
6008 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6011 switch (gregLO3ofRM(modrm)) {
6013 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6014 DIP("fiaddw %s\n", dis_buf);
6018 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6019 DIP("fimulw %s\n", dis_buf);
6023 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6024 DIP("fisubw %s\n", dis_buf);
6028 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6029 DIP("fisubrw %s\n", dis_buf);
6033 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6034 DIP("fisubw %s\n", dis_buf);
6038 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6039 DIP("fidivrw %s\n", dis_buf);
6046 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6050 loadLE(Ity_I16, mkexpr(addr))))));
6056 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6059 loadLE(Ity_I16, mkexpr(addr)))),
6064 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6065 vex_printf("first_opcode == 0xDE\n");
6074 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6075 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6078 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6079 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6082 case 0xD9: /* FCOMPP %st(0),%st(1) */
6083 DIP("fcompp %%st(0),%%st(1)\n");
6084 /* This forces C1 to zero, which isn't right. */
6089 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6097 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6098 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6101 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6102 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6105 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6106 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6109 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6110 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6120 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6122 if (first_opcode == 0xDF) {
6126 /* bits 5,4,3 are an opcode extension, and the modRM also
6127 specifies an address. */
6128 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6131 switch (gregLO3ofRM(modrm)) {
6133 case 0: /* FILD m16int */
6134 DIP("fildw %s\n", dis_buf);
6136 put_ST(0, unop(Iop_I32StoF64,
6138 loadLE(Ity_I16, mkexpr(addr)))));
6141 case 1: /* FISTTPS m16 (SSE3) */
6142 DIP("fisttps %s\n", dis_buf);
6143 storeLE( mkexpr(addr),
6144 x87ishly_qnarrow_32_to_16(
6145 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
6149 //.. case 2: /* FIST m16 */
6150 //.. DIP("fistp %s\n", dis_buf);
6151 //.. storeLE( mkexpr(addr),
6152 //.. binop(Iop_F64toI16, get_roundingmode(), get_ST(0)) );
6155 case 3: /* FISTP m16 */
6156 DIP("fistps %s\n", dis_buf);
6157 storeLE( mkexpr(addr),
6158 x87ishly_qnarrow_32_to_16(
6159 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6163 case 5: /* FILD m64 */
6164 DIP("fildll %s\n", dis_buf);
6166 put_ST(0, binop(Iop_I64StoF64,
6168 loadLE(Ity_I64, mkexpr(addr))));
6171 case 7: /* FISTP m64 */
6172 DIP("fistpll %s\n", dis_buf);
6173 storeLE( mkexpr(addr),
6174 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
6179 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6180 vex_printf("first_opcode == 0xDF\n");
6189 case 0xC0: /* FFREEP %st(0) */
6190 DIP("ffreep %%st(%d)\n", 0);
6191 put_ST_TAG ( 0, mkU8(0) );
6195 case 0xE0: /* FNSTSW %ax */
6196 DIP("fnstsw %%ax\n");
6197 /* Invent a plausible-looking FPU status word value and
6199 ((ftop & 7) << 11) | (c3210 & 0x4700)
6206 binop(Iop_And32, get_ftop(), mkU32(7)),
6209 unop(Iop_64to32, get_C3210()),
6214 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
6215 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
6218 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
6219 /* not really right since COMIP != UCOMIP */
6220 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
6242 /*------------------------------------------------------------*/
6244 /*--- MMX INSTRUCTIONS ---*/
6246 /*------------------------------------------------------------*/
6248 /* Effect of MMX insns on x87 FPU state (table 11-2 of
6249 IA32 arch manual, volume 3):
6251 Read from, or write to MMX register (viz, any insn except EMMS):
6252 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
6253 * FP stack pointer set to zero
6256 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
6257 * FP stack pointer set to zero
6260 static void do_MMX_preamble ( void )
6263 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6264 IRExpr* zero = mkU32(0);
6265 IRExpr* tag1 = mkU8(1);
6267 for (i = 0; i < 8; i++)
6268 stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
6271 static void do_EMMS_preamble ( void )
6274 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6275 IRExpr* zero = mkU32(0);
6276 IRExpr* tag0 = mkU8(0);
6278 for (i = 0; i < 8; i++)
6279 stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
6283 static IRExpr* getMMXReg ( UInt archreg )
6285 vassert(archreg < 8);
6286 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
6290 static void putMMXReg ( UInt archreg, IRExpr* e )
6292 vassert(archreg < 8);
6293 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
6294 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
6298 /* Helper for non-shift MMX insns. Note this is incomplete in the
6299 sense that it does not first call do_MMX_preamble() -- that is the
6300 responsibility of its caller. */
6303 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
6308 Bool show_granularity )
6311 UChar modrm = getUChar(delta);
6312 Bool isReg = epartIsReg(modrm);
6313 IRExpr* argL = NULL;
6314 IRExpr* argR = NULL;
6315 IRExpr* argG = NULL;
6316 IRExpr* argE = NULL;
6317 IRTemp res = newTemp(Ity_I64);
6320 IROp op = Iop_INVALID;
6322 HChar* hName = NULL;
6325 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
6328 /* Original MMX ones */
6329 case 0xFC: op = Iop_Add8x8; break;
6330 case 0xFD: op = Iop_Add16x4; break;
6331 case 0xFE: op = Iop_Add32x2; break;
6333 case 0xEC: op = Iop_QAdd8Sx8; break;
6334 case 0xED: op = Iop_QAdd16Sx4; break;
6336 case 0xDC: op = Iop_QAdd8Ux8; break;
6337 case 0xDD: op = Iop_QAdd16Ux4; break;
6339 case 0xF8: op = Iop_Sub8x8; break;
6340 case 0xF9: op = Iop_Sub16x4; break;
6341 case 0xFA: op = Iop_Sub32x2; break;
6343 case 0xE8: op = Iop_QSub8Sx8; break;
6344 case 0xE9: op = Iop_QSub16Sx4; break;
6346 case 0xD8: op = Iop_QSub8Ux8; break;
6347 case 0xD9: op = Iop_QSub16Ux4; break;
6349 case 0xE5: op = Iop_MulHi16Sx4; break;
6350 case 0xD5: op = Iop_Mul16x4; break;
6351 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
6353 case 0x74: op = Iop_CmpEQ8x8; break;
6354 case 0x75: op = Iop_CmpEQ16x4; break;
6355 case 0x76: op = Iop_CmpEQ32x2; break;
6357 case 0x64: op = Iop_CmpGT8Sx8; break;
6358 case 0x65: op = Iop_CmpGT16Sx4; break;
6359 case 0x66: op = Iop_CmpGT32Sx2; break;
6361 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
6362 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
6363 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
6365 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
6366 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
6367 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
6369 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
6370 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
6371 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
6373 case 0xDB: op = Iop_And64; break;
6374 case 0xDF: op = Iop_And64; invG = True; break;
6375 case 0xEB: op = Iop_Or64; break;
6376 case 0xEF: /* Possibly do better here if argL and argR are the
6378 op = Iop_Xor64; break;
6380 /* Introduced in SSE1 */
6381 case 0xE0: op = Iop_Avg8Ux8; break;
6382 case 0xE3: op = Iop_Avg16Ux4; break;
6383 case 0xEE: op = Iop_Max16Sx4; break;
6384 case 0xDE: op = Iop_Max8Ux8; break;
6385 case 0xEA: op = Iop_Min16Sx4; break;
6386 case 0xDA: op = Iop_Min8Ux8; break;
6387 case 0xE4: op = Iop_MulHi16Ux4; break;
6388 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
6390 /* Introduced in SSE2 */
6391 case 0xD4: op = Iop_Add64; break;
6392 case 0xFB: op = Iop_Sub64; break;
6395 vex_printf("\n0x%x\n", (Int)opc);
6396 vpanic("dis_MMXop_regmem_to_reg");
6401 argG = getMMXReg(gregLO3ofRM(modrm));
6403 argG = unop(Iop_Not64, argG);
6407 argE = getMMXReg(eregLO3ofRM(modrm));
6410 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6412 argE = loadLE(Ity_I64, mkexpr(addr));
6423 if (op != Iop_INVALID) {
6424 vassert(hName == NULL);
6425 vassert(hAddr == NULL);
6426 assign(res, binop(op, argL, argR));
6428 vassert(hName != NULL);
6429 vassert(hAddr != NULL);
6433 0/*regparms*/, hName, hAddr,
6434 mkIRExprVec_2( argL, argR )
6439 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
6441 DIP("%s%s %s, %s\n",
6442 name, show_granularity ? nameMMXGran(opc & 3) : "",
6443 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
6444 nameMMXReg(gregLO3ofRM(modrm)) );
6450 /* Vector by scalar shift of G by the amount specified at the bottom
6451 of E. This is a straight copy of dis_SSE_shiftG_byE. */
6453 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
6454 Prefix pfx, Long delta,
6455 HChar* opname, IROp op )
6461 UChar rm = getUChar(delta);
6462 IRTemp g0 = newTemp(Ity_I64);
6463 IRTemp g1 = newTemp(Ity_I64);
6464 IRTemp amt = newTemp(Ity_I64);
6465 IRTemp amt8 = newTemp(Ity_I8);
6467 if (epartIsReg(rm)) {
6468 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
6469 DIP("%s %s,%s\n", opname,
6470 nameMMXReg(eregLO3ofRM(rm)),
6471 nameMMXReg(gregLO3ofRM(rm)) );
6474 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
6475 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
6476 DIP("%s %s,%s\n", opname,
6478 nameMMXReg(gregLO3ofRM(rm)) );
6481 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
6482 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
6484 shl = shr = sar = False;
6487 case Iop_ShlN16x4: shl = True; size = 32; break;
6488 case Iop_ShlN32x2: shl = True; size = 32; break;
6489 case Iop_Shl64: shl = True; size = 64; break;
6490 case Iop_ShrN16x4: shr = True; size = 16; break;
6491 case Iop_ShrN32x2: shr = True; size = 32; break;
6492 case Iop_Shr64: shr = True; size = 64; break;
6493 case Iop_SarN16x4: sar = True; size = 16; break;
6494 case Iop_SarN32x2: sar = True; size = 32; break;
6495 default: vassert(0);
6502 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6504 binop(op, mkexpr(g0), mkexpr(amt8))
6512 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6513 binop(op, mkexpr(g0), mkU8(size-1)),
6514 binop(op, mkexpr(g0), mkexpr(amt8))
6521 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
6526 /* Vector by scalar shift of E by an immediate byte. This is a
6527 straight copy of dis_SSE_shiftE_imm. */
6530 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op )
6533 UChar rm = getUChar(delta);
6534 IRTemp e0 = newTemp(Ity_I64);
6535 IRTemp e1 = newTemp(Ity_I64);
6537 vassert(epartIsReg(rm));
6538 vassert(gregLO3ofRM(rm) == 2
6539 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
6540 amt = getUChar(delta+1);
6542 DIP("%s $%d,%s\n", opname,
6544 nameMMXReg(eregLO3ofRM(rm)) );
6546 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
6548 shl = shr = sar = False;
6551 case Iop_ShlN16x4: shl = True; size = 16; break;
6552 case Iop_ShlN32x2: shl = True; size = 32; break;
6553 case Iop_Shl64: shl = True; size = 64; break;
6554 case Iop_SarN16x4: sar = True; size = 16; break;
6555 case Iop_SarN32x2: sar = True; size = 32; break;
6556 case Iop_ShrN16x4: shr = True; size = 16; break;
6557 case Iop_ShrN32x2: shr = True; size = 32; break;
6558 case Iop_Shr64: shr = True; size = 64; break;
6559 default: vassert(0);
6563 assign( e1, amt >= size
6565 : binop(op, mkexpr(e0), mkU8(amt))
6569 assign( e1, amt >= size
6570 ? binop(op, mkexpr(e0), mkU8(size-1))
6571 : binop(op, mkexpr(e0), mkU8(amt))
6577 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
6582 /* Completely handle all MMX instructions except emms. */
6585 ULong dis_MMX ( Bool* decode_ok,
6586 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
6591 UChar opc = getUChar(delta);
6594 /* dis_MMX handles all insns except emms. */
6601 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
6602 modrm = getUChar(delta);
6603 if (epartIsReg(modrm)) {
6607 binop( Iop_32HLto64,
6609 getIReg32(eregOfRexRM(pfx,modrm)) ) );
6610 DIP("movd %s, %s\n",
6611 nameIReg32(eregOfRexRM(pfx,modrm)),
6612 nameMMXReg(gregLO3ofRM(modrm)));
6614 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6618 binop( Iop_32HLto64,
6620 loadLE(Ity_I32, mkexpr(addr)) ) );
6621 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6626 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
6627 modrm = getUChar(delta);
6628 if (epartIsReg(modrm)) {
6630 putMMXReg( gregLO3ofRM(modrm),
6631 getIReg64(eregOfRexRM(pfx,modrm)) );
6632 DIP("movd %s, %s\n",
6633 nameIReg64(eregOfRexRM(pfx,modrm)),
6634 nameMMXReg(gregLO3ofRM(modrm)));
6636 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6638 putMMXReg( gregLO3ofRM(modrm),
6639 loadLE(Ity_I64, mkexpr(addr)) );
6640 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6644 goto mmx_decode_failure;
6650 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
6651 modrm = getUChar(delta);
6652 if (epartIsReg(modrm)) {
6654 putIReg32( eregOfRexRM(pfx,modrm),
6655 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
6656 DIP("movd %s, %s\n",
6657 nameMMXReg(gregLO3ofRM(modrm)),
6658 nameIReg32(eregOfRexRM(pfx,modrm)));
6660 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6662 storeLE( mkexpr(addr),
6663 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
6664 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6669 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
6670 modrm = getUChar(delta);
6671 if (epartIsReg(modrm)) {
6673 putIReg64( eregOfRexRM(pfx,modrm),
6674 getMMXReg(gregLO3ofRM(modrm)) );
6675 DIP("movd %s, %s\n",
6676 nameMMXReg(gregLO3ofRM(modrm)),
6677 nameIReg64(eregOfRexRM(pfx,modrm)));
6679 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6681 storeLE( mkexpr(addr),
6682 getMMXReg(gregLO3ofRM(modrm)) );
6683 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6686 goto mmx_decode_failure;
6691 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
6693 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6694 goto mmx_decode_failure;
6695 modrm = getUChar(delta);
6696 if (epartIsReg(modrm)) {
6698 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
6699 DIP("movq %s, %s\n",
6700 nameMMXReg(eregLO3ofRM(modrm)),
6701 nameMMXReg(gregLO3ofRM(modrm)));
6703 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6705 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
6706 DIP("movq %s, %s\n",
6707 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6712 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
6714 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6715 goto mmx_decode_failure;
6716 modrm = getUChar(delta);
6717 if (epartIsReg(modrm)) {
6718 /* Fall through. The assembler doesn't appear to generate
6720 goto mmx_decode_failure;
6722 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6724 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
6725 DIP("mov(nt)q %s, %s\n",
6726 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6732 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
6734 goto mmx_decode_failure;
6735 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
6739 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
6741 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6742 goto mmx_decode_failure;
6743 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
6747 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6749 goto mmx_decode_failure;
6750 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
6755 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
6757 goto mmx_decode_failure;
6758 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
6762 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
6764 goto mmx_decode_failure;
6765 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
6769 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6771 goto mmx_decode_failure;
6772 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
6775 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
6777 goto mmx_decode_failure;
6778 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
6781 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
6783 goto mmx_decode_failure;
6784 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
6787 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
6789 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
6794 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
6796 goto mmx_decode_failure;
6797 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
6802 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6804 goto mmx_decode_failure;
6805 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
6808 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6810 goto mmx_decode_failure;
6811 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
6814 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6816 goto mmx_decode_failure;
6817 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
6820 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6822 goto mmx_decode_failure;
6823 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
6828 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6830 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6831 goto mmx_decode_failure;
6832 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
6837 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6839 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6840 goto mmx_decode_failure;
6841 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
6844 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6846 goto mmx_decode_failure;
6847 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
6850 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6852 goto mmx_decode_failure;
6853 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
6856 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6858 goto mmx_decode_failure;
6859 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
6862 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6864 goto mmx_decode_failure;
6865 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
6868 # define SHIFT_BY_REG(_name,_op) \
6869 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
6872 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6873 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
6874 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
6875 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
6877 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6878 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
6879 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
6880 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
6882 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6883 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
6884 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
6886 # undef SHIFT_BY_REG
6891 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6892 UChar byte2, subopc;
6894 goto mmx_decode_failure;
6895 byte2 = getUChar(delta); /* amode / sub-opcode */
6896 subopc = toUChar( (byte2 >> 3) & 7 );
6898 # define SHIFT_BY_IMM(_name,_op) \
6899 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6902 if (subopc == 2 /*SRL*/ && opc == 0x71)
6903 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
6904 else if (subopc == 2 /*SRL*/ && opc == 0x72)
6905 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
6906 else if (subopc == 2 /*SRL*/ && opc == 0x73)
6907 SHIFT_BY_IMM("psrlq", Iop_Shr64);
6909 else if (subopc == 4 /*SAR*/ && opc == 0x71)
6910 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
6911 else if (subopc == 4 /*SAR*/ && opc == 0x72)
6912 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
6914 else if (subopc == 6 /*SHL*/ && opc == 0x71)
6915 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
6916 else if (subopc == 6 /*SHL*/ && opc == 0x72)
6917 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
6918 else if (subopc == 6 /*SHL*/ && opc == 0x73)
6919 SHIFT_BY_IMM("psllq", Iop_Shl64);
6921 else goto mmx_decode_failure;
6923 # undef SHIFT_BY_IMM
6928 IRTemp addr = newTemp(Ity_I64);
6929 IRTemp regD = newTemp(Ity_I64);
6930 IRTemp regM = newTemp(Ity_I64);
6931 IRTemp mask = newTemp(Ity_I64);
6932 IRTemp olddata = newTemp(Ity_I64);
6933 IRTemp newdata = newTemp(Ity_I64);
6935 modrm = getUChar(delta);
6936 if (sz != 4 || (!epartIsReg(modrm)))
6937 goto mmx_decode_failure;
6940 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
6941 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
6942 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
6943 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6944 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6952 unop(Iop_Not64, mkexpr(mask)))) );
6953 storeLE( mkexpr(addr), mkexpr(newdata) );
6954 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
6955 nameMMXReg( gregLO3ofRM(modrm) ) );
6959 /* --- MMX decode failure --- */
6963 return delta; /* ignored */
6972 /*------------------------------------------------------------*/
6973 /*--- More misc arithmetic and other obscure insns. ---*/
6974 /*------------------------------------------------------------*/
6976 /* Generate base << amt with vacated places filled with stuff
6977 from xtra. amt guaranteed in 0 .. 63. */
6979 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
6983 else (base << amt) | (xtra >>u (64-amt))
6990 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
6991 binop(Iop_Shr64, mkexpr(xtra),
6992 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
6997 /* Generate base >>u amt with vacated places filled with stuff
6998 from xtra. amt guaranteed in 0 .. 63. */
7000 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7004 else (base >>u amt) | (xtra << (64-amt))
7011 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7012 binop(Iop_Shl64, mkexpr(xtra),
7013 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7018 /* Double length left and right shifts. Apparently only required in
7019 v-size (no b- variant). */
7021 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
7023 Long delta, UChar modrm,
7026 Bool amt_is_literal,
7027 HChar* shift_amt_txt,
7030 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7031 for printing it. And eip on entry points at the modrm byte. */
7035 IRType ty = szToITy(sz);
7036 IRTemp gsrc = newTemp(ty);
7037 IRTemp esrc = newTemp(ty);
7038 IRTemp addr = IRTemp_INVALID;
7039 IRTemp tmpSH = newTemp(Ity_I8);
7040 IRTemp tmpSS = newTemp(Ity_I8);
7041 IRTemp tmp64 = IRTemp_INVALID;
7042 IRTemp res64 = IRTemp_INVALID;
7043 IRTemp rss64 = IRTemp_INVALID;
7044 IRTemp resTy = IRTemp_INVALID;
7045 IRTemp rssTy = IRTemp_INVALID;
7046 Int mask = sz==8 ? 63 : 31;
7048 vassert(sz == 2 || sz == 4 || sz == 8);
7050 /* The E-part is the destination; this is shifted. The G-part
7051 supplies bits to be shifted into the E-part, but is not
7054 If shifting left, form a double-length word with E at the top
7055 and G at the bottom, and shift this left. The result is then in
7058 If shifting right, form a double-length word with G at the top
7059 and E at the bottom, and shift this right. The result is then
7062 /* Fetch the operands. */
7064 assign( gsrc, getIRegG(sz, pfx, modrm) );
7066 if (epartIsReg(modrm)) {
7068 assign( esrc, getIRegE(sz, pfx, modrm) );
7069 DIP("sh%cd%c %s, %s, %s\n",
7070 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7072 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7074 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
7075 /* # bytes following amode */
7076 amt_is_literal ? 1 : 0 );
7078 assign( esrc, loadLE(ty, mkexpr(addr)) );
7079 DIP("sh%cd%c %s, %s, %s\n",
7080 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7082 nameIRegG(sz, pfx, modrm), dis_buf);
7085 /* Calculate the masked shift amount (tmpSH), the masked subshift
7086 amount (tmpSS), the shifted value (res64) and the subshifted
7089 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7090 assign( tmpSS, binop(Iop_And8,
7091 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7094 tmp64 = newTemp(Ity_I64);
7095 res64 = newTemp(Ity_I64);
7096 rss64 = newTemp(Ity_I64);
7098 if (sz == 2 || sz == 4) {
7100 /* G is xtra; E is data */
7101 /* what a freaking nightmare: */
7102 if (sz == 4 && left_shift) {
7103 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7106 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7110 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7114 if (sz == 4 && !left_shift) {
7115 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7116 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7117 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7120 if (sz == 2 && left_shift) {
7123 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7124 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7126 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
7129 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7131 /* subshift formed by shifting [esrc'0000'0000'0000] */
7135 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
7141 if (sz == 2 && !left_shift) {
7144 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
7145 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
7147 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
7148 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7149 /* subshift formed by shifting [0000'0000'0000'esrc] */
7150 assign( rss64, binop(Iop_Shr64,
7151 unop(Iop_16Uto64, mkexpr(esrc)),
7159 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
7160 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
7162 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
7163 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
7168 resTy = newTemp(ty);
7169 rssTy = newTemp(ty);
7170 assign( resTy, narrowTo(ty, mkexpr(res64)) );
7171 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
7173 /* Put result back and write the flags thunk. */
7174 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
7175 resTy, rssTy, ty, tmpSH );
7177 if (epartIsReg(modrm)) {
7178 putIRegE(sz, pfx, modrm, mkexpr(resTy));
7180 storeLE( mkexpr(addr), mkexpr(resTy) );
7183 if (amt_is_literal) delta++;
7188 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
7191 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
7193 static HChar* nameBtOp ( BtOp op )
7196 case BtOpNone: return "";
7197 case BtOpSet: return "s";
7198 case BtOpReset: return "r";
7199 case BtOpComp: return "c";
7200 default: vpanic("nameBtOp(amd64)");
7206 ULong dis_bt_G_E ( VexAbiInfo* vbi,
7207 Prefix pfx, Int sz, Long delta, BtOp op )
7212 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
7213 t_addr1, t_rsp, t_mask, t_new;
7215 vassert(sz == 2 || sz == 4 || sz == 8);
7217 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
7218 = t_addr0 = t_addr1 = t_rsp
7219 = t_mask = t_new = IRTemp_INVALID;
7221 t_fetched = newTemp(Ity_I8);
7222 t_new = newTemp(Ity_I8);
7223 t_bitno0 = newTemp(Ity_I64);
7224 t_bitno1 = newTemp(Ity_I64);
7225 t_bitno2 = newTemp(Ity_I8);
7226 t_addr1 = newTemp(Ity_I64);
7227 modrm = getUChar(delta);
7229 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
7231 if (epartIsReg(modrm)) {
7233 /* Get it onto the client's stack. */
7234 t_rsp = newTemp(Ity_I64);
7235 t_addr0 = newTemp(Ity_I64);
7237 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)) );
7238 putIReg64(R_RSP, mkexpr(t_rsp));
7240 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
7242 /* Make t_addr0 point at it. */
7243 assign( t_addr0, mkexpr(t_rsp) );
7245 /* Mask out upper bits of the shift amount, since we're doing a
7247 assign( t_bitno1, binop(Iop_And64,
7249 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
7252 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
7254 assign( t_bitno1, mkexpr(t_bitno0) );
7257 /* At this point: t_addr0 is the address being operated on. If it
7258 was a reg, we will have pushed it onto the client's stack.
7259 t_bitno1 is the bit number, suitably masked in the case of a
7262 /* Now the main sequence. */
7266 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
7268 /* t_addr1 now holds effective address */
7272 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
7274 /* t_bitno2 contains offset of bit within byte */
7276 if (op != BtOpNone) {
7277 t_mask = newTemp(Ity_I8);
7278 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
7281 /* t_mask is now a suitable byte mask */
7283 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
7285 if (op != BtOpNone) {
7289 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
7293 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
7297 binop(Iop_And8, mkexpr(t_fetched),
7298 unop(Iop_Not8, mkexpr(t_mask))) );
7301 vpanic("dis_bt_G_E(amd64)");
7303 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
7304 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
7305 mkexpr(t_new)/*new*/,
7306 guest_RIP_curr_instr );
7308 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
7312 /* Side effect done; now get selected bit into Carry flag */
7313 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
7314 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7315 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7320 unop(Iop_8Uto64, mkexpr(t_fetched)),
7324 /* Set NDEP even though it isn't used. This makes redundant-PUT
7325 elimination of previous stores to this field work better. */
7326 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7328 /* Move reg operand from stack back to reg */
7329 if (epartIsReg(modrm)) {
7330 /* t_rsp still points at it. */
7331 /* only write the reg if actually modifying it; doing otherwise
7332 zeroes the top half erroneously when doing btl due to
7333 standard zero-extend rule */
7335 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
7336 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(sz)) );
7339 DIP("bt%s%c %s, %s\n",
7340 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
7341 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
7348 /* Handle BSF/BSR. Only v-size seems necessary. */
7350 ULong dis_bs_E_G ( VexAbiInfo* vbi,
7351 Prefix pfx, Int sz, Long delta, Bool fwds )
7357 IRType ty = szToITy(sz);
7358 IRTemp src = newTemp(ty);
7359 IRTemp dst = newTemp(ty);
7360 IRTemp src64 = newTemp(Ity_I64);
7361 IRTemp dst64 = newTemp(Ity_I64);
7362 IRTemp src8 = newTemp(Ity_I8);
7364 vassert(sz == 8 || sz == 4 || sz == 2);
7366 modrm = getUChar(delta);
7367 isReg = epartIsReg(modrm);
7370 assign( src, getIRegE(sz, pfx, modrm) );
7373 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7375 assign( src, loadLE(ty, mkexpr(addr)) );
7378 DIP("bs%c%c %s, %s\n",
7379 fwds ? 'f' : 'r', nameISize(sz),
7380 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
7381 nameIRegG(sz, pfx, modrm));
7383 /* First, widen src to 64 bits if it is not already. */
7384 assign( src64, widenUto64(mkexpr(src)) );
7386 /* Generate an 8-bit expression which is zero iff the
7387 original is zero, and nonzero otherwise */
7391 mkexpr(src64), mkU64(0))) );
7393 /* Flags: Z is 1 iff source value is zero. All others
7394 are undefined -- we force them to zero. */
7395 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7396 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7399 IRExpr_Mux0X( mkexpr(src8),
7401 mkU64(AMD64G_CC_MASK_Z),
7406 /* Set NDEP even though it isn't used. This makes redundant-PUT
7407 elimination of previous stores to this field work better. */
7408 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7410 /* Result: iff source value is zero, we can't use
7411 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
7412 But anyway, amd64 semantics say the result is undefined in
7413 such situations. Hence handle the zero case specially. */
7415 /* Bleh. What we compute:
7417 bsf64: if src == 0 then {dst is unchanged}
7420 bsr64: if src == 0 then {dst is unchanged}
7421 else 63 - Clz64(src)
7423 bsf32: if src == 0 then {dst is unchanged}
7424 else Ctz64(32Uto64(src))
7426 bsr32: if src == 0 then {dst is unchanged}
7427 else 63 - Clz64(32Uto64(src))
7429 bsf16: if src == 0 then {dst is unchanged}
7430 else Ctz64(32Uto64(16Uto32(src)))
7432 bsr16: if src == 0 then {dst is unchanged}
7433 else 63 - Clz64(32Uto64(16Uto32(src)))
7436 /* The main computation, guarding against zero. */
7440 /* src == 0 -- leave dst unchanged */
7441 widenUto64( getIRegG( sz, pfx, modrm ) ),
7443 fwds ? unop(Iop_Ctz64, mkexpr(src64))
7446 unop(Iop_Clz64, mkexpr(src64)))
7451 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
7454 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
7456 assign( dst, mkexpr(dst64) );
7458 /* dump result back */
7459 putIRegG( sz, pfx, modrm, mkexpr(dst) );
7465 /* swap rAX with the reg specified by reg and REX.B */
7467 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
7469 IRType ty = szToITy(sz);
7470 IRTemp t1 = newTemp(ty);
7471 IRTemp t2 = newTemp(ty);
7472 vassert(sz == 4 || sz == 8);
7473 vassert(regLo3 < 8);
7475 assign( t1, getIReg64(R_RAX) );
7476 assign( t2, getIRegRexB(8, pfx, regLo3) );
7477 putIReg64( R_RAX, mkexpr(t2) );
7478 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
7480 assign( t1, getIReg32(R_RAX) );
7481 assign( t2, getIRegRexB(4, pfx, regLo3) );
7482 putIReg32( R_RAX, mkexpr(t2) );
7483 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
7485 DIP("xchg%c %s, %s\n",
7486 nameISize(sz), nameIRegRAX(sz),
7487 nameIRegRexB(sz,pfx, regLo3));
7492 void codegen_SAHF ( void )
7494 /* Set the flags to:
7495 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
7496 -- retain the old O flag
7497 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7498 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
7500 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7501 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7502 IRTemp oldflags = newTemp(Ity_I64);
7503 assign( oldflags, mk_amd64g_calculate_rflags_all() );
7504 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7505 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7506 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7507 stmt( IRStmt_Put( OFFB_CC_DEP1,
7509 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
7511 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
7519 void codegen_LAHF ( void )
7521 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
7522 IRExpr* rax_with_hole;
7525 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7526 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7528 IRTemp flags = newTemp(Ity_I64);
7529 assign( flags, mk_amd64g_calculate_rflags_all() );
7532 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
7534 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
7537 = binop(Iop_Or64, rax_with_hole,
7538 binop(Iop_Shl64, new_byte, mkU8(8)));
7539 putIReg64(R_RAX, new_rax);
7544 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
7553 IRType ty = szToITy(size);
7554 IRTemp acc = newTemp(ty);
7555 IRTemp src = newTemp(ty);
7556 IRTemp dest = newTemp(ty);
7557 IRTemp dest2 = newTemp(ty);
7558 IRTemp acc2 = newTemp(ty);
7559 IRTemp cond8 = newTemp(Ity_I8);
7560 IRTemp addr = IRTemp_INVALID;
7561 UChar rm = getUChar(delta0);
7563 /* There are 3 cases to consider:
7565 reg-reg: ignore any lock prefix, generate sequence based
7568 reg-mem, not locked: ignore any lock prefix, generate sequence
7571 reg-mem, locked: use IRCAS
7574 if (epartIsReg(rm)) {
7578 /* awaiting test case */
7579 assign( dest, getIRegE(size, pfx, rm) );
7581 assign( src, getIRegG(size, pfx, rm) );
7582 assign( acc, getIRegRAX(size) );
7583 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7584 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7585 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
7586 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7587 putIRegRAX(size, mkexpr(acc2));
7588 putIRegE(size, pfx, rm, mkexpr(dest2));
7589 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7590 nameIRegG(size,pfx,rm),
7591 nameIRegE(size,pfx,rm) );
7593 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
7595 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7596 assign( dest, loadLE(ty, mkexpr(addr)) );
7598 assign( src, getIRegG(size, pfx, rm) );
7599 assign( acc, getIRegRAX(size) );
7600 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7601 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7602 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
7603 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7604 putIRegRAX(size, mkexpr(acc2));
7605 storeLE( mkexpr(addr), mkexpr(dest2) );
7606 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7607 nameIRegG(size,pfx,rm), dis_buf);
7609 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
7611 /* src is new value. acc is expected value. dest is old value.
7612 Compute success from the output of the IRCAS, and steer the
7613 new value for RAX accordingly: in case of success, RAX is
7615 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7617 assign( src, getIRegG(size, pfx, rm) );
7618 assign( acc, getIRegRAX(size) );
7620 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
7621 NULL, mkexpr(acc), NULL, mkexpr(src) )
7623 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7624 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7625 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7626 putIRegRAX(size, mkexpr(acc2));
7627 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7628 nameIRegG(size,pfx,rm), dis_buf);
7637 /* Handle conditional move instructions of the form
7638 cmovcc E(reg-or-mem), G(reg)
7640 E(src) is reg-or-mem
7643 If E is reg, --> GET %E, tmps
7648 If E is mem --> (getAddr E) -> tmpa
7655 ULong dis_cmov_E_G ( VexAbiInfo* vbi,
7661 UChar rm = getUChar(delta0);
7665 IRType ty = szToITy(sz);
7666 IRTemp tmps = newTemp(ty);
7667 IRTemp tmpd = newTemp(ty);
7669 if (epartIsReg(rm)) {
7670 assign( tmps, getIRegE(sz, pfx, rm) );
7671 assign( tmpd, getIRegG(sz, pfx, rm) );
7673 putIRegG( sz, pfx, rm,
7674 IRExpr_Mux0X( unop(Iop_1Uto8,
7675 mk_amd64g_calculate_condition(cond)),
7679 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
7680 nameIRegE(sz,pfx,rm),
7681 nameIRegG(sz,pfx,rm));
7685 /* E refers to memory */
7687 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7688 assign( tmps, loadLE(ty, mkexpr(addr)) );
7689 assign( tmpd, getIRegG(sz, pfx, rm) );
7691 putIRegG( sz, pfx, rm,
7692 IRExpr_Mux0X( unop(Iop_1Uto8,
7693 mk_amd64g_calculate_condition(cond)),
7698 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
7700 nameIRegG(sz,pfx,rm));
7707 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
7709 Prefix pfx, Int sz, Long delta0 )
7712 UChar rm = getUChar(delta0);
7715 IRType ty = szToITy(sz);
7716 IRTemp tmpd = newTemp(ty);
7717 IRTemp tmpt0 = newTemp(ty);
7718 IRTemp tmpt1 = newTemp(ty);
7720 /* There are 3 cases to consider:
7722 reg-reg: currently unhandled
7724 reg-mem, not locked: ignore any lock prefix, generate 'naive'
7725 (non-atomic) sequence
7727 reg-mem, locked: use IRCAS
7730 if (epartIsReg(rm)) {
7734 /* Currently we don't handle xadd_G_E with register operand. */
7736 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
7738 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7739 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7740 assign( tmpt0, getIRegG(sz, pfx, rm) );
7741 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7742 mkexpr(tmpd), mkexpr(tmpt0)) );
7743 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7744 storeLE( mkexpr(addr), mkexpr(tmpt1) );
7745 putIRegG(sz, pfx, rm, mkexpr(tmpd));
7746 DIP("xadd%c %s, %s\n",
7747 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
7751 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
7753 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7754 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7755 assign( tmpt0, getIRegG(sz, pfx, rm) );
7756 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7757 mkexpr(tmpd), mkexpr(tmpt0)) );
7758 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
7759 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
7760 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7761 putIRegG(sz, pfx, rm, mkexpr(tmpd));
7762 DIP("xadd%c %s, %s\n",
7763 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
7771 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
7774 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
7778 //.. UChar rm = getUChar(delta0);
7779 //.. HChar dis_buf[50];
7781 //.. if (epartIsReg(rm)) {
7782 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
7783 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
7784 //.. return 1+delta0;
7786 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
7787 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
7788 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
7789 //.. return len+delta0;
7793 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
7794 //.. dst is ireg and sz==4, zero out top half of it. */
7797 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
7803 //.. UChar rm = getUChar(delta0);
7804 //.. HChar dis_buf[50];
7806 //.. vassert(sz == 2 || sz == 4);
7808 //.. if (epartIsReg(rm)) {
7810 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
7812 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
7814 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
7815 //.. return 1+delta0;
7817 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
7818 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
7819 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
7820 //.. return len+delta0;
7826 //.. void dis_push_segreg ( UInt sreg, Int sz )
7828 //.. IRTemp t1 = newTemp(Ity_I16);
7829 //.. IRTemp ta = newTemp(Ity_I32);
7830 //.. vassert(sz == 2 || sz == 4);
7832 //.. assign( t1, getSReg(sreg) );
7833 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
7834 //.. putIReg(4, R_ESP, mkexpr(ta));
7835 //.. storeLE( mkexpr(ta), mkexpr(t1) );
7837 //.. DIP("pushw %s\n", nameSReg(sreg));
7841 //.. void dis_pop_segreg ( UInt sreg, Int sz )
7843 //.. IRTemp t1 = newTemp(Ity_I16);
7844 //.. IRTemp ta = newTemp(Ity_I32);
7845 //.. vassert(sz == 2 || sz == 4);
7847 //.. assign( ta, getIReg(4, R_ESP) );
7848 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
7850 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
7851 //.. putSReg( sreg, mkexpr(t1) );
7852 //.. DIP("pop %s\n", nameSReg(sreg));
7856 void dis_ret ( VexAbiInfo* vbi, ULong d64 )
7858 IRTemp t1 = newTemp(Ity_I64);
7859 IRTemp t2 = newTemp(Ity_I64);
7860 IRTemp t3 = newTemp(Ity_I64);
7861 assign(t1, getIReg64(R_RSP));
7862 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
7863 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
7864 putIReg64(R_RSP, mkexpr(t3));
7865 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
7866 jmp_treg(Ijk_Ret,t2);
7870 /*------------------------------------------------------------*/
7871 /*--- SSE/SSE2/SSE3 helpers ---*/
7872 /*------------------------------------------------------------*/
7874 /* Worker function; do not call directly.
7875 Handles full width G = G `op` E and G = (not G) `op` E.
7878 static ULong dis_SSE_E_to_G_all_wrk (
7880 Prefix pfx, Long delta,
7881 HChar* opname, IROp op,
7888 UChar rm = getUChar(delta);
7890 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
7891 : getXMMReg(gregOfRexRM(pfx,rm));
7892 if (epartIsReg(rm)) {
7893 putXMMReg( gregOfRexRM(pfx,rm),
7895 getXMMReg(eregOfRexRM(pfx,rm))) );
7896 DIP("%s %s,%s\n", opname,
7897 nameXMMReg(eregOfRexRM(pfx,rm)),
7898 nameXMMReg(gregOfRexRM(pfx,rm)) );
7901 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7902 putXMMReg( gregOfRexRM(pfx,rm),
7904 loadLE(Ity_V128, mkexpr(addr))) );
7905 DIP("%s %s,%s\n", opname,
7907 nameXMMReg(gregOfRexRM(pfx,rm)) );
7913 /* All lanes SSE binary operation, G = G `op` E. */
7916 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi,
7917 Prefix pfx, Long delta,
7918 HChar* opname, IROp op )
7920 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
7923 /* All lanes SSE binary operation, G = (not G) `op` E. */
7926 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi,
7927 Prefix pfx, Long delta,
7928 HChar* opname, IROp op )
7930 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
7934 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7936 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi,
7937 Prefix pfx, Long delta,
7938 HChar* opname, IROp op )
7943 UChar rm = getUChar(delta);
7944 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
7945 if (epartIsReg(rm)) {
7946 putXMMReg( gregOfRexRM(pfx,rm),
7948 getXMMReg(eregOfRexRM(pfx,rm))) );
7949 DIP("%s %s,%s\n", opname,
7950 nameXMMReg(eregOfRexRM(pfx,rm)),
7951 nameXMMReg(gregOfRexRM(pfx,rm)) );
7954 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7955 E operand needs to be made simply of zeroes. */
7956 IRTemp epart = newTemp(Ity_V128);
7957 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7958 assign( epart, unop( Iop_32UtoV128,
7959 loadLE(Ity_I32, mkexpr(addr))) );
7960 putXMMReg( gregOfRexRM(pfx,rm),
7961 binop(op, gpart, mkexpr(epart)) );
7962 DIP("%s %s,%s\n", opname,
7964 nameXMMReg(gregOfRexRM(pfx,rm)) );
7970 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7972 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi,
7973 Prefix pfx, Long delta,
7974 HChar* opname, IROp op )
7979 UChar rm = getUChar(delta);
7980 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
7981 if (epartIsReg(rm)) {
7982 putXMMReg( gregOfRexRM(pfx,rm),
7984 getXMMReg(eregOfRexRM(pfx,rm))) );
7985 DIP("%s %s,%s\n", opname,
7986 nameXMMReg(eregOfRexRM(pfx,rm)),
7987 nameXMMReg(gregOfRexRM(pfx,rm)) );
7990 /* We can only do a 64-bit memory read, so the upper half of the
7991 E operand needs to be made simply of zeroes. */
7992 IRTemp epart = newTemp(Ity_V128);
7993 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7994 assign( epart, unop( Iop_64UtoV128,
7995 loadLE(Ity_I64, mkexpr(addr))) );
7996 putXMMReg( gregOfRexRM(pfx,rm),
7997 binop(op, gpart, mkexpr(epart)) );
7998 DIP("%s %s,%s\n", opname,
8000 nameXMMReg(gregOfRexRM(pfx,rm)) );
8006 /* All lanes unary SSE operation, G = op(E). */
8008 static ULong dis_SSE_E_to_G_unary_all (
8010 Prefix pfx, Long delta,
8011 HChar* opname, IROp op
8017 UChar rm = getUChar(delta);
8018 if (epartIsReg(rm)) {
8019 putXMMReg( gregOfRexRM(pfx,rm),
8020 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
8021 DIP("%s %s,%s\n", opname,
8022 nameXMMReg(eregOfRexRM(pfx,rm)),
8023 nameXMMReg(gregOfRexRM(pfx,rm)) );
8026 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8027 putXMMReg( gregOfRexRM(pfx,rm),
8028 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
8029 DIP("%s %s,%s\n", opname,
8031 nameXMMReg(gregOfRexRM(pfx,rm)) );
8037 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
8039 static ULong dis_SSE_E_to_G_unary_lo32 (
8041 Prefix pfx, Long delta,
8042 HChar* opname, IROp op
8045 /* First we need to get the old G value and patch the low 32 bits
8046 of the E operand into it. Then apply op and write back to G. */
8050 UChar rm = getUChar(delta);
8051 IRTemp oldG0 = newTemp(Ity_V128);
8052 IRTemp oldG1 = newTemp(Ity_V128);
8054 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8056 if (epartIsReg(rm)) {
8058 binop( Iop_SetV128lo32,
8060 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
8061 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8062 DIP("%s %s,%s\n", opname,
8063 nameXMMReg(eregOfRexRM(pfx,rm)),
8064 nameXMMReg(gregOfRexRM(pfx,rm)) );
8067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8069 binop( Iop_SetV128lo32,
8071 loadLE(Ity_I32, mkexpr(addr)) ));
8072 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8073 DIP("%s %s,%s\n", opname,
8075 nameXMMReg(gregOfRexRM(pfx,rm)) );
8081 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
8083 static ULong dis_SSE_E_to_G_unary_lo64 (
8085 Prefix pfx, Long delta,
8086 HChar* opname, IROp op
8089 /* First we need to get the old G value and patch the low 64 bits
8090 of the E operand into it. Then apply op and write back to G. */
8094 UChar rm = getUChar(delta);
8095 IRTemp oldG0 = newTemp(Ity_V128);
8096 IRTemp oldG1 = newTemp(Ity_V128);
8098 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8100 if (epartIsReg(rm)) {
8102 binop( Iop_SetV128lo64,
8104 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
8105 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8106 DIP("%s %s,%s\n", opname,
8107 nameXMMReg(eregOfRexRM(pfx,rm)),
8108 nameXMMReg(gregOfRexRM(pfx,rm)) );
8111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8113 binop( Iop_SetV128lo64,
8115 loadLE(Ity_I64, mkexpr(addr)) ));
8116 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8117 DIP("%s %s,%s\n", opname,
8119 nameXMMReg(gregOfRexRM(pfx,rm)) );
8125 /* SSE integer binary operation:
8126 G = G `op` E (eLeft == False)
8127 G = E `op` G (eLeft == True)
8129 static ULong dis_SSEint_E_to_G(
8131 Prefix pfx, Long delta,
8132 HChar* opname, IROp op,
8139 UChar rm = getUChar(delta);
8140 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8141 IRExpr* epart = NULL;
8142 if (epartIsReg(rm)) {
8143 epart = getXMMReg(eregOfRexRM(pfx,rm));
8144 DIP("%s %s,%s\n", opname,
8145 nameXMMReg(eregOfRexRM(pfx,rm)),
8146 nameXMMReg(gregOfRexRM(pfx,rm)) );
8149 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8150 epart = loadLE(Ity_V128, mkexpr(addr));
8151 DIP("%s %s,%s\n", opname,
8153 nameXMMReg(gregOfRexRM(pfx,rm)) );
8156 putXMMReg( gregOfRexRM(pfx,rm),
8157 eLeft ? binop(op, epart, gpart)
8158 : binop(op, gpart, epart) );
8163 /* Helper for doing SSE FP comparisons. */
8165 static void findSSECmpOp ( Bool* needNot, IROp* op,
8166 Int imm8, Bool all_lanes, Int sz )
8176 if (sz == 4 && all_lanes) {
8178 case 0: *op = Iop_CmpEQ32Fx4; return;
8179 case 1: *op = Iop_CmpLT32Fx4; return;
8180 case 2: *op = Iop_CmpLE32Fx4; return;
8181 case 3: *op = Iop_CmpUN32Fx4; return;
8185 if (sz == 4 && !all_lanes) {
8187 case 0: *op = Iop_CmpEQ32F0x4; return;
8188 case 1: *op = Iop_CmpLT32F0x4; return;
8189 case 2: *op = Iop_CmpLE32F0x4; return;
8190 case 3: *op = Iop_CmpUN32F0x4; return;
8194 if (sz == 8 && all_lanes) {
8196 case 0: *op = Iop_CmpEQ64Fx2; return;
8197 case 1: *op = Iop_CmpLT64Fx2; return;
8198 case 2: *op = Iop_CmpLE64Fx2; return;
8199 case 3: *op = Iop_CmpUN64Fx2; return;
8203 if (sz == 8 && !all_lanes) {
8205 case 0: *op = Iop_CmpEQ64F0x2; return;
8206 case 1: *op = Iop_CmpLT64F0x2; return;
8207 case 2: *op = Iop_CmpLE64F0x2; return;
8208 case 3: *op = Iop_CmpUN64F0x2; return;
8212 vpanic("findSSECmpOp(amd64,guest)");
8215 /* Handles SSE 32F/64F comparisons. */
8217 static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi,
8218 Prefix pfx, Long delta,
8219 HChar* opname, Bool all_lanes, Int sz )
8224 Bool needNot = False;
8225 IROp op = Iop_INVALID;
8226 IRTemp plain = newTemp(Ity_V128);
8227 UChar rm = getUChar(delta);
8229 vassert(sz == 4 || sz == 8);
8230 if (epartIsReg(rm)) {
8231 imm8 = getUChar(delta+1);
8232 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
8233 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
8234 getXMMReg(eregOfRexRM(pfx,rm))) );
8236 DIP("%s $%d,%s,%s\n", opname,
8238 nameXMMReg(eregOfRexRM(pfx,rm)),
8239 nameXMMReg(gregOfRexRM(pfx,rm)) );
8241 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
8242 imm8 = getUChar(delta+alen);
8243 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
8247 getXMMReg(gregOfRexRM(pfx,rm)),
8248 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
8249 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
8250 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
8254 DIP("%s $%d,%s,%s\n", opname,
8257 nameXMMReg(gregOfRexRM(pfx,rm)) );
8260 if (needNot && all_lanes) {
8261 putXMMReg( gregOfRexRM(pfx,rm),
8262 unop(Iop_NotV128, mkexpr(plain)) );
8265 if (needNot && !all_lanes) {
8266 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
8267 putXMMReg( gregOfRexRM(pfx,rm),
8268 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
8271 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
8278 /* Vector by scalar shift of G by the amount specified at the bottom
8281 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi,
8282 Prefix pfx, Long delta,
8283 HChar* opname, IROp op )
8289 UChar rm = getUChar(delta);
8290 IRTemp g0 = newTemp(Ity_V128);
8291 IRTemp g1 = newTemp(Ity_V128);
8292 IRTemp amt = newTemp(Ity_I32);
8293 IRTemp amt8 = newTemp(Ity_I8);
8294 if (epartIsReg(rm)) {
8295 assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) );
8296 DIP("%s %s,%s\n", opname,
8297 nameXMMReg(eregOfRexRM(pfx,rm)),
8298 nameXMMReg(gregOfRexRM(pfx,rm)) );
8301 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8302 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
8303 DIP("%s %s,%s\n", opname,
8305 nameXMMReg(gregOfRexRM(pfx,rm)) );
8308 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
8309 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
8311 shl = shr = sar = False;
8314 case Iop_ShlN16x8: shl = True; size = 32; break;
8315 case Iop_ShlN32x4: shl = True; size = 32; break;
8316 case Iop_ShlN64x2: shl = True; size = 64; break;
8317 case Iop_SarN16x8: sar = True; size = 16; break;
8318 case Iop_SarN32x4: sar = True; size = 32; break;
8319 case Iop_ShrN16x8: shr = True; size = 16; break;
8320 case Iop_ShrN32x4: shr = True; size = 32; break;
8321 case Iop_ShrN64x2: shr = True; size = 64; break;
8322 default: vassert(0);
8330 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
8332 binop(op, mkexpr(g0), mkexpr(amt8))
8341 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
8342 binop(op, mkexpr(g0), mkU8(size-1)),
8343 binop(op, mkexpr(g0), mkexpr(amt8))
8350 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
8355 /* Vector by scalar shift of E by an immediate byte. */
8358 ULong dis_SSE_shiftE_imm ( Prefix pfx,
8359 Long delta, HChar* opname, IROp op )
8362 UChar rm = getUChar(delta);
8363 IRTemp e0 = newTemp(Ity_V128);
8364 IRTemp e1 = newTemp(Ity_V128);
8366 vassert(epartIsReg(rm));
8367 vassert(gregLO3ofRM(rm) == 2
8368 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
8369 amt = getUChar(delta+1);
8371 DIP("%s $%d,%s\n", opname,
8373 nameXMMReg(eregOfRexRM(pfx,rm)) );
8374 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
8376 shl = shr = sar = False;
8379 case Iop_ShlN16x8: shl = True; size = 16; break;
8380 case Iop_ShlN32x4: shl = True; size = 32; break;
8381 case Iop_ShlN64x2: shl = True; size = 64; break;
8382 case Iop_SarN16x8: sar = True; size = 16; break;
8383 case Iop_SarN32x4: sar = True; size = 32; break;
8384 case Iop_ShrN16x8: shr = True; size = 16; break;
8385 case Iop_ShrN32x4: shr = True; size = 32; break;
8386 case Iop_ShrN64x2: shr = True; size = 64; break;
8387 default: vassert(0);
8391 assign( e1, amt >= size
8393 : binop(op, mkexpr(e0), mkU8(amt))
8397 assign( e1, amt >= size
8398 ? binop(op, mkexpr(e0), mkU8(size-1))
8399 : binop(op, mkexpr(e0), mkU8(amt))
8405 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
8410 /* Get the current SSE rounding mode. */
8412 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
8417 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
8421 static void put_sse_roundingmode ( IRExpr* sseround )
8423 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
8424 stmt( IRStmt_Put( OFFB_SSEROUND,
8425 unop(Iop_32Uto64,sseround) ) );
8428 /* Break a 128-bit value up into four 32-bit ints. */
8430 static void breakup128to32s ( IRTemp t128,
8432 IRTemp* t3, IRTemp* t2,
8433 IRTemp* t1, IRTemp* t0 )
8435 IRTemp hi64 = newTemp(Ity_I64);
8436 IRTemp lo64 = newTemp(Ity_I64);
8437 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
8438 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
8440 vassert(t0 && *t0 == IRTemp_INVALID);
8441 vassert(t1 && *t1 == IRTemp_INVALID);
8442 vassert(t2 && *t2 == IRTemp_INVALID);
8443 vassert(t3 && *t3 == IRTemp_INVALID);
8445 *t0 = newTemp(Ity_I32);
8446 *t1 = newTemp(Ity_I32);
8447 *t2 = newTemp(Ity_I32);
8448 *t3 = newTemp(Ity_I32);
8449 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
8450 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
8451 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
8452 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
8455 /* Construct a 128-bit value from four 32-bit ints. */
8457 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
8458 IRTemp t1, IRTemp t0 )
8461 binop( Iop_64HLtoV128,
8462 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
8463 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
8467 /* Break a 64-bit value up into four 16-bit ints. */
8469 static void breakup64to16s ( IRTemp t64,
8471 IRTemp* t3, IRTemp* t2,
8472 IRTemp* t1, IRTemp* t0 )
8474 IRTemp hi32 = newTemp(Ity_I32);
8475 IRTemp lo32 = newTemp(Ity_I32);
8476 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
8477 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
8479 vassert(t0 && *t0 == IRTemp_INVALID);
8480 vassert(t1 && *t1 == IRTemp_INVALID);
8481 vassert(t2 && *t2 == IRTemp_INVALID);
8482 vassert(t3 && *t3 == IRTemp_INVALID);
8484 *t0 = newTemp(Ity_I16);
8485 *t1 = newTemp(Ity_I16);
8486 *t2 = newTemp(Ity_I16);
8487 *t3 = newTemp(Ity_I16);
8488 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
8489 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
8490 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
8491 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
8494 /* Construct a 64-bit value from four 16-bit ints. */
8496 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
8497 IRTemp t1, IRTemp t0 )
8500 binop( Iop_32HLto64,
8501 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
8502 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
8507 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
8508 values (aa,bb), computes, for each of the 4 16-bit lanes:
8510 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
8512 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
8514 IRTemp aa = newTemp(Ity_I64);
8515 IRTemp bb = newTemp(Ity_I64);
8516 IRTemp aahi32s = newTemp(Ity_I64);
8517 IRTemp aalo32s = newTemp(Ity_I64);
8518 IRTemp bbhi32s = newTemp(Ity_I64);
8519 IRTemp bblo32s = newTemp(Ity_I64);
8520 IRTemp rHi = newTemp(Ity_I64);
8521 IRTemp rLo = newTemp(Ity_I64);
8522 IRTemp one32x2 = newTemp(Ity_I64);
8527 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
8531 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
8535 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
8539 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
8541 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
8550 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
8566 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
8575 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
8578 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
8579 values (aa,bb), computes, for each lane:
8581 if aa_lane < 0 then - bb_lane
8582 else if aa_lane > 0 then bb_lane
8585 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
8587 IRTemp aa = newTemp(Ity_I64);
8588 IRTemp bb = newTemp(Ity_I64);
8589 IRTemp zero = newTemp(Ity_I64);
8590 IRTemp bbNeg = newTemp(Ity_I64);
8591 IRTemp negMask = newTemp(Ity_I64);
8592 IRTemp posMask = newTemp(Ity_I64);
8593 IROp opSub = Iop_INVALID;
8594 IROp opCmpGTS = Iop_INVALID;
8597 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
8598 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
8599 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
8600 default: vassert(0);
8605 assign( zero, mkU64(0) );
8606 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
8607 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
8608 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
8612 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
8613 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
8617 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
8618 value aa, computes, for each lane
8620 if aa < 0 then -aa else aa
8622 Note that the result is interpreted as unsigned, so that the
8623 absolute value of the most negative signed input can be
8626 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
8628 IRTemp aa = newTemp(Ity_I64);
8629 IRTemp zero = newTemp(Ity_I64);
8630 IRTemp aaNeg = newTemp(Ity_I64);
8631 IRTemp negMask = newTemp(Ity_I64);
8632 IRTemp posMask = newTemp(Ity_I64);
8633 IROp opSub = Iop_INVALID;
8634 IROp opSarN = Iop_INVALID;
8637 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
8638 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
8639 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
8640 default: vassert(0);
8644 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
8645 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
8646 assign( zero, mkU64(0) );
8647 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
8650 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
8651 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
8654 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
8655 IRTemp lo64, Long byteShift )
8657 vassert(byteShift >= 1 && byteShift <= 7);
8660 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
8661 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
8665 /* Generate a SIGSEGV followed by a restart of the current instruction
8666 if effective_addr is not 16-aligned. This is required behaviour
8667 for some SSE3 instructions and all 128-bit SSSE3 instructions.
8668 This assumes that guest_RIP_curr_instr is set correctly! */
8669 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
8674 binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
8677 IRConst_U64(guest_RIP_curr_instr)
8683 /* Helper for deciding whether a given insn (starting at the opcode
8684 byte) may validly be used with a LOCK prefix. The following insns
8685 may be used with LOCK when their destination operand is in memory.
8686 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
8688 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
8689 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
8690 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
8691 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
8692 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
8693 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
8694 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
8708 CMPXCHG 0F B0, 0F B1
8713 ------------------------------
8715 80 /0 = addb $imm8, rm8
8716 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
8717 82 /0 = addb $imm8, rm8
8718 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
8721 01 = addl r32, rm32 and addw r16, rm16
8723 Same for ADD OR ADC SBB AND SUB XOR
8726 FF /1 = dec rm32 and dec rm16
8729 FF /0 = inc rm32 and inc rm16
8732 F7 /3 = neg rm32 and neg rm16
8735 F7 /2 = not rm32 and not rm16
8737 0F BB = btcw r16, rm16 and btcl r32, rm32
8738 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
8742 static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
8745 case 0x00: case 0x01: case 0x08: case 0x09:
8746 case 0x10: case 0x11: case 0x18: case 0x19:
8747 case 0x20: case 0x21: case 0x28: case 0x29:
8748 case 0x30: case 0x31:
8749 if (!epartIsReg(opc[1]))
8753 case 0x80: case 0x81: case 0x82: case 0x83:
8754 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
8755 && !epartIsReg(opc[1]))
8759 case 0xFE: case 0xFF:
8760 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
8761 && !epartIsReg(opc[1]))
8765 case 0xF6: case 0xF7:
8766 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
8767 && !epartIsReg(opc[1]))
8771 case 0x86: case 0x87:
8772 if (!epartIsReg(opc[1]))
8778 case 0xBB: case 0xB3: case 0xAB:
8779 if (!epartIsReg(opc[2]))
8783 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
8784 && !epartIsReg(opc[2]))
8787 case 0xB0: case 0xB1:
8788 if (!epartIsReg(opc[2]))
8792 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
8795 case 0xC0: case 0xC1:
8796 if (!epartIsReg(opc[2]))
8801 } /* switch (opc[1]) */
8807 } /* switch (opc[0]) */
8813 /*------------------------------------------------------------*/
8814 /*--- Disassemble a single instruction ---*/
8815 /*------------------------------------------------------------*/
8817 /* Disassemble a single instruction into IR. The instruction is
8818 located in host memory at &guest_code[delta]. */
8821 DisResult disInstr_AMD64_WRK (
8822 /*OUT*/Bool* expect_CAS,
8824 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
8826 void* callback_opaque,
8828 VexArchInfo* archinfo,
8833 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
8835 UChar opc, modrm, abyte, pre;
8838 Int am_sz, d_sz, n, n_prefixes;
8840 UChar* insn; /* used in SSE decoders */
8842 /* The running delta */
8843 Long delta = delta64;
8845 /* Holds eip at the start of the insn, so that we can print
8846 consistent error messages for unimplemented insns. */
8847 Long delta_start = delta;
8849 /* sz denotes the nominal data-op size of the insn; we change it to
8850 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
8851 conflict REX.W takes precedence. */
8854 /* pfx holds the summary of prefixes. */
8855 Prefix pfx = PFX_EMPTY;
8857 /* Set result defaults. */
8858 dres.whatNext = Dis_Continue;
8860 dres.continueAt = 0;
8862 *expect_CAS = False;
8864 vassert(guest_RIP_next_assumed == 0);
8865 vassert(guest_RIP_next_mustcheck == False);
8867 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
8869 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
8871 /* We may be asked to update the guest RIP before going further. */
8873 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) );
8875 /* Spot "Special" instructions (see comment at top of file). */
8877 UChar* code = (UChar*)(guest_code + delta);
8878 /* Spot the 16-byte preamble:
8879 48C1C703 rolq $3, %rdi
8880 48C1C70D rolq $13, %rdi
8881 48C1C73D rolq $61, %rdi
8882 48C1C733 rolq $51, %rdi
8884 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
8885 && code[ 3] == 0x03 &&
8886 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
8887 && code[ 7] == 0x0D &&
8888 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
8889 && code[11] == 0x3D &&
8890 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
8891 && code[15] == 0x33) {
8892 /* Got a "Special" instruction preamble. Which one is it? */
8893 if (code[16] == 0x48 && code[17] == 0x87
8894 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
8895 /* %RDX = client_request ( %RAX ) */
8896 DIP("%%rdx = client_request ( %%rax )\n");
8898 jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta);
8899 dres.whatNext = Dis_StopHere;
8900 goto decode_success;
8903 if (code[16] == 0x48 && code[17] == 0x87
8904 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
8905 /* %RAX = guest_NRADDR */
8906 DIP("%%rax = guest_NRADDR\n");
8908 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
8909 goto decode_success;
8912 if (code[16] == 0x48 && code[17] == 0x87
8913 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
8914 /* call-noredir *%RAX */
8915 DIP("call-noredir *%%rax\n");
8917 t1 = newTemp(Ity_I64);
8918 assign(t1, getIRegRAX(8));
8919 t2 = newTemp(Ity_I64);
8920 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
8921 putIReg64(R_RSP, mkexpr(t2));
8922 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
8923 jmp_treg(Ijk_NoRedir,t1);
8924 dres.whatNext = Dis_StopHere;
8925 goto decode_success;
8927 /* We don't know what it is. */
8928 goto decode_failure;
8933 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
8934 as many invalid combinations as possible. */
8937 if (n_prefixes > 7) goto decode_failure;
8938 pre = getUChar(delta);
8940 case 0x66: pfx |= PFX_66; break;
8941 case 0x67: pfx |= PFX_ASO; break;
8942 case 0xF2: pfx |= PFX_F2; break;
8943 case 0xF3: pfx |= PFX_F3; break;
8944 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
8945 case 0x2E: pfx |= PFX_CS; break;
8946 case 0x3E: pfx |= PFX_DS; break;
8947 case 0x26: pfx |= PFX_ES; break;
8948 case 0x64: pfx |= PFX_FS; break;
8949 case 0x65: pfx |= PFX_GS; break;
8950 case 0x36: pfx |= PFX_SS; break;
8953 if (pre & (1<<3)) pfx |= PFX_REXW;
8954 if (pre & (1<<2)) pfx |= PFX_REXR;
8955 if (pre & (1<<1)) pfx |= PFX_REXX;
8956 if (pre & (1<<0)) pfx |= PFX_REXB;
8967 /* Dump invalid combinations */
8969 if (pfx & PFX_F2) n++;
8970 if (pfx & PFX_F3) n++;
8972 goto decode_failure; /* can't have both */
8975 if (pfx & PFX_CS) n++;
8976 if (pfx & PFX_DS) n++;
8977 if (pfx & PFX_ES) n++;
8978 if (pfx & PFX_FS) n++;
8979 if (pfx & PFX_GS) n++;
8980 if (pfx & PFX_SS) n++;
8982 goto decode_failure; /* multiple seg overrides == illegal */
8984 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
8985 that we should accept it. */
8986 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero)
8987 goto decode_failure;
8989 /* Ditto for %gs prefixes. */
8990 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60)
8991 goto decode_failure;
8995 if (pfx & PFX_66) sz = 2;
8996 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
8998 /* Now we should be looking at the primary opcode byte or the
8999 leading F2 or F3. Check that any LOCK prefix is actually
9002 if (pfx & PFX_LOCK) {
9003 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
9006 *expect_CAS = False;
9007 goto decode_failure;
9012 /* ---------------------------------------------------- */
9013 /* --- The SSE/SSE2 decoder. --- */
9014 /* ---------------------------------------------------- */
9016 /* What did I do to deserve SSE ? Perhaps I was really bad in a
9019 /* Note, this doesn't handle SSE3 right now. All amd64s support
9020 SSE2 as a minimum so there is no point distinguishing SSE1 vs
9023 insn = (UChar*)&guest_code[delta];
9025 /* FXSAVE is spuriously at the start here only because it is
9026 thusly placed in guest-x86/toIR.c. */
9028 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
9029 Note that REX.W 0F AE /0 writes a slightly different format and
9030 we don't handle that here. */
9031 if (haveNo66noF2noF3(pfx) && sz == 4
9032 && insn[0] == 0x0F && insn[1] == 0xAE
9033 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) {
9035 modrm = getUChar(delta+2);
9037 vassert(!epartIsReg(modrm));
9038 /* REX.W must not be set. That should be assured us by sz == 4
9040 vassert(!(pfx & PFX_REXW));
9042 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9045 DIP("fxsave %s\n", dis_buf);
9047 /* Uses dirty helper:
9048 void amd64g_do_FXSAVE ( VexGuestAMD64State*, UInt ) */
9049 d = unsafeIRDirty_0_N (
9051 "amd64g_dirtyhelper_FXSAVE",
9052 &amd64g_dirtyhelper_FXSAVE,
9053 mkIRExprVec_1( mkexpr(addr) )
9057 /* declare we're writing memory */
9059 d->mAddr = mkexpr(addr);
9062 /* declare we're reading guest state */
9065 d->fxState[0].fx = Ifx_Read;
9066 d->fxState[0].offset = OFFB_FTOP;
9067 d->fxState[0].size = sizeof(UInt);
9069 d->fxState[1].fx = Ifx_Read;
9070 d->fxState[1].offset = OFFB_FPREGS;
9071 d->fxState[1].size = 8 * sizeof(ULong);
9073 d->fxState[2].fx = Ifx_Read;
9074 d->fxState[2].offset = OFFB_FPTAGS;
9075 d->fxState[2].size = 8 * sizeof(UChar);
9077 d->fxState[3].fx = Ifx_Read;
9078 d->fxState[3].offset = OFFB_FPROUND;
9079 d->fxState[3].size = sizeof(ULong);
9081 d->fxState[4].fx = Ifx_Read;
9082 d->fxState[4].offset = OFFB_FC3210;
9083 d->fxState[4].size = sizeof(ULong);
9085 d->fxState[5].fx = Ifx_Read;
9086 d->fxState[5].offset = OFFB_XMM0;
9087 d->fxState[5].size = 16 * sizeof(U128);
9089 d->fxState[6].fx = Ifx_Read;
9090 d->fxState[6].offset = OFFB_SSEROUND;
9091 d->fxState[6].size = sizeof(ULong);
9093 /* Be paranoid ... this assertion tries to ensure the 16 %xmm
9094 images are packed back-to-back. If not, the value of
9095 d->fxState[5].size is wrong. */
9096 vassert(16 == sizeof(U128));
9097 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16));
9099 stmt( IRStmt_Dirty(d) );
9101 goto decode_success;
9104 /* ------ SSE decoder main ------ */
9106 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
9107 if (haveNo66noF2noF3(pfx) && sz == 4
9108 && insn[0] == 0x0F && insn[1] == 0x58) {
9109 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 );
9110 goto decode_success;
9113 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
9114 if (haveF3no66noF2(pfx) && sz == 4
9115 && insn[0] == 0x0F && insn[1] == 0x58) {
9116 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 );
9117 goto decode_success;
9120 /* 0F 55 = ANDNPS -- G = (not G) and E */
9121 if (haveNo66noF2noF3(pfx) && sz == 4
9122 && insn[0] == 0x0F && insn[1] == 0x55) {
9123 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 );
9124 goto decode_success;
9127 /* 0F 54 = ANDPS -- G = G and E */
9128 if (haveNo66noF2noF3(pfx) && sz == 4
9129 && insn[0] == 0x0F && insn[1] == 0x54) {
9130 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 );
9131 goto decode_success;
9134 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
9135 if (haveNo66noF2noF3(pfx) && sz == 4
9136 && insn[0] == 0x0F && insn[1] == 0xC2) {
9137 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 );
9138 goto decode_success;
9141 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
9142 if (haveF3no66noF2(pfx) && sz == 4
9143 && insn[0] == 0x0F && insn[1] == 0xC2) {
9144 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 );
9145 goto decode_success;
9148 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
9149 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
9150 if (haveNo66noF2noF3(pfx) && sz == 4
9151 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9152 IRTemp argL = newTemp(Ity_F32);
9153 IRTemp argR = newTemp(Ity_F32);
9154 modrm = getUChar(delta+2);
9155 if (epartIsReg(modrm)) {
9156 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
9157 0/*lowest lane*/ ) );
9159 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
9160 nameXMMReg(eregOfRexRM(pfx,modrm)),
9161 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9163 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9164 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
9166 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
9168 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9170 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
9171 0/*lowest lane*/ ) );
9173 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9174 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9180 unop(Iop_F32toF64,mkexpr(argL)),
9181 unop(Iop_F32toF64,mkexpr(argR)))),
9185 goto decode_success;
9188 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
9190 if (haveNo66noF2noF3(pfx) && sz == 4
9191 && insn[0] == 0x0F && insn[1] == 0x2A) {
9192 IRTemp arg64 = newTemp(Ity_I64);
9193 IRTemp rmode = newTemp(Ity_I32);
9195 modrm = getUChar(delta+2);
9197 if (epartIsReg(modrm)) {
9198 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
9200 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
9201 nameXMMReg(gregOfRexRM(pfx,modrm)));
9203 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9204 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9206 DIP("cvtpi2ps %s,%s\n", dis_buf,
9207 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9210 assign( rmode, get_sse_roundingmode() );
9213 gregOfRexRM(pfx,modrm), 0,
9217 unop(Iop_64to32, mkexpr(arg64)) )) );
9220 gregOfRexRM(pfx,modrm), 1,
9224 unop(Iop_64HIto32, mkexpr(arg64)) )) );
9226 goto decode_success;
9229 /* F3 0F 2A = CVTSI2SS
9230 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
9231 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
9232 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)
9233 && insn[0] == 0x0F && insn[1] == 0x2A) {
9235 IRTemp rmode = newTemp(Ity_I32);
9236 assign( rmode, get_sse_roundingmode() );
9237 modrm = getUChar(delta+2);
9240 IRTemp arg32 = newTemp(Ity_I32);
9241 if (epartIsReg(modrm)) {
9242 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
9244 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
9245 nameXMMReg(gregOfRexRM(pfx,modrm)));
9247 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9248 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
9250 DIP("cvtsi2ss %s,%s\n", dis_buf,
9251 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9254 gregOfRexRM(pfx,modrm), 0,
9257 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
9260 IRTemp arg64 = newTemp(Ity_I64);
9261 if (epartIsReg(modrm)) {
9262 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
9264 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
9265 nameXMMReg(gregOfRexRM(pfx,modrm)));
9267 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9268 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9270 DIP("cvtsi2ssq %s,%s\n", dis_buf,
9271 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9274 gregOfRexRM(pfx,modrm), 0,
9277 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
9280 goto decode_success;
9283 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9284 I32 in mmx, according to prevailing SSE rounding mode */
9285 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9286 I32 in mmx, rounding towards zero */
9287 if (haveNo66noF2noF3(pfx) && sz == 4
9288 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9289 IRTemp dst64 = newTemp(Ity_I64);
9290 IRTemp rmode = newTemp(Ity_I32);
9291 IRTemp f32lo = newTemp(Ity_F32);
9292 IRTemp f32hi = newTemp(Ity_F32);
9293 Bool r2zero = toBool(insn[1] == 0x2C);
9296 modrm = getUChar(delta+2);
9298 if (epartIsReg(modrm)) {
9300 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
9301 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
9302 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9303 nameXMMReg(eregOfRexRM(pfx,modrm)),
9304 nameMMXReg(gregLO3ofRM(modrm)));
9306 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9307 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9308 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
9312 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9314 nameMMXReg(gregLO3ofRM(modrm)));
9318 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9320 assign( rmode, get_sse_roundingmode() );
9325 binop( Iop_32HLto64,
9326 binop( Iop_F64toI32S,
9328 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
9329 binop( Iop_F64toI32S,
9331 unop( Iop_F32toF64, mkexpr(f32lo) ) )
9335 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
9336 goto decode_success;
9339 /* F3 0F 2D = CVTSS2SI
9340 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
9341 according to prevailing SSE rounding mode
9342 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
9343 according to prevailing SSE rounding mode
9345 /* F3 0F 2C = CVTTSS2SI
9346 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
9347 truncating towards zero
9348 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
9349 truncating towards zero
9351 if (haveF3no66noF2(pfx)
9353 && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9354 IRTemp rmode = newTemp(Ity_I32);
9355 IRTemp f32lo = newTemp(Ity_F32);
9356 Bool r2zero = toBool(insn[1] == 0x2C);
9357 vassert(sz == 4 || sz == 8);
9359 modrm = getUChar(delta+2);
9360 if (epartIsReg(modrm)) {
9362 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
9363 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9364 nameXMMReg(eregOfRexRM(pfx,modrm)),
9365 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
9367 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9368 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9370 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9372 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
9376 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9378 assign( rmode, get_sse_roundingmode() );
9382 putIReg32( gregOfRexRM(pfx,modrm),
9383 binop( Iop_F64toI32S,
9385 unop(Iop_F32toF64, mkexpr(f32lo))) );
9387 putIReg64( gregOfRexRM(pfx,modrm),
9388 binop( Iop_F64toI64S,
9390 unop(Iop_F32toF64, mkexpr(f32lo))) );
9393 goto decode_success;
9396 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
9397 if (haveNo66noF2noF3(pfx) && sz == 4
9398 && insn[0] == 0x0F && insn[1] == 0x5E) {
9399 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 );
9400 goto decode_success;
9403 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
9404 if (haveF3no66noF2(pfx) && sz == 4
9405 && insn[0] == 0x0F && insn[1] == 0x5E) {
9406 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 );
9407 goto decode_success;
9410 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
9411 if (insn[0] == 0x0F && insn[1] == 0xAE
9412 && haveNo66noF2noF3(pfx)
9413 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) {
9415 IRTemp t64 = newTemp(Ity_I64);
9416 IRTemp ew = newTemp(Ity_I32);
9419 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9421 DIP("ldmxcsr %s\n", dis_buf);
9423 /* The only thing we observe in %mxcsr is the rounding mode.
9424 Therefore, pass the 32-bit value (SSE native-format control
9425 word) to a clean helper, getting back a 64-bit value, the
9426 lower half of which is the SSEROUND value to store, and the
9427 upper half of which is the emulation-warning token which may
9430 /* ULong amd64h_check_ldmxcsr ( ULong ); */
9431 assign( t64, mkIRExprCCall(
9432 Ity_I64, 0/*regparms*/,
9433 "amd64g_check_ldmxcsr",
9434 &amd64g_check_ldmxcsr,
9437 loadLE(Ity_I32, mkexpr(addr))
9443 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
9444 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
9445 put_emwarn( mkexpr(ew) );
9446 /* Finally, if an emulation warning was reported, side-exit to
9447 the next insn, reporting the warning, so that Valgrind's
9448 dispatcher sees the warning. */
9451 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
9453 IRConst_U64(guest_RIP_bbstart+delta)
9456 goto decode_success;
9459 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9460 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
9461 if (haveNo66noF2noF3(pfx) && sz == 4
9462 && insn[0] == 0x0F && insn[1] == 0xF7) {
9464 delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 );
9466 goto decode_failure;
9467 goto decode_success;
9470 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9471 if (haveNo66noF2noF3(pfx) && sz == 4
9472 && insn[0] == 0x0F && insn[1] == 0x5F) {
9473 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 );
9474 goto decode_success;
9477 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9478 if (haveF3no66noF2(pfx) && sz == 4
9479 && insn[0] == 0x0F && insn[1] == 0x5F) {
9480 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 );
9481 goto decode_success;
9484 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9485 if (haveNo66noF2noF3(pfx) && sz == 4
9486 && insn[0] == 0x0F && insn[1] == 0x5D) {
9487 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 );
9488 goto decode_success;
9491 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9492 if (haveF3no66noF2(pfx) && sz == 4
9493 && insn[0] == 0x0F && insn[1] == 0x5D) {
9494 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 );
9495 goto decode_success;
9498 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9499 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9500 if (haveNo66noF2noF3(pfx)
9501 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9502 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9503 modrm = getUChar(delta+2);
9504 if (epartIsReg(modrm)) {
9505 putXMMReg( gregOfRexRM(pfx,modrm),
9506 getXMMReg( eregOfRexRM(pfx,modrm) ));
9507 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9508 nameXMMReg(gregOfRexRM(pfx,modrm)));
9511 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9512 putXMMReg( gregOfRexRM(pfx,modrm),
9513 loadLE(Ity_V128, mkexpr(addr)) );
9514 DIP("mov[ua]ps %s,%s\n", dis_buf,
9515 nameXMMReg(gregOfRexRM(pfx,modrm)));
9518 goto decode_success;
9521 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9522 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9523 if (haveNo66noF2noF3(pfx)
9524 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9525 && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) {
9526 modrm = getUChar(delta+2);
9527 if (epartIsReg(modrm)) {
9528 /* fall through; awaiting test case */
9530 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9531 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
9532 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
9535 goto decode_success;
9539 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9540 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9541 if (haveNo66noF2noF3(pfx)
9542 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9543 && insn[0] == 0x0F && insn[1] == 0x16) {
9544 modrm = getUChar(delta+2);
9545 if (epartIsReg(modrm)) {
9547 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
9548 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
9549 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9550 nameXMMReg(gregOfRexRM(pfx,modrm)));
9552 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9554 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
9555 loadLE(Ity_I64, mkexpr(addr)) );
9556 DIP("movhps %s,%s\n", dis_buf,
9557 nameXMMReg( gregOfRexRM(pfx,modrm) ));
9559 goto decode_success;
9562 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9563 if (haveNo66noF2noF3(pfx)
9564 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9565 && insn[0] == 0x0F && insn[1] == 0x17) {
9566 if (!epartIsReg(insn[2])) {
9568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9570 storeLE( mkexpr(addr),
9571 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
9572 1/*upper lane*/ ) );
9573 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
9575 goto decode_success;
9577 /* else fall through */
9580 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9581 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9582 if (haveNo66noF2noF3(pfx)
9583 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9584 && insn[0] == 0x0F && insn[1] == 0x12) {
9585 modrm = getUChar(delta+2);
9586 if (epartIsReg(modrm)) {
9588 putXMMRegLane64( gregOfRexRM(pfx,modrm),
9590 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
9591 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9592 nameXMMReg(gregOfRexRM(pfx,modrm)));
9594 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9596 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
9597 loadLE(Ity_I64, mkexpr(addr)) );
9598 DIP("movlps %s, %s\n",
9599 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
9601 goto decode_success;
9604 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9605 if (haveNo66noF2noF3(pfx)
9606 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9607 && insn[0] == 0x0F && insn[1] == 0x13) {
9608 if (!epartIsReg(insn[2])) {
9610 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9612 storeLE( mkexpr(addr),
9613 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
9614 0/*lower lane*/ ) );
9615 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
9617 goto decode_success;
9619 /* else fall through */
9622 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9623 to 4 lowest bits of ireg(G) */
9624 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
9625 && insn[0] == 0x0F && insn[1] == 0x50) {
9626 /* sz == 8 is a kludge to handle insns with REX.W redundantly
9627 set to 1, which has been known to happen:
9629 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
9631 20071106: Intel docs say that REX.W isn't redundant: when
9632 present, a 64-bit register is written; when not present, only
9633 the 32-bit half is written. However, testing on a Core2
9634 machine suggests the entire 64 bit register is written
9635 irrespective of the status of REX.W. That could be because
9636 of the default rule that says "if the lower half of a 32-bit
9637 register is written, the upper half is zeroed". By using
9638 putIReg32 here we inadvertantly produce the same behaviour as
9639 the Core2, for the same reason -- putIReg32 implements said
9642 AMD docs give no indication that REX.W is even valid for this
9644 modrm = getUChar(delta+2);
9645 if (epartIsReg(modrm)) {
9647 t0 = newTemp(Ity_I32);
9648 t1 = newTemp(Ity_I32);
9649 t2 = newTemp(Ity_I32);
9650 t3 = newTemp(Ity_I32);
9652 src = eregOfRexRM(pfx,modrm);
9653 assign( t0, binop( Iop_And32,
9654 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9656 assign( t1, binop( Iop_And32,
9657 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9659 assign( t2, binop( Iop_And32,
9660 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9662 assign( t3, binop( Iop_And32,
9663 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9665 putIReg32( gregOfRexRM(pfx,modrm),
9667 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9668 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9671 DIP("movmskps %s,%s\n", nameXMMReg(src),
9672 nameIReg32(gregOfRexRM(pfx,modrm)));
9673 goto decode_success;
9675 /* else fall through */
9678 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9679 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9680 if ( ( (haveNo66noF2noF3(pfx) && sz == 4)
9681 || (have66noF2noF3(pfx) && sz == 2)
9683 && insn[0] == 0x0F && insn[1] == 0x2B) {
9684 modrm = getUChar(delta+2);
9685 if (!epartIsReg(modrm)) {
9686 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9687 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
9688 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9690 nameXMMReg(gregOfRexRM(pfx,modrm)));
9692 goto decode_success;
9694 /* else fall through */
9697 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9698 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
9699 Intel manual does not say anything about the usual business of
9700 the FP reg tags getting trashed whenever an MMX insn happens.
9701 So we just leave them alone.
9703 if (haveNo66noF2noF3(pfx) && sz == 4
9704 && insn[0] == 0x0F && insn[1] == 0xE7) {
9705 modrm = getUChar(delta+2);
9706 if (!epartIsReg(modrm)) {
9707 /* do_MMX_preamble(); Intel docs don't specify this */
9708 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9709 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
9710 DIP("movntq %s,%s\n", dis_buf,
9711 nameMMXReg(gregLO3ofRM(modrm)));
9713 goto decode_success;
9715 /* else fall through */
9718 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9719 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9720 if (haveF3no66noF2(pfx)
9721 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9722 && insn[0] == 0x0F && insn[1] == 0x10) {
9723 modrm = getUChar(delta+2);
9724 if (epartIsReg(modrm)) {
9725 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
9726 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
9727 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9728 nameXMMReg(gregOfRexRM(pfx,modrm)));
9731 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9732 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
9733 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
9734 loadLE(Ity_I32, mkexpr(addr)) );
9735 DIP("movss %s,%s\n", dis_buf,
9736 nameXMMReg(gregOfRexRM(pfx,modrm)));
9739 goto decode_success;
9742 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9744 if (haveF3no66noF2(pfx) && sz == 4
9745 && insn[0] == 0x0F && insn[1] == 0x11) {
9746 modrm = getUChar(delta+2);
9747 if (epartIsReg(modrm)) {
9748 /* fall through, we don't yet have a test case */
9750 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9751 storeLE( mkexpr(addr),
9752 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
9753 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
9756 goto decode_success;
9760 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9761 if (haveNo66noF2noF3(pfx) && sz == 4
9762 && insn[0] == 0x0F && insn[1] == 0x59) {
9763 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 );
9764 goto decode_success;
9767 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9768 if (haveF3no66noF2(pfx) && sz == 4
9769 && insn[0] == 0x0F && insn[1] == 0x59) {
9770 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 );
9771 goto decode_success;
9774 /* 0F 56 = ORPS -- G = G and E */
9775 if (haveNo66noF2noF3(pfx) && sz == 4
9776 && insn[0] == 0x0F && insn[1] == 0x56) {
9777 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 );
9778 goto decode_success;
9781 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9782 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
9783 if (haveNo66noF2noF3(pfx) && sz == 4
9784 && insn[0] == 0x0F && insn[1] == 0xE0) {
9786 delta = dis_MMXop_regmem_to_reg (
9787 vbi, pfx, delta+2, insn[1], "pavgb", False );
9788 goto decode_success;
9791 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9792 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
9793 if (haveNo66noF2noF3(pfx) && sz == 4
9794 && insn[0] == 0x0F && insn[1] == 0xE3) {
9796 delta = dis_MMXop_regmem_to_reg (
9797 vbi, pfx, delta+2, insn[1], "pavgw", False );
9798 goto decode_success;
9801 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9802 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
9803 zero-extend of it in ireg(G). */
9804 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
9805 && insn[0] == 0x0F && insn[1] == 0xC5) {
9807 if (epartIsReg(modrm)) {
9808 IRTemp sV = newTemp(Ity_I64);
9809 t5 = newTemp(Ity_I16);
9811 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
9812 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
9813 switch (insn[3] & 3) {
9814 case 0: assign(t5, mkexpr(t0)); break;
9815 case 1: assign(t5, mkexpr(t1)); break;
9816 case 2: assign(t5, mkexpr(t2)); break;
9817 case 3: assign(t5, mkexpr(t3)); break;
9818 default: vassert(0);
9821 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
9823 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
9824 DIP("pextrw $%d,%s,%s\n",
9825 (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)),
9826 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
9827 : nameIReg32(gregOfRexRM(pfx,modrm))
9830 goto decode_success;
9832 /* else fall through */
9833 /* note, for anyone filling in the mem case: this insn has one
9834 byte after the amode and therefore you must pass 1 as the
9835 last arg to disAMode */
9838 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9839 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
9840 put it into the specified lane of mmx(G). */
9841 if (haveNo66noF2noF3(pfx) && sz == 4
9842 && insn[0] == 0x0F && insn[1] == 0xC4) {
9843 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
9844 mmx reg. t4 is the new lane value. t5 is the original
9845 mmx value. t6 is the new mmx value. */
9847 t4 = newTemp(Ity_I16);
9848 t5 = newTemp(Ity_I64);
9849 t6 = newTemp(Ity_I64);
9853 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
9854 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
9856 if (epartIsReg(modrm)) {
9857 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
9860 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
9861 nameIReg16(eregOfRexRM(pfx,modrm)),
9862 nameMMXReg(gregLO3ofRM(modrm)));
9864 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
9866 lane = insn[3+alen-1];
9867 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
9868 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
9870 nameMMXReg(gregLO3ofRM(modrm)));
9874 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
9875 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
9876 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
9877 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
9878 default: vassert(0);
9880 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
9881 goto decode_success;
9884 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9885 /* 0F EE = PMAXSW -- 16x4 signed max */
9886 if (haveNo66noF2noF3(pfx) && sz == 4
9887 && insn[0] == 0x0F && insn[1] == 0xEE) {
9889 delta = dis_MMXop_regmem_to_reg (
9890 vbi, pfx, delta+2, insn[1], "pmaxsw", False );
9891 goto decode_success;
9894 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9895 /* 0F DE = PMAXUB -- 8x8 unsigned max */
9896 if (haveNo66noF2noF3(pfx) && sz == 4
9897 && insn[0] == 0x0F && insn[1] == 0xDE) {
9899 delta = dis_MMXop_regmem_to_reg (
9900 vbi, pfx, delta+2, insn[1], "pmaxub", False );
9901 goto decode_success;
9904 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9905 /* 0F EA = PMINSW -- 16x4 signed min */
9906 if (haveNo66noF2noF3(pfx) && sz == 4
9907 && insn[0] == 0x0F && insn[1] == 0xEA) {
9909 delta = dis_MMXop_regmem_to_reg (
9910 vbi, pfx, delta+2, insn[1], "pminsw", False );
9911 goto decode_success;
9914 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9915 /* 0F DA = PMINUB -- 8x8 unsigned min */
9916 if (haveNo66noF2noF3(pfx) && sz == 4
9917 && insn[0] == 0x0F && insn[1] == 0xDA) {
9919 delta = dis_MMXop_regmem_to_reg (
9920 vbi, pfx, delta+2, insn[1], "pminub", False );
9921 goto decode_success;
9924 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9925 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9926 mmx(G), turn them into a byte, and put zero-extend of it in
9928 if (haveNo66noF2noF3(pfx) && sz == 4
9929 && insn[0] == 0x0F && insn[1] == 0xD7) {
9931 if (epartIsReg(modrm)) {
9933 t0 = newTemp(Ity_I64);
9934 t1 = newTemp(Ity_I64);
9935 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
9936 assign(t1, mkIRExprCCall(
9937 Ity_I64, 0/*regparms*/,
9938 "amd64g_calculate_mmx_pmovmskb",
9939 &amd64g_calculate_mmx_pmovmskb,
9940 mkIRExprVec_1(mkexpr(t0))));
9941 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
9942 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
9943 nameIReg32(gregOfRexRM(pfx,modrm)));
9945 goto decode_success;
9947 /* else fall through */
9950 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9951 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9952 if (haveNo66noF2noF3(pfx) && sz == 4
9953 && insn[0] == 0x0F && insn[1] == 0xE4) {
9955 delta = dis_MMXop_regmem_to_reg (
9956 vbi, pfx, delta+2, insn[1], "pmuluh", False );
9957 goto decode_success;
9960 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9961 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9962 /* 0F 18 /2 = PREFETCH1 */
9963 /* 0F 18 /3 = PREFETCH2 */
9964 if (insn[0] == 0x0F && insn[1] == 0x18
9965 && haveNo66noF2noF3(pfx)
9966 && !epartIsReg(insn[2])
9967 && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) {
9968 HChar* hintstr = "??";
9970 modrm = getUChar(delta+2);
9971 vassert(!epartIsReg(modrm));
9973 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9976 switch (gregLO3ofRM(modrm)) {
9977 case 0: hintstr = "nta"; break;
9978 case 1: hintstr = "t0"; break;
9979 case 2: hintstr = "t1"; break;
9980 case 3: hintstr = "t2"; break;
9981 default: vassert(0);
9984 DIP("prefetch%s %s\n", hintstr, dis_buf);
9985 goto decode_success;
9988 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9989 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9990 if (haveNo66noF2noF3(pfx) && sz == 4
9991 && insn[0] == 0x0F && insn[1] == 0xF6) {
9993 delta = dis_MMXop_regmem_to_reg (
9994 vbi, pfx, delta+2, insn[1], "psadbw", False );
9995 goto decode_success;
9998 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9999 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
10000 if (haveNo66noF2noF3(pfx) && sz == 4
10001 && insn[0] == 0x0F && insn[1] == 0x70) {
10003 IRTemp sV, dV, s3, s2, s1, s0;
10004 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10005 sV = newTemp(Ity_I64);
10006 dV = newTemp(Ity_I64);
10009 if (epartIsReg(modrm)) {
10010 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
10011 order = (Int)insn[3];
10013 DIP("pshufw $%d,%s,%s\n", order,
10014 nameMMXReg(eregLO3ofRM(modrm)),
10015 nameMMXReg(gregLO3ofRM(modrm)));
10017 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
10018 1/*extra byte after amode*/ );
10019 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
10020 order = (Int)insn[2+alen];
10022 DIP("pshufw $%d,%s,%s\n", order,
10024 nameMMXReg(gregLO3ofRM(modrm)));
10026 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
10028 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10030 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
10031 SEL((order>>2)&3), SEL((order>>0)&3) )
10033 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
10035 goto decode_success;
10038 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
10039 if (haveNo66noF2noF3(pfx) && sz == 4
10040 && insn[0] == 0x0F && insn[1] == 0x53) {
10041 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10042 "rcpps", Iop_Recip32Fx4 );
10043 goto decode_success;
10046 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
10047 if (haveF3no66noF2(pfx) && sz == 4
10048 && insn[0] == 0x0F && insn[1] == 0x53) {
10049 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10050 "rcpss", Iop_Recip32F0x4 );
10051 goto decode_success;
10054 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
10055 if (haveNo66noF2noF3(pfx) && sz == 4
10056 && insn[0] == 0x0F && insn[1] == 0x52) {
10057 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10058 "rsqrtps", Iop_RSqrt32Fx4 );
10059 goto decode_success;
10062 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
10063 if (haveF3no66noF2(pfx) && sz == 4
10064 && insn[0] == 0x0F && insn[1] == 0x52) {
10065 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10066 "rsqrtss", Iop_RSqrt32F0x4 );
10067 goto decode_success;
10070 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
10071 if (haveNo66noF2noF3(pfx)
10072 && insn[0] == 0x0F && insn[1] == 0xAE
10073 && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7
10076 /* Insert a memory fence. It's sometimes important that these
10077 are carried through to the generated code. */
10078 stmt( IRStmt_MBE(Imbe_Fence) );
10080 goto decode_success;
10083 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
10084 if (haveNo66noF2noF3(pfx) && sz == 4
10085 && insn[0] == 0x0F && insn[1] == 0xC6) {
10088 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10089 sV = newTemp(Ity_V128);
10090 dV = newTemp(Ity_V128);
10091 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10093 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
10095 if (epartIsReg(modrm)) {
10096 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10097 select = (Int)insn[3];
10099 DIP("shufps $%d,%s,%s\n", select,
10100 nameXMMReg(eregOfRexRM(pfx,modrm)),
10101 nameXMMReg(gregOfRexRM(pfx,modrm)));
10103 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
10104 1/*byte at end of insn*/ );
10105 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10106 select = (Int)insn[2+alen];
10108 DIP("shufps $%d,%s,%s\n", select,
10110 nameXMMReg(gregOfRexRM(pfx,modrm)));
10113 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
10114 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
10116 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
10117 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10120 gregOfRexRM(pfx,modrm),
10121 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
10122 SELD((select>>2)&3), SELD((select>>0)&3) )
10128 goto decode_success;
10131 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
10132 if (haveNo66noF2noF3(pfx) && sz == 4
10133 && insn[0] == 0x0F && insn[1] == 0x51) {
10134 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10135 "sqrtps", Iop_Sqrt32Fx4 );
10136 goto decode_success;
10139 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
10140 if (haveF3no66noF2(pfx) && sz == 4
10141 && insn[0] == 0x0F && insn[1] == 0x51) {
10142 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10143 "sqrtss", Iop_Sqrt32F0x4 );
10144 goto decode_success;
10147 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
10148 if (insn[0] == 0x0F && insn[1] == 0xAE
10149 && haveNo66noF2noF3(pfx)
10150 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) {
10153 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10156 /* Fake up a native SSE mxcsr word. The only thing it depends
10157 on is SSEROUND[1:0], so call a clean helper to cook it up.
10159 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
10160 DIP("stmxcsr %s\n", dis_buf);
10165 Ity_I64, 0/*regp*/,
10166 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
10167 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
10171 goto decode_success;
10174 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
10175 if (haveNo66noF2noF3(pfx) && sz == 4
10176 && insn[0] == 0x0F && insn[1] == 0x5C) {
10177 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 );
10178 goto decode_success;
10181 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
10182 if (haveF3no66noF2(pfx) && sz == 4
10183 && insn[0] == 0x0F && insn[1] == 0x5C) {
10184 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 );
10185 goto decode_success;
10188 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
10189 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
10190 /* These just appear to be special cases of SHUFPS */
10191 if (haveNo66noF2noF3(pfx) && sz == 4
10192 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10194 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10195 Bool hi = toBool(insn[1] == 0x15);
10196 sV = newTemp(Ity_V128);
10197 dV = newTemp(Ity_V128);
10198 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10200 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
10202 if (epartIsReg(modrm)) {
10203 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10205 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10206 nameXMMReg(eregOfRexRM(pfx,modrm)),
10207 nameXMMReg(gregOfRexRM(pfx,modrm)));
10209 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10210 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10212 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10214 nameXMMReg(gregOfRexRM(pfx,modrm)));
10217 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
10218 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
10221 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) );
10223 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) );
10226 goto decode_success;
10229 /* 0F 57 = XORPS -- G = G and E */
10230 if (haveNo66noF2noF3(pfx) && sz == 4
10231 && insn[0] == 0x0F && insn[1] == 0x57) {
10232 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 );
10233 goto decode_success;
10236 /* ---------------------------------------------------- */
10237 /* --- end of the SSE decoder. --- */
10238 /* ---------------------------------------------------- */
10240 /* ---------------------------------------------------- */
10241 /* --- start of the SSE2 decoder. --- */
10242 /* ---------------------------------------------------- */
10244 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
10245 if (have66noF2noF3(pfx)
10246 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
10247 && insn[0] == 0x0F && insn[1] == 0x58) {
10248 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 );
10249 goto decode_success;
10252 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
10253 if (haveF2no66noF3(pfx)
10254 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
10255 && insn[0] == 0x0F && insn[1] == 0x58) {
10256 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 );
10257 goto decode_success;
10260 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
10261 if (have66noF2noF3(pfx) && sz == 2
10262 && insn[0] == 0x0F && insn[1] == 0x55) {
10263 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 );
10264 goto decode_success;
10267 /* 66 0F 54 = ANDPD -- G = G and E */
10268 if (have66noF2noF3(pfx) && sz == 2
10269 && insn[0] == 0x0F && insn[1] == 0x54) {
10270 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 );
10271 goto decode_success;
10274 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
10275 if (have66noF2noF3(pfx) && sz == 2
10276 && insn[0] == 0x0F && insn[1] == 0xC2) {
10277 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 );
10278 goto decode_success;
10281 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
10282 if (haveF2no66noF3(pfx) && sz == 4
10283 && insn[0] == 0x0F && insn[1] == 0xC2) {
10284 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 );
10285 goto decode_success;
10288 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
10289 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
10290 if (have66noF2noF3(pfx) && sz == 2
10291 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
10292 IRTemp argL = newTemp(Ity_F64);
10293 IRTemp argR = newTemp(Ity_F64);
10294 modrm = getUChar(delta+2);
10295 if (epartIsReg(modrm)) {
10296 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10297 0/*lowest lane*/ ) );
10299 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
10300 nameXMMReg(eregOfRexRM(pfx,modrm)),
10301 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10303 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10304 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10306 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
10308 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10310 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10311 0/*lowest lane*/ ) );
10313 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10314 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10319 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10323 goto decode_success;
10326 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
10328 if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) {
10329 IRTemp arg64 = newTemp(Ity_I64);
10330 if (sz != 4) goto decode_failure;
10332 modrm = getUChar(delta+2);
10333 if (epartIsReg(modrm)) {
10334 assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) );
10336 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10337 nameXMMReg(gregOfRexRM(pfx,modrm)));
10339 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10340 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10342 DIP("cvtdq2pd %s,%s\n", dis_buf,
10343 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10347 gregOfRexRM(pfx,modrm), 0,
10348 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
10352 gregOfRexRM(pfx,modrm), 1,
10353 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
10356 goto decode_success;
10359 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
10361 if (haveNo66noF2noF3(pfx) && sz == 4
10362 && insn[0] == 0x0F && insn[1] == 0x5B) {
10363 IRTemp argV = newTemp(Ity_V128);
10364 IRTemp rmode = newTemp(Ity_I32);
10366 modrm = getUChar(delta+2);
10367 if (epartIsReg(modrm)) {
10368 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10370 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10371 nameXMMReg(gregOfRexRM(pfx,modrm)));
10373 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10374 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10376 DIP("cvtdq2ps %s,%s\n", dis_buf,
10377 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10380 assign( rmode, get_sse_roundingmode() );
10381 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10383 # define CVT(_t) binop( Iop_F64toF32, \
10385 unop(Iop_I32StoF64,mkexpr(_t)))
10387 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
10388 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
10389 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10390 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10394 goto decode_success;
10397 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10398 lo half xmm(G), and zero upper half, rounding towards zero */
10399 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10400 lo half xmm(G), according to prevailing rounding mode, and zero
10402 if ( ( (haveF2no66noF3(pfx) && sz == 4)
10403 || (have66noF2noF3(pfx) && sz == 2)
10405 && insn[0] == 0x0F && insn[1] == 0xE6) {
10406 IRTemp argV = newTemp(Ity_V128);
10407 IRTemp rmode = newTemp(Ity_I32);
10408 Bool r2zero = toBool(sz == 2);
10410 modrm = getUChar(delta+2);
10411 if (epartIsReg(modrm)) {
10412 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10414 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
10415 nameXMMReg(eregOfRexRM(pfx,modrm)),
10416 nameXMMReg(gregOfRexRM(pfx,modrm)));
10418 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10419 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10421 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
10423 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10427 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10429 assign( rmode, get_sse_roundingmode() );
10432 t0 = newTemp(Ity_F64);
10433 t1 = newTemp(Ity_F64);
10434 assign( t0, unop(Iop_ReinterpI64asF64,
10435 unop(Iop_V128to64, mkexpr(argV))) );
10436 assign( t1, unop(Iop_ReinterpI64asF64,
10437 unop(Iop_V128HIto64, mkexpr(argV))) );
10439 # define CVT(_t) binop( Iop_F64toI32S, \
10443 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
10444 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
10445 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10446 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10450 goto decode_success;
10453 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10454 I32 in mmx, according to prevailing SSE rounding mode */
10455 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10456 I32 in mmx, rounding towards zero */
10457 if (have66noF2noF3(pfx) && sz == 2
10458 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
10459 IRTemp dst64 = newTemp(Ity_I64);
10460 IRTemp rmode = newTemp(Ity_I32);
10461 IRTemp f64lo = newTemp(Ity_F64);
10462 IRTemp f64hi = newTemp(Ity_F64);
10463 Bool r2zero = toBool(insn[1] == 0x2C);
10466 modrm = getUChar(delta+2);
10468 if (epartIsReg(modrm)) {
10470 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10471 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
10472 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
10473 nameXMMReg(eregOfRexRM(pfx,modrm)),
10474 nameMMXReg(gregLO3ofRM(modrm)));
10476 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10477 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10478 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
10482 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
10484 nameMMXReg(gregLO3ofRM(modrm)));
10488 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10490 assign( rmode, get_sse_roundingmode() );
10495 binop( Iop_32HLto64,
10496 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
10497 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
10501 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
10502 goto decode_success;
10505 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
10506 lo half xmm(G), rounding according to prevailing SSE rounding
10507 mode, and zero upper half */
10508 /* Note, this is practically identical to CVTPD2DQ. It would have
10509 been nicer to merge them together, but the insn[] offsets differ
10511 if (have66noF2noF3(pfx) && sz == 2
10512 && insn[0] == 0x0F && insn[1] == 0x5A) {
10513 IRTemp argV = newTemp(Ity_V128);
10514 IRTemp rmode = newTemp(Ity_I32);
10516 modrm = getUChar(delta+2);
10517 if (epartIsReg(modrm)) {
10518 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10520 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10521 nameXMMReg(gregOfRexRM(pfx,modrm)));
10523 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10524 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10526 DIP("cvtpd2ps %s,%s\n", dis_buf,
10527 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10530 assign( rmode, get_sse_roundingmode() );
10531 t0 = newTemp(Ity_F64);
10532 t1 = newTemp(Ity_F64);
10533 assign( t0, unop(Iop_ReinterpI64asF64,
10534 unop(Iop_V128to64, mkexpr(argV))) );
10535 assign( t1, unop(Iop_ReinterpI64asF64,
10536 unop(Iop_V128HIto64, mkexpr(argV))) );
10538 # define CVT(_t) binop( Iop_F64toF32, \
10542 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
10543 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
10544 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10545 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10549 goto decode_success;
10552 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
10554 if (have66noF2noF3(pfx) && sz == 2
10555 && insn[0] == 0x0F && insn[1] == 0x2A) {
10556 IRTemp arg64 = newTemp(Ity_I64);
10558 modrm = getUChar(delta+2);
10559 if (epartIsReg(modrm)) {
10560 /* Only switch to MMX mode if the source is a MMX register.
10561 This is inconsistent with all other instructions which
10562 convert between XMM and (M64 or MMX), which always switch
10563 to MMX mode even if 64-bit operand is M64 and not MMX. At
10564 least, that's what the Intel docs seem to me to say.
10567 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
10569 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
10570 nameXMMReg(gregOfRexRM(pfx,modrm)));
10572 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10573 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10575 DIP("cvtpi2pd %s,%s\n", dis_buf,
10576 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10580 gregOfRexRM(pfx,modrm), 0,
10581 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
10585 gregOfRexRM(pfx,modrm), 1,
10586 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
10589 goto decode_success;
10592 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10593 xmm(G), rounding towards zero */
10594 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10595 xmm(G), as per the prevailing rounding mode */
10596 if ( ( (have66noF2noF3(pfx) && sz == 2)
10597 || (haveF3no66noF2(pfx) && sz == 4)
10599 && insn[0] == 0x0F && insn[1] == 0x5B) {
10600 IRTemp argV = newTemp(Ity_V128);
10601 IRTemp rmode = newTemp(Ity_I32);
10602 Bool r2zero = toBool(sz == 4);
10604 modrm = getUChar(delta+2);
10605 if (epartIsReg(modrm)) {
10606 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10608 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10609 nameXMMReg(gregOfRexRM(pfx,modrm)));
10611 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10612 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10614 DIP("cvtps2dq %s,%s\n", dis_buf,
10615 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10619 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10621 assign( rmode, get_sse_roundingmode() );
10624 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10626 /* This is less than ideal. If it turns out to be a performance
10627 bottleneck it can be improved. */
10629 binop( Iop_F64toI32S, \
10631 unop( Iop_F32toF64, \
10632 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10634 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
10635 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
10636 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10637 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10641 goto decode_success;
10644 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
10646 if (haveNo66noF2noF3(pfx) && sz == 4
10647 && insn[0] == 0x0F && insn[1] == 0x5A) {
10648 IRTemp f32lo = newTemp(Ity_F32);
10649 IRTemp f32hi = newTemp(Ity_F32);
10651 modrm = getUChar(delta+2);
10652 if (epartIsReg(modrm)) {
10653 assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) );
10654 assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) );
10656 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10657 nameXMMReg(gregOfRexRM(pfx,modrm)));
10659 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10660 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10661 assign( f32hi, loadLE(Ity_F32,
10662 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10664 DIP("cvtps2pd %s,%s\n", dis_buf,
10665 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10668 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1,
10669 unop(Iop_F32toF64, mkexpr(f32hi)) );
10670 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
10671 unop(Iop_F32toF64, mkexpr(f32lo)) );
10673 goto decode_success;
10676 /* F2 0F 2D = CVTSD2SI
10677 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
10678 according to prevailing SSE rounding mode
10679 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
10680 according to prevailing SSE rounding mode
10682 /* F2 0F 2C = CVTTSD2SI
10683 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
10684 truncating towards zero
10685 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
10686 truncating towards zero
10688 if (haveF2no66noF3(pfx)
10690 && (insn[1] == 0x2D || insn[1] == 0x2C)) {
10691 IRTemp rmode = newTemp(Ity_I32);
10692 IRTemp f64lo = newTemp(Ity_F64);
10693 Bool r2zero = toBool(insn[1] == 0x2C);
10694 vassert(sz == 4 || sz == 8);
10696 modrm = getUChar(delta+2);
10697 if (epartIsReg(modrm)) {
10699 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10700 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10701 nameXMMReg(eregOfRexRM(pfx,modrm)),
10702 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
10704 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10705 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10707 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10709 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
10713 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10715 assign( rmode, get_sse_roundingmode() );
10719 putIReg32( gregOfRexRM(pfx,modrm),
10720 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10722 putIReg64( gregOfRexRM(pfx,modrm),
10723 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10726 goto decode_success;
10729 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10730 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10731 if (haveF2no66noF3(pfx) && sz == 4
10732 && insn[0] == 0x0F && insn[1] == 0x5A) {
10733 IRTemp rmode = newTemp(Ity_I32);
10734 IRTemp f64lo = newTemp(Ity_F64);
10737 modrm = getUChar(delta+2);
10738 if (epartIsReg(modrm)) {
10740 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10741 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10742 nameXMMReg(gregOfRexRM(pfx,modrm)));
10744 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10745 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10747 DIP("cvtsd2ss %s,%s\n", dis_buf,
10748 nameXMMReg(gregOfRexRM(pfx,modrm)));
10751 assign( rmode, get_sse_roundingmode() );
10753 gregOfRexRM(pfx,modrm), 0,
10754 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
10757 goto decode_success;
10760 /* F2 0F 2A = CVTSI2SD
10761 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
10762 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
10764 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)
10765 && insn[0] == 0x0F && insn[1] == 0x2A) {
10766 modrm = getUChar(delta+2);
10769 IRTemp arg32 = newTemp(Ity_I32);
10770 if (epartIsReg(modrm)) {
10771 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
10773 DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
10774 nameXMMReg(gregOfRexRM(pfx,modrm)));
10776 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10777 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
10779 DIP("cvtsi2sd %s,%s\n", dis_buf,
10780 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10782 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
10783 unop(Iop_I32StoF64, mkexpr(arg32))
10787 IRTemp arg64 = newTemp(Ity_I64);
10788 if (epartIsReg(modrm)) {
10789 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
10791 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
10792 nameXMMReg(gregOfRexRM(pfx,modrm)));
10794 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10795 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10797 DIP("cvtsi2sdq %s,%s\n", dis_buf,
10798 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10801 gregOfRexRM(pfx,modrm),
10803 binop( Iop_I64StoF64,
10804 get_sse_roundingmode(),
10811 goto decode_success;
10814 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10816 if (haveF3no66noF2(pfx) && sz == 4
10817 && insn[0] == 0x0F && insn[1] == 0x5A) {
10818 IRTemp f32lo = newTemp(Ity_F32);
10820 modrm = getUChar(delta+2);
10821 if (epartIsReg(modrm)) {
10823 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10824 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10825 nameXMMReg(gregOfRexRM(pfx,modrm)));
10827 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10828 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10830 DIP("cvtss2sd %s,%s\n", dis_buf,
10831 nameXMMReg(gregOfRexRM(pfx,modrm)));
10834 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
10835 unop( Iop_F32toF64, mkexpr(f32lo) ) );
10837 goto decode_success;
10840 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10841 if (have66noF2noF3(pfx) && sz == 2
10842 && insn[0] == 0x0F && insn[1] == 0x5E) {
10843 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 );
10844 goto decode_success;
10847 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10848 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) {
10850 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 );
10851 goto decode_success;
10854 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10855 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10856 if (haveNo66noF2noF3(pfx) && sz == 4
10857 && insn[0] == 0x0F && insn[1] == 0xAE
10858 && epartIsReg(insn[2])
10859 && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) {
10861 /* Insert a memory fence. It's sometimes important that these
10862 are carried through to the generated code. */
10863 stmt( IRStmt_MBE(Imbe_Fence) );
10864 DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m");
10865 goto decode_success;
10868 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10869 if (have66noF2noF3(pfx) && sz == 2
10870 && insn[0] == 0x0F && insn[1] == 0x5F) {
10871 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 );
10872 goto decode_success;
10875 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10876 if (haveF2no66noF3(pfx) && sz == 4
10877 && insn[0] == 0x0F && insn[1] == 0x5F) {
10878 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 );
10879 goto decode_success;
10882 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10883 if (have66noF2noF3(pfx) && sz == 2
10884 && insn[0] == 0x0F && insn[1] == 0x5D) {
10885 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 );
10886 goto decode_success;
10889 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10890 if (haveF2no66noF3(pfx) && sz == 4
10891 && insn[0] == 0x0F && insn[1] == 0x5D) {
10892 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 );
10893 goto decode_success;
10896 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10897 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10898 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10899 if (have66noF2noF3(pfx)
10900 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
10902 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10903 HChar* wot = insn[1]==0x28 ? "apd" :
10904 insn[1]==0x10 ? "upd" : "dqa";
10905 modrm = getUChar(delta+2);
10906 if (epartIsReg(modrm)) {
10907 putXMMReg( gregOfRexRM(pfx,modrm),
10908 getXMMReg( eregOfRexRM(pfx,modrm) ));
10909 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)),
10910 nameXMMReg(gregOfRexRM(pfx,modrm)));
10913 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10914 putXMMReg( gregOfRexRM(pfx,modrm),
10915 loadLE(Ity_V128, mkexpr(addr)) );
10916 DIP("mov%s %s,%s\n", wot, dis_buf,
10917 nameXMMReg(gregOfRexRM(pfx,modrm)));
10920 goto decode_success;
10923 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10924 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10925 if (have66noF2noF3(pfx) && insn[0] == 0x0F
10926 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10927 modrm = getUChar(delta+2);
10928 if (epartIsReg(modrm)) {
10929 /* fall through; awaiting test case */
10931 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10932 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
10933 DIP("mov[ua]pd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
10936 goto decode_success;
10940 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */
10941 /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */
10942 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) {
10943 vassert(sz == 2 || sz == 8);
10944 if (sz == 2) sz = 4;
10945 modrm = getUChar(delta+2);
10946 if (epartIsReg(modrm)) {
10950 gregOfRexRM(pfx,modrm),
10951 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
10953 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
10954 nameXMMReg(gregOfRexRM(pfx,modrm)));
10957 gregOfRexRM(pfx,modrm),
10958 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
10960 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
10961 nameXMMReg(gregOfRexRM(pfx,modrm)));
10964 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10967 gregOfRexRM(pfx,modrm),
10969 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10970 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
10972 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
10973 nameXMMReg(gregOfRexRM(pfx,modrm)));
10975 goto decode_success;
10978 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
10979 /* or from xmm low 1/2 to ireg64 or m64. */
10980 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) {
10981 if (sz == 2) sz = 4;
10982 vassert(sz == 4 || sz == 8);
10983 modrm = getUChar(delta+2);
10984 if (epartIsReg(modrm)) {
10987 putIReg32( eregOfRexRM(pfx,modrm),
10988 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
10989 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
10990 nameIReg32(eregOfRexRM(pfx,modrm)));
10992 putIReg64( eregOfRexRM(pfx,modrm),
10993 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
10994 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
10995 nameIReg64(eregOfRexRM(pfx,modrm)));
10998 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11000 storeLE( mkexpr(addr),
11002 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
11003 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
11004 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
11005 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11007 goto decode_success;
11010 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
11011 if (have66noF2noF3(pfx) && sz == 2
11012 && insn[0] == 0x0F && insn[1] == 0x7F) {
11013 modrm = getUChar(delta+2);
11014 if (epartIsReg(modrm)) {
11016 putXMMReg( eregOfRexRM(pfx,modrm),
11017 getXMMReg(gregOfRexRM(pfx,modrm)) );
11018 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11019 nameXMMReg(eregOfRexRM(pfx,modrm)));
11021 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11023 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11024 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11026 goto decode_success;
11029 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
11030 if (haveF3no66noF2(pfx) && sz == 4
11031 && insn[0] == 0x0F && insn[1] == 0x6F) {
11032 modrm = getUChar(delta+2);
11033 if (epartIsReg(modrm)) {
11034 putXMMReg( gregOfRexRM(pfx,modrm),
11035 getXMMReg( eregOfRexRM(pfx,modrm) ));
11036 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11037 nameXMMReg(gregOfRexRM(pfx,modrm)));
11040 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11041 putXMMReg( gregOfRexRM(pfx,modrm),
11042 loadLE(Ity_V128, mkexpr(addr)) );
11043 DIP("movdqu %s,%s\n", dis_buf,
11044 nameXMMReg(gregOfRexRM(pfx,modrm)));
11047 goto decode_success;
11050 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
11051 if (haveF3no66noF2(pfx) && sz == 4
11052 && insn[0] == 0x0F && insn[1] == 0x7F) {
11053 modrm = getUChar(delta+2);
11054 if (epartIsReg(modrm)) {
11055 goto decode_failure; /* awaiting test case */
11057 putXMMReg( eregOfRexRM(pfx,modrm),
11058 getXMMReg(gregOfRexRM(pfx,modrm)) );
11059 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11060 nameXMMReg(eregOfRexRM(pfx,modrm)));
11062 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11064 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11065 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11067 goto decode_success;
11070 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
11071 if (haveF2no66noF3(pfx) && sz == 4
11072 && insn[0] == 0x0F && insn[1] == 0xD6) {
11073 modrm = getUChar(delta+2);
11074 if (epartIsReg(modrm)) {
11076 putMMXReg( gregLO3ofRM(modrm),
11077 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11078 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11079 nameMMXReg(gregLO3ofRM(modrm)));
11081 goto decode_success;
11083 /* apparently no mem case for this insn */
11084 goto decode_failure;
11088 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
11089 /* These seems identical to MOVHPS. This instruction encoding is
11090 completely crazy. */
11091 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) {
11092 modrm = getUChar(delta+2);
11093 if (epartIsReg(modrm)) {
11094 /* fall through; apparently reg-reg is not possible */
11096 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11098 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
11099 loadLE(Ity_I64, mkexpr(addr)) );
11100 DIP("movhpd %s,%s\n", dis_buf,
11101 nameXMMReg( gregOfRexRM(pfx,modrm) ));
11102 goto decode_success;
11106 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
11107 /* Again, this seems identical to MOVHPS. */
11108 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) {
11109 if (!epartIsReg(insn[2])) {
11111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11113 storeLE( mkexpr(addr),
11114 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
11115 1/*upper lane*/ ) );
11116 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
11118 goto decode_success;
11120 /* else fall through */
11123 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
11124 /* Identical to MOVLPS ? */
11125 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) {
11126 modrm = getUChar(delta+2);
11127 if (epartIsReg(modrm)) {
11128 /* fall through; apparently reg-reg is not possible */
11130 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11132 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11134 loadLE(Ity_I64, mkexpr(addr)) );
11135 DIP("movlpd %s, %s\n",
11136 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11137 goto decode_success;
11141 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
11142 /* Identical to MOVLPS ? */
11143 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) {
11144 modrm = getUChar(delta+2);
11145 if (!epartIsReg(modrm)) {
11146 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11148 storeLE( mkexpr(addr),
11149 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11150 0/*lower lane*/ ) );
11151 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11153 goto decode_success;
11155 /* else fall through */
11158 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
11159 2 lowest bits of ireg(G) */
11160 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)
11161 && insn[0] == 0x0F && insn[1] == 0x50) {
11162 /* sz == 8 is a kludge to handle insns with REX.W redundantly
11163 set to 1, which has been known to happen:
11164 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
11165 20071106: see further comments on MOVMSKPS implementation above.
11167 modrm = getUChar(delta+2);
11168 if (epartIsReg(modrm)) {
11170 t0 = newTemp(Ity_I32);
11171 t1 = newTemp(Ity_I32);
11173 src = eregOfRexRM(pfx,modrm);
11174 assign( t0, binop( Iop_And32,
11175 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
11177 assign( t1, binop( Iop_And32,
11178 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
11180 putIReg32( gregOfRexRM(pfx,modrm),
11181 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
11183 DIP("movmskpd %s,%s\n", nameXMMReg(src),
11184 nameIReg32(gregOfRexRM(pfx,modrm)));
11185 goto decode_success;
11187 /* else fall through */
11188 goto decode_failure;
11191 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
11192 if (have66noF2noF3(pfx) && sz == 2
11193 && insn[0] == 0x0F && insn[1] == 0xF7) {
11194 modrm = getUChar(delta+2);
11195 if (epartIsReg(modrm)) {
11196 IRTemp regD = newTemp(Ity_V128);
11197 IRTemp mask = newTemp(Ity_V128);
11198 IRTemp olddata = newTemp(Ity_V128);
11199 IRTemp newdata = newTemp(Ity_V128);
11200 addr = newTemp(Ity_I64);
11202 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
11203 assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) ));
11205 /* Unfortunately can't do the obvious thing with SarN8x16
11206 here since that can't be re-emitted as SSE2 code - no such
11210 binop(Iop_64HLtoV128,
11212 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
11215 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
11217 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
11225 unop(Iop_NotV128, mkexpr(mask)))) );
11226 storeLE( mkexpr(addr), mkexpr(newdata) );
11229 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ),
11230 nameXMMReg( gregOfRexRM(pfx,modrm) ) );
11231 goto decode_success;
11233 /* else fall through */
11236 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
11237 if (have66noF2noF3(pfx) && sz == 2
11238 && insn[0] == 0x0F && insn[1] == 0xE7) {
11239 modrm = getUChar(delta+2);
11240 if (!epartIsReg(modrm)) {
11241 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11242 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11243 DIP("movntdq %s,%s\n", dis_buf,
11244 nameXMMReg(gregOfRexRM(pfx,modrm)));
11246 goto decode_success;
11248 /* else fall through */
11249 goto decode_failure;
11252 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
11253 if (haveNo66noF2noF3(pfx) &&
11254 insn[0] == 0x0F && insn[1] == 0xC3) {
11255 vassert(sz == 4 || sz == 8);
11256 modrm = getUChar(delta+2);
11257 if (!epartIsReg(modrm)) {
11258 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11259 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
11260 DIP("movnti %s,%s\n", dis_buf,
11261 nameIRegG(sz, pfx, modrm));
11263 goto decode_success;
11265 /* else fall through */
11268 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
11269 or lo half xmm). */
11270 if (have66noF2noF3(pfx)
11271 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11272 && insn[0] == 0x0F && insn[1] == 0xD6) {
11273 modrm = getUChar(delta+2);
11274 if (epartIsReg(modrm)) {
11275 /* fall through, awaiting test case */
11276 /* dst: lo half copied, hi half zeroed */
11278 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11279 storeLE( mkexpr(addr),
11280 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11281 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
11283 goto decode_success;
11287 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
11289 if (haveF3no66noF2(pfx) && sz == 4
11290 && insn[0] == 0x0F && insn[1] == 0xD6) {
11291 modrm = getUChar(delta+2);
11292 if (epartIsReg(modrm)) {
11294 putXMMReg( gregOfRexRM(pfx,modrm),
11295 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
11296 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
11297 nameXMMReg(gregOfRexRM(pfx,modrm)));
11299 goto decode_success;
11301 /* apparently no mem case for this insn */
11302 goto decode_failure;
11306 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
11307 G (lo half xmm). Upper half of G is zeroed out. */
11308 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11309 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11310 If E is reg, upper half of G is unchanged. */
11311 if ( (haveF2no66noF3(pfx)
11312 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11313 && insn[0] == 0x0F && insn[1] == 0x10)
11315 (haveF3no66noF2(pfx)
11316 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11317 && insn[0] == 0x0F && insn[1] == 0x7E)
11319 modrm = getUChar(delta+2);
11320 if (epartIsReg(modrm)) {
11321 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11322 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11323 if (insn[1] == 0x7E/*MOVQ*/) {
11324 /* zero bits 127:64 */
11325 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
11327 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11328 nameXMMReg(gregOfRexRM(pfx,modrm)));
11331 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11332 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11333 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11334 loadLE(Ity_I64, mkexpr(addr)) );
11335 DIP("movsd %s,%s\n", dis_buf,
11336 nameXMMReg(gregOfRexRM(pfx,modrm)));
11339 goto decode_success;
11342 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11343 or lo half xmm). */
11344 if (haveF2no66noF3(pfx)
11345 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11346 && insn[0] == 0x0F && insn[1] == 0x11) {
11347 modrm = getUChar(delta+2);
11348 if (epartIsReg(modrm)) {
11349 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
11350 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11351 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11352 nameXMMReg(eregOfRexRM(pfx,modrm)));
11355 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11356 storeLE( mkexpr(addr),
11357 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11358 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11362 goto decode_success;
11365 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
11366 if (have66noF2noF3(pfx)
11367 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11368 && insn[0] == 0x0F && insn[1] == 0x59) {
11369 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 );
11370 goto decode_success;
11373 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
11374 if (haveF2no66noF3(pfx)
11375 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11376 && insn[0] == 0x0F && insn[1] == 0x59) {
11377 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 );
11378 goto decode_success;
11381 /* 66 0F 56 = ORPD -- G = G and E */
11382 if (have66noF2noF3(pfx) && sz == 2
11383 && insn[0] == 0x0F && insn[1] == 0x56) {
11384 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 );
11385 goto decode_success;
11388 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
11389 if (have66noF2noF3(pfx) && sz == 2
11390 && insn[0] == 0x0F && insn[1] == 0xC6) {
11392 IRTemp sV = newTemp(Ity_V128);
11393 IRTemp dV = newTemp(Ity_V128);
11394 IRTemp s1 = newTemp(Ity_I64);
11395 IRTemp s0 = newTemp(Ity_I64);
11396 IRTemp d1 = newTemp(Ity_I64);
11397 IRTemp d0 = newTemp(Ity_I64);
11400 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11402 if (epartIsReg(modrm)) {
11403 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
11404 select = (Int)insn[3];
11406 DIP("shufpd $%d,%s,%s\n", select,
11407 nameXMMReg(eregOfRexRM(pfx,modrm)),
11408 nameXMMReg(gregOfRexRM(pfx,modrm)));
11410 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
11411 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11412 select = (Int)insn[2+alen];
11414 DIP("shufpd $%d,%s,%s\n", select,
11416 nameXMMReg(gregOfRexRM(pfx,modrm)));
11419 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11420 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11421 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11422 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11424 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11425 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11428 gregOfRexRM(pfx,modrm),
11429 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
11435 goto decode_success;
11438 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
11439 if (have66noF2noF3(pfx) && sz == 2
11440 && insn[0] == 0x0F && insn[1] == 0x51) {
11441 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
11442 "sqrtpd", Iop_Sqrt64Fx2 );
11443 goto decode_success;
11446 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
11447 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) {
11449 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2,
11450 "sqrtsd", Iop_Sqrt64F0x2 );
11451 goto decode_success;
11454 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
11455 if (have66noF2noF3(pfx) && sz == 2
11456 && insn[0] == 0x0F && insn[1] == 0x5C) {
11457 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 );
11458 goto decode_success;
11461 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
11462 if (haveF2no66noF3(pfx)
11463 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11464 && insn[0] == 0x0F && insn[1] == 0x5C) {
11465 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 );
11466 goto decode_success;
11469 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
11470 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
11471 /* These just appear to be special cases of SHUFPS */
11472 if (have66noF2noF3(pfx)
11473 && sz == 2 /* could be 8 if rex also present */
11474 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
11475 IRTemp s1 = newTemp(Ity_I64);
11476 IRTemp s0 = newTemp(Ity_I64);
11477 IRTemp d1 = newTemp(Ity_I64);
11478 IRTemp d0 = newTemp(Ity_I64);
11479 IRTemp sV = newTemp(Ity_V128);
11480 IRTemp dV = newTemp(Ity_V128);
11481 Bool hi = toBool(insn[1] == 0x15);
11484 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11486 if (epartIsReg(modrm)) {
11487 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
11489 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11490 nameXMMReg(eregOfRexRM(pfx,modrm)),
11491 nameXMMReg(gregOfRexRM(pfx,modrm)));
11493 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11494 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11496 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11498 nameXMMReg(gregOfRexRM(pfx,modrm)));
11501 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11502 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11503 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11504 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11507 putXMMReg( gregOfRexRM(pfx,modrm),
11508 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
11510 putXMMReg( gregOfRexRM(pfx,modrm),
11511 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
11514 goto decode_success;
11517 /* 66 0F 57 = XORPD -- G = G xor E */
11518 if (have66noF2noF3(pfx) && sz == 2
11519 && insn[0] == 0x0F && insn[1] == 0x57) {
11520 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 );
11521 goto decode_success;
11524 /* 66 0F 6B = PACKSSDW */
11525 if (have66noF2noF3(pfx) && sz == 2
11526 && insn[0] == 0x0F && insn[1] == 0x6B) {
11527 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11528 "packssdw", Iop_QNarrow32Sx4, True );
11529 goto decode_success;
11532 /* 66 0F 63 = PACKSSWB */
11533 if (have66noF2noF3(pfx) && sz == 2
11534 && insn[0] == 0x0F && insn[1] == 0x63) {
11535 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11536 "packsswb", Iop_QNarrow16Sx8, True );
11537 goto decode_success;
11540 /* 66 0F 67 = PACKUSWB */
11541 if (have66noF2noF3(pfx) && sz == 2
11542 && insn[0] == 0x0F && insn[1] == 0x67) {
11543 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11544 "packuswb", Iop_QNarrow16Ux8, True );
11545 goto decode_success;
11548 /* 66 0F FC = PADDB */
11549 if (have66noF2noF3(pfx) && sz == 2
11550 && insn[0] == 0x0F && insn[1] == 0xFC) {
11551 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11552 "paddb", Iop_Add8x16, False );
11553 goto decode_success;
11556 /* 66 0F FE = PADDD */
11557 if (have66noF2noF3(pfx) && sz == 2
11558 && insn[0] == 0x0F && insn[1] == 0xFE) {
11559 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11560 "paddd", Iop_Add32x4, False );
11561 goto decode_success;
11564 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11565 /* 0F D4 = PADDQ -- add 64x1 */
11566 if (haveNo66noF2noF3(pfx) && sz == 4
11567 && insn[0] == 0x0F && insn[1] == 0xD4) {
11569 delta = dis_MMXop_regmem_to_reg (
11570 vbi, pfx, delta+2, insn[1], "paddq", False );
11571 goto decode_success;
11574 /* 66 0F D4 = PADDQ */
11575 if (have66noF2noF3(pfx) && sz == 2
11576 && insn[0] == 0x0F && insn[1] == 0xD4) {
11577 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11578 "paddq", Iop_Add64x2, False );
11579 goto decode_success;
11582 /* 66 0F FD = PADDW */
11583 if (have66noF2noF3(pfx) && sz == 2
11584 && insn[0] == 0x0F && insn[1] == 0xFD) {
11585 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11586 "paddw", Iop_Add16x8, False );
11587 goto decode_success;
11590 /* 66 0F EC = PADDSB */
11591 if (have66noF2noF3(pfx) && sz == 2
11592 && insn[0] == 0x0F && insn[1] == 0xEC) {
11593 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11594 "paddsb", Iop_QAdd8Sx16, False );
11595 goto decode_success;
11598 /* 66 0F ED = PADDSW */
11599 if (have66noF2noF3(pfx) && sz == 2
11600 && insn[0] == 0x0F && insn[1] == 0xED) {
11601 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11602 "paddsw", Iop_QAdd16Sx8, False );
11603 goto decode_success;
11606 /* 66 0F DC = PADDUSB */
11607 if (have66noF2noF3(pfx) && sz == 2
11608 && insn[0] == 0x0F && insn[1] == 0xDC) {
11609 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11610 "paddusb", Iop_QAdd8Ux16, False );
11611 goto decode_success;
11614 /* 66 0F DD = PADDUSW */
11615 if (have66noF2noF3(pfx) && sz == 2
11616 && insn[0] == 0x0F && insn[1] == 0xDD) {
11617 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11618 "paddusw", Iop_QAdd16Ux8, False );
11619 goto decode_success;
11622 /* 66 0F DB = PAND */
11623 if (have66noF2noF3(pfx) && sz == 2
11624 && insn[0] == 0x0F && insn[1] == 0xDB) {
11625 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 );
11626 goto decode_success;
11629 /* 66 0F DF = PANDN */
11630 if (have66noF2noF3(pfx) && sz == 2
11631 && insn[0] == 0x0F && insn[1] == 0xDF) {
11632 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 );
11633 goto decode_success;
11636 /* 66 0F E0 = PAVGB */
11637 if (have66noF2noF3(pfx) && sz == 2
11638 && insn[0] == 0x0F && insn[1] == 0xE0) {
11639 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11640 "pavgb", Iop_Avg8Ux16, False );
11641 goto decode_success;
11644 /* 66 0F E3 = PAVGW */
11645 if (have66noF2noF3(pfx) && sz == 2
11646 && insn[0] == 0x0F && insn[1] == 0xE3) {
11647 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11648 "pavgw", Iop_Avg16Ux8, False );
11649 goto decode_success;
11652 /* 66 0F 74 = PCMPEQB */
11653 if (have66noF2noF3(pfx) && sz == 2
11654 && insn[0] == 0x0F && insn[1] == 0x74) {
11655 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11656 "pcmpeqb", Iop_CmpEQ8x16, False );
11657 goto decode_success;
11660 /* 66 0F 76 = PCMPEQD */
11661 if (have66noF2noF3(pfx) && sz == 2
11662 && insn[0] == 0x0F && insn[1] == 0x76) {
11663 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11664 "pcmpeqd", Iop_CmpEQ32x4, False );
11665 goto decode_success;
11668 /* 66 0F 75 = PCMPEQW */
11669 if (have66noF2noF3(pfx) && sz == 2
11670 && insn[0] == 0x0F && insn[1] == 0x75) {
11671 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11672 "pcmpeqw", Iop_CmpEQ16x8, False );
11673 goto decode_success;
11676 /* 66 0F 64 = PCMPGTB */
11677 if (have66noF2noF3(pfx) && sz == 2
11678 && insn[0] == 0x0F && insn[1] == 0x64) {
11679 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11680 "pcmpgtb", Iop_CmpGT8Sx16, False );
11681 goto decode_success;
11684 /* 66 0F 66 = PCMPGTD */
11685 if (have66noF2noF3(pfx) && sz == 2
11686 && insn[0] == 0x0F && insn[1] == 0x66) {
11687 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11688 "pcmpgtd", Iop_CmpGT32Sx4, False );
11689 goto decode_success;
11692 /* 66 0F 65 = PCMPGTW */
11693 if (have66noF2noF3(pfx) && sz == 2
11694 && insn[0] == 0x0F && insn[1] == 0x65) {
11695 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11696 "pcmpgtw", Iop_CmpGT16Sx8, False );
11697 goto decode_success;
11700 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
11701 zero-extend of it in ireg(G). */
11702 if (have66noF2noF3(pfx)
11703 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11704 && insn[0] == 0x0F && insn[1] == 0xC5) {
11706 if (epartIsReg(modrm)) {
11707 t5 = newTemp(Ity_V128);
11708 t4 = newTemp(Ity_I16);
11709 assign(t5, getXMMReg(eregOfRexRM(pfx,modrm)));
11710 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
11711 switch (insn[3] & 7) {
11712 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
11713 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
11714 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
11715 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
11716 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
11717 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
11718 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
11719 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
11720 default: vassert(0);
11722 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4)));
11723 DIP("pextrw $%d,%s,%s\n",
11724 (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)),
11725 nameIReg32(gregOfRexRM(pfx,modrm)));
11727 goto decode_success;
11729 /* else fall through */
11730 /* note, if memory case is ever filled in, there is 1 byte after
11734 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11735 put it into the specified lane of xmm(G). */
11736 if (have66noF2noF3(pfx)
11737 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11738 && insn[0] == 0x0F && insn[1] == 0xC4) {
11740 t4 = newTemp(Ity_I16);
11743 if (epartIsReg(modrm)) {
11744 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
11746 lane = insn[3+1-1];
11747 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
11748 nameIReg16(eregOfRexRM(pfx,modrm)),
11749 nameXMMReg(gregOfRexRM(pfx,modrm)));
11751 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
11752 1/*byte after the amode*/ );
11754 lane = insn[3+alen-1];
11755 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
11756 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
11758 nameXMMReg(gregOfRexRM(pfx,modrm)));
11761 putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) );
11762 goto decode_success;
11765 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11766 E(xmm or mem) to G(xmm) */
11767 if (have66noF2noF3(pfx) && sz == 2
11768 && insn[0] == 0x0F && insn[1] == 0xF5) {
11769 IRTemp s1V = newTemp(Ity_V128);
11770 IRTemp s2V = newTemp(Ity_V128);
11771 IRTemp dV = newTemp(Ity_V128);
11772 IRTemp s1Hi = newTemp(Ity_I64);
11773 IRTemp s1Lo = newTemp(Ity_I64);
11774 IRTemp s2Hi = newTemp(Ity_I64);
11775 IRTemp s2Lo = newTemp(Ity_I64);
11776 IRTemp dHi = newTemp(Ity_I64);
11777 IRTemp dLo = newTemp(Ity_I64);
11779 if (epartIsReg(modrm)) {
11780 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
11782 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11783 nameXMMReg(gregOfRexRM(pfx,modrm)));
11785 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11786 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11788 DIP("pmaddwd %s,%s\n", dis_buf,
11789 nameXMMReg(gregOfRexRM(pfx,modrm)));
11791 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
11792 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11793 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11794 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11795 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11796 assign( dHi, mkIRExprCCall(
11797 Ity_I64, 0/*regparms*/,
11798 "amd64g_calculate_mmx_pmaddwd",
11799 &amd64g_calculate_mmx_pmaddwd,
11800 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11802 assign( dLo, mkIRExprCCall(
11803 Ity_I64, 0/*regparms*/,
11804 "amd64g_calculate_mmx_pmaddwd",
11805 &amd64g_calculate_mmx_pmaddwd,
11806 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11808 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11809 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
11810 goto decode_success;
11813 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11814 if (have66noF2noF3(pfx) && sz == 2
11815 && insn[0] == 0x0F && insn[1] == 0xEE) {
11816 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11817 "pmaxsw", Iop_Max16Sx8, False );
11818 goto decode_success;
11821 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11822 if (have66noF2noF3(pfx) && sz == 2
11823 && insn[0] == 0x0F && insn[1] == 0xDE) {
11824 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11825 "pmaxub", Iop_Max8Ux16, False );
11826 goto decode_success;
11829 /* 66 0F EA = PMINSW -- 16x8 signed min */
11830 if (have66noF2noF3(pfx) && sz == 2
11831 && insn[0] == 0x0F && insn[1] == 0xEA) {
11832 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11833 "pminsw", Iop_Min16Sx8, False );
11834 goto decode_success;
11837 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11838 if (have66noF2noF3(pfx) && sz == 2
11839 && insn[0] == 0x0F && insn[1] == 0xDA) {
11840 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11841 "pminub", Iop_Min8Ux16, False );
11842 goto decode_success;
11845 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
11846 xmm(E), turn them into a byte, and put zero-extend of it in
11847 ireg(G). Doing this directly is just too cumbersome; give up
11848 therefore and call a helper. */
11849 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
11850 if (have66noF2noF3(pfx)
11851 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11852 && insn[0] == 0x0F && insn[1] == 0xD7) {
11854 if (epartIsReg(modrm)) {
11855 t0 = newTemp(Ity_I64);
11856 t1 = newTemp(Ity_I64);
11857 assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0));
11858 assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1));
11859 t5 = newTemp(Ity_I64);
11860 assign(t5, mkIRExprCCall(
11861 Ity_I64, 0/*regparms*/,
11862 "amd64g_calculate_sse_pmovmskb",
11863 &amd64g_calculate_sse_pmovmskb,
11864 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
11865 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5)));
11866 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11867 nameIReg32(gregOfRexRM(pfx,modrm)));
11869 goto decode_success;
11871 /* else fall through */
11874 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11875 if (have66noF2noF3(pfx) && sz == 2
11876 && insn[0] == 0x0F && insn[1] == 0xE4) {
11877 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11878 "pmulhuw", Iop_MulHi16Ux8, False );
11879 goto decode_success;
11882 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11883 if (have66noF2noF3(pfx) && sz == 2
11884 && insn[0] == 0x0F && insn[1] == 0xE5) {
11885 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11886 "pmulhw", Iop_MulHi16Sx8, False );
11887 goto decode_success;
11890 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11891 if (have66noF2noF3(pfx) && sz == 2
11892 && insn[0] == 0x0F && insn[1] == 0xD5) {
11893 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11894 "pmullw", Iop_Mul16x8, False );
11895 goto decode_success;
11898 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11899 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11900 0 to form 64-bit result */
11901 if (haveNo66noF2noF3(pfx) && sz == 4
11902 && insn[0] == 0x0F && insn[1] == 0xF4) {
11903 IRTemp sV = newTemp(Ity_I64);
11904 IRTemp dV = newTemp(Ity_I64);
11905 t1 = newTemp(Ity_I32);
11906 t0 = newTemp(Ity_I32);
11910 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
11912 if (epartIsReg(modrm)) {
11913 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
11915 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
11916 nameMMXReg(gregLO3ofRM(modrm)));
11918 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11919 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11921 DIP("pmuludq %s,%s\n", dis_buf,
11922 nameMMXReg(gregLO3ofRM(modrm)));
11925 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
11926 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
11927 putMMXReg( gregLO3ofRM(modrm),
11928 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
11929 goto decode_success;
11932 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11933 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11935 /* This is a really poor translation -- could be improved if
11936 performance critical */
11937 if (have66noF2noF3(pfx) && sz == 2
11938 && insn[0] == 0x0F && insn[1] == 0xF4) {
11940 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11941 sV = newTemp(Ity_V128);
11942 dV = newTemp(Ity_V128);
11943 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11944 t1 = newTemp(Ity_I64);
11945 t0 = newTemp(Ity_I64);
11947 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11949 if (epartIsReg(modrm)) {
11950 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
11952 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11953 nameXMMReg(gregOfRexRM(pfx,modrm)));
11955 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11956 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11958 DIP("pmuludq %s,%s\n", dis_buf,
11959 nameXMMReg(gregOfRexRM(pfx,modrm)));
11962 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11963 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11965 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11966 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) );
11967 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11968 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) );
11969 goto decode_success;
11972 /* 66 0F EB = POR */
11973 if (have66noF2noF3(pfx) && sz == 2
11974 && insn[0] == 0x0F && insn[1] == 0xEB) {
11975 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 );
11976 goto decode_success;
11979 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11980 from E(xmm or mem) to G(xmm) */
11981 if (have66noF2noF3(pfx) && sz == 2
11982 && insn[0] == 0x0F && insn[1] == 0xF6) {
11983 IRTemp s1V = newTemp(Ity_V128);
11984 IRTemp s2V = newTemp(Ity_V128);
11985 IRTemp dV = newTemp(Ity_V128);
11986 IRTemp s1Hi = newTemp(Ity_I64);
11987 IRTemp s1Lo = newTemp(Ity_I64);
11988 IRTemp s2Hi = newTemp(Ity_I64);
11989 IRTemp s2Lo = newTemp(Ity_I64);
11990 IRTemp dHi = newTemp(Ity_I64);
11991 IRTemp dLo = newTemp(Ity_I64);
11993 if (epartIsReg(modrm)) {
11994 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
11996 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11997 nameXMMReg(gregOfRexRM(pfx,modrm)));
11999 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12000 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
12002 DIP("psadbw %s,%s\n", dis_buf,
12003 nameXMMReg(gregOfRexRM(pfx,modrm)));
12005 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
12006 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
12007 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
12008 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
12009 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
12010 assign( dHi, mkIRExprCCall(
12011 Ity_I64, 0/*regparms*/,
12012 "amd64g_calculate_mmx_psadbw",
12013 &amd64g_calculate_mmx_psadbw,
12014 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
12016 assign( dLo, mkIRExprCCall(
12017 Ity_I64, 0/*regparms*/,
12018 "amd64g_calculate_mmx_psadbw",
12019 &amd64g_calculate_mmx_psadbw,
12020 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
12022 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
12023 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12024 goto decode_success;
12027 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
12028 if (have66noF2noF3(pfx) && sz == 2
12029 && insn[0] == 0x0F && insn[1] == 0x70) {
12031 IRTemp sV, dV, s3, s2, s1, s0;
12032 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12033 sV = newTemp(Ity_V128);
12034 dV = newTemp(Ity_V128);
12036 if (epartIsReg(modrm)) {
12037 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12038 order = (Int)insn[3];
12040 DIP("pshufd $%d,%s,%s\n", order,
12041 nameXMMReg(eregOfRexRM(pfx,modrm)),
12042 nameXMMReg(gregOfRexRM(pfx,modrm)));
12044 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12045 1/*byte after the amode*/ );
12046 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12047 order = (Int)insn[2+alen];
12049 DIP("pshufd $%d,%s,%s\n", order,
12051 nameXMMReg(gregOfRexRM(pfx,modrm)));
12053 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12056 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12058 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
12059 SEL((order>>2)&3), SEL((order>>0)&3) )
12061 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12063 goto decode_success;
12066 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
12067 mem) to G(xmm), and copy lower half */
12068 if (haveF3no66noF2(pfx) && sz == 4
12069 && insn[0] == 0x0F && insn[1] == 0x70) {
12071 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
12072 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12073 sV = newTemp(Ity_V128);
12074 dV = newTemp(Ity_V128);
12075 sVhi = newTemp(Ity_I64);
12076 dVhi = newTemp(Ity_I64);
12078 if (epartIsReg(modrm)) {
12079 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12080 order = (Int)insn[3];
12082 DIP("pshufhw $%d,%s,%s\n", order,
12083 nameXMMReg(eregOfRexRM(pfx,modrm)),
12084 nameXMMReg(gregOfRexRM(pfx,modrm)));
12086 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12087 1/*byte after the amode*/ );
12088 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12089 order = (Int)insn[2+alen];
12091 DIP("pshufhw $%d,%s,%s\n", order,
12093 nameXMMReg(gregOfRexRM(pfx,modrm)));
12095 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
12096 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
12099 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12101 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
12102 SEL((order>>2)&3), SEL((order>>0)&3) )
12104 assign(dV, binop( Iop_64HLtoV128,
12106 unop(Iop_V128to64, mkexpr(sV))) );
12107 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12109 goto decode_success;
12112 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
12113 mem) to G(xmm), and copy upper half */
12114 if (haveF2no66noF3(pfx) && sz == 4
12115 && insn[0] == 0x0F && insn[1] == 0x70) {
12117 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
12118 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12119 sV = newTemp(Ity_V128);
12120 dV = newTemp(Ity_V128);
12121 sVlo = newTemp(Ity_I64);
12122 dVlo = newTemp(Ity_I64);
12124 if (epartIsReg(modrm)) {
12125 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12126 order = (Int)insn[3];
12128 DIP("pshuflw $%d,%s,%s\n", order,
12129 nameXMMReg(eregOfRexRM(pfx,modrm)),
12130 nameXMMReg(gregOfRexRM(pfx,modrm)));
12132 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12133 1/*byte after the amode*/ );
12134 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12135 order = (Int)insn[2+alen];
12137 DIP("pshuflw $%d,%s,%s\n", order,
12139 nameXMMReg(gregOfRexRM(pfx,modrm)));
12141 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
12142 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
12145 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12147 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
12148 SEL((order>>2)&3), SEL((order>>0)&3) )
12150 assign(dV, binop( Iop_64HLtoV128,
12151 unop(Iop_V128HIto64, mkexpr(sV)),
12153 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12155 goto decode_success;
12158 /* 66 0F 72 /6 ib = PSLLD by immediate */
12159 if (have66noF2noF3(pfx) && sz == 2
12160 && insn[0] == 0x0F && insn[1] == 0x72
12161 && epartIsReg(insn[2])
12162 && gregLO3ofRM(insn[2]) == 6) {
12163 delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 );
12164 goto decode_success;
12167 /* 66 0F F2 = PSLLD by E */
12168 if (have66noF2noF3(pfx) && sz == 2
12169 && insn[0] == 0x0F && insn[1] == 0xF2) {
12170 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 );
12171 goto decode_success;
12174 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
12175 /* note, if mem case ever filled in, 1 byte after amode */
12176 if (have66noF2noF3(pfx) && sz == 2
12177 && insn[0] == 0x0F && insn[1] == 0x73
12178 && epartIsReg(insn[2])
12179 && gregLO3ofRM(insn[2]) == 7) {
12180 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
12181 Int imm = (Int)insn[3];
12182 Int reg = eregOfRexRM(pfx,insn[2]);
12183 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
12184 vassert(imm >= 0 && imm <= 255);
12187 sV = newTemp(Ity_V128);
12188 dV = newTemp(Ity_V128);
12189 hi64 = newTemp(Ity_I64);
12190 lo64 = newTemp(Ity_I64);
12191 hi64r = newTemp(Ity_I64);
12192 lo64r = newTemp(Ity_I64);
12195 putXMMReg(reg, mkV128(0x0000));
12196 goto decode_success;
12199 assign( sV, getXMMReg(reg) );
12200 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
12201 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
12204 assign( lo64r, mkexpr(lo64) );
12205 assign( hi64r, mkexpr(hi64) );
12209 assign( lo64r, mkU64(0) );
12210 assign( hi64r, mkexpr(lo64) );
12214 assign( lo64r, mkU64(0) );
12215 assign( hi64r, binop( Iop_Shl64,
12217 mkU8( 8*(imm-8) ) ));
12219 assign( lo64r, binop( Iop_Shl64,
12224 binop(Iop_Shl64, mkexpr(hi64),
12226 binop(Iop_Shr64, mkexpr(lo64),
12227 mkU8(8 * (8 - imm)) )
12231 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
12232 putXMMReg(reg, mkexpr(dV));
12233 goto decode_success;
12236 /* 66 0F 73 /6 ib = PSLLQ by immediate */
12237 if (have66noF2noF3(pfx) && sz == 2
12238 && insn[0] == 0x0F && insn[1] == 0x73
12239 && epartIsReg(insn[2])
12240 && gregLO3ofRM(insn[2]) == 6) {
12241 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 );
12242 goto decode_success;
12245 /* 66 0F F3 = PSLLQ by E */
12246 if (have66noF2noF3(pfx) && sz == 2
12247 && insn[0] == 0x0F && insn[1] == 0xF3) {
12248 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 );
12249 goto decode_success;
12252 /* 66 0F 71 /6 ib = PSLLW by immediate */
12253 if (have66noF2noF3(pfx) && sz == 2
12254 && insn[0] == 0x0F && insn[1] == 0x71
12255 && epartIsReg(insn[2])
12256 && gregLO3ofRM(insn[2]) == 6) {
12257 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 );
12258 goto decode_success;
12261 /* 66 0F F1 = PSLLW by E */
12262 if (have66noF2noF3(pfx) && sz == 2
12263 && insn[0] == 0x0F && insn[1] == 0xF1) {
12264 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 );
12265 goto decode_success;
12268 /* 66 0F 72 /4 ib = PSRAD by immediate */
12269 if (have66noF2noF3(pfx) && sz == 2
12270 && insn[0] == 0x0F && insn[1] == 0x72
12271 && epartIsReg(insn[2])
12272 && gregLO3ofRM(insn[2]) == 4) {
12273 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 );
12274 goto decode_success;
12277 /* 66 0F E2 = PSRAD by E */
12278 if (have66noF2noF3(pfx) && sz == 2
12279 && insn[0] == 0x0F && insn[1] == 0xE2) {
12280 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 );
12281 goto decode_success;
12284 /* 66 0F 71 /4 ib = PSRAW by immediate */
12285 if (have66noF2noF3(pfx) && sz == 2
12286 && insn[0] == 0x0F && insn[1] == 0x71
12287 && epartIsReg(insn[2])
12288 && gregLO3ofRM(insn[2]) == 4) {
12289 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 );
12290 goto decode_success;
12293 /* 66 0F E1 = PSRAW by E */
12294 if (have66noF2noF3(pfx) && sz == 2
12295 && insn[0] == 0x0F && insn[1] == 0xE1) {
12296 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 );
12297 goto decode_success;
12300 /* 66 0F 72 /2 ib = PSRLD by immediate */
12301 if (have66noF2noF3(pfx) && sz == 2
12302 && insn[0] == 0x0F && insn[1] == 0x72
12303 && epartIsReg(insn[2])
12304 && gregLO3ofRM(insn[2]) == 2) {
12305 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 );
12306 goto decode_success;
12309 /* 66 0F D2 = PSRLD by E */
12310 if (have66noF2noF3(pfx) && sz == 2
12311 && insn[0] == 0x0F && insn[1] == 0xD2) {
12312 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 );
12313 goto decode_success;
12316 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
12317 /* note, if mem case ever filled in, 1 byte after amode */
12318 if (have66noF2noF3(pfx) && sz == 2
12319 && insn[0] == 0x0F && insn[1] == 0x73
12320 && epartIsReg(insn[2])
12321 && gregLO3ofRM(insn[2]) == 3) {
12322 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
12323 Int imm = (Int)insn[3];
12324 Int reg = eregOfRexRM(pfx,insn[2]);
12325 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
12326 vassert(imm >= 0 && imm <= 255);
12329 sV = newTemp(Ity_V128);
12330 dV = newTemp(Ity_V128);
12331 hi64 = newTemp(Ity_I64);
12332 lo64 = newTemp(Ity_I64);
12333 hi64r = newTemp(Ity_I64);
12334 lo64r = newTemp(Ity_I64);
12337 putXMMReg(reg, mkV128(0x0000));
12338 goto decode_success;
12341 assign( sV, getXMMReg(reg) );
12342 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
12343 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
12346 assign( lo64r, mkexpr(lo64) );
12347 assign( hi64r, mkexpr(hi64) );
12351 assign( hi64r, mkU64(0) );
12352 assign( lo64r, mkexpr(hi64) );
12356 assign( hi64r, mkU64(0) );
12357 assign( lo64r, binop( Iop_Shr64,
12359 mkU8( 8*(imm-8) ) ));
12361 assign( hi64r, binop( Iop_Shr64,
12366 binop(Iop_Shr64, mkexpr(lo64),
12368 binop(Iop_Shl64, mkexpr(hi64),
12369 mkU8(8 * (8 - imm)) )
12374 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
12375 putXMMReg(reg, mkexpr(dV));
12376 goto decode_success;
12379 /* 66 0F 73 /2 ib = PSRLQ by immediate */
12380 if (have66noF2noF3(pfx) && sz == 2
12381 && insn[0] == 0x0F && insn[1] == 0x73
12382 && epartIsReg(insn[2])
12383 && gregLO3ofRM(insn[2]) == 2) {
12384 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 );
12385 goto decode_success;
12388 /* 66 0F D3 = PSRLQ by E */
12389 if (have66noF2noF3(pfx) && sz == 2
12390 && insn[0] == 0x0F && insn[1] == 0xD3) {
12391 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 );
12392 goto decode_success;
12395 /* 66 0F 71 /2 ib = PSRLW by immediate */
12396 if (have66noF2noF3(pfx) && sz == 2
12397 && insn[0] == 0x0F && insn[1] == 0x71
12398 && epartIsReg(insn[2])
12399 && gregLO3ofRM(insn[2]) == 2) {
12400 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 );
12401 goto decode_success;
12404 /* 66 0F D1 = PSRLW by E */
12405 if (have66noF2noF3(pfx) && sz == 2
12406 && insn[0] == 0x0F && insn[1] == 0xD1) {
12407 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 );
12408 goto decode_success;
12411 /* 66 0F F8 = PSUBB */
12412 if (have66noF2noF3(pfx) && sz == 2
12413 && insn[0] == 0x0F && insn[1] == 0xF8) {
12414 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12415 "psubb", Iop_Sub8x16, False );
12416 goto decode_success;
12419 /* 66 0F FA = PSUBD */
12420 if (have66noF2noF3(pfx) && sz == 2
12421 && insn[0] == 0x0F && insn[1] == 0xFA) {
12422 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12423 "psubd", Iop_Sub32x4, False );
12424 goto decode_success;
12427 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
12428 /* 0F FB = PSUBQ -- sub 64x1 */
12429 if (haveNo66noF2noF3(pfx) && sz == 4
12430 && insn[0] == 0x0F && insn[1] == 0xFB) {
12432 delta = dis_MMXop_regmem_to_reg (
12433 vbi, pfx, delta+2, insn[1], "psubq", False );
12434 goto decode_success;
12437 /* 66 0F FB = PSUBQ */
12438 if (have66noF2noF3(pfx) && sz == 2
12439 && insn[0] == 0x0F && insn[1] == 0xFB) {
12440 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12441 "psubq", Iop_Sub64x2, False );
12442 goto decode_success;
12445 /* 66 0F F9 = PSUBW */
12446 if (have66noF2noF3(pfx) && sz == 2
12447 && insn[0] == 0x0F && insn[1] == 0xF9) {
12448 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12449 "psubw", Iop_Sub16x8, False );
12450 goto decode_success;
12453 /* 66 0F E8 = PSUBSB */
12454 if (have66noF2noF3(pfx) && sz == 2
12455 && insn[0] == 0x0F && insn[1] == 0xE8) {
12456 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12457 "psubsb", Iop_QSub8Sx16, False );
12458 goto decode_success;
12461 /* 66 0F E9 = PSUBSW */
12462 if (have66noF2noF3(pfx) && sz == 2
12463 && insn[0] == 0x0F && insn[1] == 0xE9) {
12464 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12465 "psubsw", Iop_QSub16Sx8, False );
12466 goto decode_success;
12469 /* 66 0F D8 = PSUBSB */
12470 if (have66noF2noF3(pfx) && sz == 2
12471 && insn[0] == 0x0F && insn[1] == 0xD8) {
12472 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12473 "psubusb", Iop_QSub8Ux16, False );
12474 goto decode_success;
12477 /* 66 0F D9 = PSUBSW */
12478 if (have66noF2noF3(pfx) && sz == 2
12479 && insn[0] == 0x0F && insn[1] == 0xD9) {
12480 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12481 "psubusw", Iop_QSub16Ux8, False );
12482 goto decode_success;
12485 /* 66 0F 68 = PUNPCKHBW */
12486 if (have66noF2noF3(pfx) && sz == 2
12487 && insn[0] == 0x0F && insn[1] == 0x68) {
12488 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12490 Iop_InterleaveHI8x16, True );
12491 goto decode_success;
12494 /* 66 0F 6A = PUNPCKHDQ */
12495 if (have66noF2noF3(pfx) && sz == 2
12496 && insn[0] == 0x0F && insn[1] == 0x6A) {
12497 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12499 Iop_InterleaveHI32x4, True );
12500 goto decode_success;
12503 /* 66 0F 6D = PUNPCKHQDQ */
12504 if (have66noF2noF3(pfx) && sz == 2
12505 && insn[0] == 0x0F && insn[1] == 0x6D) {
12506 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12508 Iop_InterleaveHI64x2, True );
12509 goto decode_success;
12512 /* 66 0F 69 = PUNPCKHWD */
12513 if (have66noF2noF3(pfx) && sz == 2
12514 && insn[0] == 0x0F && insn[1] == 0x69) {
12515 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12517 Iop_InterleaveHI16x8, True );
12518 goto decode_success;
12521 /* 66 0F 60 = PUNPCKLBW */
12522 if (have66noF2noF3(pfx) && sz == 2
12523 && insn[0] == 0x0F && insn[1] == 0x60) {
12524 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12526 Iop_InterleaveLO8x16, True );
12527 goto decode_success;
12530 /* 66 0F 62 = PUNPCKLDQ */
12531 if (have66noF2noF3(pfx) && sz == 2
12532 && insn[0] == 0x0F && insn[1] == 0x62) {
12533 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12535 Iop_InterleaveLO32x4, True );
12536 goto decode_success;
12539 /* 66 0F 6C = PUNPCKLQDQ */
12540 if (have66noF2noF3(pfx) && sz == 2
12541 && insn[0] == 0x0F && insn[1] == 0x6C) {
12542 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12544 Iop_InterleaveLO64x2, True );
12545 goto decode_success;
12548 /* 66 0F 61 = PUNPCKLWD */
12549 if (have66noF2noF3(pfx) && sz == 2
12550 && insn[0] == 0x0F && insn[1] == 0x61) {
12551 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12553 Iop_InterleaveLO16x8, True );
12554 goto decode_success;
12557 /* 66 0F EF = PXOR */
12558 if (have66noF2noF3(pfx) && sz == 2
12559 && insn[0] == 0x0F && insn[1] == 0xEF) {
12560 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 );
12561 goto decode_success;
12564 //.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
12565 //.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE
12566 //.. //-- && (!epartIsReg(insn[2]))
12567 //.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
12568 //.. //-- Bool store = gregOfRM(insn[2]) == 0;
12569 //.. //-- vg_assert(sz == 4);
12570 //.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
12571 //.. //-- t1 = LOW24(pair);
12572 //.. //-- eip += 2+HI8(pair);
12573 //.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
12574 //.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
12575 //.. //-- Lit16, (UShort)insn[2],
12576 //.. //-- TempReg, t1 );
12577 //.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
12578 //.. //-- goto decode_success;
12581 /* 0F AE /7 = CLFLUSH -- flush cache line */
12582 if (haveNo66noF2noF3(pfx) && sz == 4
12583 && insn[0] == 0x0F && insn[1] == 0xAE
12584 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) {
12586 /* This is something of a hack. We need to know the size of the
12587 cache line containing addr. Since we don't (easily), assume
12588 256 on the basis that no real cache would have a line that
12589 big. It's safe to invalidate more stuff than we need, just
12591 ULong lineszB = 256ULL;
12593 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12596 /* Round addr down to the start of the containing block. */
12601 mkU64( ~(lineszB-1) ))) );
12603 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) );
12605 irsb->jumpkind = Ijk_TInval;
12606 irsb->next = mkU64(guest_RIP_bbstart+delta);
12607 dres.whatNext = Dis_StopHere;
12609 DIP("clflush %s\n", dis_buf);
12610 goto decode_success;
12613 /* ---------------------------------------------------- */
12614 /* --- end of the SSE/SSE2 decoder. --- */
12615 /* ---------------------------------------------------- */
12617 /* ---------------------------------------------------- */
12618 /* --- start of the SSE3 decoder. --- */
12619 /* ---------------------------------------------------- */
12621 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
12622 duplicating some lanes (2:2:0:0). */
12623 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
12624 duplicating some lanes (3:3:1:1). */
12625 if (haveF3no66noF2(pfx) && sz == 4
12626 && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) {
12627 IRTemp s3, s2, s1, s0;
12628 IRTemp sV = newTemp(Ity_V128);
12629 Bool isH = insn[1] == 0x16;
12630 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12633 if (epartIsReg(modrm)) {
12634 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12635 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12636 nameXMMReg(eregOfRexRM(pfx,modrm)),
12637 nameXMMReg(gregOfRexRM(pfx,modrm)));
12640 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12641 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12642 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12644 nameXMMReg(gregOfRexRM(pfx,modrm)));
12648 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12649 putXMMReg( gregOfRexRM(pfx,modrm),
12650 isH ? mk128from32s( s3, s3, s1, s1 )
12651 : mk128from32s( s2, s2, s0, s0 ) );
12652 goto decode_success;
12655 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
12656 duplicating some lanes (0:1:0:1). */
12657 if (haveF2no66noF3(pfx)
12658 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
12659 && insn[0] == 0x0F && insn[1] == 0x12) {
12660 IRTemp sV = newTemp(Ity_V128);
12661 IRTemp d0 = newTemp(Ity_I64);
12664 if (epartIsReg(modrm)) {
12665 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12666 DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12667 nameXMMReg(gregOfRexRM(pfx,modrm)));
12669 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
12671 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12672 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
12673 DIP("movddup %s,%s\n", dis_buf,
12674 nameXMMReg(gregOfRexRM(pfx,modrm)));
12678 putXMMReg( gregOfRexRM(pfx,modrm),
12679 binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
12680 goto decode_success;
12683 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
12684 if (haveF2no66noF3(pfx) && sz == 4
12685 && insn[0] == 0x0F && insn[1] == 0xD0) {
12686 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
12687 IRTemp eV = newTemp(Ity_V128);
12688 IRTemp gV = newTemp(Ity_V128);
12689 IRTemp addV = newTemp(Ity_V128);
12690 IRTemp subV = newTemp(Ity_V128);
12691 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
12694 if (epartIsReg(modrm)) {
12695 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12696 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12697 nameXMMReg(gregOfRexRM(pfx,modrm)));
12700 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12701 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12702 DIP("addsubps %s,%s\n", dis_buf,
12703 nameXMMReg(gregOfRexRM(pfx,modrm)));
12707 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12709 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
12710 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
12712 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
12713 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
12715 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ));
12716 goto decode_success;
12719 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
12720 if (have66noF2noF3(pfx) && sz == 2
12721 && insn[0] == 0x0F && insn[1] == 0xD0) {
12722 IRTemp eV = newTemp(Ity_V128);
12723 IRTemp gV = newTemp(Ity_V128);
12724 IRTemp addV = newTemp(Ity_V128);
12725 IRTemp subV = newTemp(Ity_V128);
12726 IRTemp a1 = newTemp(Ity_I64);
12727 IRTemp s0 = newTemp(Ity_I64);
12730 if (epartIsReg(modrm)) {
12731 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12732 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12733 nameXMMReg(gregOfRexRM(pfx,modrm)));
12736 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12737 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12738 DIP("addsubpd %s,%s\n", dis_buf,
12739 nameXMMReg(gregOfRexRM(pfx,modrm)));
12743 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12745 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
12746 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
12748 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
12749 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
12751 putXMMReg( gregOfRexRM(pfx,modrm),
12752 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
12753 goto decode_success;
12756 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
12757 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
12758 if (haveF2no66noF3(pfx) && sz == 4
12759 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12760 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
12761 IRTemp eV = newTemp(Ity_V128);
12762 IRTemp gV = newTemp(Ity_V128);
12763 IRTemp leftV = newTemp(Ity_V128);
12764 IRTemp rightV = newTemp(Ity_V128);
12765 Bool isAdd = insn[1] == 0x7C;
12766 HChar* str = isAdd ? "add" : "sub";
12767 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
12770 if (epartIsReg(modrm)) {
12771 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12772 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
12773 nameXMMReg(gregOfRexRM(pfx,modrm)));
12776 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12777 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12778 DIP("h%sps %s,%s\n", str, dis_buf,
12779 nameXMMReg(gregOfRexRM(pfx,modrm)));
12783 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12785 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
12786 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
12788 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
12789 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
12791 putXMMReg( gregOfRexRM(pfx,modrm),
12792 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
12793 mkexpr(leftV), mkexpr(rightV) ) );
12794 goto decode_success;
12797 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12798 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12799 if (have66noF2noF3(pfx) && sz == 2
12800 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12801 IRTemp e1 = newTemp(Ity_I64);
12802 IRTemp e0 = newTemp(Ity_I64);
12803 IRTemp g1 = newTemp(Ity_I64);
12804 IRTemp g0 = newTemp(Ity_I64);
12805 IRTemp eV = newTemp(Ity_V128);
12806 IRTemp gV = newTemp(Ity_V128);
12807 IRTemp leftV = newTemp(Ity_V128);
12808 IRTemp rightV = newTemp(Ity_V128);
12809 Bool isAdd = insn[1] == 0x7C;
12810 HChar* str = isAdd ? "add" : "sub";
12813 if (epartIsReg(modrm)) {
12814 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12815 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
12816 nameXMMReg(gregOfRexRM(pfx,modrm)));
12819 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12820 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12821 DIP("h%spd %s,%s\n", str, dis_buf,
12822 nameXMMReg(gregOfRexRM(pfx,modrm)));
12826 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12828 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
12829 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12830 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12831 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12833 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12834 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12836 putXMMReg( gregOfRexRM(pfx,modrm),
12837 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12838 mkexpr(leftV), mkexpr(rightV) ) );
12839 goto decode_success;
12842 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12843 if (haveF2no66noF3(pfx) && sz == 4
12844 && insn[0] == 0x0F && insn[1] == 0xF0) {
12846 if (epartIsReg(modrm)) {
12847 goto decode_failure;
12849 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12850 putXMMReg( gregOfRexRM(pfx,modrm),
12851 loadLE(Ity_V128, mkexpr(addr)) );
12852 DIP("lddqu %s,%s\n", dis_buf,
12853 nameXMMReg(gregOfRexRM(pfx,modrm)));
12856 goto decode_success;
12859 /* ---------------------------------------------------- */
12860 /* --- end of the SSE3 decoder. --- */
12861 /* ---------------------------------------------------- */
12863 /* ---------------------------------------------------- */
12864 /* --- start of the SSSE3 decoder. --- */
12865 /* ---------------------------------------------------- */
12867 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12868 Unsigned Bytes (MMX) */
12869 if (haveNo66noF2noF3(pfx)
12871 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12872 IRTemp sV = newTemp(Ity_I64);
12873 IRTemp dV = newTemp(Ity_I64);
12874 IRTemp sVoddsSX = newTemp(Ity_I64);
12875 IRTemp sVevensSX = newTemp(Ity_I64);
12876 IRTemp dVoddsZX = newTemp(Ity_I64);
12877 IRTemp dVevensZX = newTemp(Ity_I64);
12881 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
12883 if (epartIsReg(modrm)) {
12884 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
12886 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12887 nameMMXReg(gregLO3ofRM(modrm)));
12889 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
12890 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12892 DIP("pmaddubsw %s,%s\n", dis_buf,
12893 nameMMXReg(gregLO3ofRM(modrm)));
12896 /* compute dV unsigned x sV signed */
12898 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
12900 binop(Iop_SarN16x4,
12901 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
12904 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
12906 binop(Iop_ShrN16x4,
12907 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
12911 gregLO3ofRM(modrm),
12912 binop(Iop_QAdd16Sx4,
12913 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12914 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
12917 goto decode_success;
12920 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12921 Unsigned Bytes (XMM) */
12922 if (have66noF2noF3(pfx)
12923 && (sz == 2 || /*redundant REX.W*/ sz == 8)
12924 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12925 IRTemp sV = newTemp(Ity_V128);
12926 IRTemp dV = newTemp(Ity_V128);
12927 IRTemp sVoddsSX = newTemp(Ity_V128);
12928 IRTemp sVevensSX = newTemp(Ity_V128);
12929 IRTemp dVoddsZX = newTemp(Ity_V128);
12930 IRTemp dVevensZX = newTemp(Ity_V128);
12933 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12935 if (epartIsReg(modrm)) {
12936 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12938 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12939 nameXMMReg(gregOfRexRM(pfx,modrm)));
12941 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
12942 gen_SEGV_if_not_16_aligned( addr );
12943 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12945 DIP("pmaddubsw %s,%s\n", dis_buf,
12946 nameXMMReg(gregOfRexRM(pfx,modrm)));
12949 /* compute dV unsigned x sV signed */
12951 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
12953 binop(Iop_SarN16x8,
12954 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
12957 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
12959 binop(Iop_ShrN16x8,
12960 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
12964 gregOfRexRM(pfx,modrm),
12965 binop(Iop_QAdd16Sx8,
12966 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12967 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
12970 goto decode_success;
12973 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12974 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12975 mmx) and G to G (mmx). */
12976 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12977 mmx) and G to G (mmx). */
12978 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12980 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12982 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12984 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12987 if (haveNo66noF2noF3(pfx)
12989 && insn[0] == 0x0F && insn[1] == 0x38
12990 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12991 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12992 HChar* str = "???";
12993 IROp opV64 = Iop_INVALID;
12994 IROp opCatO = Iop_CatOddLanes16x4;
12995 IROp opCatE = Iop_CatEvenLanes16x4;
12996 IRTemp sV = newTemp(Ity_I64);
12997 IRTemp dV = newTemp(Ity_I64);
13002 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
13003 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
13004 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
13005 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
13006 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
13007 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
13008 default: vassert(0);
13010 if (insn[2] == 0x02 || insn[2] == 0x06) {
13011 opCatO = Iop_InterleaveHI32x2;
13012 opCatE = Iop_InterleaveLO32x2;
13016 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13018 if (epartIsReg(modrm)) {
13019 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13021 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13022 nameMMXReg(gregLO3ofRM(modrm)));
13024 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13025 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13027 DIP("ph%s %s,%s\n", str, dis_buf,
13028 nameMMXReg(gregLO3ofRM(modrm)));
13032 gregLO3ofRM(modrm),
13034 binop(opCatE,mkexpr(sV),mkexpr(dV)),
13035 binop(opCatO,mkexpr(sV),mkexpr(dV))
13038 goto decode_success;
13041 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
13042 xmm) and G to G (xmm). */
13043 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
13044 xmm) and G to G (xmm). */
13045 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
13047 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
13049 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
13051 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
13054 if (have66noF2noF3(pfx)
13055 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13056 && insn[0] == 0x0F && insn[1] == 0x38
13057 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
13058 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
13059 HChar* str = "???";
13060 IROp opV64 = Iop_INVALID;
13061 IROp opCatO = Iop_CatOddLanes16x4;
13062 IROp opCatE = Iop_CatEvenLanes16x4;
13063 IRTemp sV = newTemp(Ity_V128);
13064 IRTemp dV = newTemp(Ity_V128);
13065 IRTemp sHi = newTemp(Ity_I64);
13066 IRTemp sLo = newTemp(Ity_I64);
13067 IRTemp dHi = newTemp(Ity_I64);
13068 IRTemp dLo = newTemp(Ity_I64);
13073 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
13074 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
13075 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
13076 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
13077 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
13078 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
13079 default: vassert(0);
13081 if (insn[2] == 0x02 || insn[2] == 0x06) {
13082 opCatO = Iop_InterleaveHI32x2;
13083 opCatE = Iop_InterleaveLO32x2;
13086 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13088 if (epartIsReg(modrm)) {
13089 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
13090 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13091 nameXMMReg(gregOfRexRM(pfx,modrm)));
13094 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13095 gen_SEGV_if_not_16_aligned( addr );
13096 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13097 DIP("ph%s %s,%s\n", str, dis_buf,
13098 nameXMMReg(gregOfRexRM(pfx,modrm)));
13102 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13103 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13104 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13105 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13107 /* This isn't a particularly efficient way to compute the
13108 result, but at least it avoids a proliferation of IROps,
13109 hence avoids complication all the backends. */
13111 gregOfRexRM(pfx,modrm),
13112 binop(Iop_64HLtoV128,
13114 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
13115 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
13118 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
13119 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
13123 goto decode_success;
13126 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
13128 if (haveNo66noF2noF3(pfx)
13130 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
13131 IRTemp sV = newTemp(Ity_I64);
13132 IRTemp dV = newTemp(Ity_I64);
13136 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13138 if (epartIsReg(modrm)) {
13139 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13141 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13142 nameMMXReg(gregLO3ofRM(modrm)));
13144 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13145 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13147 DIP("pmulhrsw %s,%s\n", dis_buf,
13148 nameMMXReg(gregLO3ofRM(modrm)));
13152 gregLO3ofRM(modrm),
13153 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
13155 goto decode_success;
13158 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
13160 if (have66noF2noF3(pfx)
13161 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13162 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
13163 IRTemp sV = newTemp(Ity_V128);
13164 IRTemp dV = newTemp(Ity_V128);
13165 IRTemp sHi = newTemp(Ity_I64);
13166 IRTemp sLo = newTemp(Ity_I64);
13167 IRTemp dHi = newTemp(Ity_I64);
13168 IRTemp dLo = newTemp(Ity_I64);
13171 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13173 if (epartIsReg(modrm)) {
13174 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13176 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13177 nameXMMReg(gregOfRexRM(pfx,modrm)));
13179 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13180 gen_SEGV_if_not_16_aligned( addr );
13181 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13183 DIP("pmulhrsw %s,%s\n", dis_buf,
13184 nameXMMReg(gregOfRexRM(pfx,modrm)));
13187 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13188 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13189 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13190 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13193 gregOfRexRM(pfx,modrm),
13194 binop(Iop_64HLtoV128,
13195 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
13196 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
13199 goto decode_success;
13202 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
13203 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
13204 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
13205 if (haveNo66noF2noF3(pfx)
13207 && insn[0] == 0x0F && insn[1] == 0x38
13208 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
13209 IRTemp sV = newTemp(Ity_I64);
13210 IRTemp dV = newTemp(Ity_I64);
13211 HChar* str = "???";
13215 case 0x08: laneszB = 1; str = "b"; break;
13216 case 0x09: laneszB = 2; str = "w"; break;
13217 case 0x0A: laneszB = 4; str = "d"; break;
13218 default: vassert(0);
13223 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13225 if (epartIsReg(modrm)) {
13226 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13228 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13229 nameMMXReg(gregLO3ofRM(modrm)));
13231 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13232 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13234 DIP("psign%s %s,%s\n", str, dis_buf,
13235 nameMMXReg(gregLO3ofRM(modrm)));
13239 gregLO3ofRM(modrm),
13240 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
13242 goto decode_success;
13245 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
13246 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
13247 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
13248 if (have66noF2noF3(pfx)
13249 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13250 && insn[0] == 0x0F && insn[1] == 0x38
13251 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
13252 IRTemp sV = newTemp(Ity_V128);
13253 IRTemp dV = newTemp(Ity_V128);
13254 IRTemp sHi = newTemp(Ity_I64);
13255 IRTemp sLo = newTemp(Ity_I64);
13256 IRTemp dHi = newTemp(Ity_I64);
13257 IRTemp dLo = newTemp(Ity_I64);
13258 HChar* str = "???";
13262 case 0x08: laneszB = 1; str = "b"; break;
13263 case 0x09: laneszB = 2; str = "w"; break;
13264 case 0x0A: laneszB = 4; str = "d"; break;
13265 default: vassert(0);
13269 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13271 if (epartIsReg(modrm)) {
13272 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13274 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13275 nameXMMReg(gregOfRexRM(pfx,modrm)));
13277 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13278 gen_SEGV_if_not_16_aligned( addr );
13279 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13281 DIP("psign%s %s,%s\n", str, dis_buf,
13282 nameXMMReg(gregOfRexRM(pfx,modrm)));
13285 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13286 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13287 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13288 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13291 gregOfRexRM(pfx,modrm),
13292 binop(Iop_64HLtoV128,
13293 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
13294 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
13297 goto decode_success;
13300 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
13301 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
13302 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
13303 if (haveNo66noF2noF3(pfx)
13305 && insn[0] == 0x0F && insn[1] == 0x38
13306 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
13307 IRTemp sV = newTemp(Ity_I64);
13308 HChar* str = "???";
13312 case 0x1C: laneszB = 1; str = "b"; break;
13313 case 0x1D: laneszB = 2; str = "w"; break;
13314 case 0x1E: laneszB = 4; str = "d"; break;
13315 default: vassert(0);
13321 if (epartIsReg(modrm)) {
13322 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13324 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13325 nameMMXReg(gregLO3ofRM(modrm)));
13327 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13328 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13330 DIP("pabs%s %s,%s\n", str, dis_buf,
13331 nameMMXReg(gregLO3ofRM(modrm)));
13335 gregLO3ofRM(modrm),
13336 dis_PABS_helper( mkexpr(sV), laneszB )
13338 goto decode_success;
13341 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
13342 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
13343 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
13344 if (have66noF2noF3(pfx)
13345 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13346 && insn[0] == 0x0F && insn[1] == 0x38
13347 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
13348 IRTemp sV = newTemp(Ity_V128);
13349 IRTemp sHi = newTemp(Ity_I64);
13350 IRTemp sLo = newTemp(Ity_I64);
13351 HChar* str = "???";
13355 case 0x1C: laneszB = 1; str = "b"; break;
13356 case 0x1D: laneszB = 2; str = "w"; break;
13357 case 0x1E: laneszB = 4; str = "d"; break;
13358 default: vassert(0);
13363 if (epartIsReg(modrm)) {
13364 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13366 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13367 nameXMMReg(gregOfRexRM(pfx,modrm)));
13369 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13370 gen_SEGV_if_not_16_aligned( addr );
13371 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13373 DIP("pabs%s %s,%s\n", str, dis_buf,
13374 nameXMMReg(gregOfRexRM(pfx,modrm)));
13377 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13378 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13381 gregOfRexRM(pfx,modrm),
13382 binop(Iop_64HLtoV128,
13383 dis_PABS_helper( mkexpr(sHi), laneszB ),
13384 dis_PABS_helper( mkexpr(sLo), laneszB )
13387 goto decode_success;
13390 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
13391 if (haveNo66noF2noF3(pfx) && sz == 4
13392 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13393 IRTemp sV = newTemp(Ity_I64);
13394 IRTemp dV = newTemp(Ity_I64);
13395 IRTemp res = newTemp(Ity_I64);
13399 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13401 if (epartIsReg(modrm)) {
13402 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13403 d64 = (Long)insn[3+1];
13405 DIP("palignr $%d,%s,%s\n", (Int)d64,
13406 nameMMXReg(eregLO3ofRM(modrm)),
13407 nameMMXReg(gregLO3ofRM(modrm)));
13409 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
13410 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13411 d64 = (Long)insn[3+alen];
13413 DIP("palignr $%d%s,%s\n", (Int)d64,
13415 nameMMXReg(gregLO3ofRM(modrm)));
13419 assign( res, mkexpr(sV) );
13421 else if (d64 >= 1 && d64 <= 7) {
13424 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
13425 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
13428 else if (d64 == 8) {
13429 assign( res, mkexpr(dV) );
13431 else if (d64 >= 9 && d64 <= 15) {
13432 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
13434 else if (d64 >= 16 && d64 <= 255) {
13435 assign( res, mkU64(0) );
13440 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
13441 goto decode_success;
13444 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
13445 if (have66noF2noF3(pfx)
13446 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13447 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13448 IRTemp sV = newTemp(Ity_V128);
13449 IRTemp dV = newTemp(Ity_V128);
13450 IRTemp sHi = newTemp(Ity_I64);
13451 IRTemp sLo = newTemp(Ity_I64);
13452 IRTemp dHi = newTemp(Ity_I64);
13453 IRTemp dLo = newTemp(Ity_I64);
13454 IRTemp rHi = newTemp(Ity_I64);
13455 IRTemp rLo = newTemp(Ity_I64);
13458 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13460 if (epartIsReg(modrm)) {
13461 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13462 d64 = (Long)insn[3+1];
13464 DIP("palignr $%d,%s,%s\n", (Int)d64,
13465 nameXMMReg(eregOfRexRM(pfx,modrm)),
13466 nameXMMReg(gregOfRexRM(pfx,modrm)));
13468 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
13469 gen_SEGV_if_not_16_aligned( addr );
13470 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13471 d64 = (Long)insn[3+alen];
13473 DIP("palignr $%d,%s,%s\n", (Int)d64,
13475 nameXMMReg(gregOfRexRM(pfx,modrm)));
13478 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13479 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13480 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13481 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13484 assign( rHi, mkexpr(sHi) );
13485 assign( rLo, mkexpr(sLo) );
13487 else if (d64 >= 1 && d64 <= 7) {
13488 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) );
13489 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) );
13491 else if (d64 == 8) {
13492 assign( rHi, mkexpr(dLo) );
13493 assign( rLo, mkexpr(sHi) );
13495 else if (d64 >= 9 && d64 <= 15) {
13496 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) );
13497 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) );
13499 else if (d64 == 16) {
13500 assign( rHi, mkexpr(dHi) );
13501 assign( rLo, mkexpr(dLo) );
13503 else if (d64 >= 17 && d64 <= 23) {
13504 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) );
13505 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) );
13507 else if (d64 == 24) {
13508 assign( rHi, mkU64(0) );
13509 assign( rLo, mkexpr(dHi) );
13511 else if (d64 >= 25 && d64 <= 31) {
13512 assign( rHi, mkU64(0) );
13513 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) );
13515 else if (d64 >= 32 && d64 <= 255) {
13516 assign( rHi, mkU64(0) );
13517 assign( rLo, mkU64(0) );
13523 gregOfRexRM(pfx,modrm),
13524 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13526 goto decode_success;
13529 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
13530 if (haveNo66noF2noF3(pfx)
13532 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13533 IRTemp sV = newTemp(Ity_I64);
13534 IRTemp dV = newTemp(Ity_I64);
13538 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13540 if (epartIsReg(modrm)) {
13541 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13543 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13544 nameMMXReg(gregLO3ofRM(modrm)));
13546 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13547 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13549 DIP("pshufb %s,%s\n", dis_buf,
13550 nameMMXReg(gregLO3ofRM(modrm)));
13554 gregLO3ofRM(modrm),
13557 /* permute the lanes */
13561 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
13563 /* mask off lanes which have (index & 0x80) == 0x80 */
13564 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
13567 goto decode_success;
13570 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
13571 if (have66noF2noF3(pfx)
13572 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13573 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13574 IRTemp sV = newTemp(Ity_V128);
13575 IRTemp dV = newTemp(Ity_V128);
13576 IRTemp sHi = newTemp(Ity_I64);
13577 IRTemp sLo = newTemp(Ity_I64);
13578 IRTemp dHi = newTemp(Ity_I64);
13579 IRTemp dLo = newTemp(Ity_I64);
13580 IRTemp rHi = newTemp(Ity_I64);
13581 IRTemp rLo = newTemp(Ity_I64);
13582 IRTemp sevens = newTemp(Ity_I64);
13583 IRTemp mask0x80hi = newTemp(Ity_I64);
13584 IRTemp mask0x80lo = newTemp(Ity_I64);
13585 IRTemp maskBit3hi = newTemp(Ity_I64);
13586 IRTemp maskBit3lo = newTemp(Ity_I64);
13587 IRTemp sAnd7hi = newTemp(Ity_I64);
13588 IRTemp sAnd7lo = newTemp(Ity_I64);
13589 IRTemp permdHi = newTemp(Ity_I64);
13590 IRTemp permdLo = newTemp(Ity_I64);
13593 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13595 if (epartIsReg(modrm)) {
13596 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13598 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13599 nameXMMReg(gregOfRexRM(pfx,modrm)));
13601 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13602 gen_SEGV_if_not_16_aligned( addr );
13603 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13605 DIP("pshufb %s,%s\n", dis_buf,
13606 nameXMMReg(gregOfRexRM(pfx,modrm)));
13609 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13610 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13611 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13612 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13614 assign( sevens, mkU64(0x0707070707070707ULL) );
13617 mask0x80hi = Not(SarN8x8(sHi,7))
13618 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
13619 sAnd7hi = And(sHi,sevens)
13620 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
13621 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
13622 rHi = And(permdHi,mask0x80hi)
13626 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
13631 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
13634 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
13641 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
13642 mkexpr(maskBit3hi)),
13644 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
13645 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
13647 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
13649 /* And the same for the lower half of the result. What fun. */
13653 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
13658 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
13661 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
13668 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
13669 mkexpr(maskBit3lo)),
13671 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
13672 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
13674 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
13677 gregOfRexRM(pfx,modrm),
13678 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13680 goto decode_success;
13683 /* ---------------------------------------------------- */
13684 /* --- end of the SSSE3 decoder. --- */
13685 /* ---------------------------------------------------- */
13687 /* ---------------------------------------------------- */
13688 /* --- start of the SSE4 decoder --- */
13689 /* ---------------------------------------------------- */
13691 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
13692 Blend Packed Double Precision Floating-Point Values (XMM) */
13693 if ( have66noF2noF3( pfx )
13695 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0D ) {
13698 UShort imm8_mask_16;
13700 IRTemp dst_vec = newTemp(Ity_V128);
13701 IRTemp src_vec = newTemp(Ity_V128);
13702 IRTemp imm8_mask = newTemp(Ity_V128);
13705 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13707 if ( epartIsReg( modrm ) ) {
13708 imm8 = (Int)insn[4];
13709 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13711 DIP( "blendpd $%d, %s,%s\n", imm8,
13712 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13713 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13715 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13716 1/* imm8 is 1 byte after the amode */ );
13717 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
13718 imm8 = (Int)insn[2+alen+1];
13720 DIP( "blendpd $%d, %s,%s\n",
13721 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13724 switch( imm8 & 3 ) {
13725 case 0: imm8_mask_16 = 0x0000; break;
13726 case 1: imm8_mask_16 = 0x00FF; break;
13727 case 2: imm8_mask_16 = 0xFF00; break;
13728 case 3: imm8_mask_16 = 0xFFFF; break;
13729 default: vassert(0); break;
13731 assign( imm8_mask, mkV128( imm8_mask_16 ) );
13733 putXMMReg( gregOfRexRM(pfx, modrm),
13735 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
13736 binop( Iop_AndV128, mkexpr(dst_vec),
13737 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
13739 goto decode_success;
13743 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
13744 Blend Packed Single Precision Floating-Point Values (XMM) */
13745 if ( have66noF2noF3( pfx )
13747 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0C ) {
13750 IRTemp dst_vec = newTemp(Ity_V128);
13751 IRTemp src_vec = newTemp(Ity_V128);
13755 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13757 if ( epartIsReg( modrm ) ) {
13758 imm8 = (Int)insn[3+1];
13759 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13761 DIP( "blendps $%d, %s,%s\n", imm8,
13762 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13763 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13765 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13766 1/* imm8 is 1 byte after the amode */ );
13767 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
13768 imm8 = (Int)insn[3+alen];
13770 DIP( "blendpd $%d, %s,%s\n",
13771 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13774 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 0x0F0F,
13775 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 0xF0F0, 0xF0FF,
13776 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
13777 IRTemp imm8_mask = newTemp(Ity_V128);
13778 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
13780 putXMMReg( gregOfRexRM(pfx, modrm),
13782 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
13783 binop( Iop_AndV128, mkexpr(dst_vec),
13784 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
13786 goto decode_success;
13790 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
13791 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
13792 if ( have66noF2noF3( pfx )
13794 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x41 ) {
13797 IRTemp src_vec = newTemp(Ity_V128);
13798 IRTemp dst_vec = newTemp(Ity_V128);
13799 IRTemp and_vec = newTemp(Ity_V128);
13800 IRTemp sum_vec = newTemp(Ity_V128);
13804 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13806 if ( epartIsReg( modrm ) ) {
13807 imm8 = (Int)insn[4];
13808 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13810 DIP( "dppd $%d, %s,%s\n", imm8,
13811 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13812 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13814 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13815 1/* imm8 is 1 byte after the amode */ );
13816 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
13817 imm8 = (Int)insn[2+alen+1];
13819 DIP( "dppd $%d, %s,%s\n",
13820 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13823 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
13825 assign( and_vec, binop( Iop_AndV128,
13826 binop( Iop_Mul64Fx2,
13827 mkexpr(dst_vec), mkexpr(src_vec) ),
13828 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
13830 assign( sum_vec, binop( Iop_Add64F0x2,
13831 binop( Iop_InterleaveHI64x2,
13832 mkexpr(and_vec), mkexpr(and_vec) ),
13833 binop( Iop_InterleaveLO64x2,
13834 mkexpr(and_vec), mkexpr(and_vec) ) ) );
13836 putXMMReg( gregOfRexRM( pfx, modrm ),
13837 binop( Iop_AndV128,
13838 binop( Iop_InterleaveLO64x2,
13839 mkexpr(sum_vec), mkexpr(sum_vec) ),
13840 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
13842 goto decode_success;
13846 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
13847 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
13848 if ( have66noF2noF3( pfx )
13852 && insn[2] == 0x40 ) {
13855 IRTemp xmm1_vec = newTemp(Ity_V128);
13856 IRTemp xmm2_vec = newTemp(Ity_V128);
13857 IRTemp tmp_prod_vec = newTemp(Ity_V128);
13858 IRTemp prod_vec = newTemp(Ity_V128);
13859 IRTemp sum_vec = newTemp(Ity_V128);
13860 IRTemp v3, v2, v1, v0;
13861 v3 = v2 = v1 = v0 = IRTemp_INVALID;
13865 assign( xmm1_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13867 if ( epartIsReg( modrm ) ) {
13868 imm8 = (Int)insn[4];
13869 assign( xmm2_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13871 DIP( "dpps $%d, %s,%s\n", imm8,
13872 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13873 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13875 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13876 1/* imm8 is 1 byte after the amode */ );
13877 assign( xmm2_vec, loadLE( Ity_V128, mkexpr(addr) ) );
13878 imm8 = (Int)insn[2+alen+1];
13880 DIP( "dpps $%d, %s,%s\n",
13881 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13884 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
13885 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
13886 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
13888 assign( tmp_prod_vec,
13889 binop( Iop_AndV128,
13890 binop( Iop_Mul32Fx4, mkexpr(xmm1_vec), mkexpr(xmm2_vec) ),
13891 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
13892 breakup128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
13893 assign( prod_vec, mk128from32s( v3, v1, v2, v0 ) );
13895 assign( sum_vec, binop( Iop_Add32Fx4,
13896 binop( Iop_InterleaveHI32x4,
13897 mkexpr(prod_vec), mkexpr(prod_vec) ),
13898 binop( Iop_InterleaveLO32x4,
13899 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
13901 putXMMReg( gregOfRexRM(pfx, modrm),
13902 binop( Iop_AndV128,
13903 binop( Iop_Add32Fx4,
13904 binop( Iop_InterleaveHI32x4,
13905 mkexpr(sum_vec), mkexpr(sum_vec) ),
13906 binop( Iop_InterleaveLO32x4,
13907 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
13908 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
13910 goto decode_success;
13914 /* 66 0F 3A 21 /r ib = INSERTPS xmm1, xmm2/m32, imm8
13915 Insert Packed Single Precision Floating-Point Value (XMM) */
13916 if ( have66noF2noF3( pfx )
13918 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x21 ) {
13924 IRTemp dstVec = newTemp(Ity_V128);
13925 IRTemp srcDWord = newTemp(Ity_I32);
13929 assign( dstVec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13931 if ( epartIsReg( modrm ) ) {
13932 IRTemp src_vec = newTemp(Ity_V128);
13933 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13935 IRTemp src_lane_0 = IRTemp_INVALID;
13936 IRTemp src_lane_1 = IRTemp_INVALID;
13937 IRTemp src_lane_2 = IRTemp_INVALID;
13938 IRTemp src_lane_3 = IRTemp_INVALID;
13939 breakup128to32s( src_vec,
13940 &src_lane_3, &src_lane_2, &src_lane_1, &src_lane_0 );
13942 imm8 = (Int)insn[4];
13943 imm8_count_s = ((imm8 >> 6) & 3);
13944 switch( imm8_count_s ) {
13945 case 0: assign( srcDWord, mkexpr(src_lane_0) ); break;
13946 case 1: assign( srcDWord, mkexpr(src_lane_1) ); break;
13947 case 2: assign( srcDWord, mkexpr(src_lane_2) ); break;
13948 case 3: assign( srcDWord, mkexpr(src_lane_3) ); break;
13949 default: vassert(0); break;
13953 DIP( "insertps $%d, %s,%s\n", imm8,
13954 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13955 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13957 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13958 1/* const imm8 is 1 byte after the amode */ );
13959 assign( srcDWord, loadLE( Ity_I32, mkexpr(addr) ) );
13960 imm8 = (Int)insn[2+alen+1];
13963 DIP( "insertps $%d, %s,%s\n",
13964 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13967 IRTemp dst_lane_0 = IRTemp_INVALID;
13968 IRTemp dst_lane_1 = IRTemp_INVALID;
13969 IRTemp dst_lane_2 = IRTemp_INVALID;
13970 IRTemp dst_lane_3 = IRTemp_INVALID;
13971 breakup128to32s( dstVec,
13972 &dst_lane_3, &dst_lane_2, &dst_lane_1, &dst_lane_0 );
13974 imm8_count_d = ((imm8 >> 4) & 3);
13975 switch( imm8_count_d ) {
13976 case 0: dst_lane_0 = srcDWord; break;
13977 case 1: dst_lane_1 = srcDWord; break;
13978 case 2: dst_lane_2 = srcDWord; break;
13979 case 3: dst_lane_3 = srcDWord; break;
13980 default: vassert(0); break;
13983 imm8_zmask = (imm8 & 15);
13984 IRTemp zero_32 = newTemp(Ity_I32);
13985 assign( zero_32, mkU32(0) );
13987 IRExpr* ire_vec_128 = mk128from32s(
13988 ((imm8_zmask & 8) == 8) ? zero_32 : dst_lane_3,
13989 ((imm8_zmask & 4) == 4) ? zero_32 : dst_lane_2,
13990 ((imm8_zmask & 2) == 2) ? zero_32 : dst_lane_1,
13991 ((imm8_zmask & 1) == 1) ? zero_32 : dst_lane_0 );
13993 putXMMReg( gregOfRexRM(pfx, modrm), ire_vec_128 );
13995 goto decode_success;
13999 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
14000 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
14001 if ( have66noF2noF3( pfx )
14003 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x14 ) {
14006 IRTemp xmm_vec = newTemp(Ity_V128);
14007 IRTemp sel_lane = newTemp(Ity_I32);
14008 IRTemp shr_lane = newTemp(Ity_I32);
14011 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14012 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14014 if ( epartIsReg( modrm ) ) {
14015 imm8 = (Int)insn[3+1];
14017 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14018 imm8 = (Int)insn[3+alen];
14020 switch( (imm8 >> 2) & 3 ) {
14021 case 0: assign( sel_lane, mkexpr(t0) ); break;
14022 case 1: assign( sel_lane, mkexpr(t1) ); break;
14023 case 2: assign( sel_lane, mkexpr(t2) ); break;
14024 case 3: assign( sel_lane, mkexpr(t3) ); break;
14025 default: vassert(0);
14028 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
14030 if ( epartIsReg( modrm ) ) {
14031 putIReg64( eregOfRexRM(pfx,modrm),
14033 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
14036 DIP( "pextrb $%d, %s,%s\n", imm8,
14037 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14038 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14040 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
14042 DIP( "$%d, pextrb %s,%s\n",
14043 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14046 goto decode_success;
14050 /* 66 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
14051 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
14052 Note that this insn has the same opcodes as PEXTRQ, but
14053 here the REX.W bit is _not_ present */
14054 if ( have66noF2noF3( pfx )
14055 && sz == 2 /* REX.W is _not_ present */
14056 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
14059 IRTemp xmm_vec = newTemp(Ity_V128);
14060 IRTemp src_dword = newTemp(Ity_I32);
14063 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14064 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14066 if ( epartIsReg( modrm ) ) {
14067 imm8_10 = (Int)(insn[3+1] & 3);
14069 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14070 imm8_10 = (Int)(insn[3+alen] & 3);
14073 switch ( imm8_10 ) {
14074 case 0: assign( src_dword, mkexpr(t0) ); break;
14075 case 1: assign( src_dword, mkexpr(t1) ); break;
14076 case 2: assign( src_dword, mkexpr(t2) ); break;
14077 case 3: assign( src_dword, mkexpr(t3) ); break;
14078 default: vassert(0);
14081 if ( epartIsReg( modrm ) ) {
14082 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
14084 DIP( "pextrd $%d, %s,%s\n", imm8_10,
14085 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14086 nameIReg32( eregOfRexRM(pfx, modrm) ) );
14088 storeLE( mkexpr(addr), mkexpr(src_dword) );
14090 DIP( "pextrd $%d, %s,%s\n",
14091 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14094 goto decode_success;
14098 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
14099 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
14100 Note that this insn has the same opcodes as PEXTRD, but
14101 here the REX.W bit is present */
14102 if ( have66noF2noF3( pfx )
14103 && sz == 8 /* REX.W is present */
14104 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
14107 IRTemp xmm_vec = newTemp(Ity_V128);
14108 IRTemp src_qword = newTemp(Ity_I64);
14111 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14113 if ( epartIsReg( modrm ) ) {
14114 imm8_0 = (Int)(insn[3+1] & 1);
14116 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14117 imm8_0 = (Int)(insn[3+alen] & 1);
14119 switch ( imm8_0 ) {
14120 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); break;
14121 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); break;
14122 default: vassert(0);
14125 if ( epartIsReg( modrm ) ) {
14126 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
14128 DIP( "pextrq $%d, %s,%s\n", imm8_0,
14129 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14130 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14132 storeLE( mkexpr(addr), mkexpr(src_qword) );
14134 DIP( "pextrq $%d, %s,%s\n",
14135 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14138 goto decode_success;
14142 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
14143 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
14144 if ( have66noF2noF3( pfx )
14146 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x15 ) {
14149 IRTemp xmm_vec = newTemp(Ity_V128);
14150 IRTemp src_word = newTemp(Ity_I16);
14153 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14154 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14156 if ( epartIsReg( modrm ) ) {
14157 imm8_20 = (Int)(insn[3+1] & 7);
14159 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14160 imm8_20 = (Int)(insn[3+alen] & 7);
14163 switch ( imm8_20 ) {
14164 case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break;
14165 case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break;
14166 case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break;
14167 case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break;
14168 case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break;
14169 case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break;
14170 case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break;
14171 case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break;
14172 default: vassert(0);
14175 if ( epartIsReg( modrm ) ) {
14176 putIReg64( eregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(src_word)) );
14178 DIP( "pextrw $%d, %s,%s\n", imm8_20,
14179 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14180 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14182 storeLE( mkexpr(addr), mkexpr(src_word) );
14184 DIP( "pextrw $%d, %s,%s\n",
14185 imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14188 goto decode_success;
14192 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
14193 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
14194 if ( have66noF2noF3( pfx )
14195 && sz == 8 /* REX.W is present */
14196 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) {
14199 IRTemp src_elems = newTemp(Ity_I64);
14200 IRTemp src_vec = newTemp(Ity_V128);
14204 if ( epartIsReg( modrm ) ) {
14205 imm8_0 = (Int)(insn[3+1] & 1);
14206 assign( src_elems, getIReg64( eregOfRexRM(pfx,modrm) ) );
14208 DIP( "pinsrq $%d, %s,%s\n", imm8_0,
14209 nameIReg64( eregOfRexRM(pfx, modrm) ),
14210 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14212 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14213 imm8_0 = (Int)(insn[3+alen] & 1);
14214 assign( src_elems, loadLE( Ity_I64, mkexpr(addr) ) );
14216 DIP( "pinsrq $%d, %s,%s\n",
14217 imm8_0, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14221 if ( imm8_0 == 0 ) {
14223 assign( src_vec, binop( Iop_64HLtoV128, mkU64(0), mkexpr(src_elems) ) );
14226 assign( src_vec, binop( Iop_64HLtoV128, mkexpr(src_elems), mkU64(0) ) );
14229 putXMMReg( gregOfRexRM(pfx, modrm),
14230 binop( Iop_OrV128, mkexpr(src_vec),
14231 binop( Iop_AndV128,
14232 getXMMReg( gregOfRexRM(pfx, modrm) ),
14233 mkV128(mask) ) ) );
14235 goto decode_success;
14239 /* 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
14240 Maximum of Packed Signed Double Word Integers (XMM)
14242 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
14243 Minimum of Packed Signed Double Word Integers (XMM) */
14244 if ( have66noF2noF3( pfx )
14246 && insn[0] == 0x0F && insn[1] == 0x38
14247 && ( (insn[2] == 0x3D) || (insn[2] == 0x39) ) ) {
14249 IRTemp reg_vec = newTemp(Ity_V128);
14250 IRTemp rom_vec = newTemp(Ity_V128);
14251 IRTemp mask_vec = newTemp(Ity_V128);
14253 Bool isPMAX = (insn[2] == 0x3D) ? True : False;
14255 HChar* str = isPMAX ? "pmaxsd" : "pminsd";
14258 assign( reg_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14260 if ( epartIsReg( modrm ) ) {
14261 assign( rom_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14263 DIP( "%s %s,%s\n", str,
14264 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14265 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14267 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14268 assign( rom_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14270 DIP( "%s %s,%s\n", str, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14273 assign( mask_vec, binop( Iop_CmpGT32Sx4, mkexpr(reg_vec), mkexpr(rom_vec) ) );
14275 IRTemp max_min_vec = newTemp(Ity_V128);
14277 assign( max_min_vec,
14279 binop( Iop_AndV128, mkexpr(rom_vec),
14280 unop( Iop_NotV128, mkexpr(mask_vec) ) ),
14281 binop( Iop_AndV128, mkexpr(reg_vec), mkexpr(mask_vec) ) ) );
14283 assign( max_min_vec,
14285 binop( Iop_AndV128, mkexpr(reg_vec),
14286 unop( Iop_NotV128, mkexpr(mask_vec) ) ),
14287 binop( Iop_AndV128, mkexpr(rom_vec), mkexpr(mask_vec) ) ) );
14290 putXMMReg( gregOfRexRM(pfx, modrm), mkexpr(max_min_vec) );
14292 goto decode_success;
14296 /* 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
14297 Maximum of Packed Unsigned Doubleword Integers (XMM) */
14298 if ( have66noF2noF3( pfx )
14300 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x3F ) {
14302 IRTemp reg_vec = newTemp(Ity_V128);
14303 IRTemp rom_vec = newTemp(Ity_V128);
14304 IRTemp mask_vec = newTemp(Ity_V128);
14305 IRTemp and_vec = newTemp(Ity_V128);
14306 IRTemp not_vec = newTemp(Ity_V128);
14309 assign( reg_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14311 if ( epartIsReg( modrm ) ) {
14312 assign( rom_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14314 DIP( "pmaxud %s,%s\n",
14315 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14316 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14318 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14319 assign( rom_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14321 DIP( "pmaxud %s,%s\n", dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14324 /* the foll. simulates Iop_CmpGT32Ux4 (not implemented)
14325 c.f. Hacker's Delight, S2-11, p.23 */
14327 binop( Iop_XorV128,
14328 binop( Iop_XorV128,
14329 binop( Iop_CmpGT32Sx4, mkexpr(reg_vec), mkexpr(rom_vec) ),
14330 binop( Iop_SarN32x4, mkexpr(reg_vec), mkU8(31) ) ),
14331 binop( Iop_SarN32x4, mkexpr(rom_vec), mkU8(31) ) ) );
14333 assign( and_vec, binop( Iop_AndV128, mkexpr(reg_vec), mkexpr(mask_vec) ) );
14334 assign( not_vec, binop( Iop_AndV128, mkexpr(rom_vec),
14335 unop( Iop_NotV128, mkexpr(mask_vec) ) ) );
14337 putXMMReg( gregOfRexRM(pfx, modrm),
14338 binop( Iop_OrV128, mkexpr(not_vec), mkexpr(and_vec) ) );
14340 goto decode_success;
14344 /* 66 0f 38 20 /r = PMOVSXBW xmm1, xmm2/m64
14345 Packed Move with Sign Extend from Byte to Word (XMM) */
14346 if ( have66noF2noF3( pfx )
14348 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x20 ) {
14352 IRTemp srcVec = newTemp(Ity_V128);
14354 if ( epartIsReg( modrm ) ) {
14355 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14357 DIP( "pmovsxbw %s,%s\n",
14358 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14359 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14361 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14363 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14365 DIP( "pmovsxbw %s,%s\n",
14366 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14369 putXMMReg( gregOfRexRM(pfx, modrm),
14370 binop( Iop_SarN16x8,
14371 binop( Iop_ShlN16x8,
14372 binop( Iop_InterleaveLO8x16,
14373 IRExpr_Const( IRConst_V128(0) ),
14378 goto decode_success;
14382 /* 66 0f 38 21 /r = PMOVSXBD xmm1, xmm2/m32
14383 Packed Move with Sign Extend from Byte to DWord (XMM) */
14384 if ( have66noF2noF3( pfx )
14386 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x21 ) {
14390 IRTemp srcVec = newTemp(Ity_V128);
14392 if ( epartIsReg( modrm ) ) {
14393 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14395 DIP( "pmovsxbd %s,%s\n",
14396 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14397 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14399 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14401 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
14403 DIP( "pmovsxbd %s,%s\n",
14404 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14407 IRTemp zeroVec = newTemp(Ity_V128);
14408 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
14410 putXMMReg( gregOfRexRM(pfx, modrm),
14411 binop( Iop_SarN32x4,
14412 binop( Iop_ShlN32x4,
14413 binop( Iop_InterleaveLO8x16,
14415 binop( Iop_InterleaveLO8x16,
14417 mkexpr(srcVec) ) ),
14418 mkU8(24) ), mkU8(24) ) );
14420 goto decode_success;
14424 /* 66 0f 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
14425 Packed Move with Sign Extend from Byte to QWord (XMM) */
14426 if ( have66noF2noF3(pfx)
14428 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x22 ) {
14432 IRTemp srcBytes = newTemp(Ity_I16);
14434 if ( epartIsReg(modrm) ) {
14435 assign( srcBytes, getXMMRegLane16( eregOfRexRM(pfx, modrm), 0 ) );
14437 DIP( "pmovsxbq %s,%s\n",
14438 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14439 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14441 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14442 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
14444 DIP( "pmovsxbq %s,%s\n",
14445 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14448 putXMMReg( gregOfRexRM( pfx, modrm ),
14449 binop( Iop_64HLtoV128,
14452 mkexpr(srcBytes) ) ),
14454 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
14456 goto decode_success;
14460 /* 66 0f 38 23 /r = PMOVSXWD xmm1, xmm2/m64
14461 Packed Move with Sign Extend from Word to DWord (XMM) */
14462 if ( have66noF2noF3( pfx )
14464 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x23 ) {
14468 IRTemp srcVec = newTemp(Ity_V128);
14470 if ( epartIsReg(modrm) ) {
14471 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14473 DIP( "pmovsxwd %s,%s\n",
14474 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14475 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14477 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14479 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14481 DIP( "pmovsxwd %s,%s\n",
14482 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14485 putXMMReg( gregOfRexRM(pfx, modrm),
14486 binop( Iop_SarN32x4,
14487 binop( Iop_ShlN32x4,
14488 binop( Iop_InterleaveLO16x8,
14489 IRExpr_Const( IRConst_V128(0) ),
14494 goto decode_success;
14498 /* 66 0f 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
14499 Packed Move with Sign Extend from Word to QWord (XMM) */
14500 if ( have66noF2noF3( pfx )
14502 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x24 ) {
14506 IRTemp srcBytes = newTemp(Ity_I32);
14508 if ( epartIsReg( modrm ) ) {
14509 assign( srcBytes, getXMMRegLane32( eregOfRexRM(pfx, modrm), 0 ) );
14511 DIP( "pmovsxwq %s,%s\n",
14512 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14513 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14515 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14516 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
14518 DIP( "pmovsxwq %s,%s\n",
14519 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14522 putXMMReg( gregOfRexRM( pfx, modrm ),
14523 binop( Iop_64HLtoV128,
14525 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
14527 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
14529 goto decode_success;
14533 /* 66 0f 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
14534 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
14535 if ( have66noF2noF3( pfx )
14537 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x25 ) {
14541 IRTemp srcBytes = newTemp(Ity_I64);
14543 if ( epartIsReg(modrm) ) {
14544 assign( srcBytes, getXMMRegLane64( eregOfRexRM(pfx, modrm), 0 ) );
14546 DIP( "pmovsxdq %s,%s\n",
14547 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14548 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14550 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14551 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
14553 DIP( "pmovsxdq %s,%s\n",
14554 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14557 putXMMReg( gregOfRexRM(pfx, modrm),
14558 binop( Iop_64HLtoV128,
14560 unop( Iop_64HIto32, mkexpr(srcBytes) ) ),
14562 unop( Iop_64to32, mkexpr(srcBytes) ) ) ) );
14564 goto decode_success;
14568 /* 66 0f 38 30 /r = PMOVZXBW xmm1, xmm2/m64
14569 Packed Move with Zero Extend from Byte to Word (XMM) */
14570 if ( have66noF2noF3(pfx)
14572 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x30 ) {
14576 IRTemp srcVec = newTemp(Ity_V128);
14578 if ( epartIsReg(modrm) ) {
14579 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14581 DIP( "pmovzxbw %s,%s\n",
14582 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14583 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14585 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14587 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14589 DIP( "pmovzxbw %s,%s\n",
14590 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14593 putXMMReg( gregOfRexRM(pfx, modrm),
14594 binop( Iop_InterleaveLO8x16,
14595 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
14597 goto decode_success;
14601 /* 66 0f 38 31 /r = PMOVZXBD xmm1, xmm2/m32
14602 Packed Move with Zero Extend from Byte to DWord (XMM) */
14603 if ( have66noF2noF3( pfx )
14605 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x31 ) {
14609 IRTemp srcVec = newTemp(Ity_V128);
14611 if ( epartIsReg(modrm) ) {
14612 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14614 DIP( "pmovzxbd %s,%s\n",
14615 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14616 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14618 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14620 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
14622 DIP( "pmovzxbd %s,%s\n",
14623 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14626 IRTemp zeroVec = newTemp(Ity_V128);
14627 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
14629 putXMMReg( gregOfRexRM( pfx, modrm ),
14630 binop( Iop_InterleaveLO8x16,
14632 binop( Iop_InterleaveLO8x16,
14633 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
14635 goto decode_success;
14639 /* 66 0f 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
14640 Packed Move with Zero Extend from Byte to QWord (XMM) */
14641 if ( have66noF2noF3( pfx )
14643 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x32 ) {
14647 IRTemp srcVec = newTemp(Ity_V128);
14649 if ( epartIsReg(modrm) ) {
14650 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14652 DIP( "pmovzxbq %s,%s\n",
14653 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14654 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14656 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14658 unop( Iop_32UtoV128,
14659 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ) ) ) );
14661 DIP( "pmovzxbq %s,%s\n",
14662 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14665 IRTemp zeroVec = newTemp(Ity_V128);
14666 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
14668 putXMMReg( gregOfRexRM( pfx, modrm ),
14669 binop( Iop_InterleaveLO8x16,
14671 binop( Iop_InterleaveLO8x16,
14673 binop( Iop_InterleaveLO8x16,
14674 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
14676 goto decode_success;
14680 /* 66 0f 38 33 /r = PMOVZXWD xmm1, xmm2/m64
14681 Packed Move with Zero Extend from Word to DWord (XMM) */
14682 if ( have66noF2noF3( pfx )
14684 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x33 ) {
14688 IRTemp srcVec = newTemp(Ity_V128);
14690 if ( epartIsReg(modrm) ) {
14691 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14693 DIP( "pmovzxwd %s,%s\n",
14694 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14695 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14697 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14699 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14701 DIP( "pmovzxwd %s,%s\n",
14702 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14705 putXMMReg( gregOfRexRM(pfx, modrm),
14706 binop( Iop_InterleaveLO16x8,
14707 IRExpr_Const( IRConst_V128(0) ),
14708 mkexpr(srcVec) ) );
14710 goto decode_success;
14714 /* 66 0f 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
14715 Packed Move with Zero Extend from Word to QWord (XMM) */
14716 if ( have66noF2noF3( pfx )
14718 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x34 ) {
14722 IRTemp srcVec = newTemp(Ity_V128);
14724 if ( epartIsReg( modrm ) ) {
14725 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14727 DIP( "pmovzxwq %s,%s\n",
14728 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14729 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14731 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14733 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
14735 DIP( "pmovzxwq %s,%s\n",
14736 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14739 IRTemp zeroVec = newTemp( Ity_V128 );
14740 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
14742 putXMMReg( gregOfRexRM( pfx, modrm ),
14743 binop( Iop_InterleaveLO16x8,
14745 binop( Iop_InterleaveLO16x8,
14746 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
14748 goto decode_success;
14752 /* 66 0f 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
14753 Packed Move with Zero Extend from DWord to QWord (XMM) */
14754 if ( have66noF2noF3( pfx )
14756 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x35 ) {
14760 IRTemp srcVec = newTemp(Ity_V128);
14762 if ( epartIsReg(modrm) ) {
14763 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14765 DIP( "pmovzxdq %s,%s\n",
14766 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14767 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14769 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14771 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14773 DIP( "pmovzxdq %s,%s\n",
14774 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14777 putXMMReg( gregOfRexRM(pfx, modrm),
14778 binop( Iop_InterleaveLO32x4,
14779 IRExpr_Const( IRConst_V128(0) ),
14780 mkexpr(srcVec) ) );
14782 goto decode_success;
14786 /* ---------------------------------------------------- */
14787 /* --- end of the SSE4 decoder --- */
14788 /* ---------------------------------------------------- */
14790 /*after_sse_decoders:*/
14792 /* Get the primary opcode. */
14793 opc = getUChar(delta); delta++;
14795 /* We get here if the current insn isn't SSE, or this CPU doesn't
14800 /* ------------------------ Control flow --------------- */
14802 case 0xC2: /* RET imm16 */
14803 if (have66orF2orF3(pfx)) goto decode_failure;
14804 d64 = getUDisp16(delta);
14807 dres.whatNext = Dis_StopHere;
14808 DIP("ret %lld\n", d64);
14811 case 0xC3: /* RET */
14812 if (have66orF2(pfx)) goto decode_failure;
14813 /* F3 is acceptable on AMD. */
14815 dres.whatNext = Dis_StopHere;
14816 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
14819 case 0xE8: /* CALL J4 */
14820 if (haveF2orF3(pfx)) goto decode_failure;
14821 d64 = getSDisp32(delta); delta += 4;
14822 d64 += (guest_RIP_bbstart+delta);
14823 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
14824 t1 = newTemp(Ity_I64);
14825 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
14826 putIReg64(R_RSP, mkexpr(t1));
14827 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
14828 t2 = newTemp(Ity_I64);
14829 assign(t2, mkU64((Addr64)d64));
14830 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
14831 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
14832 /* follow into the call target. */
14833 dres.whatNext = Dis_ResteerU;
14834 dres.continueAt = d64;
14836 jmp_lit(Ijk_Call,d64);
14837 dres.whatNext = Dis_StopHere;
14839 DIP("call 0x%llx\n",d64);
14842 //.. //-- case 0xC8: /* ENTER */
14843 //.. //-- d32 = getUDisp16(eip); eip += 2;
14844 //.. //-- abyte = getUChar(delta); delta++;
14846 //.. //-- vg_assert(sz == 4);
14847 //.. //-- vg_assert(abyte == 0);
14849 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
14850 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
14851 //.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
14852 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
14853 //.. //-- uLiteral(cb, sz);
14854 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
14855 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
14856 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
14857 //.. //-- if (d32) {
14858 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
14859 //.. //-- uLiteral(cb, d32);
14860 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
14862 //.. //-- DIP("enter 0x%x, 0x%x", d32, abyte);
14865 case 0xC9: /* LEAVE */
14866 /* In 64-bit mode this defaults to a 64-bit operand size. There
14867 is no way to encode a 32-bit variant. Hence sz==4 but we do
14870 goto decode_failure;
14871 t1 = newTemp(Ity_I64);
14872 t2 = newTemp(Ity_I64);
14873 assign(t1, getIReg64(R_RBP));
14874 /* First PUT RSP looks redundant, but need it because RSP must
14875 always be up-to-date for Memcheck to work... */
14876 putIReg64(R_RSP, mkexpr(t1));
14877 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
14878 putIReg64(R_RBP, mkexpr(t2));
14879 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
14883 //.. //-- /* ---------------- Misc weird-ass insns --------------- */
14885 //.. //-- case 0x27: /* DAA */
14886 //.. //-- case 0x2F: /* DAS */
14887 //.. //-- t1 = newTemp(cb);
14888 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1);
14889 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
14890 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
14891 //.. //-- uWiden(cb, 1, False);
14892 //.. //-- uInstr0(cb, CALLM_S, 0);
14893 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14894 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
14895 //.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) );
14896 //.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO);
14897 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
14898 //.. //-- uInstr0(cb, CALLM_E, 0);
14899 //.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL);
14900 //.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n");
14903 //.. //-- case 0x37: /* AAA */
14904 //.. //-- case 0x3F: /* AAS */
14905 //.. //-- t1 = newTemp(cb);
14906 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
14907 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
14908 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
14909 //.. //-- uWiden(cb, 2, False);
14910 //.. //-- uInstr0(cb, CALLM_S, 0);
14911 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14912 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
14913 //.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) );
14914 //.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty);
14915 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
14916 //.. //-- uInstr0(cb, CALLM_E, 0);
14917 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
14918 //.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n");
14921 //.. //-- case 0xD4: /* AAM */
14922 //.. //-- case 0xD5: /* AAD */
14923 //.. //-- d32 = getUChar(delta); delta++;
14924 //.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
14925 //.. //-- t1 = newTemp(cb);
14926 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
14927 //.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
14928 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
14929 //.. //-- uWiden(cb, 2, False);
14930 //.. //-- uInstr0(cb, CALLM_S, 0);
14931 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14932 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
14933 //.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
14934 //.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
14935 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
14936 //.. //-- uInstr0(cb, CALLM_E, 0);
14937 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
14938 //.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
14941 /* ------------------------ CWD/CDQ -------------------- */
14943 case 0x98: /* CBW */
14944 if (haveF2orF3(pfx)) goto decode_failure;
14946 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
14947 DIP(/*"cdqe\n"*/"cltq");
14951 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
14956 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
14960 goto decode_failure;
14962 case 0x99: /* CWD/CDQ/CQO */
14963 if (haveF2orF3(pfx)) goto decode_failure;
14964 vassert(sz == 2 || sz == 4 || sz == 8);
14967 binop(mkSizedOp(ty,Iop_Sar8),
14969 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
14970 DIP(sz == 2 ? "cwd\n"
14971 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
14975 /* ------------------------ FPU ops -------------------- */
14977 case 0x9E: /* SAHF */
14982 case 0x9F: /* LAHF */
14987 case 0x9B: /* FWAIT */
15000 Bool redundantREXWok = False;
15002 if (haveF2orF3(pfx))
15003 goto decode_failure;
15005 /* kludge to tolerate redundant rex.w prefixes (should do this
15006 properly one day) */
15007 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
15008 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
15009 redundantREXWok = True;
15012 || (sz == 8 && redundantREXWok))
15013 && haveNo66noF2noF3(pfx)) {
15014 Long delta0 = delta;
15015 Bool decode_OK = False;
15016 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
15019 goto decode_failure;
15023 goto decode_failure;
15027 /* ------------------------ INT ------------------------ */
15029 case 0xCC: /* INT 3 */
15030 jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta);
15031 dres.whatNext = Dis_StopHere;
15035 case 0xCD: { /* INT imm8 */
15036 IRJumpKind jk = Ijk_Boring;
15037 if (have66orF2orF3(pfx)) goto decode_failure;
15038 d64 = getUChar(delta); delta++;
15040 case 32: jk = Ijk_Sys_int32; break;
15041 default: goto decode_failure;
15043 guest_RIP_next_mustcheck = True;
15044 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
15045 jmp_lit(jk, guest_RIP_next_assumed);
15046 /* It's important that all ArchRegs carry their up-to-date value
15047 at this point. So we declare an end-of-block here, which
15048 forces any TempRegs caching ArchRegs to be flushed. */
15049 dres.whatNext = Dis_StopHere;
15050 DIP("int $0x%02x\n", (UInt)d64);
15054 /* ------------------------ Jcond, byte offset --------- */
15056 case 0xEB: /* Jb (jump, byte offset) */
15057 if (haveF2orF3(pfx)) goto decode_failure;
15059 goto decode_failure; /* JRS added 2004 July 11 */
15060 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
15062 if (resteerOkFn(callback_opaque,d64)) {
15063 dres.whatNext = Dis_ResteerU;
15064 dres.continueAt = d64;
15066 jmp_lit(Ijk_Boring,d64);
15067 dres.whatNext = Dis_StopHere;
15069 DIP("jmp-8 0x%llx\n", d64);
15072 case 0xE9: /* Jv (jump, 16/32 offset) */
15073 if (haveF2orF3(pfx)) goto decode_failure;
15075 goto decode_failure; /* JRS added 2004 July 11 */
15076 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
15078 if (resteerOkFn(callback_opaque,d64)) {
15079 dres.whatNext = Dis_ResteerU;
15080 dres.continueAt = d64;
15082 jmp_lit(Ijk_Boring,d64);
15083 dres.whatNext = Dis_StopHere;
15085 DIP("jmp 0x%llx\n", d64);
15090 case 0x72: /* JBb/JNAEb (jump below) */
15091 case 0x73: /* JNBb/JAEb (jump not below) */
15092 case 0x74: /* JZb/JEb (jump zero) */
15093 case 0x75: /* JNZb/JNEb (jump not zero) */
15094 case 0x76: /* JBEb/JNAb (jump below or equal) */
15095 case 0x77: /* JNBEb/JAb (jump not below or equal) */
15096 case 0x78: /* JSb (jump negative) */
15097 case 0x79: /* JSb (jump not negative) */
15098 case 0x7A: /* JP (jump parity even) */
15099 case 0x7B: /* JNP/JPO (jump parity odd) */
15100 case 0x7C: /* JLb/JNGEb (jump less) */
15101 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
15102 case 0x7E: /* JLEb/JNGb (jump less or equal) */
15103 case 0x7F: /* JGb/JNLEb (jump greater) */
15105 HChar* comment = "";
15106 if (haveF2orF3(pfx)) goto decode_failure;
15107 jmpDelta = getSDisp8(delta);
15108 vassert(-128 <= jmpDelta && jmpDelta < 128);
15109 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
15112 && vex_control.guest_chase_cond
15113 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
15115 && resteerOkFn( callback_opaque, d64) ) {
15116 /* Speculation: assume this backward branch is taken. So we
15117 need to emit a side-exit to the insn following this one,
15118 on the negation of the condition, and continue at the
15119 branch target address (d64). If we wind up back at the
15120 first instruction of the trace, just stop; it's better to
15121 let the IR loop unroller handle that case. */
15123 mk_amd64g_calculate_condition(
15124 (AMD64Condcode)(1 ^ (opc - 0x70))),
15126 IRConst_U64(guest_RIP_bbstart+delta) ) );
15127 dres.whatNext = Dis_ResteerC;
15128 dres.continueAt = d64;
15129 comment = "(assumed taken)";
15133 && vex_control.guest_chase_cond
15134 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
15136 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
15137 /* Speculation: assume this forward branch is not taken. So
15138 we need to emit a side-exit to d64 (the dest) and continue
15139 disassembling at the insn immediately following this
15142 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
15144 IRConst_U64(d64) ) );
15145 dres.whatNext = Dis_ResteerC;
15146 dres.continueAt = guest_RIP_bbstart+delta;
15147 comment = "(assumed not taken)";
15150 /* Conservative default translation - end the block at this
15152 jcc_01( (AMD64Condcode)(opc - 0x70),
15153 guest_RIP_bbstart+delta,
15155 dres.whatNext = Dis_StopHere;
15157 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
15162 /* JRCXZ or JECXZ, depending address size override. */
15163 if (have66orF2orF3(pfx)) goto decode_failure;
15164 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
15166 if (haveASO(pfx)) {
15168 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
15169 unop(Iop_32Uto64, getIReg32(R_RCX)),
15174 DIP("jecxz 0x%llx\n", d64);
15177 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
15183 DIP("jrcxz 0x%llx\n", d64);
15187 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
15188 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
15189 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
15190 { /* The docs say this uses rCX as a count depending on the
15191 address size override, not the operand one. Since we don't
15192 handle address size overrides, I guess that means RCX. */
15193 IRExpr* zbit = NULL;
15194 IRExpr* count = NULL;
15195 IRExpr* cond = NULL;
15196 HChar* xtra = NULL;
15198 if (have66orF2orF3(pfx) || haveASO(pfx)) goto decode_failure;
15199 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
15201 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
15203 count = getIReg64(R_RCX);
15204 cond = binop(Iop_CmpNE64, count, mkU64(0));
15211 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
15212 cond = mkAnd1(cond, zbit);
15216 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
15217 cond = mkAnd1(cond, zbit);
15222 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) );
15224 DIP("loop%s 0x%llx\n", xtra, d64);
15228 /* ------------------------ IMUL ----------------------- */
15230 case 0x69: /* IMUL Iv, Ev, Gv */
15231 if (haveF2orF3(pfx)) goto decode_failure;
15232 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
15234 case 0x6B: /* IMUL Ib, Ev, Gv */
15235 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
15238 /* ------------------------ MOV ------------------------ */
15240 case 0x88: /* MOV Gb,Eb */
15241 if (haveF2orF3(pfx)) goto decode_failure;
15242 delta = dis_mov_G_E(vbi, pfx, 1, delta);
15245 case 0x89: /* MOV Gv,Ev */
15246 if (haveF2orF3(pfx)) goto decode_failure;
15247 delta = dis_mov_G_E(vbi, pfx, sz, delta);
15250 case 0x8A: /* MOV Eb,Gb */
15251 if (haveF2orF3(pfx)) goto decode_failure;
15252 delta = dis_mov_E_G(vbi, pfx, 1, delta);
15255 case 0x8B: /* MOV Ev,Gv */
15256 if (haveF2orF3(pfx)) goto decode_failure;
15257 delta = dis_mov_E_G(vbi, pfx, sz, delta);
15260 case 0x8D: /* LEA M,Gv */
15261 if (haveF2orF3(pfx)) goto decode_failure;
15262 if (sz != 4 && sz != 8)
15263 goto decode_failure;
15264 modrm = getUChar(delta);
15265 if (epartIsReg(modrm))
15266 goto decode_failure;
15267 /* NOTE! this is the one place where a segment override prefix
15268 has no effect on the address calculation. Therefore we clear
15269 any segment override bits in pfx. */
15270 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
15272 /* This is a hack. But it isn't clear that really doing the
15273 calculation at 32 bits is really worth it. Hence for leal,
15274 do the full 64-bit calculation and then truncate it. */
15275 putIRegG( sz, pfx, modrm,
15277 ? unop(Iop_64to32, mkexpr(addr))
15280 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
15281 nameIRegG(sz,pfx,modrm));
15284 //.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
15285 //.. delta = dis_mov_Sw_Ew(sorb, sz, delta);
15288 //.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
15289 //.. delta = dis_mov_Ew_Sw(sorb, delta);
15292 case 0xA0: /* MOV Ob,AL */
15293 if (have66orF2orF3(pfx)) goto decode_failure;
15295 /* Fall through ... */
15296 case 0xA1: /* MOV Ov,eAX */
15297 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
15298 goto decode_failure;
15299 d64 = getDisp64(delta);
15302 addr = newTemp(Ity_I64);
15303 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
15304 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
15305 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
15306 segRegTxt(pfx), d64,
15310 case 0xA2: /* MOV AL,Ob */
15311 if (have66orF2orF3(pfx)) goto decode_failure;
15313 /* Fall through ... */
15314 case 0xA3: /* MOV eAX,Ov */
15315 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
15316 goto decode_failure;
15317 d64 = getDisp64(delta);
15320 addr = newTemp(Ity_I64);
15321 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
15322 storeLE( mkexpr(addr), getIRegRAX(sz) );
15323 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
15324 segRegTxt(pfx), d64);
15327 /* XXXX be careful here with moves to AH/BH/CH/DH */
15328 case 0xB0: /* MOV imm,AL */
15329 case 0xB1: /* MOV imm,CL */
15330 case 0xB2: /* MOV imm,DL */
15331 case 0xB3: /* MOV imm,BL */
15332 case 0xB4: /* MOV imm,AH */
15333 case 0xB5: /* MOV imm,CH */
15334 case 0xB6: /* MOV imm,DH */
15335 case 0xB7: /* MOV imm,BH */
15336 if (haveF2orF3(pfx)) goto decode_failure;
15337 d64 = getUChar(delta);
15339 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
15340 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
15343 case 0xB8: /* MOV imm,eAX */
15344 case 0xB9: /* MOV imm,eCX */
15345 case 0xBA: /* MOV imm,eDX */
15346 case 0xBB: /* MOV imm,eBX */
15347 case 0xBC: /* MOV imm,eSP */
15348 case 0xBD: /* MOV imm,eBP */
15349 case 0xBE: /* MOV imm,eSI */
15350 case 0xBF: /* MOV imm,eDI */
15351 /* This is the one-and-only place where 64-bit literals are
15352 allowed in the instruction stream. */
15353 if (haveF2orF3(pfx)) goto decode_failure;
15355 d64 = getDisp64(delta);
15357 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
15358 DIP("movabsq $%lld,%s\n", (Long)d64,
15359 nameIRegRexB(8,pfx,opc-0xB8));
15361 d64 = getSDisp(imin(4,sz),delta);
15362 delta += imin(4,sz);
15363 putIRegRexB(sz, pfx, opc-0xB8,
15364 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
15365 DIP("mov%c $%lld,%s\n", nameISize(sz),
15367 nameIRegRexB(sz,pfx,opc-0xB8));
15371 case 0xC6: /* MOV Ib,Eb */
15374 case 0xC7: /* MOV Iv,Ev */
15378 if (haveF2orF3(pfx)) goto decode_failure;
15379 modrm = getUChar(delta);
15380 if (epartIsReg(modrm)) {
15381 delta++; /* mod/rm byte */
15382 d64 = getSDisp(imin(4,sz),delta);
15383 delta += imin(4,sz);
15384 putIRegE(sz, pfx, modrm,
15385 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
15386 DIP("mov%c $%lld, %s\n", nameISize(sz),
15388 nameIRegE(sz,pfx,modrm));
15390 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
15391 /*xtra*/imin(4,sz) );
15393 d64 = getSDisp(imin(4,sz),delta);
15394 delta += imin(4,sz);
15395 storeLE(mkexpr(addr),
15396 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
15397 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
15401 /* ------------------------ MOVx ------------------------ */
15403 case 0x63: /* MOVSX */
15404 if (haveF2orF3(pfx)) goto decode_failure;
15405 if (haveREX(pfx) && 1==getRexW(pfx)) {
15407 /* movsx r/m32 to r64 */
15408 modrm = getUChar(delta);
15409 if (epartIsReg(modrm)) {
15411 putIRegG(8, pfx, modrm,
15413 getIRegE(4, pfx, modrm)));
15414 DIP("movslq %s,%s\n",
15415 nameIRegE(4, pfx, modrm),
15416 nameIRegG(8, pfx, modrm));
15419 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15421 putIRegG(8, pfx, modrm,
15423 loadLE(Ity_I32, mkexpr(addr))));
15424 DIP("movslq %s,%s\n", dis_buf,
15425 nameIRegG(8, pfx, modrm));
15429 goto decode_failure;
15432 /* ------------------------ opl imm, A ----------------- */
15434 case 0x04: /* ADD Ib, AL */
15435 if (haveF2orF3(pfx)) goto decode_failure;
15436 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
15438 case 0x05: /* ADD Iv, eAX */
15439 if (haveF2orF3(pfx)) goto decode_failure;
15440 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
15443 case 0x0C: /* OR Ib, AL */
15444 if (haveF2orF3(pfx)) goto decode_failure;
15445 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
15447 case 0x0D: /* OR Iv, eAX */
15448 if (haveF2orF3(pfx)) goto decode_failure;
15449 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
15452 case 0x14: /* ADC Ib, AL */
15453 if (haveF2orF3(pfx)) goto decode_failure;
15454 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
15456 //.. //-- case 0x15: /* ADC Iv, eAX */
15457 //.. //-- delta = dis_op_imm_A( sz, ADC, True, delta, "adc" );
15460 case 0x1C: /* SBB Ib, AL */
15461 if (haveF2orF3(pfx)) goto decode_failure;
15462 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
15464 //.. //-- case 0x1D: /* SBB Iv, eAX */
15465 //.. //-- delta = dis_op_imm_A( sz, SBB, True, delta, "sbb" );
15468 case 0x24: /* AND Ib, AL */
15469 if (haveF2orF3(pfx)) goto decode_failure;
15470 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
15472 case 0x25: /* AND Iv, eAX */
15473 if (haveF2orF3(pfx)) goto decode_failure;
15474 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
15477 case 0x2C: /* SUB Ib, AL */
15478 if (haveF2orF3(pfx)) goto decode_failure;
15479 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
15481 case 0x2D: /* SUB Iv, eAX */
15482 if (haveF2orF3(pfx)) goto decode_failure;
15483 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
15486 case 0x34: /* XOR Ib, AL */
15487 if (haveF2orF3(pfx)) goto decode_failure;
15488 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
15490 case 0x35: /* XOR Iv, eAX */
15491 if (haveF2orF3(pfx)) goto decode_failure;
15492 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
15495 case 0x3C: /* CMP Ib, AL */
15496 if (haveF2orF3(pfx)) goto decode_failure;
15497 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
15499 case 0x3D: /* CMP Iv, eAX */
15500 if (haveF2orF3(pfx)) goto decode_failure;
15501 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
15504 case 0xA8: /* TEST Ib, AL */
15505 if (haveF2orF3(pfx)) goto decode_failure;
15506 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
15508 case 0xA9: /* TEST Iv, eAX */
15509 if (haveF2orF3(pfx)) goto decode_failure;
15510 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
15513 /* ------------------------ opl Ev, Gv ----------------- */
15515 case 0x02: /* ADD Eb,Gb */
15516 if (haveF2orF3(pfx)) goto decode_failure;
15517 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
15519 case 0x03: /* ADD Ev,Gv */
15520 if (haveF2orF3(pfx)) goto decode_failure;
15521 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
15524 case 0x0A: /* OR Eb,Gb */
15525 if (haveF2orF3(pfx)) goto decode_failure;
15526 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
15528 case 0x0B: /* OR Ev,Gv */
15529 if (haveF2orF3(pfx)) goto decode_failure;
15530 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
15533 case 0x12: /* ADC Eb,Gb */
15534 if (haveF2orF3(pfx)) goto decode_failure;
15535 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
15537 case 0x13: /* ADC Ev,Gv */
15538 if (haveF2orF3(pfx)) goto decode_failure;
15539 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
15542 case 0x1A: /* SBB Eb,Gb */
15543 if (haveF2orF3(pfx)) goto decode_failure;
15544 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
15546 case 0x1B: /* SBB Ev,Gv */
15547 if (haveF2orF3(pfx)) goto decode_failure;
15548 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
15551 case 0x22: /* AND Eb,Gb */
15552 if (haveF2orF3(pfx)) goto decode_failure;
15553 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
15555 case 0x23: /* AND Ev,Gv */
15556 if (haveF2orF3(pfx)) goto decode_failure;
15557 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
15560 case 0x2A: /* SUB Eb,Gb */
15561 if (haveF2orF3(pfx)) goto decode_failure;
15562 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
15564 case 0x2B: /* SUB Ev,Gv */
15565 if (haveF2orF3(pfx)) goto decode_failure;
15566 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
15569 case 0x32: /* XOR Eb,Gb */
15570 if (haveF2orF3(pfx)) goto decode_failure;
15571 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
15573 case 0x33: /* XOR Ev,Gv */
15574 if (haveF2orF3(pfx)) goto decode_failure;
15575 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
15578 case 0x3A: /* CMP Eb,Gb */
15579 if (haveF2orF3(pfx)) goto decode_failure;
15580 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
15582 case 0x3B: /* CMP Ev,Gv */
15583 if (haveF2orF3(pfx)) goto decode_failure;
15584 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
15587 case 0x84: /* TEST Eb,Gb */
15588 if (haveF2orF3(pfx)) goto decode_failure;
15589 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
15591 case 0x85: /* TEST Ev,Gv */
15592 if (haveF2orF3(pfx)) goto decode_failure;
15593 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
15596 /* ------------------------ opl Gv, Ev ----------------- */
15598 case 0x00: /* ADD Gb,Eb */
15599 if (haveF2orF3(pfx)) goto decode_failure;
15600 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
15602 case 0x01: /* ADD Gv,Ev */
15603 if (haveF2orF3(pfx)) goto decode_failure;
15604 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
15607 case 0x08: /* OR Gb,Eb */
15608 if (haveF2orF3(pfx)) goto decode_failure;
15609 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
15611 case 0x09: /* OR Gv,Ev */
15612 if (haveF2orF3(pfx)) goto decode_failure;
15613 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
15616 case 0x10: /* ADC Gb,Eb */
15617 if (haveF2orF3(pfx)) goto decode_failure;
15618 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
15620 case 0x11: /* ADC Gv,Ev */
15621 if (haveF2orF3(pfx)) goto decode_failure;
15622 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
15625 case 0x18: /* SBB Gb,Eb */
15626 if (haveF2orF3(pfx)) goto decode_failure;
15627 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
15629 case 0x19: /* SBB Gv,Ev */
15630 if (haveF2orF3(pfx)) goto decode_failure;
15631 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
15634 case 0x20: /* AND Gb,Eb */
15635 if (haveF2orF3(pfx)) goto decode_failure;
15636 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
15638 case 0x21: /* AND Gv,Ev */
15639 if (haveF2orF3(pfx)) goto decode_failure;
15640 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
15643 case 0x28: /* SUB Gb,Eb */
15644 if (haveF2orF3(pfx)) goto decode_failure;
15645 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
15647 case 0x29: /* SUB Gv,Ev */
15648 if (haveF2orF3(pfx)) goto decode_failure;
15649 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
15652 case 0x30: /* XOR Gb,Eb */
15653 if (haveF2orF3(pfx)) goto decode_failure;
15654 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
15656 case 0x31: /* XOR Gv,Ev */
15657 if (haveF2orF3(pfx)) goto decode_failure;
15658 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
15661 case 0x38: /* CMP Gb,Eb */
15662 if (haveF2orF3(pfx)) goto decode_failure;
15663 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
15665 case 0x39: /* CMP Gv,Ev */
15666 if (haveF2orF3(pfx)) goto decode_failure;
15667 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
15670 /* ------------------------ POP ------------------------ */
15672 case 0x58: /* POP eAX */
15673 case 0x59: /* POP eCX */
15674 case 0x5A: /* POP eDX */
15675 case 0x5B: /* POP eBX */
15676 case 0x5D: /* POP eBP */
15677 case 0x5E: /* POP eSI */
15678 case 0x5F: /* POP eDI */
15679 case 0x5C: /* POP eSP */
15680 if (haveF2orF3(pfx)) goto decode_failure;
15681 vassert(sz == 2 || sz == 4 || sz == 8);
15683 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
15684 t1 = newTemp(szToITy(sz));
15685 t2 = newTemp(Ity_I64);
15686 assign(t2, getIReg64(R_RSP));
15687 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
15688 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
15689 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
15690 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
15693 case 0x9D: /* POPF */
15694 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
15695 So sz==4 actually means sz==8. */
15696 if (haveF2orF3(pfx)) goto decode_failure;
15697 vassert(sz == 2 || sz == 4);
15698 if (sz == 4) sz = 8;
15699 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
15700 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
15701 assign(t2, getIReg64(R_RSP));
15702 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
15703 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
15704 /* t1 is the flag word. Mask out everything except OSZACP and
15705 set the flags thunk to AMD64G_CC_OP_COPY. */
15706 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15707 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15708 stmt( IRStmt_Put( OFFB_CC_DEP1,
15711 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
15712 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
15713 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
15718 /* Also need to set the D flag, which is held in bit 10 of t1.
15719 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
15726 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
15729 mkU64(0xFFFFFFFFFFFFFFFFULL)))
15732 /* And set the ID flag */
15739 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
15745 DIP("popf%c\n", nameISize(sz));
15748 //.. case 0x61: /* POPA */
15749 //.. /* This is almost certainly wrong for sz==2. So ... */
15750 //.. if (sz != 4) goto decode_failure;
15752 //.. /* t5 is the old %ESP value. */
15753 //.. t5 = newTemp(Ity_I32);
15754 //.. assign( t5, getIReg(4, R_ESP) );
15756 //.. /* Reload all the registers, except %esp. */
15757 //.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
15758 //.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
15759 //.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
15760 //.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
15761 //.. /* ignore saved %ESP */
15762 //.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
15763 //.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
15764 //.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
15766 //.. /* and move %ESP back up */
15767 //.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
15769 //.. DIP("pusha%c\n", nameISize(sz));
15772 case 0x8F: { /* POPQ m64 / POPW m16 */
15775 /* There is no encoding for 32-bit pop in 64-bit mode.
15776 So sz==4 actually means sz==8. */
15777 if (haveF2orF3(pfx)) goto decode_failure;
15778 vassert(sz == 2 || sz == 4);
15779 if (sz == 4) sz = 8;
15780 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
15782 rm = getUChar(delta);
15784 /* make sure this instruction is correct POP */
15785 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
15786 goto decode_failure;
15787 /* and has correct size */
15790 t1 = newTemp(Ity_I64);
15791 t3 = newTemp(Ity_I64);
15792 assign( t1, getIReg64(R_RSP) );
15793 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
15795 /* Increase RSP; must be done before the STORE. Intel manual
15796 says: If the RSP register is used as a base register for
15797 addressing a destination operand in memory, the POP
15798 instruction computes the effective address of the operand
15799 after it increments the RSP register. */
15800 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
15802 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
15803 storeLE( mkexpr(addr), mkexpr(t3) );
15805 DIP("popl %s\n", dis_buf);
15811 //.. //-- case 0x1F: /* POP %DS */
15812 //.. //-- dis_pop_segreg( cb, R_DS, sz ); break;
15813 //.. //-- case 0x07: /* POP %ES */
15814 //.. //-- dis_pop_segreg( cb, R_ES, sz ); break;
15815 //.. //-- case 0x17: /* POP %SS */
15816 //.. //-- dis_pop_segreg( cb, R_SS, sz ); break;
15818 /* ------------------------ PUSH ----------------------- */
15820 case 0x50: /* PUSH eAX */
15821 case 0x51: /* PUSH eCX */
15822 case 0x52: /* PUSH eDX */
15823 case 0x53: /* PUSH eBX */
15824 case 0x55: /* PUSH eBP */
15825 case 0x56: /* PUSH eSI */
15826 case 0x57: /* PUSH eDI */
15827 case 0x54: /* PUSH eSP */
15828 /* This is the Right Way, in that the value to be pushed is
15829 established before %rsp is changed, so that pushq %rsp
15830 correctly pushes the old value. */
15831 if (haveF2orF3(pfx)) goto decode_failure;
15832 vassert(sz == 2 || sz == 4 || sz == 8);
15834 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
15835 ty = sz==2 ? Ity_I16 : Ity_I64;
15837 t2 = newTemp(Ity_I64);
15838 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
15839 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
15840 putIReg64(R_RSP, mkexpr(t2) );
15841 storeLE(mkexpr(t2),mkexpr(t1));
15842 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
15845 case 0x68: /* PUSH Iv */
15846 if (haveF2orF3(pfx)) goto decode_failure;
15847 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
15848 if (sz == 4) sz = 8;
15849 d64 = getSDisp(imin(4,sz),delta);
15850 delta += imin(4,sz);
15852 case 0x6A: /* PUSH Ib, sign-extended to sz */
15853 if (haveF2orF3(pfx)) goto decode_failure;
15854 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
15855 if (sz == 4) sz = 8;
15856 d64 = getSDisp8(delta); delta += 1;
15860 t1 = newTemp(Ity_I64);
15862 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
15863 putIReg64(R_RSP, mkexpr(t1) );
15864 /* stop mkU16 asserting if d32 is a negative 16-bit number
15868 storeLE( mkexpr(t1), mkU(ty,d64) );
15869 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
15872 case 0x9C: /* PUSHF */ {
15873 /* Note. There is no encoding for a 32-bit pushf in 64-bit
15874 mode. So sz==4 actually means sz==8. */
15875 /* 24 July 06: has also been seen with a redundant REX prefix,
15876 so must also allow sz==8. */
15877 if (haveF2orF3(pfx)) goto decode_failure;
15878 vassert(sz == 2 || sz == 4 || sz == 8);
15879 if (sz == 4) sz = 8;
15880 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
15882 t1 = newTemp(Ity_I64);
15883 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
15884 putIReg64(R_RSP, mkexpr(t1) );
15886 t2 = newTemp(Ity_I64);
15887 assign( t2, mk_amd64g_calculate_rflags_all() );
15889 /* Patch in the D flag. This can simply be a copy of bit 10 of
15890 baseBlock[OFFB_DFLAG]. */
15891 t3 = newTemp(Ity_I64);
15892 assign( t3, binop(Iop_Or64,
15895 IRExpr_Get(OFFB_DFLAG,Ity_I64),
15899 /* And patch in the ID flag. */
15900 t4 = newTemp(Ity_I64);
15901 assign( t4, binop(Iop_Or64,
15904 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
15909 /* if sz==2, the stored value needs to be narrowed. */
15911 storeLE( mkexpr(t1), unop(Iop_32to16,
15912 unop(Iop_64to32,mkexpr(t4))) );
15914 storeLE( mkexpr(t1), mkexpr(t4) );
15916 DIP("pushf%c\n", nameISize(sz));
15920 //.. case 0x60: /* PUSHA */
15921 //.. /* This is almost certainly wrong for sz==2. So ... */
15922 //.. if (sz != 4) goto decode_failure;
15924 //.. /* This is the Right Way, in that the value to be pushed is
15925 //.. established before %esp is changed, so that pusha
15926 //.. correctly pushes the old %esp value. New value of %esp is
15927 //.. pushed at start. */
15928 //.. /* t0 is the %ESP value we're going to push. */
15929 //.. t0 = newTemp(Ity_I32);
15930 //.. assign( t0, getIReg(4, R_ESP) );
15932 //.. /* t5 will be the new %ESP value. */
15933 //.. t5 = newTemp(Ity_I32);
15934 //.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
15936 //.. /* Update guest state before prodding memory. */
15937 //.. putIReg(4, R_ESP, mkexpr(t5));
15939 //.. /* Dump all the registers. */
15940 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
15941 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
15942 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
15943 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
15944 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
15945 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
15946 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
15947 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
15949 //.. DIP("pusha%c\n", nameISize(sz));
15953 //.. //-- case 0x0E: /* PUSH %CS */
15954 //.. //-- dis_push_segreg( cb, R_CS, sz ); break;
15955 //.. //-- case 0x1E: /* PUSH %DS */
15956 //.. //-- dis_push_segreg( cb, R_DS, sz ); break;
15957 //.. //-- case 0x06: /* PUSH %ES */
15958 //.. //-- dis_push_segreg( cb, R_ES, sz ); break;
15959 //.. //-- case 0x16: /* PUSH %SS */
15960 //.. //-- dis_push_segreg( cb, R_SS, sz ); break;
15962 //.. /* ------------------------ SCAS et al ----------------- */
15964 //.. case 0xA4: /* MOVS, no REP prefix */
15966 //.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
15969 //.. case 0xA6: /* CMPSb, no REP prefix */
15970 //.. //-- case 0xA7:
15971 //.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
15975 case 0xAC: /* LODS, no REP prefix */
15977 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
15980 //.. case 0xAE: /* SCAS, no REP prefix */
15982 //.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
15986 case 0xFC: /* CLD */
15987 if (haveF2orF3(pfx)) goto decode_failure;
15988 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
15992 case 0xFD: /* STD */
15993 if (haveF2orF3(pfx)) goto decode_failure;
15994 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
15998 case 0xF8: /* CLC */
15999 case 0xF9: /* STC */
16000 case 0xF5: /* CMC */
16001 t0 = newTemp(Ity_I64);
16002 t1 = newTemp(Ity_I64);
16003 assign( t0, mk_amd64g_calculate_rflags_all() );
16006 assign( t1, binop(Iop_And64, mkexpr(t0),
16007 mkU64(~AMD64G_CC_MASK_C)));
16011 assign( t1, binop(Iop_Or64, mkexpr(t0),
16012 mkU64(AMD64G_CC_MASK_C)));
16016 assign( t1, binop(Iop_Xor64, mkexpr(t0),
16017 mkU64(AMD64G_CC_MASK_C)));
16021 vpanic("disInstr(x64)(clc/stc/cmc)");
16023 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16024 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16025 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
16026 /* Set NDEP even though it isn't used. This makes redundant-PUT
16027 elimination of previous stores to this field work better. */
16028 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16031 //.. /* REPNE prefix insn */
16033 //.. Addr32 eip_orig = guest_eip_bbstart + delta - 1;
16034 //.. vassert(sorb == 0);
16035 //.. abyte = getUChar(delta); delta++;
16037 //.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; }
16038 //.. whatNext = Dis_StopHere;
16040 //.. switch (abyte) {
16041 //.. /* According to the Intel manual, "repne movs" should never occur, but
16042 //.. * in practice it has happened, so allow for it here... */
16043 //.. case 0xA4: sz = 1; /* REPNE MOVS<sz> */
16044 //.. goto decode_failure;
16045 //.. //-- case 0xA5:
16046 //.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig,
16047 //.. // guest_eip_bbstart+delta, "repne movs" );
16050 //.. //-- case 0xA6: sz = 1; /* REPNE CMPS<sz> */
16051 //.. //-- case 0xA7:
16052 //.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" );
16055 //.. case 0xAE: sz = 1; /* REPNE SCAS<sz> */
16057 //.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
16058 //.. guest_eip_bbstart+delta, "repne scas" );
16062 //.. goto decode_failure;
16067 /* ------ AE: SCAS variants ------ */
16070 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
16072 goto decode_failure;
16073 if (haveF2(pfx) && !haveF3(pfx)) {
16076 dis_REP_op ( AMD64CondNZ, dis_SCAS, sz,
16077 guest_RIP_curr_instr,
16078 guest_RIP_bbstart+delta, "repne scas", pfx );
16079 dres.whatNext = Dis_StopHere;
16082 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
16084 goto decode_failure;
16085 if (!haveF2(pfx) && haveF3(pfx)) {
16088 dis_REP_op ( AMD64CondZ, dis_SCAS, sz,
16089 guest_RIP_curr_instr,
16090 guest_RIP_bbstart+delta, "repe scas", pfx );
16091 dres.whatNext = Dis_StopHere;
16094 /* AE/AF: scasb/scas{w,l,q} */
16095 if (!haveF2(pfx) && !haveF3(pfx)) {
16098 dis_string_op( dis_SCAS, sz, "scas", pfx );
16101 goto decode_failure;
16103 /* ------ A6, A7: CMPS variants ------ */
16106 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
16108 goto decode_failure;
16109 if (haveF3(pfx) && !haveF2(pfx)) {
16112 dis_REP_op ( AMD64CondZ, dis_CMPS, sz,
16113 guest_RIP_curr_instr,
16114 guest_RIP_bbstart+delta, "repe cmps", pfx );
16115 dres.whatNext = Dis_StopHere;
16118 goto decode_failure;
16120 /* ------ AA, AB: STOS variants ------ */
16123 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
16125 goto decode_failure;
16126 if (haveF3(pfx) && !haveF2(pfx)) {
16129 dis_REP_op ( AMD64CondAlways, dis_STOS, sz,
16130 guest_RIP_curr_instr,
16131 guest_RIP_bbstart+delta, "rep stos", pfx );
16132 dres.whatNext = Dis_StopHere;
16135 /* AA/AB: stosb/stos{w,l,q} */
16136 if (!haveF3(pfx) && !haveF2(pfx)) {
16139 dis_string_op( dis_STOS, sz, "stos", pfx );
16142 goto decode_failure;
16144 /* ------ A4, A5: MOVS variants ------ */
16147 /* F3 A4: rep movsb */
16149 goto decode_failure;
16150 if (haveF3(pfx) && !haveF2(pfx)) {
16153 dis_REP_op ( AMD64CondAlways, dis_MOVS, sz,
16154 guest_RIP_curr_instr,
16155 guest_RIP_bbstart+delta, "rep movs", pfx );
16156 dres.whatNext = Dis_StopHere;
16160 if (!haveF3(pfx) && !haveF2(pfx)) {
16163 dis_string_op( dis_MOVS, sz, "movs", pfx );
16166 goto decode_failure;
16169 /* ------------------------ XCHG ----------------------- */
16171 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
16172 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
16173 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
16174 used with an explicit LOCK prefix, we don't want to end up with
16175 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
16176 the generic LOCK logic at the top of disInstr. */
16177 case 0x86: /* XCHG Gb,Eb */
16179 /* Fall through ... */
16180 case 0x87: /* XCHG Gv,Ev */
16181 if (haveF2orF3(pfx)) goto decode_failure;
16182 modrm = getUChar(delta);
16184 t1 = newTemp(ty); t2 = newTemp(ty);
16185 if (epartIsReg(modrm)) {
16186 assign(t1, getIRegE(sz, pfx, modrm));
16187 assign(t2, getIRegG(sz, pfx, modrm));
16188 putIRegG(sz, pfx, modrm, mkexpr(t1));
16189 putIRegE(sz, pfx, modrm, mkexpr(t2));
16191 DIP("xchg%c %s, %s\n",
16192 nameISize(sz), nameIRegG(sz, pfx, modrm),
16193 nameIRegE(sz, pfx, modrm));
16195 *expect_CAS = True;
16196 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16197 assign( t1, loadLE(ty, mkexpr(addr)) );
16198 assign( t2, getIRegG(sz, pfx, modrm) );
16199 casLE( mkexpr(addr),
16200 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
16201 putIRegG( sz, pfx, modrm, mkexpr(t1) );
16203 DIP("xchg%c %s, %s\n", nameISize(sz),
16204 nameIRegG(sz, pfx, modrm), dis_buf);
16208 case 0x90: /* XCHG eAX,eAX */
16209 /* detect and handle F3 90 (rep nop) specially */
16210 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
16211 DIP("rep nop (P4 pause)\n");
16212 /* "observe" the hint. The Vex client needs to be careful not
16213 to cause very long delays as a result, though. */
16214 jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta);
16215 dres.whatNext = Dis_StopHere;
16218 /* detect and handle NOPs specially */
16219 if (/* F2/F3 probably change meaning completely */
16221 /* If REX.B is 1, we're not exchanging rAX with itself */
16222 && getRexB(pfx)==0 ) {
16226 /* else fall through to normal case. */
16227 case 0x91: /* XCHG rAX,rCX */
16228 case 0x92: /* XCHG rAX,rDX */
16229 case 0x93: /* XCHG rAX,rBX */
16230 case 0x94: /* XCHG rAX,rSP */
16231 case 0x95: /* XCHG rAX,rBP */
16232 case 0x96: /* XCHG rAX,rSI */
16233 case 0x97: /* XCHG rAX,rDI */
16235 /* guard against mutancy */
16236 if (haveF2orF3(pfx)) goto decode_failure;
16238 /* sz == 2 could legitimately happen, but we don't handle it yet */
16239 if (sz == 2) goto decode_failure; /* awaiting test case */
16241 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
16244 //.. //-- /* ------------------------ XLAT ----------------------- */
16246 //.. //-- case 0xD7: /* XLAT */
16247 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
16248 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */
16249 //.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */
16250 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */
16251 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */
16252 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2);
16253 //.. //-- uWiden(cb, 1, False);
16254 //.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */
16255 //.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */
16256 //.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */
16258 //.. //-- DIP("xlat%c [ebx]\n", nameISize(sz));
16261 /* ------------------------ IN / OUT ----------------------- */
16263 case 0xE4: /* IN imm8, AL */
16265 t1 = newTemp(Ity_I64);
16266 abyte = getUChar(delta); delta++;
16267 assign(t1, mkU64( abyte & 0xFF ));
16268 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
16270 case 0xE5: /* IN imm8, eAX */
16271 if (!(sz == 2 || sz == 4)) goto decode_failure;
16272 t1 = newTemp(Ity_I64);
16273 abyte = getUChar(delta); delta++;
16274 assign(t1, mkU64( abyte & 0xFF ));
16275 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
16277 case 0xEC: /* IN %DX, AL */
16279 t1 = newTemp(Ity_I64);
16280 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
16281 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
16284 case 0xED: /* IN %DX, eAX */
16285 if (!(sz == 2 || sz == 4)) goto decode_failure;
16286 t1 = newTemp(Ity_I64);
16287 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
16288 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
16292 /* At this point, sz indicates the width, and t1 is a 64-bit
16293 value giving port number. */
16295 if (haveF2orF3(pfx)) goto decode_failure;
16296 vassert(sz == 1 || sz == 2 || sz == 4);
16298 t2 = newTemp(Ity_I64);
16299 d = unsafeIRDirty_1_N(
16302 "amd64g_dirtyhelper_IN",
16303 &amd64g_dirtyhelper_IN,
16304 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
16306 /* do the call, dumping the result in t2. */
16307 stmt( IRStmt_Dirty(d) );
16308 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
16312 case 0xE6: /* OUT AL, imm8 */
16314 t1 = newTemp(Ity_I64);
16315 abyte = getUChar(delta); delta++;
16316 assign( t1, mkU64( abyte & 0xFF ) );
16317 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
16319 case 0xE7: /* OUT eAX, imm8 */
16320 if (!(sz == 2 || sz == 4)) goto decode_failure;
16321 t1 = newTemp(Ity_I64);
16322 abyte = getUChar(delta); delta++;
16323 assign( t1, mkU64( abyte & 0xFF ) );
16324 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
16326 case 0xEE: /* OUT AL, %DX */
16328 t1 = newTemp(Ity_I64);
16329 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
16330 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
16333 case 0xEF: /* OUT eAX, %DX */
16334 if (!(sz == 2 || sz == 4)) goto decode_failure;
16335 t1 = newTemp(Ity_I64);
16336 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
16337 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
16341 /* At this point, sz indicates the width, and t1 is a 64-bit
16342 value giving port number. */
16344 if (haveF2orF3(pfx)) goto decode_failure;
16345 vassert(sz == 1 || sz == 2 || sz == 4);
16347 d = unsafeIRDirty_0_N(
16349 "amd64g_dirtyhelper_OUT",
16350 &amd64g_dirtyhelper_OUT,
16351 mkIRExprVec_3( mkexpr(t1),
16352 widenUto64( getIRegRAX(sz) ),
16355 stmt( IRStmt_Dirty(d) );
16359 /* ------------------------ (Grp1 extensions) ---------- */
16361 case 0x80: /* Grp1 Ib,Eb */
16362 if (haveF2orF3(pfx)) goto decode_failure;
16363 modrm = getUChar(delta);
16364 am_sz = lengthAMode(pfx,delta);
16367 d64 = getSDisp8(delta + am_sz);
16368 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
16371 case 0x81: /* Grp1 Iv,Ev */
16372 if (haveF2orF3(pfx)) goto decode_failure;
16373 modrm = getUChar(delta);
16374 am_sz = lengthAMode(pfx,delta);
16376 d64 = getSDisp(d_sz, delta + am_sz);
16377 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
16380 case 0x83: /* Grp1 Ib,Ev */
16381 if (haveF2orF3(pfx)) goto decode_failure;
16382 modrm = getUChar(delta);
16383 am_sz = lengthAMode(pfx,delta);
16385 d64 = getSDisp8(delta + am_sz);
16386 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
16389 /* ------------------------ (Grp2 extensions) ---------- */
16391 case 0xC0: { /* Grp2 Ib,Eb */
16392 Bool decode_OK = True;
16393 if (haveF2orF3(pfx)) goto decode_failure;
16394 modrm = getUChar(delta);
16395 am_sz = lengthAMode(pfx,delta);
16397 d64 = getUChar(delta + am_sz);
16399 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16400 mkU8(d64 & 0xFF), NULL, &decode_OK );
16401 if (!decode_OK) goto decode_failure;
16404 case 0xC1: { /* Grp2 Ib,Ev */
16405 Bool decode_OK = True;
16406 if (haveF2orF3(pfx)) goto decode_failure;
16407 modrm = getUChar(delta);
16408 am_sz = lengthAMode(pfx,delta);
16410 d64 = getUChar(delta + am_sz);
16411 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16412 mkU8(d64 & 0xFF), NULL, &decode_OK );
16413 if (!decode_OK) goto decode_failure;
16416 case 0xD0: { /* Grp2 1,Eb */
16417 Bool decode_OK = True;
16418 if (haveF2orF3(pfx)) goto decode_failure;
16419 modrm = getUChar(delta);
16420 am_sz = lengthAMode(pfx,delta);
16424 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16425 mkU8(d64), NULL, &decode_OK );
16426 if (!decode_OK) goto decode_failure;
16429 case 0xD1: { /* Grp2 1,Ev */
16430 Bool decode_OK = True;
16431 if (haveF2orF3(pfx)) goto decode_failure;
16432 modrm = getUChar(delta);
16433 am_sz = lengthAMode(pfx,delta);
16436 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16437 mkU8(d64), NULL, &decode_OK );
16438 if (!decode_OK) goto decode_failure;
16441 case 0xD2: { /* Grp2 CL,Eb */
16442 Bool decode_OK = True;
16443 if (haveF2orF3(pfx)) goto decode_failure;
16444 modrm = getUChar(delta);
16445 am_sz = lengthAMode(pfx,delta);
16448 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16449 getIRegCL(), "%cl", &decode_OK );
16450 if (!decode_OK) goto decode_failure;
16453 case 0xD3: { /* Grp2 CL,Ev */
16454 Bool decode_OK = True;
16455 if (haveF2orF3(pfx)) goto decode_failure;
16456 modrm = getUChar(delta);
16457 am_sz = lengthAMode(pfx,delta);
16459 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
16460 getIRegCL(), "%cl", &decode_OK );
16461 if (!decode_OK) goto decode_failure;
16465 /* ------------------------ (Grp3 extensions) ---------- */
16467 case 0xF6: { /* Grp3 Eb */
16468 Bool decode_OK = True;
16469 if (haveF2orF3(pfx)) goto decode_failure;
16470 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
16471 if (!decode_OK) goto decode_failure;
16474 case 0xF7: { /* Grp3 Ev */
16475 Bool decode_OK = True;
16476 if (haveF2orF3(pfx)) goto decode_failure;
16477 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
16478 if (!decode_OK) goto decode_failure;
16482 /* ------------------------ (Grp4 extensions) ---------- */
16484 case 0xFE: { /* Grp4 Eb */
16485 Bool decode_OK = True;
16486 if (haveF2orF3(pfx)) goto decode_failure;
16487 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
16488 if (!decode_OK) goto decode_failure;
16492 /* ------------------------ (Grp5 extensions) ---------- */
16494 case 0xFF: { /* Grp5 Ev */
16495 Bool decode_OK = True;
16496 if (haveF2orF3(pfx)) goto decode_failure;
16497 delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK );
16498 if (!decode_OK) goto decode_failure;
16502 /* ------------------------ Escapes to 2-byte opcodes -- */
16505 opc = getUChar(delta); delta++;
16508 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
16510 case 0xBA: { /* Grp8 Ib,Ev */
16511 Bool decode_OK = False;
16512 if (haveF2orF3(pfx)) goto decode_failure;
16513 modrm = getUChar(delta);
16514 am_sz = lengthAMode(pfx,delta);
16515 d64 = getSDisp8(delta + am_sz);
16516 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
16519 goto decode_failure;
16523 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
16525 case 0xBC: /* BSF Gv,Ev */
16526 if (haveF2orF3(pfx)) goto decode_failure;
16527 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
16529 case 0xBD: /* BSR Gv,Ev */
16530 if (haveF2orF3(pfx)) goto decode_failure;
16531 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
16534 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
16536 case 0xC8: /* BSWAP %eax */
16543 case 0xCF: /* BSWAP %edi */
16544 if (haveF2orF3(pfx)) goto decode_failure;
16545 /* According to the AMD64 docs, this insn can have size 4 or
16548 t1 = newTemp(Ity_I32);
16549 t2 = newTemp(Ity_I32);
16550 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
16553 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
16555 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
16556 mkU32(0x00FF0000)),
16558 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
16559 mkU32(0x0000FF00)),
16560 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
16561 mkU32(0x000000FF) )
16564 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
16565 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
16568 else if (sz == 8) {
16569 IRTemp m8 = newTemp(Ity_I64);
16570 IRTemp s8 = newTemp(Ity_I64);
16571 IRTemp m16 = newTemp(Ity_I64);
16572 IRTemp s16 = newTemp(Ity_I64);
16573 IRTemp m32 = newTemp(Ity_I64);
16574 t1 = newTemp(Ity_I64);
16575 t2 = newTemp(Ity_I64);
16576 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
16578 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
16582 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
16585 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
16590 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
16594 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
16597 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
16602 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
16606 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
16609 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
16614 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
16615 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
16618 goto decode_failure;
16621 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
16623 /* All of these are possible at sizes 2, 4 and 8, but until a
16624 size 2 test case shows up, only handle sizes 4 and 8. */
16626 case 0xA3: /* BT Gv,Ev */
16627 if (haveF2orF3(pfx)) goto decode_failure;
16628 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
16629 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone );
16631 case 0xB3: /* BTR Gv,Ev */
16632 if (haveF2orF3(pfx)) goto decode_failure;
16633 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
16634 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset );
16636 case 0xAB: /* BTS Gv,Ev */
16637 if (haveF2orF3(pfx)) goto decode_failure;
16638 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
16639 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet );
16641 case 0xBB: /* BTC Gv,Ev */
16642 if (haveF2orF3(pfx)) goto decode_failure;
16643 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
16644 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp );
16647 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
16651 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
16652 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
16653 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
16654 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
16655 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
16656 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
16657 case 0x48: /* CMOVSb (cmov negative) */
16658 case 0x49: /* CMOVSb (cmov not negative) */
16659 case 0x4A: /* CMOVP (cmov parity even) */
16660 case 0x4B: /* CMOVNP (cmov parity odd) */
16661 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
16662 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
16663 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
16664 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
16665 if (haveF2orF3(pfx)) goto decode_failure;
16666 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
16669 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
16671 case 0xB0: { /* CMPXCHG Gb,Eb */
16673 if (haveF2orF3(pfx)) goto decode_failure;
16674 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
16675 if (!ok) goto decode_failure;
16678 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
16680 if (haveF2orF3(pfx)) goto decode_failure;
16681 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
16682 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
16683 if (!ok) goto decode_failure;
16687 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
16688 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
16689 IRTemp expdHi = newTemp(elemTy);
16690 IRTemp expdLo = newTemp(elemTy);
16691 IRTemp dataHi = newTemp(elemTy);
16692 IRTemp dataLo = newTemp(elemTy);
16693 IRTemp oldHi = newTemp(elemTy);
16694 IRTemp oldLo = newTemp(elemTy);
16695 IRTemp flags_old = newTemp(Ity_I64);
16696 IRTemp flags_new = newTemp(Ity_I64);
16697 IRTemp success = newTemp(Ity_I1);
16698 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
16699 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
16700 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
16701 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
16702 IRTemp expdHi64 = newTemp(Ity_I64);
16703 IRTemp expdLo64 = newTemp(Ity_I64);
16705 /* Translate this using a DCAS, even if there is no LOCK
16706 prefix. Life is too short to bother with generating two
16707 different translations for the with/without-LOCK-prefix
16709 *expect_CAS = True;
16711 /* Decode, and generate address. */
16712 if (have66orF2orF3(pfx)) goto decode_failure;
16713 if (sz != 4 && sz != 8) goto decode_failure;
16714 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
16715 goto decode_failure;
16716 modrm = getUChar(delta);
16717 if (epartIsReg(modrm)) goto decode_failure;
16718 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
16719 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16722 /* cmpxchg16b requires an alignment check. */
16724 gen_SEGV_if_not_16_aligned( addr );
16726 /* Get the expected and new values. */
16727 assign( expdHi64, getIReg64(R_RDX) );
16728 assign( expdLo64, getIReg64(R_RAX) );
16730 /* These are the correctly-sized expected and new values.
16731 However, we also get expdHi64/expdLo64 above as 64-bits
16732 regardless, because we will need them later in the 32-bit
16733 case (paradoxically). */
16734 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
16735 : mkexpr(expdHi64) );
16736 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
16737 : mkexpr(expdLo64) );
16738 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
16739 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
16743 mkIRCAS( oldHi, oldLo,
16744 Iend_LE, mkexpr(addr),
16745 mkexpr(expdHi), mkexpr(expdLo),
16746 mkexpr(dataHi), mkexpr(dataLo)
16749 /* success when oldHi:oldLo == expdHi:expdLo */
16753 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
16754 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
16759 /* If the DCAS is successful, that is to say oldHi:oldLo ==
16760 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
16761 which is where they came from originally. Both the actual
16762 contents of these two regs, and any shadow values, are
16763 unchanged. If the DCAS fails then we're putting into
16764 RDX:RAX the value seen in memory. */
16765 /* Now of course there's a complication in the 32-bit case
16766 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
16767 unchanged; but if we use the same scheme as in the 64-bit
16768 case, we get hit by the standard rule that a write to the
16769 bottom 32 bits of an integer register zeros the upper 32
16770 bits. And so the upper halves of RDX and RAX mysteriously
16771 become zero. So we have to stuff back in the original
16772 64-bit values which we previously stashed in
16773 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
16774 /* It's just _so_ much fun ... */
16776 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
16777 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
16782 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
16783 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
16788 /* Copy the success bit into the Z flag and leave the others
16790 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
16794 binop(Iop_And64, mkexpr(flags_old),
16795 mkU64(~AMD64G_CC_MASK_Z)),
16798 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
16799 mkU8(AMD64G_CC_SHIFT_Z)) ));
16801 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16802 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
16803 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16804 /* Set NDEP even though it isn't used. This makes
16805 redundant-PUT elimination of previous stores to this field
16807 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16809 /* Sheesh. Aren't you glad it was me and not you that had to
16810 write and validate all this grunge? */
16812 DIP("cmpxchg8b %s\n", dis_buf);
16817 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
16819 case 0xA2: { /* CPUID */
16820 /* Uses dirty helper:
16821 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
16822 declared to mod rax, wr rbx, rcx, rdx
16825 HChar* fName = NULL;
16826 void* fAddr = NULL;
16827 if (haveF2orF3(pfx)) goto decode_failure;
16828 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
16829 |VEX_HWCAPS_AMD64_CX16)) {
16830 fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
16831 fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
16832 /* This is a Core-2-like machine */
16835 /* Give a CPUID for at least a baseline machine, no SSE2
16837 fName = "amd64g_dirtyhelper_CPUID_baseline";
16838 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
16841 vassert(fName); vassert(fAddr);
16842 d = unsafeIRDirty_0_N ( 0/*regparms*/,
16843 fName, fAddr, mkIRExprVec_0() );
16844 /* declare guest state effects */
16845 d->needsBBP = True;
16847 d->fxState[0].fx = Ifx_Modify;
16848 d->fxState[0].offset = OFFB_RAX;
16849 d->fxState[0].size = 8;
16850 d->fxState[1].fx = Ifx_Write;
16851 d->fxState[1].offset = OFFB_RBX;
16852 d->fxState[1].size = 8;
16853 d->fxState[2].fx = Ifx_Modify;
16854 d->fxState[2].offset = OFFB_RCX;
16855 d->fxState[2].size = 8;
16856 d->fxState[3].fx = Ifx_Write;
16857 d->fxState[3].offset = OFFB_RDX;
16858 d->fxState[3].size = 8;
16859 /* execute the dirty call, side-effecting guest state */
16860 stmt( IRStmt_Dirty(d) );
16861 /* CPUID is a serialising insn. So, just in case someone is
16862 using it as a memory fence ... */
16863 stmt( IRStmt_MBE(Imbe_Fence) );
16868 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
16870 case 0xB6: /* MOVZXb Eb,Gv */
16871 if (haveF2orF3(pfx)) goto decode_failure;
16872 if (sz != 2 && sz != 4 && sz != 8)
16873 goto decode_failure;
16874 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
16876 case 0xB7: /* MOVZXw Ew,Gv */
16877 if (haveF2orF3(pfx)) goto decode_failure;
16878 if (sz != 4 && sz != 8)
16879 goto decode_failure;
16880 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
16883 case 0xBE: /* MOVSXb Eb,Gv */
16884 if (haveF2orF3(pfx)) goto decode_failure;
16885 if (sz != 2 && sz != 4 && sz != 8)
16886 goto decode_failure;
16887 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
16889 case 0xBF: /* MOVSXw Ew,Gv */
16890 if (haveF2orF3(pfx)) goto decode_failure;
16891 if (sz != 4 && sz != 8)
16892 goto decode_failure;
16893 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
16896 //.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
16898 //.. //-- case 0xC3: /* MOVNTI Gv,Ev */
16899 //.. //-- vg_assert(sz == 4);
16900 //.. //-- modrm = getUChar(eip);
16901 //.. //-- vg_assert(!epartIsReg(modrm));
16902 //.. //-- t1 = newTemp(cb);
16903 //.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
16904 //.. //-- pair = disAMode ( cb, sorb, eip, dis_buf );
16905 //.. //-- t2 = LOW24(pair);
16906 //.. //-- eip += HI8(pair);
16907 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
16908 //.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
16911 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
16913 case 0xAF: /* IMUL Ev, Gv */
16914 if (haveF2orF3(pfx)) goto decode_failure;
16915 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
16918 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
16921 if (haveF2orF3(pfx)) goto decode_failure;
16922 modrm = getUChar(delta);
16923 if (epartIsReg(modrm)) goto decode_failure;
16924 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16926 DIP("nop%c %s\n", nameISize(sz), dis_buf);
16929 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
16932 case 0x82: /* JBb/JNAEb (jump below) */
16933 case 0x83: /* JNBb/JAEb (jump not below) */
16934 case 0x84: /* JZb/JEb (jump zero) */
16935 case 0x85: /* JNZb/JNEb (jump not zero) */
16936 case 0x86: /* JBEb/JNAb (jump below or equal) */
16937 case 0x87: /* JNBEb/JAb (jump not below or equal) */
16938 case 0x88: /* JSb (jump negative) */
16939 case 0x89: /* JSb (jump not negative) */
16940 case 0x8A: /* JP (jump parity even) */
16941 case 0x8B: /* JNP/JPO (jump parity odd) */
16942 case 0x8C: /* JLb/JNGEb (jump less) */
16943 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
16944 case 0x8E: /* JLEb/JNGb (jump less or equal) */
16945 case 0x8F: /* JGb/JNLEb (jump greater) */
16947 HChar* comment = "";
16948 if (haveF2orF3(pfx)) goto decode_failure;
16949 jmpDelta = getSDisp32(delta);
16950 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
16953 && vex_control.guest_chase_cond
16954 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
16956 && resteerOkFn( callback_opaque, d64) ) {
16957 /* Speculation: assume this backward branch is taken. So
16958 we need to emit a side-exit to the insn following this
16959 one, on the negation of the condition, and continue at
16960 the branch target address (d64). If we wind up back at
16961 the first instruction of the trace, just stop; it's
16962 better to let the IR loop unroller handle that case. */
16964 mk_amd64g_calculate_condition(
16965 (AMD64Condcode)(1 ^ (opc - 0x80))),
16967 IRConst_U64(guest_RIP_bbstart+delta) ) );
16968 dres.whatNext = Dis_ResteerC;
16969 dres.continueAt = d64;
16970 comment = "(assumed taken)";
16974 && vex_control.guest_chase_cond
16975 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
16977 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
16978 /* Speculation: assume this forward branch is not taken.
16979 So we need to emit a side-exit to d64 (the dest) and
16980 continue disassembling at the insn immediately
16981 following this one. */
16983 mk_amd64g_calculate_condition((AMD64Condcode)
16986 IRConst_U64(d64) ) );
16987 dres.whatNext = Dis_ResteerC;
16988 dres.continueAt = guest_RIP_bbstart+delta;
16989 comment = "(assumed not taken)";
16992 /* Conservative default translation - end the block at
16994 jcc_01( (AMD64Condcode)(opc - 0x80),
16995 guest_RIP_bbstart+delta,
16997 dres.whatNext = Dis_StopHere;
16999 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
17003 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
17004 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
17005 /* 0F 0D /1 -- prefetchw mem8 */
17006 if (have66orF2orF3(pfx)) goto decode_failure;
17007 modrm = getUChar(delta);
17008 if (epartIsReg(modrm)) goto decode_failure;
17009 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
17010 goto decode_failure;
17012 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17015 switch (gregLO3ofRM(modrm)) {
17016 case 0: DIP("prefetch %s\n", dis_buf); break;
17017 case 1: DIP("prefetchw %s\n", dis_buf); break;
17018 default: vassert(0); /*NOTREACHED*/
17022 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
17023 case 0x31: { /* RDTSC */
17024 IRTemp val = newTemp(Ity_I64);
17025 IRExpr** args = mkIRExprVec_0();
17026 IRDirty* d = unsafeIRDirty_1_N (
17029 "amd64g_dirtyhelper_RDTSC",
17030 &amd64g_dirtyhelper_RDTSC,
17033 if (have66orF2orF3(pfx)) goto decode_failure;
17034 /* execute the dirty call, dumping the result in val. */
17035 stmt( IRStmt_Dirty(d) );
17036 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
17037 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
17042 //.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
17044 //.. case 0xA1: /* POP %FS */
17045 //.. dis_pop_segreg( R_FS, sz ); break;
17046 //.. case 0xA9: /* POP %GS */
17047 //.. dis_pop_segreg( R_GS, sz ); break;
17049 //.. case 0xA0: /* PUSH %FS */
17050 //.. dis_push_segreg( R_FS, sz ); break;
17051 //.. case 0xA8: /* PUSH %GS */
17052 //.. dis_push_segreg( R_GS, sz ); break;
17054 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
17057 case 0x92: /* set-Bb/set-NAEb (set if below) */
17058 case 0x93: /* set-NBb/set-AEb (set if not below) */
17059 case 0x94: /* set-Zb/set-Eb (set if zero) */
17060 case 0x95: /* set-NZb/set-NEb (set if not zero) */
17061 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
17062 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
17063 case 0x98: /* set-Sb (set if negative) */
17064 case 0x99: /* set-Sb (set if not negative) */
17065 case 0x9A: /* set-P (set if parity even) */
17066 case 0x9B: /* set-NP (set if parity odd) */
17067 case 0x9C: /* set-Lb/set-NGEb (set if less) */
17068 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
17069 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
17070 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
17071 if (haveF2orF3(pfx)) goto decode_failure;
17072 t1 = newTemp(Ity_I8);
17073 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
17074 modrm = getUChar(delta);
17075 if (epartIsReg(modrm)) {
17077 putIRegE(1, pfx, modrm, mkexpr(t1));
17078 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
17079 nameIRegE(1,pfx,modrm));
17081 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17083 storeLE( mkexpr(addr), mkexpr(t1) );
17084 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
17088 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
17090 case 0xA4: /* SHLDv imm8,Gv,Ev */
17091 modrm = getUChar(delta);
17092 d64 = delta + lengthAMode(pfx, delta);
17093 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
17094 delta = dis_SHLRD_Gv_Ev (
17095 vbi, pfx, delta, modrm, sz,
17096 mkU8(getUChar(d64)), True, /* literal */
17097 dis_buf, True /* left */ );
17099 case 0xA5: /* SHLDv %cl,Gv,Ev */
17100 modrm = getUChar(delta);
17101 delta = dis_SHLRD_Gv_Ev (
17102 vbi, pfx, delta, modrm, sz,
17103 getIRegCL(), False, /* not literal */
17104 "%cl", True /* left */ );
17107 case 0xAC: /* SHRDv imm8,Gv,Ev */
17108 modrm = getUChar(delta);
17109 d64 = delta + lengthAMode(pfx, delta);
17110 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
17111 delta = dis_SHLRD_Gv_Ev (
17112 vbi, pfx, delta, modrm, sz,
17113 mkU8(getUChar(d64)), True, /* literal */
17114 dis_buf, False /* right */ );
17116 case 0xAD: /* SHRDv %cl,Gv,Ev */
17117 modrm = getUChar(delta);
17118 delta = dis_SHLRD_Gv_Ev (
17119 vbi, pfx, delta, modrm, sz,
17120 getIRegCL(), False, /* not literal */
17121 "%cl", False /* right */);
17124 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
17125 case 0x05: /* SYSCALL */
17126 guest_RIP_next_mustcheck = True;
17127 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
17128 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
17129 /* It's important that all guest state is up-to-date
17130 at this point. So we declare an end-of-block here, which
17131 forces any cached guest state to be flushed. */
17132 jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed);
17133 dres.whatNext = Dis_StopHere;
17137 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
17139 case 0xC0: { /* XADD Gb,Eb */
17140 Bool decode_OK = False;
17141 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
17143 goto decode_failure;
17146 case 0xC1: { /* XADD Gv,Ev */
17147 Bool decode_OK = False;
17148 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
17150 goto decode_failure;
17154 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
17158 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
17160 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
17161 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
17162 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
17163 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
17167 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
17170 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
17173 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
17177 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
17180 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
17183 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
17185 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
17186 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
17188 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
17192 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
17196 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
17198 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
17199 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
17200 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
17204 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
17208 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
17210 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
17211 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
17212 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
17213 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
17215 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
17219 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
17223 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
17226 Long delta0 = delta-1;
17227 Bool decode_OK = False;
17229 /* If sz==2 this is SSE, and we assume sse idec has
17230 already spotted those cases by now. */
17231 if (sz != 4 && sz != 8)
17232 goto decode_failure;
17233 if (have66orF2orF3(pfx))
17234 goto decode_failure;
17236 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 );
17239 goto decode_failure;
17244 case 0x0E: /* FEMMS */
17245 case 0x77: /* EMMS */
17247 goto decode_failure;
17248 do_EMMS_preamble();
17252 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
17255 goto decode_failure;
17256 } /* switch (opc) for the 2-byte opcodes */
17257 goto decode_success;
17258 } /* case 0x0F: of primary opcode */
17260 /* ------------------------ ??? ------------------------ */
17264 /* All decode failures end up here. */
17265 vex_printf("vex amd64->IR: unhandled instruction bytes: "
17266 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
17267 (Int)getUChar(delta_start+0),
17268 (Int)getUChar(delta_start+1),
17269 (Int)getUChar(delta_start+2),
17270 (Int)getUChar(delta_start+3),
17271 (Int)getUChar(delta_start+4),
17272 (Int)getUChar(delta_start+5) );
17274 /* Tell the dispatcher that this insn cannot be decoded, and so has
17275 not been executed, and (is currently) the next to be executed.
17276 RIP should be up-to-date since it made so at the start of each
17277 insn, but nevertheless be paranoid and update it again right
17279 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
17280 jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
17281 dres.whatNext = Dis_StopHere;
17283 /* We also need to say that a CAS is not expected now, regardless
17284 of what it might have been set to at the start of the function,
17285 since the IR that we've emitted just above (to synthesis a
17286 SIGILL) does not involve any CAS, and presumably no other IR has
17287 been emitted for this (non-decoded) insn. */
17288 *expect_CAS = False;
17291 } /* switch (opc) for the main (primary) opcode switch. */
17294 /* All decode successes end up here. */
17296 dres.len = (Int)toUInt(delta - delta_start);
17304 /*------------------------------------------------------------*/
17305 /*--- Top-level fn ---*/
17306 /*------------------------------------------------------------*/
17308 /* Disassemble a single instruction into IR. The instruction
17309 is located in host memory at &guest_code[delta]. */
17311 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
17313 Bool (*resteerOkFn) ( void*, Addr64 ),
17315 void* callback_opaque,
17316 UChar* guest_code_IN,
17319 VexArch guest_arch,
17320 VexArchInfo* archinfo,
17321 VexAbiInfo* abiinfo,
17322 Bool host_bigendian_IN )
17325 Bool expect_CAS, has_CAS;
17328 /* Set globals (see top of this file) */
17329 vassert(guest_arch == VexArchAMD64);
17330 guest_code = guest_code_IN;
17332 host_is_bigendian = host_bigendian_IN;
17333 guest_RIP_curr_instr = guest_IP;
17334 guest_RIP_bbstart = guest_IP - delta;
17336 /* We'll consult these after doing disInstr_AMD64_WRK. */
17337 guest_RIP_next_assumed = 0;
17338 guest_RIP_next_mustcheck = False;
17340 x1 = irsb_IN->stmts_used;
17341 expect_CAS = False;
17342 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
17345 delta, archinfo, abiinfo );
17346 x2 = irsb_IN->stmts_used;
17349 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
17350 got it right. Failure of this assertion is serious and denotes
17351 a bug in disInstr. */
17352 if (guest_RIP_next_mustcheck
17353 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
17355 vex_printf("assumed next %%rip = 0x%llx\n",
17356 guest_RIP_next_assumed );
17357 vex_printf(" actual next %%rip = 0x%llx\n",
17358 guest_RIP_curr_instr + dres.len );
17359 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
17362 /* See comment at the top of disInstr_AMD64_WRK for meaning of
17363 expect_CAS. Here, we (sanity-)check for the presence/absence of
17364 IRCAS as directed by the returned expect_CAS value. */
17366 for (i = x1; i < x2; i++) {
17367 if (irsb_IN->stmts[i]->tag == Ist_CAS)
17371 if (expect_CAS != has_CAS) {
17372 /* inconsistency detected. re-disassemble the instruction so as
17373 to generate a useful error message; then assert. */
17374 vex_traceflags |= VEX_TRACE_FE;
17375 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
17378 delta, archinfo, abiinfo );
17379 for (i = x1; i < x2; i++) {
17380 vex_printf("\t\t");
17381 ppIRStmt(irsb_IN->stmts[i]);
17384 /* Failure of this assertion is serious and denotes a bug in
17386 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
17394 /*--------------------------------------------------------------------*/
17395 /*--- end guest_amd64_toIR.c ---*/
17396 /*--------------------------------------------------------------------*/