2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates AMD64 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
45 * all arithmetic done at 64 bits
47 * no FP exceptions, except for handling stack over/underflow
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
58 * some of the FCOM cases could do with testing -- not convinced
59 that the args are the right way round.
61 * FSAVE does not re-initialise the FPU; it should do
63 * FINIT not only initialises the FPU environment, it also zeroes
64 all the FP registers. It should leave the registers unchanged.
66 RDTSC returns zero, always.
68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
69 per Intel docs this bit has no meaning anyway. Since PUSHF is the
70 only way to observe eflags[1], a proper fix would be to make that
73 This module uses global variables and so is not MT-safe (if that
74 should ever become relevant).
77 /* Notes re address size overrides (0x67).
79 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
81 and System Instructions"), Section 1.2.3 ("Address-Size Override
84 0x67 applies to all explicit memory references, causing the top
85 32 bits of the effective address to become zero.
87 0x67 has no effect on stack references (push/pop); these always
90 0x67 changes the interpretation of instructions which implicitly
91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
106 /* "Special" instructions.
108 This instruction decoder can decode three special instructions
109 which mean nothing natively (are no-ops as far as regs/mem are
110 concerned) but have meaning for supporting Valgrind. A special
111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
114 Following that, one of the following 3 are allowed (standard
115 interpretation in parentheses):
117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
121 Any other bytes following the 16-byte preamble are illegal and
122 constitute a failure in instruction decoding. This all assumes
123 that the preamble will never occur except in specific code
124 fragments designed for Valgrind to catch.
126 No prefixes may precede a "Special" instruction.
129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
130 insns: the side-exit back to the start of the insn is done with
131 Ijk_Boring. This is quite wrong, it should be done with
132 Ijk_NoRedir, since otherwise the side exit, which is intended to
133 restart the instruction for whatever reason, could go somewhere
134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
135 no-redir jumps performance critical, at least for rep-prefixed
136 instructions, since all iterations thereof would involve such a
137 jump. It's not such a big deal with casLE since the side exit is
138 only taken if the CAS fails, that is, the location is contended,
139 which is relatively unlikely.
141 Note also, the test for CAS success vs failure is done using
142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
144 shouldn't definedness-check these comparisons. See
145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
146 background/rationale.
149 /* LOCK prefixed instructions. These are translated using IR-level
150 CAS statements (IRCAS) and are believed to preserve atomicity, even
151 from the point of view of some other process racing against a
152 simulated one (presumably they communicate via a shared memory
155 Handlers which are aware of LOCK prefixes are:
156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
157 dis_cmpxchg_G_E (cmpxchg)
158 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
162 dis_Grp8_Imm (bts, btc, btr)
163 dis_bt_G_E (bts, btc, btr)
168 #include "libvex_basictypes.h"
169 #include "libvex_ir.h"
171 #include "libvex_guest_amd64.h"
173 #include "main_util.h"
174 #include "main_globals.h"
175 #include "guest_generic_bb_to_IR.h"
176 #include "guest_generic_x87.h"
177 #include "guest_amd64_defs.h"
180 /*------------------------------------------------------------*/
182 /*------------------------------------------------------------*/
184 /* These are set at the start of the translation of an insn, right
185 down in disInstr_AMD64, so that we don't have to pass them around
186 endlessly. They are all constant during the translation of any
189 /* These are set at the start of the translation of a BB, so
190 that we don't have to pass them around endlessly. */
192 /* We need to know this to do sub-register accesses correctly. */
193 static Bool host_is_bigendian;
195 /* Pointer to the guest code area (points to start of BB, not to the
196 insn being processed). */
197 static UChar* guest_code;
199 /* The guest address corresponding to guest_code[0]. */
200 static Addr64 guest_RIP_bbstart;
202 /* The guest address for the instruction currently being
204 static Addr64 guest_RIP_curr_instr;
206 /* The IRSB* into which we're generating code. */
209 /* For ensuring that %rip-relative addressing is done right. A read
210 of %rip generates the address of the next instruction. It may be
211 that we don't conveniently know that inside disAMode(). For sanity
212 checking, if the next insn %rip is needed, we make a guess at what
213 it is, record that guess here, and set the accompanying Bool to
214 indicate that -- after this insn's decode is finished -- that guess
215 needs to be checked. */
217 /* At the start of each insn decode, is set to (0, False).
218 After the decode, if _mustcheck is now True, _assumed is
221 static Addr64 guest_RIP_next_assumed;
222 static Bool guest_RIP_next_mustcheck;
225 /*------------------------------------------------------------*/
226 /*--- Helpers for constructing IR. ---*/
227 /*------------------------------------------------------------*/
229 /* Generate a new temporary of the given type. */
230 static IRTemp newTemp ( IRType ty )
232 vassert(isPlausibleIRType(ty));
233 return newIRTemp( irsb->tyenv, ty );
236 /* Add a statement to the list held by "irsb". */
237 static void stmt ( IRStmt* st )
239 addStmtToIRSB( irsb, st );
242 /* Generate a statement "dst := e". */
243 static void assign ( IRTemp dst, IRExpr* e )
245 stmt( IRStmt_WrTmp(dst, e) );
248 static IRExpr* unop ( IROp op, IRExpr* a )
250 return IRExpr_Unop(op, a);
253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
255 return IRExpr_Binop(op, a1, a2);
258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
260 return IRExpr_Triop(op, a1, a2, a3);
263 static IRExpr* mkexpr ( IRTemp tmp )
265 return IRExpr_RdTmp(tmp);
268 static IRExpr* mkU8 ( ULong i )
271 return IRExpr_Const(IRConst_U8( (UChar)i ));
274 static IRExpr* mkU16 ( ULong i )
276 vassert(i < 0x10000ULL);
277 return IRExpr_Const(IRConst_U16( (UShort)i ));
280 static IRExpr* mkU32 ( ULong i )
282 vassert(i < 0x100000000ULL);
283 return IRExpr_Const(IRConst_U32( (UInt)i ));
286 static IRExpr* mkU64 ( ULong i )
288 return IRExpr_Const(IRConst_U64(i));
291 static IRExpr* mkU ( IRType ty, ULong i )
294 case Ity_I8: return mkU8(i);
295 case Ity_I16: return mkU16(i);
296 case Ity_I32: return mkU32(i);
297 case Ity_I64: return mkU64(i);
298 default: vpanic("mkU(amd64)");
302 static void storeLE ( IRExpr* addr, IRExpr* data )
304 stmt( IRStmt_Store(Iend_LE, addr, data) );
307 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
309 return IRExpr_Load(Iend_LE, ty, addr);
312 static IROp mkSizedOp ( IRType ty, IROp op8 )
314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
319 || op8 == Iop_CasCmpNE8
320 || op8 == Iop_Not8 );
322 case Ity_I8: return 0 +op8;
323 case Ity_I16: return 1 +op8;
324 case Ity_I32: return 2 +op8;
325 case Ity_I64: return 3 +op8;
326 default: vpanic("mkSizedOp(amd64)");
331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
333 if (szSmall == 1 && szBig == 4) {
334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
336 if (szSmall == 1 && szBig == 2) {
337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
339 if (szSmall == 2 && szBig == 4) {
340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
342 if (szSmall == 1 && szBig == 8 && !signd) {
343 return unop(Iop_8Uto64, src);
345 if (szSmall == 1 && szBig == 8 && signd) {
346 return unop(Iop_8Sto64, src);
348 if (szSmall == 2 && szBig == 8 && !signd) {
349 return unop(Iop_16Uto64, src);
351 if (szSmall == 2 && szBig == 8 && signd) {
352 return unop(Iop_16Sto64, src);
354 vpanic("doScalarWidening(amd64)");
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
421 //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
422 //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
423 //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
424 //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
425 //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
426 //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
427 //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
429 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
430 #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0)
431 #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1)
432 #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2)
433 #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3)
434 #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4)
435 #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5)
436 #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6)
437 #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7)
438 #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8)
439 #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9)
440 #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10)
441 #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11)
442 #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12)
443 #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13)
444 #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14)
445 #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15)
446 #define OFFB_XMM16 offsetof(VexGuestAMD64State,guest_XMM16)
448 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN)
449 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART)
450 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN)
452 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
455 /*------------------------------------------------------------*/
456 /*--- Helper bits and pieces for deconstructing the ---*/
457 /*--- amd64 insn stream. ---*/
458 /*------------------------------------------------------------*/
460 /* This is the AMD64 register encoding -- integer regs. */
478 //.. #define R_AL (0+R_EAX)
479 //.. #define R_AH (4+R_EAX)
481 /* This is the Intel register encoding -- segment regs. */
490 /* Various simple conversions */
492 static ULong extend_s_8to64 ( UChar x )
494 return (ULong)((((Long)x) << 56) >> 56);
497 static ULong extend_s_16to64 ( UShort x )
499 return (ULong)((((Long)x) << 48) >> 48);
502 static ULong extend_s_32to64 ( UInt x )
504 return (ULong)((((Long)x) << 32) >> 32);
507 /* Figure out whether the mod and rm parts of a modRM byte refer to a
508 register or memory. If so, the byte will have the form 11XXXYYY,
509 where YYY is the register number. */
511 static Bool epartIsReg ( UChar mod_reg_rm )
513 return toBool(0xC0 == (mod_reg_rm & 0xC0));
516 /* Extract the 'g' field from a modRM byte. This only produces 3
517 bits, which is not a complete register number. You should avoid
518 this function if at all possible. */
520 static Int gregLO3ofRM ( UChar mod_reg_rm )
522 return (Int)( (mod_reg_rm >> 3) & 7 );
525 /* Ditto the 'e' field of a modRM byte. */
527 static Int eregLO3ofRM ( UChar mod_reg_rm )
529 return (Int)(mod_reg_rm & 0x7);
532 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
534 static UChar getUChar ( Long delta )
536 UChar v = guest_code[delta+0];
540 static UInt getUDisp16 ( Long delta )
542 UInt v = guest_code[delta+1]; v <<= 8;
543 v |= guest_code[delta+0];
547 //.. static UInt getUDisp ( Int size, Long delta )
550 //.. case 4: return getUDisp32(delta);
551 //.. case 2: return getUDisp16(delta);
552 //.. case 1: return getUChar(delta);
553 //.. default: vpanic("getUDisp(x86)");
555 //.. return 0; /*notreached*/
559 /* Get a byte value out of the insn stream and sign-extend to 64
561 static Long getSDisp8 ( Long delta )
563 return extend_s_8to64( guest_code[delta] );
566 /* Get a 16-bit value out of the insn stream and sign-extend to 64
568 static Long getSDisp16 ( Long delta )
570 UInt v = guest_code[delta+1]; v <<= 8;
571 v |= guest_code[delta+0];
572 return extend_s_16to64( (UShort)v );
575 /* Get a 32-bit value out of the insn stream and sign-extend to 64
577 static Long getSDisp32 ( Long delta )
579 UInt v = guest_code[delta+3]; v <<= 8;
580 v |= guest_code[delta+2]; v <<= 8;
581 v |= guest_code[delta+1]; v <<= 8;
582 v |= guest_code[delta+0];
583 return extend_s_32to64( v );
586 /* Get a 64-bit value out of the insn stream. */
587 static Long getDisp64 ( Long delta )
590 v |= guest_code[delta+7]; v <<= 8;
591 v |= guest_code[delta+6]; v <<= 8;
592 v |= guest_code[delta+5]; v <<= 8;
593 v |= guest_code[delta+4]; v <<= 8;
594 v |= guest_code[delta+3]; v <<= 8;
595 v |= guest_code[delta+2]; v <<= 8;
596 v |= guest_code[delta+1]; v <<= 8;
597 v |= guest_code[delta+0];
601 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
602 if this is called with size==8. Should not happen. */
603 static Long getSDisp ( Int size, Long delta )
606 case 4: return getSDisp32(delta);
607 case 2: return getSDisp16(delta);
608 case 1: return getSDisp8(delta);
609 default: vpanic("getSDisp(amd64)");
613 static ULong mkSizeMask ( Int sz )
616 case 1: return 0x00000000000000FFULL;
617 case 2: return 0x000000000000FFFFULL;
618 case 4: return 0x00000000FFFFFFFFULL;
619 case 8: return 0xFFFFFFFFFFFFFFFFULL;
620 default: vpanic("mkSzMask(amd64)");
624 static Int imin ( Int a, Int b )
626 return (a < b) ? a : b;
629 static IRType szToITy ( Int n )
632 case 1: return Ity_I8;
633 case 2: return Ity_I16;
634 case 4: return Ity_I32;
635 case 8: return Ity_I64;
636 default: vex_printf("\nszToITy(%d)\n", n);
637 vpanic("szToITy(amd64)");
642 /*------------------------------------------------------------*/
643 /*--- For dealing with prefixes. ---*/
644 /*------------------------------------------------------------*/
646 /* The idea is to pass around an int holding a bitmask summarising
647 info from the prefixes seen on the current instruction, including
648 info from the REX byte. This info is used in various places, but
649 most especially when making sense of register fields in
652 The top 16 bits of the prefix are 0x3141, just as a hacky way
653 to ensure it really is a valid prefix.
655 Things you can safely assume about a well-formed prefix:
656 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
657 * if REX is not present then REXW,REXR,REXX,REXB will read
659 * F2 and F3 will not both be 1.
664 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
665 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
666 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
667 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
668 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
669 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
670 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
671 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
672 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
673 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
674 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
675 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
676 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
677 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
678 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
679 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
681 #define PFX_EMPTY 0x31410000
683 static Bool IS_VALID_PFX ( Prefix pfx ) {
684 return toBool((pfx & 0xFFFF0000) == PFX_EMPTY);
687 static Bool haveREX ( Prefix pfx ) {
688 return toBool(pfx & PFX_REX);
691 static Int getRexW ( Prefix pfx ) {
692 return (pfx & PFX_REXW) ? 1 : 0;
694 /* Apparently unused.
695 static Int getRexR ( Prefix pfx ) {
696 return (pfx & PFX_REXR) ? 1 : 0;
699 static Int getRexX ( Prefix pfx ) {
700 return (pfx & PFX_REXX) ? 1 : 0;
702 static Int getRexB ( Prefix pfx ) {
703 return (pfx & PFX_REXB) ? 1 : 0;
706 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
707 completely changes what instruction it really is. */
708 static Bool haveF2orF3 ( Prefix pfx ) {
709 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
711 static Bool haveF2 ( Prefix pfx ) {
712 return toBool((pfx & PFX_F2) > 0);
714 static Bool haveF3 ( Prefix pfx ) {
715 return toBool((pfx & PFX_F3) > 0);
718 static Bool have66 ( Prefix pfx ) {
719 return toBool((pfx & PFX_66) > 0);
721 static Bool haveASO ( Prefix pfx ) {
722 return toBool((pfx & PFX_ASO) > 0);
725 /* Return True iff pfx has 66 set and F2 and F3 clear */
726 static Bool have66noF2noF3 ( Prefix pfx )
729 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
732 /* Return True iff pfx has F2 set and 66 and F3 clear */
733 static Bool haveF2no66noF3 ( Prefix pfx )
736 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
739 /* Return True iff pfx has F3 set and 66 and F2 clear */
740 static Bool haveF3no66noF2 ( Prefix pfx )
743 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
746 /* Return True iff pfx has F3 set and F2 clear */
747 static Bool haveF3noF2 ( Prefix pfx )
750 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
753 /* Return True iff pfx has F2 set and F3 clear */
754 static Bool haveF2noF3 ( Prefix pfx )
757 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
760 /* Return True iff pfx has 66, F2 and F3 clear */
761 static Bool haveNo66noF2noF3 ( Prefix pfx )
764 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
767 /* Return True iff pfx has any of 66, F2 and F3 set */
768 static Bool have66orF2orF3 ( Prefix pfx )
770 return toBool( ! haveNo66noF2noF3(pfx) );
773 /* Return True iff pfx has 66 or F2 set */
774 static Bool have66orF2 ( Prefix pfx )
776 return toBool((pfx & (PFX_66|PFX_F2)) > 0);
779 /* Clear all the segment-override bits in a prefix. */
780 static Prefix clearSegBits ( Prefix p )
783 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
787 /*------------------------------------------------------------*/
788 /*--- For dealing with integer registers ---*/
789 /*------------------------------------------------------------*/
791 /* This is somewhat complex. The rules are:
793 For 64, 32 and 16 bit register references, the e or g fields in the
794 modrm bytes supply the low 3 bits of the register number. The
795 fourth (most-significant) bit of the register number is supplied by
796 the REX byte, if it is present; else that bit is taken to be zero.
798 The REX.R bit supplies the high bit corresponding to the g register
799 field, and the REX.B bit supplies the high bit corresponding to the
800 e register field (when the mod part of modrm indicates that modrm's
801 e component refers to a register and not to memory).
803 The REX.X bit supplies a high register bit for certain registers
804 in SIB address modes, and is generally rarely used.
806 For 8 bit register references, the presence of the REX byte itself
807 has significance. If there is no REX present, then the 3-bit
808 number extracted from the modrm e or g field is treated as an index
809 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
810 old x86 encoding scheme.
812 But if there is a REX present, the register reference is
813 interpreted in the same way as for 64/32/16-bit references: a high
814 bit is extracted from REX, giving a 4-bit number, and the denoted
815 register is the lowest 8 bits of the 16 integer registers denoted
816 by the number. In particular, values 3 through 7 of this sequence
817 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
820 The REX.W bit has no bearing at all on register numbers. Instead
821 its presence indicates that the operand size is to be overridden
822 from its default value (32 bits) to 64 bits instead. This is in
823 the same fashion that an 0x66 prefix indicates the operand size is
824 to be overridden from 32 bits down to 16 bits. When both REX.W and
825 0x66 are present there is a conflict, and REX.W takes precedence.
827 Rather than try to handle this complexity using a single huge
828 function, several smaller ones are provided. The aim is to make it
829 as difficult as possible to screw up register decoding in a subtle
830 and hard-to-track-down way.
832 Because these routines fish around in the host's memory (that is,
833 in the guest state area) for sub-parts of guest registers, their
834 correctness depends on the host's endianness. So far these
835 routines only work for little-endian hosts. Those for which
836 endianness is important have assertions to ensure sanity.
840 /* About the simplest question you can ask: where do the 64-bit
841 integer registers live (in the guest state) ? */
843 static Int integerGuestReg64Offset ( UInt reg )
846 case R_RAX: return OFFB_RAX;
847 case R_RCX: return OFFB_RCX;
848 case R_RDX: return OFFB_RDX;
849 case R_RBX: return OFFB_RBX;
850 case R_RSP: return OFFB_RSP;
851 case R_RBP: return OFFB_RBP;
852 case R_RSI: return OFFB_RSI;
853 case R_RDI: return OFFB_RDI;
854 case R_R8: return OFFB_R8;
855 case R_R9: return OFFB_R9;
856 case R_R10: return OFFB_R10;
857 case R_R11: return OFFB_R11;
858 case R_R12: return OFFB_R12;
859 case R_R13: return OFFB_R13;
860 case R_R14: return OFFB_R14;
861 case R_R15: return OFFB_R15;
862 default: vpanic("integerGuestReg64Offset(amd64)");
867 /* Produce the name of an integer register, for printing purposes.
868 reg is a number in the range 0 .. 15 that has been generated from a
869 3-bit reg-field number and a REX extension bit. irregular denotes
870 the case where sz==1 and no REX byte is present. */
873 HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
875 static HChar* ireg64_names[16]
876 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
877 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
878 static HChar* ireg32_names[16]
879 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
880 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
881 static HChar* ireg16_names[16]
882 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
883 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
884 static HChar* ireg8_names[16]
885 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
886 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
887 static HChar* ireg8_irregular[8]
888 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
895 vassert(irregular == False);
899 case 8: return ireg64_names[reg];
900 case 4: return ireg32_names[reg];
901 case 2: return ireg16_names[reg];
902 case 1: if (irregular) {
903 return ireg8_irregular[reg];
905 return ireg8_names[reg];
907 default: vpanic("nameIReg(amd64)");
911 /* Using the same argument conventions as nameIReg, produce the
912 guest state offset of an integer register. */
915 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
922 vassert(irregular == False);
925 /* Deal with irregular case -- sz==1 and no REX present */
926 if (sz == 1 && irregular) {
928 case R_RSP: return 1+ OFFB_RAX;
929 case R_RBP: return 1+ OFFB_RCX;
930 case R_RSI: return 1+ OFFB_RDX;
931 case R_RDI: return 1+ OFFB_RBX;
932 default: break; /* use the normal case */
937 return integerGuestReg64Offset(reg);
941 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
943 static IRExpr* getIRegCL ( void )
945 vassert(!host_is_bigendian);
946 return IRExpr_Get( OFFB_RCX, Ity_I8 );
950 /* Write to the %AH register. */
952 static void putIRegAH ( IRExpr* e )
954 vassert(!host_is_bigendian);
955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
956 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
960 /* Read/write various widths of %RAX, as it has various
961 special-purpose uses. */
963 static HChar* nameIRegRAX ( Int sz )
966 case 1: return "%al";
967 case 2: return "%ax";
968 case 4: return "%eax";
969 case 8: return "%rax";
970 default: vpanic("nameIRegRAX(amd64)");
974 static IRExpr* getIRegRAX ( Int sz )
976 vassert(!host_is_bigendian);
978 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
979 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
980 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
981 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
982 default: vpanic("getIRegRAX(amd64)");
986 static void putIRegRAX ( Int sz, IRExpr* e )
988 IRType ty = typeOfIRExpr(irsb->tyenv, e);
989 vassert(!host_is_bigendian);
991 case 8: vassert(ty == Ity_I64);
992 stmt( IRStmt_Put( OFFB_RAX, e ));
994 case 4: vassert(ty == Ity_I32);
995 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
997 case 2: vassert(ty == Ity_I16);
998 stmt( IRStmt_Put( OFFB_RAX, e ));
1000 case 1: vassert(ty == Ity_I8);
1001 stmt( IRStmt_Put( OFFB_RAX, e ));
1003 default: vpanic("putIRegRAX(amd64)");
1008 /* Read/write various widths of %RDX, as it has various
1009 special-purpose uses. */
1011 static HChar* nameIRegRDX ( Int sz )
1014 case 1: return "%dl";
1015 case 2: return "%dx";
1016 case 4: return "%edx";
1017 case 8: return "%rdx";
1018 default: vpanic("nameIRegRDX(amd64)");
1022 static IRExpr* getIRegRDX ( Int sz )
1024 vassert(!host_is_bigendian);
1026 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1027 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
1028 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1029 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1030 default: vpanic("getIRegRDX(amd64)");
1034 static void putIRegRDX ( Int sz, IRExpr* e )
1036 vassert(!host_is_bigendian);
1037 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1039 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1041 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1043 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1045 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1047 default: vpanic("putIRegRDX(amd64)");
1052 /* Simplistic functions to deal with the integer registers as a
1053 straightforward bank of 16 64-bit regs. */
1055 static IRExpr* getIReg64 ( UInt regno )
1057 return IRExpr_Get( integerGuestReg64Offset(regno),
1061 static void putIReg64 ( UInt regno, IRExpr* e )
1063 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1064 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1067 static HChar* nameIReg64 ( UInt regno )
1069 return nameIReg( 8, regno, False );
1073 /* Simplistic functions to deal with the lower halves of integer
1074 registers as a straightforward bank of 16 32-bit regs. */
1076 static IRExpr* getIReg32 ( UInt regno )
1078 vassert(!host_is_bigendian);
1079 return unop(Iop_64to32,
1080 IRExpr_Get( integerGuestReg64Offset(regno),
1084 static void putIReg32 ( UInt regno, IRExpr* e )
1086 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1087 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1088 unop(Iop_32Uto64,e) ) );
1091 static HChar* nameIReg32 ( UInt regno )
1093 return nameIReg( 4, regno, False );
1097 /* Simplistic functions to deal with the lower quarters of integer
1098 registers as a straightforward bank of 16 16-bit regs. */
1100 static IRExpr* getIReg16 ( UInt regno )
1102 vassert(!host_is_bigendian);
1103 return IRExpr_Get( integerGuestReg64Offset(regno),
1107 static HChar* nameIReg16 ( UInt regno )
1109 return nameIReg( 2, regno, False );
1113 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1114 which field of the REX byte is to be used to extend to a 4-bit
1115 number. These functions cater for that situation.
1117 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1119 vassert(lo3bits < 8);
1120 vassert(IS_VALID_PFX(pfx));
1121 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1124 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1126 vassert(lo3bits < 8);
1127 vassert(IS_VALID_PFX(pfx));
1128 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1131 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1133 vassert(lo3bits < 8);
1134 vassert(IS_VALID_PFX(pfx));
1135 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1136 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
1137 toBool(sz==1 && !haveREX(pfx)) );
1140 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1142 vassert(lo3bits < 8);
1143 vassert(IS_VALID_PFX(pfx));
1144 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1147 return unop(Iop_64to32,
1149 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1150 toBool(sz==1 && !haveREX(pfx)) ),
1156 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1157 toBool(sz==1 && !haveREX(pfx)) ),
1163 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1165 vassert(lo3bits < 8);
1166 vassert(IS_VALID_PFX(pfx));
1167 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1168 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1170 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1171 toBool(sz==1 && !haveREX(pfx)) ),
1172 sz==4 ? unop(Iop_32Uto64,e) : e
1177 /* Functions for getting register numbers from modrm bytes and REX
1178 when we don't have to consider the complexities of integer subreg
1181 /* Extract the g reg field from a modRM byte, and augment it using the
1182 REX.R bit from the supplied REX byte. The R bit usually is
1183 associated with the g register field.
1185 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1187 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1188 reg += (pfx & PFX_REXR) ? 8 : 0;
1192 /* Extract the e reg field from a modRM byte, and augment it using the
1193 REX.B bit from the supplied REX byte. The B bit usually is
1194 associated with the e register field (when modrm indicates e is a
1197 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1200 vassert(epartIsReg(mod_reg_rm));
1201 rm = (Int)(mod_reg_rm & 0x7);
1202 rm += (pfx & PFX_REXB) ? 8 : 0;
1207 /* General functions for dealing with integer register access. */
1209 /* Produce the guest state offset for a reference to the 'g' register
1210 field in a modrm byte, taking into account REX (or its absence),
1211 and the size of the access.
1213 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1216 vassert(!host_is_bigendian);
1217 vassert(IS_VALID_PFX(pfx));
1218 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1219 reg = gregOfRexRM( pfx, mod_reg_rm );
1220 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1224 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1228 return unop(Iop_64to32,
1229 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1232 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1238 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1240 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1242 e = unop(Iop_32Uto64,e);
1244 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1248 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1250 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
1251 toBool(sz==1 && !haveREX(pfx)) );
1255 /* Produce the guest state offset for a reference to the 'e' register
1256 field in a modrm byte, taking into account REX (or its absence),
1257 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1258 denotes a memory access rather than a register access.
1260 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1263 vassert(!host_is_bigendian);
1264 vassert(IS_VALID_PFX(pfx));
1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1266 reg = eregOfRexRM( pfx, mod_reg_rm );
1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1271 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1275 return unop(Iop_64to32,
1276 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1279 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1285 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1289 e = unop(Iop_32Uto64,e);
1291 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1295 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1297 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
1298 toBool(sz==1 && !haveREX(pfx)) );
1302 /*------------------------------------------------------------*/
1303 /*--- For dealing with XMM registers ---*/
1304 /*------------------------------------------------------------*/
1306 //.. static Int segmentGuestRegOffset ( UInt sreg )
1308 //.. switch (sreg) {
1309 //.. case R_ES: return OFFB_ES;
1310 //.. case R_CS: return OFFB_CS;
1311 //.. case R_SS: return OFFB_SS;
1312 //.. case R_DS: return OFFB_DS;
1313 //.. case R_FS: return OFFB_FS;
1314 //.. case R_GS: return OFFB_GS;
1315 //.. default: vpanic("segmentGuestRegOffset(x86)");
1319 static Int xmmGuestRegOffset ( UInt xmmreg )
1322 case 0: return OFFB_XMM0;
1323 case 1: return OFFB_XMM1;
1324 case 2: return OFFB_XMM2;
1325 case 3: return OFFB_XMM3;
1326 case 4: return OFFB_XMM4;
1327 case 5: return OFFB_XMM5;
1328 case 6: return OFFB_XMM6;
1329 case 7: return OFFB_XMM7;
1330 case 8: return OFFB_XMM8;
1331 case 9: return OFFB_XMM9;
1332 case 10: return OFFB_XMM10;
1333 case 11: return OFFB_XMM11;
1334 case 12: return OFFB_XMM12;
1335 case 13: return OFFB_XMM13;
1336 case 14: return OFFB_XMM14;
1337 case 15: return OFFB_XMM15;
1338 default: vpanic("xmmGuestRegOffset(amd64)");
1342 /* Lanes of vector registers are always numbered from zero being the
1343 least significant lane (rightmost in the register). */
1345 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1347 /* Correct for little-endian host only. */
1348 vassert(!host_is_bigendian);
1349 vassert(laneno >= 0 && laneno < 8);
1350 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1353 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1355 /* Correct for little-endian host only. */
1356 vassert(!host_is_bigendian);
1357 vassert(laneno >= 0 && laneno < 4);
1358 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1361 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1363 /* Correct for little-endian host only. */
1364 vassert(!host_is_bigendian);
1365 vassert(laneno >= 0 && laneno < 2);
1366 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1369 //.. static IRExpr* getSReg ( UInt sreg )
1371 //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
1374 //.. static void putSReg ( UInt sreg, IRExpr* e )
1376 //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1377 //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
1380 static IRExpr* getXMMReg ( UInt xmmreg )
1382 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1385 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1387 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1390 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1392 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1395 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1397 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1400 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1402 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1405 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1407 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1410 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1412 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1413 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1416 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1418 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1419 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1422 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1424 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1425 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1428 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1430 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1431 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1434 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1436 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1437 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1440 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
1442 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1443 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
1446 static IRExpr* mkV128 ( UShort mask )
1448 return IRExpr_Const(IRConst_V128(mask));
1451 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1453 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1454 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1455 return unop(Iop_64to1,
1458 unop(Iop_1Uto64,y)));
1461 /* Generate a compare-and-swap operation, operating on memory at
1462 'addr'. The expected value is 'expVal' and the new value is
1463 'newVal'. If the operation fails, then transfer control (with a
1464 no-redir jump (XXX no -- see comment at top of this file)) to
1465 'restart_point', which is presumably the address of the guest
1466 instruction again -- retrying, essentially. */
1467 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1468 Addr64 restart_point )
1471 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1472 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1473 IRTemp oldTmp = newTemp(tyE);
1474 IRTemp expTmp = newTemp(tyE);
1475 vassert(tyE == tyN);
1476 vassert(tyE == Ity_I64 || tyE == Ity_I32
1477 || tyE == Ity_I16 || tyE == Ity_I8);
1478 assign(expTmp, expVal);
1479 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1480 NULL, mkexpr(expTmp), NULL, newVal );
1481 stmt( IRStmt_CAS(cas) );
1483 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1484 mkexpr(oldTmp), mkexpr(expTmp) ),
1485 Ijk_Boring, /*Ijk_NoRedir*/
1486 IRConst_U64( restart_point )
1491 /*------------------------------------------------------------*/
1492 /*--- Helpers for %rflags. ---*/
1493 /*------------------------------------------------------------*/
1495 /* -------------- Evaluating the flags-thunk. -------------- */
1497 /* Build IR to calculate all the eflags from stored
1498 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1500 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1503 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1504 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1505 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1506 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1511 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1514 /* Exclude OP and NDEP from definedness checking. We're only
1515 interested in DEP1 and DEP2. */
1516 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1520 /* Build IR to calculate some particular condition from stored
1521 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1523 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1526 = mkIRExprVec_5( mkU64(cond),
1527 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1528 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1529 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1530 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1535 "amd64g_calculate_condition", &amd64g_calculate_condition,
1538 /* Exclude the requested condition, OP and NDEP from definedness
1539 checking. We're only interested in DEP1 and DEP2. */
1540 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1541 return unop(Iop_64to1, call);
1544 /* Build IR to calculate just the carry flag from stored
1545 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1546 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1549 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1550 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1551 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1552 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1557 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1560 /* Exclude OP and NDEP from definedness checking. We're only
1561 interested in DEP1 and DEP2. */
1562 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1567 /* -------------- Building the flags-thunk. -------------- */
1569 /* The machinery in this section builds the flag-thunk following a
1570 flag-setting operation. Hence the various setFlags_* functions.
1573 static Bool isAddSub ( IROp op8 )
1575 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1578 static Bool isLogic ( IROp op8 )
1580 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1583 /* U-widen 8/16/32/64 bit int expr to 64. */
1584 static IRExpr* widenUto64 ( IRExpr* e )
1586 switch (typeOfIRExpr(irsb->tyenv,e)) {
1587 case Ity_I64: return e;
1588 case Ity_I32: return unop(Iop_32Uto64, e);
1589 case Ity_I16: return unop(Iop_16Uto64, e);
1590 case Ity_I8: return unop(Iop_8Uto64, e);
1591 default: vpanic("widenUto64");
1595 /* S-widen 8/16/32/64 bit int expr to 32. */
1596 static IRExpr* widenSto64 ( IRExpr* e )
1598 switch (typeOfIRExpr(irsb->tyenv,e)) {
1599 case Ity_I64: return e;
1600 case Ity_I32: return unop(Iop_32Sto64, e);
1601 case Ity_I16: return unop(Iop_16Sto64, e);
1602 case Ity_I8: return unop(Iop_8Sto64, e);
1603 default: vpanic("widenSto64");
1607 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1608 of these combinations make sense. */
1609 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1611 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1612 if (src_ty == dst_ty)
1614 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1615 return unop(Iop_32to16, e);
1616 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1617 return unop(Iop_32to8, e);
1618 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1619 return unop(Iop_64to32, e);
1620 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1621 return unop(Iop_64to16, e);
1622 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1623 return unop(Iop_64to8, e);
1625 vex_printf("\nsrc, dst tys are: ");
1630 vpanic("narrowTo(amd64)");
1634 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1635 auto-sized up to the real op. */
1638 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1642 case Ity_I8: ccOp = 0; break;
1643 case Ity_I16: ccOp = 1; break;
1644 case Ity_I32: ccOp = 2; break;
1645 case Ity_I64: ccOp = 3; break;
1646 default: vassert(0);
1649 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1650 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1651 default: ppIROp(op8);
1652 vpanic("setFlags_DEP1_DEP2(amd64)");
1654 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1655 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1656 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1660 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1663 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1667 case Ity_I8: ccOp = 0; break;
1668 case Ity_I16: ccOp = 1; break;
1669 case Ity_I32: ccOp = 2; break;
1670 case Ity_I64: ccOp = 3; break;
1671 default: vassert(0);
1676 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1677 default: ppIROp(op8);
1678 vpanic("setFlags_DEP1(amd64)");
1680 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1681 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1682 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1686 /* For shift operations, we put in the result and the undershifted
1687 result. Except if the shift amount is zero, the thunk is left
1690 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1698 case Ity_I8: ccOp = 0; break;
1699 case Ity_I16: ccOp = 1; break;
1700 case Ity_I32: ccOp = 2; break;
1701 case Ity_I64: ccOp = 3; break;
1702 default: vassert(0);
1707 /* Both kinds of right shifts are handled by the same thunk
1711 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1712 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1713 default: ppIROp(op64);
1714 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1717 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1718 stmt( IRStmt_Put( OFFB_CC_OP,
1719 IRExpr_Mux0X( mkexpr(guard),
1720 IRExpr_Get(OFFB_CC_OP,Ity_I64),
1722 stmt( IRStmt_Put( OFFB_CC_DEP1,
1723 IRExpr_Mux0X( mkexpr(guard),
1724 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
1725 widenUto64(mkexpr(res)))) );
1726 stmt( IRStmt_Put( OFFB_CC_DEP2,
1727 IRExpr_Mux0X( mkexpr(guard),
1728 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
1729 widenUto64(mkexpr(resUS)))) );
1733 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1734 the former value of the carry flag, which unfortunately we have to
1737 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1739 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1742 case Ity_I8: ccOp += 0; break;
1743 case Ity_I16: ccOp += 1; break;
1744 case Ity_I32: ccOp += 2; break;
1745 case Ity_I64: ccOp += 3; break;
1746 default: vassert(0);
1749 /* This has to come first, because calculating the C flag
1750 may require reading all four thunk fields. */
1751 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1752 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1753 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1754 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1758 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1762 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1766 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1769 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1772 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1775 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1778 vpanic("setFlags_MUL(amd64)");
1780 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1781 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1785 /* -------------- Condition codes. -------------- */
1787 /* Condition codes, using the AMD encoding. */
1789 static HChar* name_AMD64Condcode ( AMD64Condcode cond )
1792 case AMD64CondO: return "o";
1793 case AMD64CondNO: return "no";
1794 case AMD64CondB: return "b";
1795 case AMD64CondNB: return "ae"; /*"nb";*/
1796 case AMD64CondZ: return "e"; /*"z";*/
1797 case AMD64CondNZ: return "ne"; /*"nz";*/
1798 case AMD64CondBE: return "be";
1799 case AMD64CondNBE: return "a"; /*"nbe";*/
1800 case AMD64CondS: return "s";
1801 case AMD64CondNS: return "ns";
1802 case AMD64CondP: return "p";
1803 case AMD64CondNP: return "np";
1804 case AMD64CondL: return "l";
1805 case AMD64CondNL: return "ge"; /*"nl";*/
1806 case AMD64CondLE: return "le";
1807 case AMD64CondNLE: return "g"; /*"nle";*/
1808 case AMD64CondAlways: return "ALWAYS";
1809 default: vpanic("name_AMD64Condcode");
1814 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1815 /*OUT*/Bool* needInvert )
1817 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1822 *needInvert = False;
1828 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1830 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1833 Optionally, generate a store for the 'tres' value. This can either
1834 be a normal store, or it can be a cas-with-possible-failure style
1837 if taddr is IRTemp_INVALID, then no store is generated.
1839 if taddr is not IRTemp_INVALID, then a store (using taddr as
1840 the address) is generated:
1842 if texpVal is IRTemp_INVALID then a normal store is
1843 generated, and restart_point must be zero (it is irrelevant).
1845 if texpVal is not IRTemp_INVALID then a cas-style store is
1846 generated. texpVal is the expected value, restart_point
1847 is the restart point if the store fails, and texpVal must
1848 have the same type as tres.
1851 static void helper_ADC ( Int sz,
1852 IRTemp tres, IRTemp ta1, IRTemp ta2,
1853 /* info about optional store: */
1854 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1857 IRType ty = szToITy(sz);
1858 IRTemp oldc = newTemp(Ity_I64);
1859 IRTemp oldcn = newTemp(ty);
1860 IROp plus = mkSizedOp(ty, Iop_Add8);
1861 IROp xor = mkSizedOp(ty, Iop_Xor8);
1863 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1866 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
1867 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
1868 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
1869 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
1870 default: vassert(0);
1873 /* oldc = old carry flag, 0 or 1 */
1874 assign( oldc, binop(Iop_And64,
1875 mk_amd64g_calculate_rflags_c(),
1878 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1880 assign( tres, binop(plus,
1881 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1884 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1885 start of this function. */
1886 if (taddr != IRTemp_INVALID) {
1887 if (texpVal == IRTemp_INVALID) {
1888 vassert(restart_point == 0);
1889 storeLE( mkexpr(taddr), mkexpr(tres) );
1891 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1892 /* .. and hence 'texpVal' has the same type as 'tres'. */
1893 casLE( mkexpr(taddr),
1894 mkexpr(texpVal), mkexpr(tres), restart_point );
1898 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
1899 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
1900 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
1901 mkexpr(oldcn)) )) );
1902 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1906 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1907 appropriately. As with helper_ADC, possibly generate a store of
1908 the result -- see comments on helper_ADC for details.
1910 static void helper_SBB ( Int sz,
1911 IRTemp tres, IRTemp ta1, IRTemp ta2,
1912 /* info about optional store: */
1913 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1916 IRType ty = szToITy(sz);
1917 IRTemp oldc = newTemp(Ity_I64);
1918 IRTemp oldcn = newTemp(ty);
1919 IROp minus = mkSizedOp(ty, Iop_Sub8);
1920 IROp xor = mkSizedOp(ty, Iop_Xor8);
1922 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1925 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
1926 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
1927 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
1928 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
1929 default: vassert(0);
1932 /* oldc = old carry flag, 0 or 1 */
1933 assign( oldc, binop(Iop_And64,
1934 mk_amd64g_calculate_rflags_c(),
1937 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1939 assign( tres, binop(minus,
1940 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1943 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1944 start of this function. */
1945 if (taddr != IRTemp_INVALID) {
1946 if (texpVal == IRTemp_INVALID) {
1947 vassert(restart_point == 0);
1948 storeLE( mkexpr(taddr), mkexpr(tres) );
1950 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1951 /* .. and hence 'texpVal' has the same type as 'tres'. */
1952 casLE( mkexpr(taddr),
1953 mkexpr(texpVal), mkexpr(tres), restart_point );
1957 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
1958 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
1959 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
1960 mkexpr(oldcn)) )) );
1961 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1965 /* -------------- Helpers for disassembly printing. -------------- */
1967 static HChar* nameGrp1 ( Int opc_aux )
1969 static HChar* grp1_names[8]
1970 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1971 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
1972 return grp1_names[opc_aux];
1975 static HChar* nameGrp2 ( Int opc_aux )
1977 static HChar* grp2_names[8]
1978 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1979 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
1980 return grp2_names[opc_aux];
1983 static HChar* nameGrp4 ( Int opc_aux )
1985 static HChar* grp4_names[8]
1986 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1987 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
1988 return grp4_names[opc_aux];
1991 static HChar* nameGrp5 ( Int opc_aux )
1993 static HChar* grp5_names[8]
1994 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1995 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
1996 return grp5_names[opc_aux];
1999 static HChar* nameGrp8 ( Int opc_aux )
2001 static HChar* grp8_names[8]
2002 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2003 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2004 return grp8_names[opc_aux];
2007 //.. static HChar* nameSReg ( UInt sreg )
2009 //.. switch (sreg) {
2010 //.. case R_ES: return "%es";
2011 //.. case R_CS: return "%cs";
2012 //.. case R_SS: return "%ss";
2013 //.. case R_DS: return "%ds";
2014 //.. case R_FS: return "%fs";
2015 //.. case R_GS: return "%gs";
2016 //.. default: vpanic("nameSReg(x86)");
2020 static HChar* nameMMXReg ( Int mmxreg )
2022 static HChar* mmx_names[8]
2023 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2024 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2025 return mmx_names[mmxreg];
2028 static HChar* nameXMMReg ( Int xmmreg )
2030 static HChar* xmm_names[16]
2031 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2032 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2033 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2034 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2035 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2036 return xmm_names[xmmreg];
2039 static HChar* nameMMXGran ( Int gran )
2046 default: vpanic("nameMMXGran(amd64,guest)");
2050 static HChar nameISize ( Int size )
2057 default: vpanic("nameISize(amd64)");
2062 /*------------------------------------------------------------*/
2063 /*--- JMP helpers ---*/
2064 /*------------------------------------------------------------*/
2066 static void jmp_lit( IRJumpKind kind, Addr64 d64 )
2068 irsb->next = mkU64(d64);
2069 irsb->jumpkind = kind;
2072 static void jmp_treg( IRJumpKind kind, IRTemp t )
2074 irsb->next = mkexpr(t);
2075 irsb->jumpkind = kind;
2079 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2082 AMD64Condcode condPos;
2083 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2085 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2087 IRConst_U64(d64_false) ) );
2088 irsb->next = mkU64(d64_true);
2089 irsb->jumpkind = Ijk_Boring;
2091 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2093 IRConst_U64(d64_true) ) );
2094 irsb->next = mkU64(d64_false);
2095 irsb->jumpkind = Ijk_Boring;
2099 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2100 guest address of the next instruction to be executed.
2102 This function generates an AbiHint to say that -128(%rsp)
2103 .. -1(%rsp) should now be regarded as uninitialised.
2106 void make_redzone_AbiHint ( VexAbiInfo* vbi,
2107 IRTemp new_rsp, IRTemp nia, HChar* who )
2109 Int szB = vbi->guest_stack_redzone_size;
2112 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2113 for is ELF. So just check it's the expected 128 value
2115 vassert(szB == 128);
2117 if (0) vex_printf("AbiHint: %s\n", who);
2118 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2119 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2121 stmt( IRStmt_AbiHint(
2122 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2129 /*------------------------------------------------------------*/
2130 /*--- Disassembling addressing modes ---*/
2131 /*------------------------------------------------------------*/
2134 HChar* segRegTxt ( Prefix pfx )
2136 if (pfx & PFX_CS) return "%cs:";
2137 if (pfx & PFX_DS) return "%ds:";
2138 if (pfx & PFX_ES) return "%es:";
2139 if (pfx & PFX_FS) return "%fs:";
2140 if (pfx & PFX_GS) return "%gs:";
2141 if (pfx & PFX_SS) return "%ss:";
2142 return ""; /* no override */
2146 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2147 linear address by adding any required segment override as indicated
2148 by sorb, and also dealing with any address size override
2151 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
2152 Prefix pfx, IRExpr* virtual )
2154 /* --- segment overrides --- */
2156 if (vbi->guest_amd64_assume_fs_is_zero) {
2157 /* Note that this is a linux-kernel specific hack that relies
2158 on the assumption that %fs is always zero. */
2159 /* return virtual + guest_FS_ZERO. */
2160 virtual = binop(Iop_Add64, virtual,
2161 IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
2163 unimplemented("amd64 %fs segment override");
2168 if (vbi->guest_amd64_assume_gs_is_0x60) {
2169 /* Note that this is a darwin-kernel specific hack that relies
2170 on the assumption that %gs is always 0x60. */
2171 /* return virtual + guest_GS_0x60. */
2172 virtual = binop(Iop_Add64, virtual,
2173 IRExpr_Get(OFFB_GS_0x60, Ity_I64));
2175 unimplemented("amd64 %gs segment override");
2179 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2181 /* --- address size override --- */
2183 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2190 //.. IRType hWordTy;
2191 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2194 //.. /* the common case - no override */
2195 //.. return virtual;
2197 //.. switch (sorb) {
2198 //.. case 0x3E: sreg = R_DS; break;
2199 //.. case 0x26: sreg = R_ES; break;
2200 //.. case 0x64: sreg = R_FS; break;
2201 //.. case 0x65: sreg = R_GS; break;
2202 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2205 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2207 //.. seg_selector = newTemp(Ity_I32);
2208 //.. ldt_ptr = newTemp(hWordTy);
2209 //.. gdt_ptr = newTemp(hWordTy);
2210 //.. r64 = newTemp(Ity_I64);
2212 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2213 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2214 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2217 //.. Call this to do the translation and limit checks:
2218 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2219 //.. UInt seg_selector, UInt virtual_addr )
2226 //.. "x86g_use_seg_selector",
2227 //.. &x86g_use_seg_selector,
2228 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2229 //.. mkexpr(seg_selector), virtual)
2233 //.. /* If the high 32 of the result are non-zero, there was a
2234 //.. failure in address translation. In which case, make a
2239 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2241 //.. IRConst_U32( guest_eip_curr_instr )
2245 //.. /* otherwise, here's the translated result. */
2246 //.. return unop(Iop_64to32, mkexpr(r64));
2250 /* Generate IR to calculate an address indicated by a ModRM and
2251 following SIB bytes. The expression, and the number of bytes in
2252 the address mode, are returned (the latter in *len). Note that
2253 this fn should not be called if the R/M part of the address denotes
2254 a register instead of memory. If print_codegen is true, text of
2255 the addressing mode is placed in buf.
2257 The computed address is stored in a new tempreg, and the
2258 identity of the tempreg is returned.
2260 extra_bytes holds the number of bytes after the amode, as supplied
2261 by the caller. This is needed to make sense of %rip-relative
2262 addresses. Note that the value that *len is set to is only the
2263 length of the amode itself and does not include the value supplied
2267 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2269 IRTemp tmp = newTemp(Ity_I64);
2270 assign( tmp, addr64 );
2275 IRTemp disAMode ( /*OUT*/Int* len,
2276 VexAbiInfo* vbi, Prefix pfx, Long delta,
2277 /*OUT*/HChar* buf, Int extra_bytes )
2279 UChar mod_reg_rm = getUChar(delta);
2283 vassert(extra_bytes >= 0 && extra_bytes < 10);
2285 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2286 jump table seems a bit excessive.
2288 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2289 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2290 /* is now XX0XXYYY */
2291 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2292 switch (mod_reg_rm) {
2294 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2295 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2297 case 0x00: case 0x01: case 0x02: case 0x03:
2298 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2299 { UChar rm = toUChar(mod_reg_rm & 7);
2300 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2302 return disAMode_copy2tmp(
2303 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2306 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2307 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2309 case 0x08: case 0x09: case 0x0A: case 0x0B:
2310 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2311 { UChar rm = toUChar(mod_reg_rm & 7);
2312 Long d = getSDisp8(delta);
2314 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2316 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2319 return disAMode_copy2tmp(
2320 handleAddrOverrides(vbi, pfx,
2321 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2324 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2325 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2327 case 0x10: case 0x11: case 0x12: case 0x13:
2328 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2329 { UChar rm = toUChar(mod_reg_rm & 7);
2330 Long d = getSDisp32(delta);
2331 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2333 return disAMode_copy2tmp(
2334 handleAddrOverrides(vbi, pfx,
2335 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2338 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2339 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2340 case 0x18: case 0x19: case 0x1A: case 0x1B:
2341 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2342 vpanic("disAMode(amd64): not an addr!");
2344 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2345 correctly at the start of handling each instruction. */
2347 { Long d = getSDisp32(delta);
2349 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2350 /* We need to know the next instruction's start address.
2351 Try and figure out what it is, record the guess, and ask
2352 the top-level driver logic (bbToIR_AMD64) to check we
2353 guessed right, after the instruction is completely
2355 guest_RIP_next_mustcheck = True;
2356 guest_RIP_next_assumed = guest_RIP_bbstart
2357 + delta+4 + extra_bytes;
2358 return disAMode_copy2tmp(
2359 handleAddrOverrides(vbi, pfx,
2360 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2365 /* SIB, with no displacement. Special cases:
2366 -- %rsp cannot act as an index value.
2367 If index_r indicates %rsp, zero is used for the index.
2368 -- when mod is zero and base indicates RBP or R13, base is
2369 instead a 32-bit sign-extended literal.
2370 It's all madness, I tell you. Extract %index, %base and
2371 scale from the SIB byte. The value denoted is then:
2372 | %index == %RSP && (%base == %RBP || %base == %R13)
2373 = d32 following SIB byte
2374 | %index == %RSP && !(%base == %RBP || %base == %R13)
2376 | %index != %RSP && (%base == %RBP || %base == %R13)
2377 = d32 following SIB byte + (%index << scale)
2378 | %index != %RSP && !(%base == %RBP || %base == %R13)
2379 = %base + (%index << scale)
2381 UChar sib = getUChar(delta);
2382 UChar scale = toUChar((sib >> 6) & 3);
2383 UChar index_r = toUChar((sib >> 3) & 7);
2384 UChar base_r = toUChar(sib & 7);
2385 /* correct since #(R13) == 8 + #(RBP) */
2386 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2387 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2390 if ((!index_is_SP) && (!base_is_BPor13)) {
2392 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2393 nameIRegRexB(8,pfx,base_r),
2394 nameIReg64rexX(pfx,index_r));
2396 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2397 nameIRegRexB(8,pfx,base_r),
2398 nameIReg64rexX(pfx,index_r), 1<<scale);
2403 handleAddrOverrides(vbi, pfx,
2405 getIRegRexB(8,pfx,base_r),
2406 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2410 if ((!index_is_SP) && base_is_BPor13) {
2411 Long d = getSDisp32(delta);
2412 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2413 nameIReg64rexX(pfx,index_r), 1<<scale);
2417 handleAddrOverrides(vbi, pfx,
2419 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2424 if (index_is_SP && (!base_is_BPor13)) {
2425 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2427 return disAMode_copy2tmp(
2428 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2431 if (index_is_SP && base_is_BPor13) {
2432 Long d = getSDisp32(delta);
2433 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2435 return disAMode_copy2tmp(
2436 handleAddrOverrides(vbi, pfx, mkU64(d)));
2442 /* SIB, with 8-bit displacement. Special cases:
2443 -- %esp cannot act as an index value.
2444 If index_r indicates %esp, zero is used for the index.
2449 = d8 + %base + (%index << scale)
2452 UChar sib = getUChar(delta);
2453 UChar scale = toUChar((sib >> 6) & 3);
2454 UChar index_r = toUChar((sib >> 3) & 7);
2455 UChar base_r = toUChar(sib & 7);
2456 Long d = getSDisp8(delta+1);
2458 if (index_r == R_RSP && 0==getRexX(pfx)) {
2459 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2460 d, nameIRegRexB(8,pfx,base_r));
2462 return disAMode_copy2tmp(
2463 handleAddrOverrides(vbi, pfx,
2464 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2467 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2468 nameIRegRexB(8,pfx,base_r),
2469 nameIReg64rexX(pfx,index_r));
2471 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2472 nameIRegRexB(8,pfx,base_r),
2473 nameIReg64rexX(pfx,index_r), 1<<scale);
2478 handleAddrOverrides(vbi, pfx,
2481 getIRegRexB(8,pfx,base_r),
2483 getIReg64rexX(pfx,index_r), mkU8(scale))),
2486 vassert(0); /*NOTREACHED*/
2489 /* SIB, with 32-bit displacement. Special cases:
2490 -- %rsp cannot act as an index value.
2491 If index_r indicates %rsp, zero is used for the index.
2496 = d32 + %base + (%index << scale)
2499 UChar sib = getUChar(delta);
2500 UChar scale = toUChar((sib >> 6) & 3);
2501 UChar index_r = toUChar((sib >> 3) & 7);
2502 UChar base_r = toUChar(sib & 7);
2503 Long d = getSDisp32(delta+1);
2505 if (index_r == R_RSP && 0==getRexX(pfx)) {
2506 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2507 d, nameIRegRexB(8,pfx,base_r));
2509 return disAMode_copy2tmp(
2510 handleAddrOverrides(vbi, pfx,
2511 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2514 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2515 nameIRegRexB(8,pfx,base_r),
2516 nameIReg64rexX(pfx,index_r));
2518 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2519 nameIRegRexB(8,pfx,base_r),
2520 nameIReg64rexX(pfx,index_r), 1<<scale);
2525 handleAddrOverrides(vbi, pfx,
2528 getIRegRexB(8,pfx,base_r),
2530 getIReg64rexX(pfx,index_r), mkU8(scale))),
2533 vassert(0); /*NOTREACHED*/
2537 vpanic("disAMode(amd64)");
2538 return 0; /*notreached*/
2543 /* Figure out the number of (insn-stream) bytes constituting the amode
2544 beginning at delta. Is useful for getting hold of literals beyond
2545 the end of the amode before it has been disassembled. */
2547 static UInt lengthAMode ( Prefix pfx, Long delta )
2549 UChar mod_reg_rm = getUChar(delta);
2552 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2553 jump table seems a bit excessive.
2555 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2556 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2557 /* is now XX0XXYYY */
2558 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2559 switch (mod_reg_rm) {
2561 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2562 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2564 case 0x00: case 0x01: case 0x02: case 0x03:
2565 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2568 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2569 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2571 case 0x08: case 0x09: case 0x0A: case 0x0B:
2572 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2575 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2576 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2578 case 0x10: case 0x11: case 0x12: case 0x13:
2579 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2582 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2583 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2584 /* Not an address, but still handled. */
2585 case 0x18: case 0x19: case 0x1A: case 0x1B:
2586 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2594 /* SIB, with no displacement. */
2595 UChar sib = getUChar(delta);
2596 UChar base_r = toUChar(sib & 7);
2597 /* correct since #(R13) == 8 + #(RBP) */
2598 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2600 if (base_is_BPor13) {
2607 /* SIB, with 8-bit displacement. */
2611 /* SIB, with 32-bit displacement. */
2616 vpanic("lengthAMode(amd64)");
2617 return 0; /*notreached*/
2622 /*------------------------------------------------------------*/
2623 /*--- Disassembling common idioms ---*/
2624 /*------------------------------------------------------------*/
2626 /* Handle binary integer instructions of the form
2629 Is passed the a ptr to the modRM byte, the actual operation, and the
2630 data size. Returns the address advanced completely over this
2633 E(src) is reg-or-mem
2636 If E is reg, --> GET %G, tmp
2640 If E is mem and OP is not reversible,
2641 --> (getAddr E) -> tmpa
2647 If E is mem and OP is reversible
2648 --> (getAddr E) -> tmpa
2654 ULong dis_op2_E_G ( VexAbiInfo* vbi,
2665 IRType ty = szToITy(size);
2666 IRTemp dst1 = newTemp(ty);
2667 IRTemp src = newTemp(ty);
2668 IRTemp dst0 = newTemp(ty);
2669 UChar rm = getUChar(delta0);
2670 IRTemp addr = IRTemp_INVALID;
2672 /* addSubCarry == True indicates the intended operation is
2673 add-with-carry or subtract-with-borrow. */
2675 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2679 if (epartIsReg(rm)) {
2680 /* Specially handle XOR reg,reg, because that doesn't really
2681 depend on reg, and doing the obvious thing potentially
2682 generates a spurious value check failure due to the bogus
2684 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2685 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
2686 if (False && op8 == Iop_Sub8)
2687 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
2688 putIRegG(size,pfx,rm, mkU(ty,0));
2691 assign( dst0, getIRegG(size,pfx,rm) );
2692 assign( src, getIRegE(size,pfx,rm) );
2694 if (addSubCarry && op8 == Iop_Add8) {
2695 helper_ADC( size, dst1, dst0, src,
2696 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2697 putIRegG(size, pfx, rm, mkexpr(dst1));
2699 if (addSubCarry && op8 == Iop_Sub8) {
2700 helper_SBB( size, dst1, dst0, src,
2701 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2702 putIRegG(size, pfx, rm, mkexpr(dst1));
2704 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2706 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2708 setFlags_DEP1(op8, dst1, ty);
2710 putIRegG(size, pfx, rm, mkexpr(dst1));
2713 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2714 nameIRegE(size,pfx,rm),
2715 nameIRegG(size,pfx,rm));
2718 /* E refers to memory */
2719 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2720 assign( dst0, getIRegG(size,pfx,rm) );
2721 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2723 if (addSubCarry && op8 == Iop_Add8) {
2724 helper_ADC( size, dst1, dst0, src,
2725 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2726 putIRegG(size, pfx, rm, mkexpr(dst1));
2728 if (addSubCarry && op8 == Iop_Sub8) {
2729 helper_SBB( size, dst1, dst0, src,
2730 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2731 putIRegG(size, pfx, rm, mkexpr(dst1));
2733 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2735 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2737 setFlags_DEP1(op8, dst1, ty);
2739 putIRegG(size, pfx, rm, mkexpr(dst1));
2742 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2743 dis_buf, nameIRegG(size, pfx, rm));
2750 /* Handle binary integer instructions of the form
2753 Is passed the a ptr to the modRM byte, the actual operation, and the
2754 data size. Returns the address advanced completely over this
2758 E(dst) is reg-or-mem
2760 If E is reg, --> GET %E, tmp
2764 If E is mem, --> (getAddr E) -> tmpa
2770 ULong dis_op2_G_E ( VexAbiInfo* vbi,
2781 IRType ty = szToITy(size);
2782 IRTemp dst1 = newTemp(ty);
2783 IRTemp src = newTemp(ty);
2784 IRTemp dst0 = newTemp(ty);
2785 UChar rm = getUChar(delta0);
2786 IRTemp addr = IRTemp_INVALID;
2788 /* addSubCarry == True indicates the intended operation is
2789 add-with-carry or subtract-with-borrow. */
2791 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2795 if (epartIsReg(rm)) {
2796 /* Specially handle XOR reg,reg, because that doesn't really
2797 depend on reg, and doing the obvious thing potentially
2798 generates a spurious value check failure due to the bogus
2799 dependency. Ditto SBB reg,reg. */
2800 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2801 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
2802 putIRegE(size,pfx,rm, mkU(ty,0));
2805 assign(dst0, getIRegE(size,pfx,rm));
2806 assign(src, getIRegG(size,pfx,rm));
2808 if (addSubCarry && op8 == Iop_Add8) {
2809 helper_ADC( size, dst1, dst0, src,
2810 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2811 putIRegE(size, pfx, rm, mkexpr(dst1));
2813 if (addSubCarry && op8 == Iop_Sub8) {
2814 helper_SBB( size, dst1, dst0, src,
2815 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2816 putIRegE(size, pfx, rm, mkexpr(dst1));
2818 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2820 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2822 setFlags_DEP1(op8, dst1, ty);
2824 putIRegE(size, pfx, rm, mkexpr(dst1));
2827 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2828 nameIRegG(size,pfx,rm),
2829 nameIRegE(size,pfx,rm));
2833 /* E refers to memory */
2835 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2836 assign(dst0, loadLE(ty,mkexpr(addr)));
2837 assign(src, getIRegG(size,pfx,rm));
2839 if (addSubCarry && op8 == Iop_Add8) {
2840 if (pfx & PFX_LOCK) {
2841 /* cas-style store */
2842 helper_ADC( size, dst1, dst0, src,
2843 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2846 helper_ADC( size, dst1, dst0, src,
2847 /*store*/addr, IRTemp_INVALID, 0 );
2850 if (addSubCarry && op8 == Iop_Sub8) {
2851 if (pfx & PFX_LOCK) {
2852 /* cas-style store */
2853 helper_SBB( size, dst1, dst0, src,
2854 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2857 helper_SBB( size, dst1, dst0, src,
2858 /*store*/addr, IRTemp_INVALID, 0 );
2861 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2863 if (pfx & PFX_LOCK) {
2864 if (0) vex_printf("locked case\n" );
2865 casLE( mkexpr(addr),
2866 mkexpr(dst0)/*expval*/,
2867 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
2869 if (0) vex_printf("nonlocked case\n");
2870 storeLE(mkexpr(addr), mkexpr(dst1));
2874 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2876 setFlags_DEP1(op8, dst1, ty);
2879 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
2880 nameIRegG(size,pfx,rm), dis_buf);
2886 /* Handle move instructions of the form
2889 Is passed the a ptr to the modRM byte, and the data size. Returns
2890 the address advanced completely over this instruction.
2892 E(src) is reg-or-mem
2895 If E is reg, --> GET %E, tmpv
2898 If E is mem --> (getAddr E) -> tmpa
2903 ULong dis_mov_E_G ( VexAbiInfo* vbi,
2909 UChar rm = getUChar(delta0);
2912 if (epartIsReg(rm)) {
2913 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
2914 DIP("mov%c %s,%s\n", nameISize(size),
2915 nameIRegE(size,pfx,rm),
2916 nameIRegG(size,pfx,rm));
2920 /* E refers to memory */
2922 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2923 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
2924 DIP("mov%c %s,%s\n", nameISize(size),
2926 nameIRegG(size,pfx,rm));
2932 /* Handle move instructions of the form
2935 Is passed the a ptr to the modRM byte, and the data size. Returns
2936 the address advanced completely over this instruction.
2939 E(dst) is reg-or-mem
2941 If E is reg, --> GET %G, tmp
2944 If E is mem, --> (getAddr E) -> tmpa
2949 ULong dis_mov_G_E ( VexAbiInfo* vbi,
2955 UChar rm = getUChar(delta0);
2958 if (epartIsReg(rm)) {
2959 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
2960 DIP("mov%c %s,%s\n", nameISize(size),
2961 nameIRegG(size,pfx,rm),
2962 nameIRegE(size,pfx,rm));
2966 /* E refers to memory */
2968 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
2969 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
2970 DIP("mov%c %s,%s\n", nameISize(size),
2971 nameIRegG(size,pfx,rm),
2978 /* op $immediate, AL/AX/EAX/RAX. */
2980 ULong dis_op_imm_A ( Int size,
2987 Int size4 = imin(size,4);
2988 IRType ty = szToITy(size);
2989 IRTemp dst0 = newTemp(ty);
2990 IRTemp src = newTemp(ty);
2991 IRTemp dst1 = newTemp(ty);
2992 Long lit = getSDisp(size4,delta);
2993 assign(dst0, getIRegRAX(size));
2994 assign(src, mkU(ty,lit & mkSizeMask(size)));
2996 if (isAddSub(op8) && !carrying) {
2997 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2998 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3003 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3004 setFlags_DEP1(op8, dst1, ty);
3007 if (op8 == Iop_Add8 && carrying) {
3008 helper_ADC( size, dst1, dst0, src,
3009 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3012 if (op8 == Iop_Sub8 && carrying) {
3013 helper_SBB( size, dst1, dst0, src,
3014 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3017 vpanic("dis_op_imm_A(amd64,guest)");
3020 putIRegRAX(size, mkexpr(dst1));
3022 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3023 lit, nameIRegRAX(size));
3028 /* Sign- and Zero-extending moves. */
3030 ULong dis_movx_E_G ( VexAbiInfo* vbi,
3032 Long delta, Int szs, Int szd, Bool sign_extend )
3034 UChar rm = getUChar(delta);
3035 if (epartIsReg(rm)) {
3036 putIRegG(szd, pfx, rm,
3038 szs,szd,sign_extend,
3039 getIRegE(szs,pfx,rm)));
3040 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3043 nameIRegE(szs,pfx,rm),
3044 nameIRegG(szd,pfx,rm));
3048 /* E refers to memory */
3052 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3053 putIRegG(szd, pfx, rm,
3055 szs,szd,sign_extend,
3056 loadLE(szToITy(szs),mkexpr(addr))));
3057 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3061 nameIRegG(szd,pfx,rm));
3067 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3068 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3070 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3072 /* special-case the 64-bit case */
3074 IROp op = signed_divide ? Iop_DivModS128to64
3075 : Iop_DivModU128to64;
3076 IRTemp src128 = newTemp(Ity_I128);
3077 IRTemp dst128 = newTemp(Ity_I128);
3078 assign( src128, binop(Iop_64HLto128,
3080 getIReg64(R_RAX)) );
3081 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3082 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3083 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3085 IROp op = signed_divide ? Iop_DivModS64to32
3086 : Iop_DivModU64to32;
3087 IRTemp src64 = newTemp(Ity_I64);
3088 IRTemp dst64 = newTemp(Ity_I64);
3092 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3094 binop(op, mkexpr(src64), mkexpr(t)) );
3095 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3096 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3099 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3100 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3101 assign( src64, unop(widen3264,
3105 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3106 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3107 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3111 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3112 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3113 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3114 assign( src64, unop(widen3264,
3115 unop(widen1632, getIRegRAX(2))) );
3117 binop(op, mkexpr(src64),
3118 unop(widen1632, unop(widen816, mkexpr(t)))) );
3119 putIRegRAX( 1, unop(Iop_16to8,
3121 unop(Iop_64to32,mkexpr(dst64)))) );
3122 putIRegAH( unop(Iop_16to8,
3124 unop(Iop_64HIto32,mkexpr(dst64)))) );
3128 vpanic("codegen_div(amd64)");
3134 ULong dis_Grp1 ( VexAbiInfo* vbi,
3136 Long delta, UChar modrm,
3137 Int am_sz, Int d_sz, Int sz, Long d64 )
3141 IRType ty = szToITy(sz);
3142 IRTemp dst1 = newTemp(ty);
3143 IRTemp src = newTemp(ty);
3144 IRTemp dst0 = newTemp(ty);
3145 IRTemp addr = IRTemp_INVALID;
3146 IROp op8 = Iop_INVALID;
3147 ULong mask = mkSizeMask(sz);
3149 switch (gregLO3ofRM(modrm)) {
3150 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3151 case 2: break; // ADC
3152 case 3: break; // SBB
3153 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3154 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3156 default: vpanic("dis_Grp1(amd64): unhandled case");
3159 if (epartIsReg(modrm)) {
3160 vassert(am_sz == 1);
3162 assign(dst0, getIRegE(sz,pfx,modrm));
3163 assign(src, mkU(ty,d64 & mask));
3165 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3166 helper_ADC( sz, dst1, dst0, src,
3167 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3169 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3170 helper_SBB( sz, dst1, dst0, src,
3171 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3173 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3175 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3177 setFlags_DEP1(op8, dst1, ty);
3180 if (gregLO3ofRM(modrm) < 7)
3181 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3183 delta += (am_sz + d_sz);
3184 DIP("%s%c $%lld, %s\n",
3185 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3186 nameIRegE(sz,pfx,modrm));
3188 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3190 assign(dst0, loadLE(ty,mkexpr(addr)));
3191 assign(src, mkU(ty,d64 & mask));
3193 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3194 if (pfx & PFX_LOCK) {
3195 /* cas-style store */
3196 helper_ADC( sz, dst1, dst0, src,
3197 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3200 helper_ADC( sz, dst1, dst0, src,
3201 /*store*/addr, IRTemp_INVALID, 0 );
3204 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3205 if (pfx & PFX_LOCK) {
3206 /* cas-style store */
3207 helper_SBB( sz, dst1, dst0, src,
3208 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3211 helper_SBB( sz, dst1, dst0, src,
3212 /*store*/addr, IRTemp_INVALID, 0 );
3215 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3216 if (gregLO3ofRM(modrm) < 7) {
3217 if (pfx & PFX_LOCK) {
3218 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3219 mkexpr(dst1)/*newVal*/,
3220 guest_RIP_curr_instr );
3222 storeLE(mkexpr(addr), mkexpr(dst1));
3226 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3228 setFlags_DEP1(op8, dst1, ty);
3231 delta += (len+d_sz);
3232 DIP("%s%c $%lld, %s\n",
3233 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3240 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3244 ULong dis_Grp2 ( VexAbiInfo* vbi,
3246 Long delta, UChar modrm,
3247 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3248 HChar* shift_expr_txt, Bool* decode_OK )
3250 /* delta on entry points at the modrm byte. */
3253 Bool isShift, isRotate, isRotateC;
3254 IRType ty = szToITy(sz);
3255 IRTemp dst0 = newTemp(ty);
3256 IRTemp dst1 = newTemp(ty);
3257 IRTemp addr = IRTemp_INVALID;
3261 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3263 /* Put value to shift/rotate in dst0. */
3264 if (epartIsReg(modrm)) {
3265 assign(dst0, getIRegE(sz, pfx, modrm));
3266 delta += (am_sz + d_sz);
3268 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3269 assign(dst0, loadLE(ty,mkexpr(addr)));
3270 delta += len + d_sz;
3274 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; }
3277 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3280 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3282 if (gregLO3ofRM(modrm) == 6) {
3287 if (!isShift && !isRotate && !isRotateC) {
3289 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3293 /* Call a helper; this insn is so ridiculous it does not deserve
3294 better. One problem is, the helper has to calculate both the
3295 new value and the new flags. This is more than 64 bits, and
3296 there is no way to return more than 64 bits from the helper.
3297 Hence the crude and obvious solution is to call it twice,
3298 using the sign of the sz field to indicate whether it is the
3299 value or rflags result we want.
3301 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3303 IRExpr** argsRFLAGS;
3305 IRTemp new_value = newTemp(Ity_I64);
3306 IRTemp new_rflags = newTemp(Ity_I64);
3307 IRTemp old_rflags = newTemp(Ity_I64);
3309 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3312 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3313 widenUto64(shift_expr), /* rotate amount */
3320 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3321 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3327 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3328 widenUto64(shift_expr), /* rotate amount */
3335 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3336 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3341 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3342 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3343 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3344 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3345 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3351 IRTemp pre64 = newTemp(Ity_I64);
3352 IRTemp res64 = newTemp(Ity_I64);
3353 IRTemp res64ss = newTemp(Ity_I64);
3354 IRTemp shift_amt = newTemp(Ity_I8);
3355 UChar mask = toUChar(sz==8 ? 63 : 31);
3358 switch (gregLO3ofRM(modrm)) {
3359 case 4: op64 = Iop_Shl64; break;
3360 case 5: op64 = Iop_Shr64; break;
3361 case 7: op64 = Iop_Sar64; break;
3363 default: vpanic("dis_Grp2:shift"); break;
3366 /* Widen the value to be shifted to 64 bits, do the shift, and
3367 narrow back down. This seems surprisingly long-winded, but
3368 unfortunately the AMD semantics requires that 8/16/32-bit
3369 shifts give defined results for shift values all the way up
3370 to 32, and this seems the simplest way to do it. It has the
3371 advantage that the only IR level shifts generated are of 64
3372 bit values, and the shift amount is guaranteed to be in the
3373 range 0 .. 63, thereby observing the IR semantics requiring
3374 all shift values to be in the range 0 .. 2^word_size-1.
3376 Therefore the shift amount is masked with 63 for 64-bit shifts
3377 and 31 for all others.
3379 /* shift_amt = shift_expr & MASK, regardless of operation size */
3380 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3382 /* suitably widen the value to be shifted to 64 bits. */
3383 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3384 : widenUto64(mkexpr(dst0)) );
3386 /* res64 = pre64 `shift` shift_amt */
3387 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3389 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3395 mkexpr(shift_amt), mkU8(1)),
3398 /* Build the flags thunk. */
3399 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3401 /* Narrow the result back down. */
3402 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3404 } /* if (isShift) */
3408 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3409 : (ty==Ity_I32 ? 2 : 3));
3410 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3411 IRTemp rot_amt = newTemp(Ity_I8);
3412 IRTemp rot_amt64 = newTemp(Ity_I8);
3413 IRTemp oldFlags = newTemp(Ity_I64);
3414 UChar mask = toUChar(sz==8 ? 63 : 31);
3416 /* rot_amt = shift_expr & mask */
3417 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3418 expressions never shift beyond the word size and thus remain
3420 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3423 assign(rot_amt, mkexpr(rot_amt64));
3425 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3429 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3431 binop( mkSizedOp(ty,Iop_Or8),
3432 binop( mkSizedOp(ty,Iop_Shl8),
3436 binop( mkSizedOp(ty,Iop_Shr8),
3438 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3442 ccOp += AMD64G_CC_OP_ROLB;
3444 } else { /* right */
3446 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3448 binop( mkSizedOp(ty,Iop_Or8),
3449 binop( mkSizedOp(ty,Iop_Shr8),
3453 binop( mkSizedOp(ty,Iop_Shl8),
3455 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3459 ccOp += AMD64G_CC_OP_RORB;
3463 /* dst1 now holds the rotated value. Build flag thunk. We
3464 need the resulting value for this, and the previous flags.
3465 Except don't set it if the rotate count is zero. */
3467 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3469 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3470 stmt( IRStmt_Put( OFFB_CC_OP,
3471 IRExpr_Mux0X( mkexpr(rot_amt64),
3472 IRExpr_Get(OFFB_CC_OP,Ity_I64),
3474 stmt( IRStmt_Put( OFFB_CC_DEP1,
3475 IRExpr_Mux0X( mkexpr(rot_amt64),
3476 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
3477 widenUto64(mkexpr(dst1)))) );
3478 stmt( IRStmt_Put( OFFB_CC_DEP2,
3479 IRExpr_Mux0X( mkexpr(rot_amt64),
3480 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
3482 stmt( IRStmt_Put( OFFB_CC_NDEP,
3483 IRExpr_Mux0X( mkexpr(rot_amt64),
3484 IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
3485 mkexpr(oldFlags))) );
3486 } /* if (isRotate) */
3488 /* Save result, and finish up. */
3489 if (epartIsReg(modrm)) {
3490 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3491 if (vex_traceflags & VEX_TRACE_FE) {
3493 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3495 vex_printf("%s", shift_expr_txt);
3497 ppIRExpr(shift_expr);
3498 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3501 storeLE(mkexpr(addr), mkexpr(dst1));
3502 if (vex_traceflags & VEX_TRACE_FE) {
3504 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3506 vex_printf("%s", shift_expr_txt);
3508 ppIRExpr(shift_expr);
3509 vex_printf(", %s\n", dis_buf);
3516 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3518 ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
3520 Long delta, UChar modrm,
3521 Int am_sz, Int sz, ULong src_val,
3524 /* src_val denotes a d8.
3525 And delta on entry points at the modrm byte. */
3527 IRType ty = szToITy(sz);
3528 IRTemp t2 = newTemp(Ity_I64);
3529 IRTemp t2m = newTemp(Ity_I64);
3530 IRTemp t_addr = IRTemp_INVALID;
3534 /* we're optimists :-) */
3537 /* Limit src_val -- the bit offset -- to something within a word.
3538 The Intel docs say that literal offsets larger than a word are
3539 masked in this way. */
3541 case 2: src_val &= 15; break;
3542 case 4: src_val &= 31; break;
3543 case 8: src_val &= 63; break;
3544 default: *decode_OK = False; return delta;
3547 /* Invent a mask suitable for the operation. */
3548 switch (gregLO3ofRM(modrm)) {
3549 case 4: /* BT */ mask = 0; break;
3550 case 5: /* BTS */ mask = 1ULL << src_val; break;
3551 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3552 case 7: /* BTC */ mask = 1ULL << src_val; break;
3553 /* If this needs to be extended, probably simplest to make a
3554 new function to handle the other cases (0 .. 3). The
3555 Intel docs do however not indicate any use for 0 .. 3, so
3556 we don't expect this to happen. */
3557 default: *decode_OK = False; return delta;
3560 /* Fetch the value to be tested and modified into t2, which is
3561 64-bits wide regardless of sz. */
3562 if (epartIsReg(modrm)) {
3563 vassert(am_sz == 1);
3564 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3565 delta += (am_sz + 1);
3566 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3568 src_val, nameIRegE(sz,pfx,modrm));
3571 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
3573 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3574 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3579 /* Compute the new value into t2m, if non-BT. */
3580 switch (gregLO3ofRM(modrm)) {
3584 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3587 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3590 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3593 /*NOTREACHED*/ /*the previous switch guards this*/
3597 /* Write the result back, if non-BT. */
3598 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3599 if (epartIsReg(modrm)) {
3600 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
3602 if (pfx & PFX_LOCK) {
3603 casLE( mkexpr(t_addr),
3604 narrowTo(ty, mkexpr(t2))/*expd*/,
3605 narrowTo(ty, mkexpr(t2m))/*new*/,
3606 guest_RIP_curr_instr );
3608 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
3613 /* Copy relevant bit from t2 into the carry flag. */
3614 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
3615 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3616 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3620 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
3623 /* Set NDEP even though it isn't used. This makes redundant-PUT
3624 elimination of previous stores to this field work better. */
3625 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3631 /* Signed/unsigned widening multiply. Generate IR to multiply the
3632 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
3633 RDX:RAX/EDX:EAX/DX:AX/AX.
3635 static void codegen_mulL_A_D ( Int sz, Bool syned,
3636 IRTemp tmp, HChar* tmp_txt )
3638 IRType ty = szToITy(sz);
3639 IRTemp t1 = newTemp(ty);
3641 assign( t1, getIRegRAX(sz) );
3645 IRTemp res128 = newTemp(Ity_I128);
3646 IRTemp resHi = newTemp(Ity_I64);
3647 IRTemp resLo = newTemp(Ity_I64);
3648 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
3649 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3650 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
3651 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3652 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
3653 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
3654 putIReg64(R_RDX, mkexpr(resHi));
3655 putIReg64(R_RAX, mkexpr(resLo));
3659 IRTemp res64 = newTemp(Ity_I64);
3660 IRTemp resHi = newTemp(Ity_I32);
3661 IRTemp resLo = newTemp(Ity_I32);
3662 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
3663 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3664 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
3665 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3666 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
3667 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
3668 putIRegRDX(4, mkexpr(resHi));
3669 putIRegRAX(4, mkexpr(resLo));
3673 IRTemp res32 = newTemp(Ity_I32);
3674 IRTemp resHi = newTemp(Ity_I16);
3675 IRTemp resLo = newTemp(Ity_I16);
3676 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
3677 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3678 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
3679 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3680 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
3681 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
3682 putIRegRDX(2, mkexpr(resHi));
3683 putIRegRAX(2, mkexpr(resLo));
3687 IRTemp res16 = newTemp(Ity_I16);
3688 IRTemp resHi = newTemp(Ity_I8);
3689 IRTemp resLo = newTemp(Ity_I8);
3690 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
3691 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3692 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
3693 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3694 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
3695 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
3696 putIRegRAX(2, mkexpr(res16));
3701 vpanic("codegen_mulL_A_D(amd64)");
3703 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
3707 /* Group 3 extended opcodes. */
3709 ULong dis_Grp3 ( VexAbiInfo* vbi,
3710 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
3717 IRType ty = szToITy(sz);
3718 IRTemp t1 = newTemp(ty);
3719 IRTemp dst1, src, dst0;
3721 modrm = getUChar(delta);
3722 if (epartIsReg(modrm)) {
3723 switch (gregLO3ofRM(modrm)) {
3724 case 0: { /* TEST */
3726 d64 = getSDisp(imin(4,sz), delta);
3727 delta += imin(4,sz);
3729 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3730 getIRegE(sz,pfx,modrm),
3731 mkU(ty, d64 & mkSizeMask(sz))));
3732 setFlags_DEP1( Iop_And8, dst1, ty );
3733 DIP("test%c $%lld, %s\n",
3735 nameIRegE(sz, pfx, modrm));
3743 putIRegE(sz, pfx, modrm,
3744 unop(mkSizedOp(ty,Iop_Not8),
3745 getIRegE(sz, pfx, modrm)));
3746 DIP("not%c %s\n", nameISize(sz),
3747 nameIRegE(sz, pfx, modrm));
3754 assign(dst0, mkU(ty,0));
3755 assign(src, getIRegE(sz, pfx, modrm));
3756 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3758 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3759 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3760 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
3762 case 4: /* MUL (unsigned widening) */
3765 assign(src, getIRegE(sz,pfx,modrm));
3766 codegen_mulL_A_D ( sz, False, src,
3767 nameIRegE(sz,pfx,modrm) );
3769 case 5: /* IMUL (signed widening) */
3772 assign(src, getIRegE(sz,pfx,modrm));
3773 codegen_mulL_A_D ( sz, True, src,
3774 nameIRegE(sz,pfx,modrm) );
3778 assign( t1, getIRegE(sz, pfx, modrm) );
3779 codegen_div ( sz, t1, False );
3780 DIP("div%c %s\n", nameISize(sz),
3781 nameIRegE(sz, pfx, modrm));
3785 assign( t1, getIRegE(sz, pfx, modrm) );
3786 codegen_div ( sz, t1, True );
3787 DIP("idiv%c %s\n", nameISize(sz),
3788 nameIRegE(sz, pfx, modrm));
3792 vpanic("Grp3(amd64,R)");
3795 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
3796 /* we have to inform disAMode of any immediate
3798 gregLO3ofRM(modrm)==0/*TEST*/
3804 assign(t1, loadLE(ty,mkexpr(addr)));
3805 switch (gregLO3ofRM(modrm)) {
3806 case 0: { /* TEST */
3807 d64 = getSDisp(imin(4,sz), delta);
3808 delta += imin(4,sz);
3810 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3812 mkU(ty, d64 & mkSizeMask(sz))));
3813 setFlags_DEP1( Iop_And8, dst1, ty );
3814 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
3822 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
3823 if (pfx & PFX_LOCK) {
3824 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3825 guest_RIP_curr_instr );
3827 storeLE( mkexpr(addr), mkexpr(dst1) );
3829 DIP("not%c %s\n", nameISize(sz), dis_buf);
3835 assign(dst0, mkU(ty,0));
3836 assign(src, mkexpr(t1));
3837 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3839 if (pfx & PFX_LOCK) {
3840 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3841 guest_RIP_curr_instr );
3843 storeLE( mkexpr(addr), mkexpr(dst1) );
3845 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3846 DIP("neg%c %s\n", nameISize(sz), dis_buf);
3848 case 4: /* MUL (unsigned widening) */
3849 codegen_mulL_A_D ( sz, False, t1, dis_buf );
3852 codegen_mulL_A_D ( sz, True, t1, dis_buf );
3855 codegen_div ( sz, t1, False );
3856 DIP("div%c %s\n", nameISize(sz), dis_buf);
3859 codegen_div ( sz, t1, True );
3860 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
3864 vpanic("Grp3(amd64,M)");
3871 /* Group 4 extended opcodes. */
3873 ULong dis_Grp4 ( VexAbiInfo* vbi,
3874 Prefix pfx, Long delta, Bool* decode_OK )
3880 IRTemp t1 = newTemp(ty);
3881 IRTemp t2 = newTemp(ty);
3885 modrm = getUChar(delta);
3886 if (epartIsReg(modrm)) {
3887 assign(t1, getIRegE(1, pfx, modrm));
3888 switch (gregLO3ofRM(modrm)) {
3890 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3891 putIRegE(1, pfx, modrm, mkexpr(t2));
3892 setFlags_INC_DEC( True, t2, ty );
3895 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3896 putIRegE(1, pfx, modrm, mkexpr(t2));
3897 setFlags_INC_DEC( False, t2, ty );
3904 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
3905 nameIRegE(1, pfx, modrm));
3907 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
3908 assign( t1, loadLE(ty, mkexpr(addr)) );
3909 switch (gregLO3ofRM(modrm)) {
3911 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3912 if (pfx & PFX_LOCK) {
3913 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3914 guest_RIP_curr_instr );
3916 storeLE( mkexpr(addr), mkexpr(t2) );
3918 setFlags_INC_DEC( True, t2, ty );
3921 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3922 if (pfx & PFX_LOCK) {
3923 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3924 guest_RIP_curr_instr );
3926 storeLE( mkexpr(addr), mkexpr(t2) );
3928 setFlags_INC_DEC( False, t2, ty );
3935 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
3941 /* Group 5 extended opcodes. */
3943 ULong dis_Grp5 ( VexAbiInfo* vbi,
3944 Prefix pfx, Int sz, Long delta,
3945 DisResult* dres, Bool* decode_OK )
3950 IRTemp addr = IRTemp_INVALID;
3951 IRType ty = szToITy(sz);
3952 IRTemp t1 = newTemp(ty);
3953 IRTemp t2 = IRTemp_INVALID;
3954 IRTemp t3 = IRTemp_INVALID;
3959 modrm = getUChar(delta);
3960 if (epartIsReg(modrm)) {
3961 assign(t1, getIRegE(sz,pfx,modrm));
3962 switch (gregLO3ofRM(modrm)) {
3965 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3966 mkexpr(t1), mkU(ty,1)));
3967 setFlags_INC_DEC( True, t2, ty );
3968 putIRegE(sz,pfx,modrm, mkexpr(t2));
3972 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3973 mkexpr(t1), mkU(ty,1)));
3974 setFlags_INC_DEC( False, t2, ty );
3975 putIRegE(sz,pfx,modrm, mkexpr(t2));
3977 case 2: /* call Ev */
3978 /* Ignore any sz value and operate as if sz==8. */
3979 if (!(sz == 4 || sz == 8)) goto unhandled;
3981 t3 = newTemp(Ity_I64);
3982 assign(t3, getIRegE(sz,pfx,modrm));
3983 t2 = newTemp(Ity_I64);
3984 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
3985 putIReg64(R_RSP, mkexpr(t2));
3986 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
3987 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
3988 jmp_treg(Ijk_Call,t3);
3989 dres->whatNext = Dis_StopHere;
3992 case 4: /* jmp Ev */
3993 /* Ignore any sz value and operate as if sz==8. */
3994 if (!(sz == 4 || sz == 8)) goto unhandled;
3996 t3 = newTemp(Ity_I64);
3997 assign(t3, getIRegE(sz,pfx,modrm));
3998 jmp_treg(Ijk_Boring,t3);
3999 dres->whatNext = Dis_StopHere;
4007 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4008 showSz ? nameISize(sz) : ' ',
4009 nameIRegE(sz, pfx, modrm));
4011 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4012 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4013 && gregLO3ofRM(modrm) != 6) {
4014 assign(t1, loadLE(ty,mkexpr(addr)));
4016 switch (gregLO3ofRM(modrm)) {
4019 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4020 mkexpr(t1), mkU(ty,1)));
4021 if (pfx & PFX_LOCK) {
4022 casLE( mkexpr(addr),
4023 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4025 storeLE(mkexpr(addr),mkexpr(t2));
4027 setFlags_INC_DEC( True, t2, ty );
4031 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4032 mkexpr(t1), mkU(ty,1)));
4033 if (pfx & PFX_LOCK) {
4034 casLE( mkexpr(addr),
4035 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4037 storeLE(mkexpr(addr),mkexpr(t2));
4039 setFlags_INC_DEC( False, t2, ty );
4041 case 2: /* call Ev */
4042 /* Ignore any sz value and operate as if sz==8. */
4043 if (!(sz == 4 || sz == 8)) goto unhandled;
4045 t3 = newTemp(Ity_I64);
4046 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4047 t2 = newTemp(Ity_I64);
4048 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4049 putIReg64(R_RSP, mkexpr(t2));
4050 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4051 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4052 jmp_treg(Ijk_Call,t3);
4053 dres->whatNext = Dis_StopHere;
4056 case 4: /* JMP Ev */
4057 /* Ignore any sz value and operate as if sz==8. */
4058 if (!(sz == 4 || sz == 8)) goto unhandled;
4060 t3 = newTemp(Ity_I64);
4061 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4062 jmp_treg(Ijk_Boring,t3);
4063 dres->whatNext = Dis_StopHere;
4066 case 6: /* PUSH Ev */
4067 /* There is no encoding for 32-bit operand size; hence ... */
4068 if (sz == 4) sz = 8;
4069 if (!(sz == 8 || sz == 2)) goto unhandled;
4071 t3 = newTemp(Ity_I64);
4072 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4073 t2 = newTemp(Ity_I64);
4074 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4075 putIReg64(R_RSP, mkexpr(t2) );
4076 storeLE( mkexpr(t2), mkexpr(t3) );
4079 goto unhandled; /* awaiting test case */
4087 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4088 showSz ? nameISize(sz) : ' ',
4095 /*------------------------------------------------------------*/
4096 /*--- Disassembling string ops (including REP prefixes) ---*/
4097 /*------------------------------------------------------------*/
4099 /* Code shared by all the string ops */
4101 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4104 if (sz == 8 || sz == 4 || sz == 2) {
4106 if (sz == 4) logSz = 2;
4107 if (sz == 8) logSz = 3;
4109 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4113 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4118 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
4119 Int sz, HChar* name, Prefix pfx )
4121 IRTemp t_inc = newTemp(Ity_I64);
4122 /* Really we ought to inspect the override prefixes, but we don't.
4123 The following assertion catches any resulting sillyness. */
4124 vassert(pfx == clearSegBits(pfx));
4125 dis_string_op_increment(sz, t_inc);
4126 dis_OP( sz, t_inc );
4127 DIP("%s%c\n", name, nameISize(sz));
4131 void dis_MOVS ( Int sz, IRTemp t_inc )
4133 IRType ty = szToITy(sz);
4134 IRTemp td = newTemp(Ity_I64); /* RDI */
4135 IRTemp ts = newTemp(Ity_I64); /* RSI */
4137 assign( td, getIReg64(R_RDI) );
4138 assign( ts, getIReg64(R_RSI) );
4140 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4142 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4143 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4147 void dis_LODS ( Int sz, IRTemp t_inc )
4149 IRType ty = szToITy(sz);
4150 IRTemp ts = newTemp(Ity_I64); /* RSI */
4152 assign( ts, getIReg64(R_RSI) );
4154 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4156 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4160 void dis_STOS ( Int sz, IRTemp t_inc )
4162 IRType ty = szToITy(sz);
4163 IRTemp ta = newTemp(ty); /* rAX */
4164 IRTemp td = newTemp(Ity_I64); /* RDI */
4166 assign( ta, getIRegRAX(sz) );
4168 assign( td, getIReg64(R_RDI) );
4170 storeLE( mkexpr(td), mkexpr(ta) );
4172 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4176 void dis_CMPS ( Int sz, IRTemp t_inc )
4178 IRType ty = szToITy(sz);
4179 IRTemp tdv = newTemp(ty); /* (RDI) */
4180 IRTemp tsv = newTemp(ty); /* (RSI) */
4181 IRTemp td = newTemp(Ity_I64); /* RDI */
4182 IRTemp ts = newTemp(Ity_I64); /* RSI */
4184 assign( td, getIReg64(R_RDI) );
4186 assign( ts, getIReg64(R_RSI) );
4188 assign( tdv, loadLE(ty,mkexpr(td)) );
4190 assign( tsv, loadLE(ty,mkexpr(ts)) );
4192 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4194 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4196 putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
4200 void dis_SCAS ( Int sz, IRTemp t_inc )
4202 IRType ty = szToITy(sz);
4203 IRTemp ta = newTemp(ty); /* rAX */
4204 IRTemp td = newTemp(Ity_I64); /* RDI */
4205 IRTemp tdv = newTemp(ty); /* (RDI) */
4207 assign( ta, getIRegRAX(sz) );
4209 assign( td, getIReg64(R_RDI) );
4211 assign( tdv, loadLE(ty,mkexpr(td)) );
4213 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4215 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
4219 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4220 the insn is the last one in the basic block, and so emit a jump to
4221 the next insn, rather than just falling through. */
4223 void dis_REP_op ( AMD64Condcode cond,
4224 void (*dis_OP)(Int, IRTemp),
4225 Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
4228 IRTemp t_inc = newTemp(Ity_I64);
4229 IRTemp tc = newTemp(Ity_I64); /* RCX */
4231 /* Really we ought to inspect the override prefixes, but we don't.
4232 The following assertion catches any resulting sillyness. */
4233 vassert(pfx == clearSegBits(pfx));
4235 assign( tc, getIReg64(R_RCX) );
4237 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)),
4239 IRConst_U64(rip_next) ) );
4241 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4243 dis_string_op_increment(sz, t_inc);
4246 if (cond == AMD64CondAlways) {
4247 jmp_lit(Ijk_Boring,rip);
4249 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4251 IRConst_U64(rip) ) );
4252 jmp_lit(Ijk_Boring,rip_next);
4254 DIP("%s%c\n", name, nameISize(sz));
4258 /*------------------------------------------------------------*/
4259 /*--- Arithmetic, etc. ---*/
4260 /*------------------------------------------------------------*/
4262 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4264 ULong dis_mul_E_G ( VexAbiInfo* vbi,
4271 UChar rm = getUChar(delta0);
4272 IRType ty = szToITy(size);
4273 IRTemp te = newTemp(ty);
4274 IRTemp tg = newTemp(ty);
4275 IRTemp resLo = newTemp(ty);
4277 assign( tg, getIRegG(size, pfx, rm) );
4278 if (epartIsReg(rm)) {
4279 assign( te, getIRegE(size, pfx, rm) );
4281 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4282 assign( te, loadLE(ty,mkexpr(addr)) );
4285 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4287 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4289 putIRegG(size, pfx, rm, mkexpr(resLo) );
4291 if (epartIsReg(rm)) {
4292 DIP("imul%c %s, %s\n", nameISize(size),
4293 nameIRegE(size,pfx,rm),
4294 nameIRegG(size,pfx,rm));
4297 DIP("imul%c %s, %s\n", nameISize(size),
4299 nameIRegG(size,pfx,rm));
4305 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4307 ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
4316 UChar rm = getUChar(delta);
4317 IRType ty = szToITy(size);
4318 IRTemp te = newTemp(ty);
4319 IRTemp tl = newTemp(ty);
4320 IRTemp resLo = newTemp(ty);
4322 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4324 if (epartIsReg(rm)) {
4325 assign(te, getIRegE(size, pfx, rm));
4328 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4330 assign(te, loadLE(ty, mkexpr(addr)));
4333 d64 = getSDisp(imin(4,litsize),delta);
4334 delta += imin(4,litsize);
4336 d64 &= mkSizeMask(size);
4337 assign(tl, mkU(ty,d64));
4339 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4341 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4343 putIRegG(size, pfx, rm, mkexpr(resLo));
4345 DIP("imul%c $%lld, %s, %s\n",
4346 nameISize(size), d64,
4347 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4348 nameIRegG(size,pfx,rm) );
4353 /* Generate an IR sequence to do a popcount operation on the supplied
4354 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4355 Ity_I16, Ity_I32 or Ity_I64 only. */
4356 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4359 if (ty == Ity_I16) {
4360 IRTemp old = IRTemp_INVALID;
4361 IRTemp nyu = IRTemp_INVALID;
4362 IRTemp mask[4], shift[4];
4363 for (i = 0; i < 4; i++) {
4364 mask[i] = newTemp(ty);
4367 assign(mask[0], mkU16(0x5555));
4368 assign(mask[1], mkU16(0x3333));
4369 assign(mask[2], mkU16(0x0F0F));
4370 assign(mask[3], mkU16(0x00FF));
4372 for (i = 0; i < 4; i++) {
4380 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4386 if (ty == Ity_I32) {
4387 IRTemp old = IRTemp_INVALID;
4388 IRTemp nyu = IRTemp_INVALID;
4389 IRTemp mask[5], shift[5];
4390 for (i = 0; i < 5; i++) {
4391 mask[i] = newTemp(ty);
4394 assign(mask[0], mkU32(0x55555555));
4395 assign(mask[1], mkU32(0x33333333));
4396 assign(mask[2], mkU32(0x0F0F0F0F));
4397 assign(mask[3], mkU32(0x00FF00FF));
4398 assign(mask[4], mkU32(0x0000FFFF));
4400 for (i = 0; i < 5; i++) {
4408 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4414 if (ty == Ity_I64) {
4415 IRTemp old = IRTemp_INVALID;
4416 IRTemp nyu = IRTemp_INVALID;
4417 IRTemp mask[6], shift[6];
4418 for (i = 0; i < 6; i++) {
4419 mask[i] = newTemp(ty);
4422 assign(mask[0], mkU64(0x5555555555555555ULL));
4423 assign(mask[1], mkU64(0x3333333333333333ULL));
4424 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4425 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4426 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4427 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4429 for (i = 0; i < 6; i++) {
4437 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4448 /* Generate an IR sequence to do a count-leading-zeroes operation on
4449 the supplied IRTemp, and return a new IRTemp holding the result.
4450 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4451 the argument is zero, return the number of bits in the word (the
4452 natural semantics). */
4453 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4455 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4457 IRTemp src64 = newTemp(Ity_I64);
4458 assign(src64, widenUto64( mkexpr(src) ));
4460 IRTemp src64x = newTemp(Ity_I64);
4462 binop(Iop_Shl64, mkexpr(src64),
4463 mkU8(64 - 8 * sizeofIRType(ty))));
4465 // Clz64 has undefined semantics when its input is zero, so
4466 // special-case around that.
4467 IRTemp res64 = newTemp(Ity_I64);
4471 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))),
4472 unop(Iop_Clz64, mkexpr(src64x)),
4473 mkU64(8 * sizeofIRType(ty))
4476 IRTemp res = newTemp(ty);
4477 assign(res, narrowTo(ty, mkexpr(res64)));
4482 /*------------------------------------------------------------*/
4484 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
4486 /*------------------------------------------------------------*/
4488 /* --- Helper functions for dealing with the register stack. --- */
4490 /* --- Set the emulation-warning pseudo-register. --- */
4492 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
4494 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4495 stmt( IRStmt_Put( OFFB_EMWARN, e ) );
4498 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
4500 static IRExpr* mkQNaN64 ( void )
4502 /* QNaN is 0 2047 1 0(51times)
4503 == 0b 11111111111b 1 0(51times)
4504 == 0x7FF8 0000 0000 0000
4506 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
4509 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
4511 static IRExpr* get_ftop ( void )
4513 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
4516 static void put_ftop ( IRExpr* e )
4518 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4519 stmt( IRStmt_Put( OFFB_FTOP, e ) );
4522 /* --------- Get/put the C3210 bits. --------- */
4524 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
4526 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
4529 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
4531 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
4532 stmt( IRStmt_Put( OFFB_FC3210, e ) );
4535 /* --------- Get/put the FPU rounding mode. --------- */
4536 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
4538 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
4541 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
4543 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
4544 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
4548 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
4549 /* Produces a value in 0 .. 3, which is encoded as per the type
4550 IRRoundingMode. Since the guest_FPROUND value is also encoded as
4551 per IRRoundingMode, we merely need to get it and mask it for
4554 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
4556 return binop( Iop_And32, get_fpround(), mkU32(3) );
4559 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
4561 return mkU32(Irrm_NEAREST);
4565 /* --------- Get/set FP register tag bytes. --------- */
4567 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
4569 static void put_ST_TAG ( Int i, IRExpr* value )
4572 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
4573 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
4574 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
4577 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
4578 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
4580 static IRExpr* get_ST_TAG ( Int i )
4582 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
4583 return IRExpr_GetI( descr, get_ftop(), i );
4587 /* --------- Get/set FP registers. --------- */
4589 /* Given i, and some expression e, emit 'ST(i) = e' and set the
4590 register's tag to indicate the register is full. The previous
4591 state of the register is not checked. */
4593 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
4596 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
4597 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
4598 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
4599 /* Mark the register as in-use. */
4600 put_ST_TAG(i, mkU8(1));
4603 /* Given i, and some expression e, emit
4604 ST(i) = is_full(i) ? NaN : e
4605 and set the tag accordingly.
4608 static void put_ST ( Int i, IRExpr* value )
4610 put_ST_UNCHECKED( i,
4611 IRExpr_Mux0X( get_ST_TAG(i),
4614 /* non-0 means full */
4621 /* Given i, generate an expression yielding 'ST(i)'. */
4623 static IRExpr* get_ST_UNCHECKED ( Int i )
4625 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
4626 return IRExpr_GetI( descr, get_ftop(), i );
4630 /* Given i, generate an expression yielding
4631 is_full(i) ? ST(i) : NaN
4634 static IRExpr* get_ST ( Int i )
4637 IRExpr_Mux0X( get_ST_TAG(i),
4640 /* non-0 means full */
4641 get_ST_UNCHECKED(i));
4645 /* Adjust FTOP downwards by one register. */
4647 static void fp_push ( void )
4649 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
4652 /* Adjust FTOP upwards by one register, and mark the vacated register
4655 static void fp_pop ( void )
4657 put_ST_TAG(0, mkU8(0));
4658 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4661 /* Clear the C2 bit of the FPU status register, for
4662 sin/cos/tan/sincos. */
4664 static void clear_C2 ( void )
4666 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
4669 /* Invent a plausible-looking FPU status word value:
4670 ((ftop & 7) << 11) | (c3210 & 0x4700)
4672 static IRExpr* get_FPU_sw ( void )
4678 binop(Iop_And32, get_ftop(), mkU32(7)),
4680 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
4686 /* ------------------------------------------------------- */
4687 /* Given all that stack-mangling junk, we can now go ahead
4688 and describe FP instructions.
4691 /* ST(0) = ST(0) `op` mem64/32(addr)
4692 Need to check ST(0)'s tag on read, but not on write.
4695 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
4698 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4702 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4704 loadLE(Ity_F64,mkexpr(addr))
4709 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4711 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
4717 /* ST(0) = mem64/32(addr) `op` ST(0)
4718 Need to check ST(0)'s tag on read, but not on write.
4721 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
4724 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4728 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4729 loadLE(Ity_F64,mkexpr(addr)),
4735 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4736 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
4743 /* ST(dst) = ST(dst) `op` ST(src).
4744 Check dst and src tags when reading but not on write.
4747 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4750 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
4754 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4762 /* ST(dst) = ST(src) `op` ST(dst).
4763 Check dst and src tags when reading but not on write.
4766 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4769 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
4773 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4781 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
4782 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
4784 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
4785 /* This is a bit of a hack (and isn't really right). It sets
4786 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
4787 documentation implies A and S are unchanged.
4789 /* It's also fishy in that it is used both for COMIP and
4790 UCOMIP, and they aren't the same (although similar). */
4791 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4792 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4797 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
4806 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
4808 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
4810 IRTemp t32 = newTemp(Ity_I32);
4817 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
4820 unop(Iop_32to16, mkexpr(t32)));
4825 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
4826 VexAbiInfo* vbi, Prefix pfx, Long delta )
4833 /* On entry, delta points at the second byte of the insn (the modrm
4835 UChar first_opcode = getUChar(delta-1);
4836 UChar modrm = getUChar(delta+0);
4838 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
4840 if (first_opcode == 0xD8) {
4843 /* bits 5,4,3 are an opcode extension, and the modRM also
4844 specifies an address. */
4845 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
4848 switch (gregLO3ofRM(modrm)) {
4850 case 0: /* FADD single-real */
4851 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
4854 case 1: /* FMUL single-real */
4855 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
4858 //.. case 2: /* FCOM single-real */
4859 //.. DIP("fcoms %s\n", dis_buf);
4860 //.. /* This forces C1 to zero, which isn't right. */
4862 //.. binop( Iop_And32,
4863 //.. binop(Iop_Shl32,
4864 //.. binop(Iop_CmpF64,
4866 //.. unop(Iop_F32toF64,
4867 //.. loadLE(Ity_F32,mkexpr(addr)))),
4873 //.. case 3: /* FCOMP single-real */
4874 //.. DIP("fcomps %s\n", dis_buf);
4875 //.. /* This forces C1 to zero, which isn't right. */
4877 //.. binop( Iop_And32,
4878 //.. binop(Iop_Shl32,
4879 //.. binop(Iop_CmpF64,
4881 //.. unop(Iop_F32toF64,
4882 //.. loadLE(Ity_F32,mkexpr(addr)))),
4889 case 4: /* FSUB single-real */
4890 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
4893 case 5: /* FSUBR single-real */
4894 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
4897 case 6: /* FDIV single-real */
4898 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
4901 case 7: /* FDIVR single-real */
4902 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
4906 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
4907 vex_printf("first_opcode == 0xD8\n");
4914 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
4915 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
4918 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
4919 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
4922 /* Dunno if this is right */
4923 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
4924 r_dst = (UInt)modrm - 0xD0;
4925 DIP("fcom %%st(0),%%st(%d)\n", r_dst);
4926 /* This forces C1 to zero, which isn't right. */
4931 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4937 /* Dunno if this is right */
4938 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
4939 r_dst = (UInt)modrm - 0xD8;
4940 DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
4941 /* This forces C1 to zero, which isn't right. */
4946 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4953 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
4954 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
4957 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
4958 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
4961 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
4962 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
4965 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
4966 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
4975 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
4977 if (first_opcode == 0xD9) {
4980 /* bits 5,4,3 are an opcode extension, and the modRM also
4981 specifies an address. */
4982 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
4985 switch (gregLO3ofRM(modrm)) {
4987 case 0: /* FLD single-real */
4988 DIP("flds %s\n", dis_buf);
4990 put_ST(0, unop(Iop_F32toF64,
4991 loadLE(Ity_F32, mkexpr(addr))));
4994 case 2: /* FST single-real */
4995 DIP("fsts %s\n", dis_buf);
4996 storeLE(mkexpr(addr),
4997 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5000 case 3: /* FSTP single-real */
5001 DIP("fstps %s\n", dis_buf);
5002 storeLE(mkexpr(addr),
5003 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5007 case 4: { /* FLDENV m28 */
5008 /* Uses dirty helper:
5009 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5010 IRTemp ew = newTemp(Ity_I32);
5011 IRTemp w64 = newTemp(Ity_I64);
5012 IRDirty* d = unsafeIRDirty_0_N (
5014 "amd64g_dirtyhelper_FLDENV",
5015 &amd64g_dirtyhelper_FLDENV,
5016 mkIRExprVec_1( mkexpr(addr) )
5020 /* declare we're reading memory */
5022 d->mAddr = mkexpr(addr);
5025 /* declare we're writing guest state */
5028 d->fxState[0].fx = Ifx_Write;
5029 d->fxState[0].offset = OFFB_FTOP;
5030 d->fxState[0].size = sizeof(UInt);
5032 d->fxState[1].fx = Ifx_Write;
5033 d->fxState[1].offset = OFFB_FPTAGS;
5034 d->fxState[1].size = 8 * sizeof(UChar);
5036 d->fxState[2].fx = Ifx_Write;
5037 d->fxState[2].offset = OFFB_FPROUND;
5038 d->fxState[2].size = sizeof(ULong);
5040 d->fxState[3].fx = Ifx_Write;
5041 d->fxState[3].offset = OFFB_FC3210;
5042 d->fxState[3].size = sizeof(ULong);
5044 stmt( IRStmt_Dirty(d) );
5046 /* ew contains any emulation warning we may need to
5047 issue. If needed, side-exit to the next insn,
5048 reporting the warning, so that Valgrind's dispatcher
5049 sees the warning. */
5050 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5051 put_emwarn( mkexpr(ew) );
5054 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5056 IRConst_U64( guest_RIP_bbstart+delta )
5060 DIP("fldenv %s\n", dis_buf);
5064 case 5: {/* FLDCW */
5065 /* The only thing we observe in the control word is the
5066 rounding mode. Therefore, pass the 16-bit value
5067 (x87 native-format control word) to a clean helper,
5068 getting back a 64-bit value, the lower half of which
5069 is the FPROUND value to store, and the upper half of
5070 which is the emulation-warning token which may be
5073 /* ULong amd64h_check_fldcw ( ULong ); */
5074 IRTemp t64 = newTemp(Ity_I64);
5075 IRTemp ew = newTemp(Ity_I32);
5076 DIP("fldcw %s\n", dis_buf);
5077 assign( t64, mkIRExprCCall(
5078 Ity_I64, 0/*regparms*/,
5079 "amd64g_check_fldcw",
5080 &amd64g_check_fldcw,
5083 loadLE(Ity_I16, mkexpr(addr)))
5088 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5089 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5090 put_emwarn( mkexpr(ew) );
5091 /* Finally, if an emulation warning was reported,
5092 side-exit to the next insn, reporting the warning,
5093 so that Valgrind's dispatcher sees the warning. */
5096 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5098 IRConst_U64( guest_RIP_bbstart+delta )
5104 case 6: { /* FNSTENV m28 */
5105 /* Uses dirty helper:
5106 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5107 IRDirty* d = unsafeIRDirty_0_N (
5109 "amd64g_dirtyhelper_FSTENV",
5110 &amd64g_dirtyhelper_FSTENV,
5111 mkIRExprVec_1( mkexpr(addr) )
5114 /* declare we're writing memory */
5116 d->mAddr = mkexpr(addr);
5119 /* declare we're reading guest state */
5122 d->fxState[0].fx = Ifx_Read;
5123 d->fxState[0].offset = OFFB_FTOP;
5124 d->fxState[0].size = sizeof(UInt);
5126 d->fxState[1].fx = Ifx_Read;
5127 d->fxState[1].offset = OFFB_FPTAGS;
5128 d->fxState[1].size = 8 * sizeof(UChar);
5130 d->fxState[2].fx = Ifx_Read;
5131 d->fxState[2].offset = OFFB_FPROUND;
5132 d->fxState[2].size = sizeof(ULong);
5134 d->fxState[3].fx = Ifx_Read;
5135 d->fxState[3].offset = OFFB_FC3210;
5136 d->fxState[3].size = sizeof(ULong);
5138 stmt( IRStmt_Dirty(d) );
5140 DIP("fnstenv %s\n", dis_buf);
5144 case 7: /* FNSTCW */
5145 /* Fake up a native x87 FPU control word. The only
5146 thing it depends on is FPROUND[1:0], so call a clean
5147 helper to cook it up. */
5148 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5149 DIP("fnstcw %s\n", dis_buf);
5155 "amd64g_create_fpucw", &amd64g_create_fpucw,
5156 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5163 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5164 vex_printf("first_opcode == 0xD9\n");
5172 case 0xC0 ... 0xC7: /* FLD %st(?) */
5173 r_src = (UInt)modrm - 0xC0;
5174 DIP("fld %%st(%u)\n", r_src);
5175 t1 = newTemp(Ity_F64);
5176 assign(t1, get_ST(r_src));
5178 put_ST(0, mkexpr(t1));
5181 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5182 r_src = (UInt)modrm - 0xC8;
5183 DIP("fxch %%st(%u)\n", r_src);
5184 t1 = newTemp(Ity_F64);
5185 t2 = newTemp(Ity_F64);
5186 assign(t1, get_ST(0));
5187 assign(t2, get_ST(r_src));
5188 put_ST_UNCHECKED(0, mkexpr(t2));
5189 put_ST_UNCHECKED(r_src, mkexpr(t1));
5192 case 0xE0: /* FCHS */
5194 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5197 case 0xE1: /* FABS */
5199 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5202 case 0xE5: { /* FXAM */
5203 /* This is an interesting one. It examines %st(0),
5204 regardless of whether the tag says it's empty or not.
5205 Here, just pass both the tag (in our format) and the
5206 value (as a double, actually a ULong) to a helper
5209 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5210 unop(Iop_ReinterpF64asI64,
5211 get_ST_UNCHECKED(0)) );
5212 put_C3210(mkIRExprCCall(
5215 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5222 case 0xE8: /* FLD1 */
5225 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5226 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5229 case 0xE9: /* FLDL2T */
5232 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5233 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5236 case 0xEA: /* FLDL2E */
5239 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5240 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5243 case 0xEB: /* FLDPI */
5246 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5247 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5250 case 0xEC: /* FLDLG2 */
5253 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5254 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5257 case 0xED: /* FLDLN2 */
5260 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5261 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5264 case 0xEE: /* FLDZ */
5267 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5268 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5271 case 0xF0: /* F2XM1 */
5275 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5279 case 0xF1: /* FYL2X */
5283 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5289 case 0xF2: /* FPTAN */
5293 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5296 put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
5297 clear_C2(); /* HACK */
5300 case 0xF3: /* FPATAN */
5304 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5310 case 0xF4: { /* FXTRACT */
5311 IRTemp argF = newTemp(Ity_F64);
5312 IRTemp sigF = newTemp(Ity_F64);
5313 IRTemp expF = newTemp(Ity_F64);
5314 IRTemp argI = newTemp(Ity_I64);
5315 IRTemp sigI = newTemp(Ity_I64);
5316 IRTemp expI = newTemp(Ity_I64);
5318 assign( argF, get_ST(0) );
5319 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
5322 Ity_I64, 0/*regparms*/,
5323 "x86amd64g_calculate_FXTRACT",
5324 &x86amd64g_calculate_FXTRACT,
5325 mkIRExprVec_2( mkexpr(argI),
5326 mkIRExpr_HWord(0)/*sig*/ ))
5330 Ity_I64, 0/*regparms*/,
5331 "x86amd64g_calculate_FXTRACT",
5332 &x86amd64g_calculate_FXTRACT,
5333 mkIRExprVec_2( mkexpr(argI),
5334 mkIRExpr_HWord(1)/*exp*/ ))
5336 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
5337 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
5339 put_ST_UNCHECKED(0, mkexpr(expF) );
5342 put_ST(0, mkexpr(sigF) );
5346 case 0xF5: { /* FPREM1 -- IEEE compliant */
5347 IRTemp a1 = newTemp(Ity_F64);
5348 IRTemp a2 = newTemp(Ity_F64);
5350 /* Do FPREM1 twice, once to get the remainder, and once
5351 to get the C3210 flag values. */
5352 assign( a1, get_ST(0) );
5353 assign( a2, get_ST(1) );
5356 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5361 triop(Iop_PRem1C3210F64,
5362 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5368 case 0xF7: /* FINCSTP */
5370 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5373 case 0xF8: { /* FPREM -- not IEEE compliant */
5374 IRTemp a1 = newTemp(Ity_F64);
5375 IRTemp a2 = newTemp(Ity_F64);
5377 /* Do FPREM twice, once to get the remainder, and once
5378 to get the C3210 flag values. */
5379 assign( a1, get_ST(0) );
5380 assign( a2, get_ST(1) );
5383 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5388 triop(Iop_PRemC3210F64,
5389 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5395 case 0xF9: /* FYL2XP1 */
5398 triop(Iop_Yl2xp1F64,
5399 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5405 case 0xFA: /* FSQRT */
5409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5413 case 0xFB: { /* FSINCOS */
5414 IRTemp a1 = newTemp(Ity_F64);
5415 assign( a1, get_ST(0) );
5419 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5424 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5426 clear_C2(); /* HACK */
5430 case 0xFC: /* FRNDINT */
5433 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
5436 case 0xFD: /* FSCALE */
5440 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5445 case 0xFE: /* FSIN */
5449 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5451 clear_C2(); /* HACK */
5454 case 0xFF: /* FCOS */
5458 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5460 clear_C2(); /* HACK */
5469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
5471 if (first_opcode == 0xDA) {
5475 /* bits 5,4,3 are an opcode extension, and the modRM also
5476 specifies an address. */
5478 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5480 switch (gregLO3ofRM(modrm)) {
5482 case 0: /* FIADD m32int */ /* ST(0) += m32int */
5483 DIP("fiaddl %s\n", dis_buf);
5487 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
5488 DIP("fimull %s\n", dis_buf);
5492 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
5493 DIP("fisubl %s\n", dis_buf);
5497 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
5498 DIP("fisubrl %s\n", dis_buf);
5502 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
5503 DIP("fisubl %s\n", dis_buf);
5507 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
5508 DIP("fidivrl %s\n", dis_buf);
5515 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5518 loadLE(Ity_I32, mkexpr(addr)))));
5524 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5526 loadLE(Ity_I32, mkexpr(addr))),
5531 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5532 vex_printf("first_opcode == 0xDA\n");
5541 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
5542 r_src = (UInt)modrm - 0xC0;
5543 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
5547 mk_amd64g_calculate_condition(AMD64CondB)),
5548 get_ST(0), get_ST(r_src)) );
5551 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
5552 r_src = (UInt)modrm - 0xC8;
5553 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
5557 mk_amd64g_calculate_condition(AMD64CondZ)),
5558 get_ST(0), get_ST(r_src)) );
5561 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
5562 r_src = (UInt)modrm - 0xD0;
5563 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
5567 mk_amd64g_calculate_condition(AMD64CondBE)),
5568 get_ST(0), get_ST(r_src)) );
5571 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
5572 r_src = (UInt)modrm - 0xD8;
5573 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
5577 mk_amd64g_calculate_condition(AMD64CondP)),
5578 get_ST(0), get_ST(r_src)) );
5581 case 0xE9: /* FUCOMPP %st(0),%st(1) */
5582 DIP("fucompp %%st(0),%%st(1)\n");
5583 /* This forces C1 to zero, which isn't right. */
5588 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5603 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
5605 if (first_opcode == 0xDB) {
5608 /* bits 5,4,3 are an opcode extension, and the modRM also
5609 specifies an address. */
5610 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5613 switch (gregLO3ofRM(modrm)) {
5615 case 0: /* FILD m32int */
5616 DIP("fildl %s\n", dis_buf);
5618 put_ST(0, unop(Iop_I32StoF64,
5619 loadLE(Ity_I32, mkexpr(addr))));
5622 case 1: /* FISTTPL m32 (SSE3) */
5623 DIP("fisttpl %s\n", dis_buf);
5624 storeLE( mkexpr(addr),
5625 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
5629 case 2: /* FIST m32 */
5630 DIP("fistl %s\n", dis_buf);
5631 storeLE( mkexpr(addr),
5632 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
5635 case 3: /* FISTP m32 */
5636 DIP("fistpl %s\n", dis_buf);
5637 storeLE( mkexpr(addr),
5638 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
5642 case 5: { /* FLD extended-real */
5643 /* Uses dirty helper:
5644 ULong amd64g_loadF80le ( ULong )
5645 addr holds the address. First, do a dirty call to
5646 get hold of the data. */
5647 IRTemp val = newTemp(Ity_I64);
5648 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
5650 IRDirty* d = unsafeIRDirty_1_N (
5653 "amd64g_dirtyhelper_loadF80le",
5654 &amd64g_dirtyhelper_loadF80le,
5657 /* declare that we're reading memory */
5659 d->mAddr = mkexpr(addr);
5662 /* execute the dirty call, dumping the result in val. */
5663 stmt( IRStmt_Dirty(d) );
5665 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
5667 DIP("fldt %s\n", dis_buf);
5671 case 7: { /* FSTP extended-real */
5672 /* Uses dirty helper:
5673 void amd64g_storeF80le ( ULong addr, ULong data )
5676 = mkIRExprVec_2( mkexpr(addr),
5677 unop(Iop_ReinterpF64asI64, get_ST(0)) );
5679 IRDirty* d = unsafeIRDirty_0_N (
5681 "amd64g_dirtyhelper_storeF80le",
5682 &amd64g_dirtyhelper_storeF80le,
5685 /* declare we're writing memory */
5687 d->mAddr = mkexpr(addr);
5690 /* execute the dirty call. */
5691 stmt( IRStmt_Dirty(d) );
5694 DIP("fstpt\n %s", dis_buf);
5699 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5700 vex_printf("first_opcode == 0xDB\n");
5709 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
5710 r_src = (UInt)modrm - 0xC0;
5711 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
5715 mk_amd64g_calculate_condition(AMD64CondNB)),
5716 get_ST(0), get_ST(r_src)) );
5719 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
5720 r_src = (UInt)modrm - 0xC8;
5721 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
5726 mk_amd64g_calculate_condition(AMD64CondNZ)),
5733 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
5734 r_src = (UInt)modrm - 0xD0;
5735 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
5740 mk_amd64g_calculate_condition(AMD64CondNBE)),
5747 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
5748 r_src = (UInt)modrm - 0xD8;
5749 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
5754 mk_amd64g_calculate_condition(AMD64CondNP)),
5766 /* Uses dirty helper:
5767 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5768 IRDirty* d = unsafeIRDirty_0_N (
5770 "amd64g_dirtyhelper_FINIT",
5771 &amd64g_dirtyhelper_FINIT,
5776 /* declare we're writing guest state */
5779 d->fxState[0].fx = Ifx_Write;
5780 d->fxState[0].offset = OFFB_FTOP;
5781 d->fxState[0].size = sizeof(UInt);
5783 d->fxState[1].fx = Ifx_Write;
5784 d->fxState[1].offset = OFFB_FPREGS;
5785 d->fxState[1].size = 8 * sizeof(ULong);
5787 d->fxState[2].fx = Ifx_Write;
5788 d->fxState[2].offset = OFFB_FPTAGS;
5789 d->fxState[2].size = 8 * sizeof(UChar);
5791 d->fxState[3].fx = Ifx_Write;
5792 d->fxState[3].offset = OFFB_FPROUND;
5793 d->fxState[3].size = sizeof(ULong);
5795 d->fxState[4].fx = Ifx_Write;
5796 d->fxState[4].offset = OFFB_FC3210;
5797 d->fxState[4].size = sizeof(ULong);
5799 stmt( IRStmt_Dirty(d) );
5805 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
5806 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
5809 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
5810 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
5819 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
5821 if (first_opcode == 0xDC) {
5824 /* bits 5,4,3 are an opcode extension, and the modRM also
5825 specifies an address. */
5826 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5829 switch (gregLO3ofRM(modrm)) {
5831 case 0: /* FADD double-real */
5832 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
5835 case 1: /* FMUL double-real */
5836 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
5839 //.. case 2: /* FCOM double-real */
5840 //.. DIP("fcoml %s\n", dis_buf);
5841 //.. /* This forces C1 to zero, which isn't right. */
5843 //.. binop( Iop_And32,
5844 //.. binop(Iop_Shl32,
5845 //.. binop(Iop_CmpF64,
5847 //.. loadLE(Ity_F64,mkexpr(addr))),
5853 case 3: /* FCOMP double-real */
5854 DIP("fcompl %s\n", dis_buf);
5855 /* This forces C1 to zero, which isn't right. */
5862 loadLE(Ity_F64,mkexpr(addr))),
5869 case 4: /* FSUB double-real */
5870 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
5873 case 5: /* FSUBR double-real */
5874 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
5877 case 6: /* FDIV double-real */
5878 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
5881 case 7: /* FDIVR double-real */
5882 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
5886 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
5887 vex_printf("first_opcode == 0xDC\n");
5896 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
5897 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
5900 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
5901 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
5904 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
5905 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
5908 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
5909 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
5912 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
5913 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
5916 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
5917 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
5927 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5929 if (first_opcode == 0xDD) {
5933 /* bits 5,4,3 are an opcode extension, and the modRM also
5934 specifies an address. */
5935 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5938 switch (gregLO3ofRM(modrm)) {
5940 case 0: /* FLD double-real */
5941 DIP("fldl %s\n", dis_buf);
5943 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5946 case 1: /* FISTTPQ m64 (SSE3) */
5947 DIP("fistppll %s\n", dis_buf);
5948 storeLE( mkexpr(addr),
5949 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5953 case 2: /* FST double-real */
5954 DIP("fstl %s\n", dis_buf);
5955 storeLE(mkexpr(addr), get_ST(0));
5958 case 3: /* FSTP double-real */
5959 DIP("fstpl %s\n", dis_buf);
5960 storeLE(mkexpr(addr), get_ST(0));
5964 //.. case 4: { /* FRSTOR m108 */
5965 //.. /* Uses dirty helper:
5966 //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5967 //.. IRTemp ew = newTemp(Ity_I32);
5968 //.. IRDirty* d = unsafeIRDirty_0_N (
5970 //.. "x86g_dirtyhelper_FRSTOR",
5971 //.. &x86g_dirtyhelper_FRSTOR,
5972 //.. mkIRExprVec_1( mkexpr(addr) )
5974 //.. d->needsBBP = True;
5976 //.. /* declare we're reading memory */
5977 //.. d->mFx = Ifx_Read;
5978 //.. d->mAddr = mkexpr(addr);
5979 //.. d->mSize = 108;
5981 //.. /* declare we're writing guest state */
5982 //.. d->nFxState = 5;
5984 //.. d->fxState[0].fx = Ifx_Write;
5985 //.. d->fxState[0].offset = OFFB_FTOP;
5986 //.. d->fxState[0].size = sizeof(UInt);
5988 //.. d->fxState[1].fx = Ifx_Write;
5989 //.. d->fxState[1].offset = OFFB_FPREGS;
5990 //.. d->fxState[1].size = 8 * sizeof(ULong);
5992 //.. d->fxState[2].fx = Ifx_Write;
5993 //.. d->fxState[2].offset = OFFB_FPTAGS;
5994 //.. d->fxState[2].size = 8 * sizeof(UChar);
5996 //.. d->fxState[3].fx = Ifx_Write;
5997 //.. d->fxState[3].offset = OFFB_FPROUND;
5998 //.. d->fxState[3].size = sizeof(UInt);
6000 //.. d->fxState[4].fx = Ifx_Write;
6001 //.. d->fxState[4].offset = OFFB_FC3210;
6002 //.. d->fxState[4].size = sizeof(UInt);
6004 //.. stmt( IRStmt_Dirty(d) );
6006 //.. /* ew contains any emulation warning we may need to
6007 //.. issue. If needed, side-exit to the next insn,
6008 //.. reporting the warning, so that Valgrind's dispatcher
6009 //.. sees the warning. */
6010 //.. put_emwarn( mkexpr(ew) );
6013 //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6015 //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
6019 //.. DIP("frstor %s\n", dis_buf);
6023 //.. case 6: { /* FNSAVE m108 */
6024 //.. /* Uses dirty helper:
6025 //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
6026 //.. IRDirty* d = unsafeIRDirty_0_N (
6028 //.. "x86g_dirtyhelper_FSAVE",
6029 //.. &x86g_dirtyhelper_FSAVE,
6030 //.. mkIRExprVec_1( mkexpr(addr) )
6032 //.. d->needsBBP = True;
6033 //.. /* declare we're writing memory */
6034 //.. d->mFx = Ifx_Write;
6035 //.. d->mAddr = mkexpr(addr);
6036 //.. d->mSize = 108;
6038 //.. /* declare we're reading guest state */
6039 //.. d->nFxState = 5;
6041 //.. d->fxState[0].fx = Ifx_Read;
6042 //.. d->fxState[0].offset = OFFB_FTOP;
6043 //.. d->fxState[0].size = sizeof(UInt);
6045 //.. d->fxState[1].fx = Ifx_Read;
6046 //.. d->fxState[1].offset = OFFB_FPREGS;
6047 //.. d->fxState[1].size = 8 * sizeof(ULong);
6049 //.. d->fxState[2].fx = Ifx_Read;
6050 //.. d->fxState[2].offset = OFFB_FPTAGS;
6051 //.. d->fxState[2].size = 8 * sizeof(UChar);
6053 //.. d->fxState[3].fx = Ifx_Read;
6054 //.. d->fxState[3].offset = OFFB_FPROUND;
6055 //.. d->fxState[3].size = sizeof(UInt);
6057 //.. d->fxState[4].fx = Ifx_Read;
6058 //.. d->fxState[4].offset = OFFB_FC3210;
6059 //.. d->fxState[4].size = sizeof(UInt);
6061 //.. stmt( IRStmt_Dirty(d) );
6063 //.. DIP("fnsave %s\n", dis_buf);
6067 case 7: { /* FNSTSW m16 */
6068 IRExpr* sw = get_FPU_sw();
6069 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6070 storeLE( mkexpr(addr), sw );
6071 DIP("fnstsw %s\n", dis_buf);
6076 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6077 vex_printf("first_opcode == 0xDD\n");
6084 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6085 r_dst = (UInt)modrm - 0xC0;
6086 DIP("ffree %%st(%u)\n", r_dst);
6087 put_ST_TAG ( r_dst, mkU8(0) );
6090 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6091 r_dst = (UInt)modrm - 0xD0;
6092 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6093 /* P4 manual says: "If the destination operand is a
6094 non-empty register, the invalid-operation exception
6095 is not generated. Hence put_ST_UNCHECKED. */
6096 put_ST_UNCHECKED(r_dst, get_ST(0));
6099 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6100 r_dst = (UInt)modrm - 0xD8;
6101 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6102 /* P4 manual says: "If the destination operand is a
6103 non-empty register, the invalid-operation exception
6104 is not generated. Hence put_ST_UNCHECKED. */
6105 put_ST_UNCHECKED(r_dst, get_ST(0));
6109 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6110 r_dst = (UInt)modrm - 0xE0;
6111 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6112 /* This forces C1 to zero, which isn't right. */
6117 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6123 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6124 r_dst = (UInt)modrm - 0xE8;
6125 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6126 /* This forces C1 to zero, which isn't right. */
6131 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6144 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6146 if (first_opcode == 0xDE) {
6150 /* bits 5,4,3 are an opcode extension, and the modRM also
6151 specifies an address. */
6153 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6156 switch (gregLO3ofRM(modrm)) {
6158 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6159 DIP("fiaddw %s\n", dis_buf);
6163 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6164 DIP("fimulw %s\n", dis_buf);
6168 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6169 DIP("fisubw %s\n", dis_buf);
6173 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6174 DIP("fisubrw %s\n", dis_buf);
6178 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6179 DIP("fisubw %s\n", dis_buf);
6183 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6184 DIP("fidivrw %s\n", dis_buf);
6191 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6195 loadLE(Ity_I16, mkexpr(addr))))));
6201 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6204 loadLE(Ity_I16, mkexpr(addr)))),
6209 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6210 vex_printf("first_opcode == 0xDE\n");
6219 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6220 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6223 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6224 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6227 case 0xD9: /* FCOMPP %st(0),%st(1) */
6228 DIP("fcompp %%st(0),%%st(1)\n");
6229 /* This forces C1 to zero, which isn't right. */
6234 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6242 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6243 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6246 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6247 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6250 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6251 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6254 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6255 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6265 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6267 if (first_opcode == 0xDF) {
6271 /* bits 5,4,3 are an opcode extension, and the modRM also
6272 specifies an address. */
6273 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6276 switch (gregLO3ofRM(modrm)) {
6278 case 0: /* FILD m16int */
6279 DIP("fildw %s\n", dis_buf);
6281 put_ST(0, unop(Iop_I32StoF64,
6283 loadLE(Ity_I16, mkexpr(addr)))));
6286 case 1: /* FISTTPS m16 (SSE3) */
6287 DIP("fisttps %s\n", dis_buf);
6288 storeLE( mkexpr(addr),
6289 x87ishly_qnarrow_32_to_16(
6290 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
6294 case 2: /* FIST m16 */
6295 DIP("fists %s\n", dis_buf);
6296 storeLE( mkexpr(addr),
6297 x87ishly_qnarrow_32_to_16(
6298 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6301 case 3: /* FISTP m16 */
6302 DIP("fistps %s\n", dis_buf);
6303 storeLE( mkexpr(addr),
6304 x87ishly_qnarrow_32_to_16(
6305 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6309 case 5: /* FILD m64 */
6310 DIP("fildll %s\n", dis_buf);
6312 put_ST(0, binop(Iop_I64StoF64,
6314 loadLE(Ity_I64, mkexpr(addr))));
6317 case 7: /* FISTP m64 */
6318 DIP("fistpll %s\n", dis_buf);
6319 storeLE( mkexpr(addr),
6320 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
6325 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6326 vex_printf("first_opcode == 0xDF\n");
6335 case 0xC0: /* FFREEP %st(0) */
6336 DIP("ffreep %%st(%d)\n", 0);
6337 put_ST_TAG ( 0, mkU8(0) );
6341 case 0xE0: /* FNSTSW %ax */
6342 DIP("fnstsw %%ax\n");
6343 /* Invent a plausible-looking FPU status word value and
6345 ((ftop & 7) << 11) | (c3210 & 0x4700)
6352 binop(Iop_And32, get_ftop(), mkU32(7)),
6355 unop(Iop_64to32, get_C3210()),
6360 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
6361 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
6364 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
6365 /* not really right since COMIP != UCOMIP */
6366 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
6388 /*------------------------------------------------------------*/
6390 /*--- MMX INSTRUCTIONS ---*/
6392 /*------------------------------------------------------------*/
6394 /* Effect of MMX insns on x87 FPU state (table 11-2 of
6395 IA32 arch manual, volume 3):
6397 Read from, or write to MMX register (viz, any insn except EMMS):
6398 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
6399 * FP stack pointer set to zero
6402 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
6403 * FP stack pointer set to zero
6406 static void do_MMX_preamble ( void )
6409 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6410 IRExpr* zero = mkU32(0);
6411 IRExpr* tag1 = mkU8(1);
6413 for (i = 0; i < 8; i++)
6414 stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
6417 static void do_EMMS_preamble ( void )
6420 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6421 IRExpr* zero = mkU32(0);
6422 IRExpr* tag0 = mkU8(0);
6424 for (i = 0; i < 8; i++)
6425 stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
6429 static IRExpr* getMMXReg ( UInt archreg )
6431 vassert(archreg < 8);
6432 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
6436 static void putMMXReg ( UInt archreg, IRExpr* e )
6438 vassert(archreg < 8);
6439 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
6440 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
6444 /* Helper for non-shift MMX insns. Note this is incomplete in the
6445 sense that it does not first call do_MMX_preamble() -- that is the
6446 responsibility of its caller. */
6449 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
6454 Bool show_granularity )
6457 UChar modrm = getUChar(delta);
6458 Bool isReg = epartIsReg(modrm);
6459 IRExpr* argL = NULL;
6460 IRExpr* argR = NULL;
6461 IRExpr* argG = NULL;
6462 IRExpr* argE = NULL;
6463 IRTemp res = newTemp(Ity_I64);
6466 IROp op = Iop_INVALID;
6468 HChar* hName = NULL;
6471 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
6474 /* Original MMX ones */
6475 case 0xFC: op = Iop_Add8x8; break;
6476 case 0xFD: op = Iop_Add16x4; break;
6477 case 0xFE: op = Iop_Add32x2; break;
6479 case 0xEC: op = Iop_QAdd8Sx8; break;
6480 case 0xED: op = Iop_QAdd16Sx4; break;
6482 case 0xDC: op = Iop_QAdd8Ux8; break;
6483 case 0xDD: op = Iop_QAdd16Ux4; break;
6485 case 0xF8: op = Iop_Sub8x8; break;
6486 case 0xF9: op = Iop_Sub16x4; break;
6487 case 0xFA: op = Iop_Sub32x2; break;
6489 case 0xE8: op = Iop_QSub8Sx8; break;
6490 case 0xE9: op = Iop_QSub16Sx4; break;
6492 case 0xD8: op = Iop_QSub8Ux8; break;
6493 case 0xD9: op = Iop_QSub16Ux4; break;
6495 case 0xE5: op = Iop_MulHi16Sx4; break;
6496 case 0xD5: op = Iop_Mul16x4; break;
6497 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
6499 case 0x74: op = Iop_CmpEQ8x8; break;
6500 case 0x75: op = Iop_CmpEQ16x4; break;
6501 case 0x76: op = Iop_CmpEQ32x2; break;
6503 case 0x64: op = Iop_CmpGT8Sx8; break;
6504 case 0x65: op = Iop_CmpGT16Sx4; break;
6505 case 0x66: op = Iop_CmpGT32Sx2; break;
6507 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
6508 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
6509 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
6511 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
6512 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
6513 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
6515 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
6516 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
6517 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
6519 case 0xDB: op = Iop_And64; break;
6520 case 0xDF: op = Iop_And64; invG = True; break;
6521 case 0xEB: op = Iop_Or64; break;
6522 case 0xEF: /* Possibly do better here if argL and argR are the
6524 op = Iop_Xor64; break;
6526 /* Introduced in SSE1 */
6527 case 0xE0: op = Iop_Avg8Ux8; break;
6528 case 0xE3: op = Iop_Avg16Ux4; break;
6529 case 0xEE: op = Iop_Max16Sx4; break;
6530 case 0xDE: op = Iop_Max8Ux8; break;
6531 case 0xEA: op = Iop_Min16Sx4; break;
6532 case 0xDA: op = Iop_Min8Ux8; break;
6533 case 0xE4: op = Iop_MulHi16Ux4; break;
6534 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
6536 /* Introduced in SSE2 */
6537 case 0xD4: op = Iop_Add64; break;
6538 case 0xFB: op = Iop_Sub64; break;
6541 vex_printf("\n0x%x\n", (Int)opc);
6542 vpanic("dis_MMXop_regmem_to_reg");
6547 argG = getMMXReg(gregLO3ofRM(modrm));
6549 argG = unop(Iop_Not64, argG);
6553 argE = getMMXReg(eregLO3ofRM(modrm));
6556 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6558 argE = loadLE(Ity_I64, mkexpr(addr));
6569 if (op != Iop_INVALID) {
6570 vassert(hName == NULL);
6571 vassert(hAddr == NULL);
6572 assign(res, binop(op, argL, argR));
6574 vassert(hName != NULL);
6575 vassert(hAddr != NULL);
6579 0/*regparms*/, hName, hAddr,
6580 mkIRExprVec_2( argL, argR )
6585 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
6587 DIP("%s%s %s, %s\n",
6588 name, show_granularity ? nameMMXGran(opc & 3) : "",
6589 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
6590 nameMMXReg(gregLO3ofRM(modrm)) );
6596 /* Vector by scalar shift of G by the amount specified at the bottom
6597 of E. This is a straight copy of dis_SSE_shiftG_byE. */
6599 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
6600 Prefix pfx, Long delta,
6601 HChar* opname, IROp op )
6607 UChar rm = getUChar(delta);
6608 IRTemp g0 = newTemp(Ity_I64);
6609 IRTemp g1 = newTemp(Ity_I64);
6610 IRTemp amt = newTemp(Ity_I64);
6611 IRTemp amt8 = newTemp(Ity_I8);
6613 if (epartIsReg(rm)) {
6614 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
6615 DIP("%s %s,%s\n", opname,
6616 nameMMXReg(eregLO3ofRM(rm)),
6617 nameMMXReg(gregLO3ofRM(rm)) );
6620 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
6621 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
6622 DIP("%s %s,%s\n", opname,
6624 nameMMXReg(gregLO3ofRM(rm)) );
6627 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
6628 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
6630 shl = shr = sar = False;
6633 case Iop_ShlN16x4: shl = True; size = 32; break;
6634 case Iop_ShlN32x2: shl = True; size = 32; break;
6635 case Iop_Shl64: shl = True; size = 64; break;
6636 case Iop_ShrN16x4: shr = True; size = 16; break;
6637 case Iop_ShrN32x2: shr = True; size = 32; break;
6638 case Iop_Shr64: shr = True; size = 64; break;
6639 case Iop_SarN16x4: sar = True; size = 16; break;
6640 case Iop_SarN32x2: sar = True; size = 32; break;
6641 default: vassert(0);
6648 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6650 binop(op, mkexpr(g0), mkexpr(amt8))
6658 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6659 binop(op, mkexpr(g0), mkU8(size-1)),
6660 binop(op, mkexpr(g0), mkexpr(amt8))
6667 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
6672 /* Vector by scalar shift of E by an immediate byte. This is a
6673 straight copy of dis_SSE_shiftE_imm. */
6676 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op )
6679 UChar rm = getUChar(delta);
6680 IRTemp e0 = newTemp(Ity_I64);
6681 IRTemp e1 = newTemp(Ity_I64);
6683 vassert(epartIsReg(rm));
6684 vassert(gregLO3ofRM(rm) == 2
6685 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
6686 amt = getUChar(delta+1);
6688 DIP("%s $%d,%s\n", opname,
6690 nameMMXReg(eregLO3ofRM(rm)) );
6692 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
6694 shl = shr = sar = False;
6697 case Iop_ShlN16x4: shl = True; size = 16; break;
6698 case Iop_ShlN32x2: shl = True; size = 32; break;
6699 case Iop_Shl64: shl = True; size = 64; break;
6700 case Iop_SarN16x4: sar = True; size = 16; break;
6701 case Iop_SarN32x2: sar = True; size = 32; break;
6702 case Iop_ShrN16x4: shr = True; size = 16; break;
6703 case Iop_ShrN32x2: shr = True; size = 32; break;
6704 case Iop_Shr64: shr = True; size = 64; break;
6705 default: vassert(0);
6709 assign( e1, amt >= size
6711 : binop(op, mkexpr(e0), mkU8(amt))
6715 assign( e1, amt >= size
6716 ? binop(op, mkexpr(e0), mkU8(size-1))
6717 : binop(op, mkexpr(e0), mkU8(amt))
6723 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
6728 /* Completely handle all MMX instructions except emms. */
6731 ULong dis_MMX ( Bool* decode_ok,
6732 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
6737 UChar opc = getUChar(delta);
6740 /* dis_MMX handles all insns except emms. */
6747 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
6748 modrm = getUChar(delta);
6749 if (epartIsReg(modrm)) {
6753 binop( Iop_32HLto64,
6755 getIReg32(eregOfRexRM(pfx,modrm)) ) );
6756 DIP("movd %s, %s\n",
6757 nameIReg32(eregOfRexRM(pfx,modrm)),
6758 nameMMXReg(gregLO3ofRM(modrm)));
6760 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6764 binop( Iop_32HLto64,
6766 loadLE(Ity_I32, mkexpr(addr)) ) );
6767 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6772 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
6773 modrm = getUChar(delta);
6774 if (epartIsReg(modrm)) {
6776 putMMXReg( gregLO3ofRM(modrm),
6777 getIReg64(eregOfRexRM(pfx,modrm)) );
6778 DIP("movd %s, %s\n",
6779 nameIReg64(eregOfRexRM(pfx,modrm)),
6780 nameMMXReg(gregLO3ofRM(modrm)));
6782 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6784 putMMXReg( gregLO3ofRM(modrm),
6785 loadLE(Ity_I64, mkexpr(addr)) );
6786 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6790 goto mmx_decode_failure;
6796 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
6797 modrm = getUChar(delta);
6798 if (epartIsReg(modrm)) {
6800 putIReg32( eregOfRexRM(pfx,modrm),
6801 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
6802 DIP("movd %s, %s\n",
6803 nameMMXReg(gregLO3ofRM(modrm)),
6804 nameIReg32(eregOfRexRM(pfx,modrm)));
6806 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6808 storeLE( mkexpr(addr),
6809 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
6810 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6815 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
6816 modrm = getUChar(delta);
6817 if (epartIsReg(modrm)) {
6819 putIReg64( eregOfRexRM(pfx,modrm),
6820 getMMXReg(gregLO3ofRM(modrm)) );
6821 DIP("movd %s, %s\n",
6822 nameMMXReg(gregLO3ofRM(modrm)),
6823 nameIReg64(eregOfRexRM(pfx,modrm)));
6825 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6827 storeLE( mkexpr(addr),
6828 getMMXReg(gregLO3ofRM(modrm)) );
6829 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6832 goto mmx_decode_failure;
6837 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
6839 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6840 goto mmx_decode_failure;
6841 modrm = getUChar(delta);
6842 if (epartIsReg(modrm)) {
6844 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
6845 DIP("movq %s, %s\n",
6846 nameMMXReg(eregLO3ofRM(modrm)),
6847 nameMMXReg(gregLO3ofRM(modrm)));
6849 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6851 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
6852 DIP("movq %s, %s\n",
6853 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
6858 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
6860 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6861 goto mmx_decode_failure;
6862 modrm = getUChar(delta);
6863 if (epartIsReg(modrm)) {
6864 /* Fall through. The assembler doesn't appear to generate
6866 goto mmx_decode_failure;
6868 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6870 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
6871 DIP("mov(nt)q %s, %s\n",
6872 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
6878 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
6880 goto mmx_decode_failure;
6881 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
6885 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
6887 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6888 goto mmx_decode_failure;
6889 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
6893 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6895 goto mmx_decode_failure;
6896 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
6901 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
6903 goto mmx_decode_failure;
6904 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
6908 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
6910 goto mmx_decode_failure;
6911 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
6915 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6917 goto mmx_decode_failure;
6918 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
6921 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
6923 goto mmx_decode_failure;
6924 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
6927 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
6929 goto mmx_decode_failure;
6930 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
6933 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
6935 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
6940 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
6942 goto mmx_decode_failure;
6943 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
6948 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6950 goto mmx_decode_failure;
6951 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
6954 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6956 goto mmx_decode_failure;
6957 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
6960 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6962 goto mmx_decode_failure;
6963 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
6966 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6968 goto mmx_decode_failure;
6969 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
6974 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6976 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6977 goto mmx_decode_failure;
6978 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
6983 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6985 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
6986 goto mmx_decode_failure;
6987 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
6990 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6992 goto mmx_decode_failure;
6993 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
6996 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6998 goto mmx_decode_failure;
6999 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7002 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7004 goto mmx_decode_failure;
7005 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7008 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7010 goto mmx_decode_failure;
7011 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7014 # define SHIFT_BY_REG(_name,_op) \
7015 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7018 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7019 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7020 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7021 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7023 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7024 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7025 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7026 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7028 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7029 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7030 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7032 # undef SHIFT_BY_REG
7037 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7038 UChar byte2, subopc;
7040 goto mmx_decode_failure;
7041 byte2 = getUChar(delta); /* amode / sub-opcode */
7042 subopc = toUChar( (byte2 >> 3) & 7 );
7044 # define SHIFT_BY_IMM(_name,_op) \
7045 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7048 if (subopc == 2 /*SRL*/ && opc == 0x71)
7049 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7050 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7051 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7052 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7053 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7055 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7056 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7057 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7058 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7060 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7061 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7062 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7063 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7064 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7065 SHIFT_BY_IMM("psllq", Iop_Shl64);
7067 else goto mmx_decode_failure;
7069 # undef SHIFT_BY_IMM
7074 IRTemp addr = newTemp(Ity_I64);
7075 IRTemp regD = newTemp(Ity_I64);
7076 IRTemp regM = newTemp(Ity_I64);
7077 IRTemp mask = newTemp(Ity_I64);
7078 IRTemp olddata = newTemp(Ity_I64);
7079 IRTemp newdata = newTemp(Ity_I64);
7081 modrm = getUChar(delta);
7082 if (sz != 4 || (!epartIsReg(modrm)))
7083 goto mmx_decode_failure;
7086 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7087 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7088 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7089 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7090 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7098 unop(Iop_Not64, mkexpr(mask)))) );
7099 storeLE( mkexpr(addr), mkexpr(newdata) );
7100 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7101 nameMMXReg( gregLO3ofRM(modrm) ) );
7105 /* --- MMX decode failure --- */
7109 return delta; /* ignored */
7118 /*------------------------------------------------------------*/
7119 /*--- More misc arithmetic and other obscure insns. ---*/
7120 /*------------------------------------------------------------*/
7122 /* Generate base << amt with vacated places filled with stuff
7123 from xtra. amt guaranteed in 0 .. 63. */
7125 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7129 else (base << amt) | (xtra >>u (64-amt))
7136 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7137 binop(Iop_Shr64, mkexpr(xtra),
7138 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7143 /* Generate base >>u amt with vacated places filled with stuff
7144 from xtra. amt guaranteed in 0 .. 63. */
7146 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7150 else (base >>u amt) | (xtra << (64-amt))
7157 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7158 binop(Iop_Shl64, mkexpr(xtra),
7159 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7164 /* Double length left and right shifts. Apparently only required in
7165 v-size (no b- variant). */
7167 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
7169 Long delta, UChar modrm,
7172 Bool amt_is_literal,
7173 HChar* shift_amt_txt,
7176 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7177 for printing it. And eip on entry points at the modrm byte. */
7181 IRType ty = szToITy(sz);
7182 IRTemp gsrc = newTemp(ty);
7183 IRTemp esrc = newTemp(ty);
7184 IRTemp addr = IRTemp_INVALID;
7185 IRTemp tmpSH = newTemp(Ity_I8);
7186 IRTemp tmpSS = newTemp(Ity_I8);
7187 IRTemp tmp64 = IRTemp_INVALID;
7188 IRTemp res64 = IRTemp_INVALID;
7189 IRTemp rss64 = IRTemp_INVALID;
7190 IRTemp resTy = IRTemp_INVALID;
7191 IRTemp rssTy = IRTemp_INVALID;
7192 Int mask = sz==8 ? 63 : 31;
7194 vassert(sz == 2 || sz == 4 || sz == 8);
7196 /* The E-part is the destination; this is shifted. The G-part
7197 supplies bits to be shifted into the E-part, but is not
7200 If shifting left, form a double-length word with E at the top
7201 and G at the bottom, and shift this left. The result is then in
7204 If shifting right, form a double-length word with G at the top
7205 and E at the bottom, and shift this right. The result is then
7208 /* Fetch the operands. */
7210 assign( gsrc, getIRegG(sz, pfx, modrm) );
7212 if (epartIsReg(modrm)) {
7214 assign( esrc, getIRegE(sz, pfx, modrm) );
7215 DIP("sh%cd%c %s, %s, %s\n",
7216 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7218 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7220 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
7221 /* # bytes following amode */
7222 amt_is_literal ? 1 : 0 );
7224 assign( esrc, loadLE(ty, mkexpr(addr)) );
7225 DIP("sh%cd%c %s, %s, %s\n",
7226 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7228 nameIRegG(sz, pfx, modrm), dis_buf);
7231 /* Calculate the masked shift amount (tmpSH), the masked subshift
7232 amount (tmpSS), the shifted value (res64) and the subshifted
7235 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7236 assign( tmpSS, binop(Iop_And8,
7237 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7240 tmp64 = newTemp(Ity_I64);
7241 res64 = newTemp(Ity_I64);
7242 rss64 = newTemp(Ity_I64);
7244 if (sz == 2 || sz == 4) {
7246 /* G is xtra; E is data */
7247 /* what a freaking nightmare: */
7248 if (sz == 4 && left_shift) {
7249 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7252 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7256 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7260 if (sz == 4 && !left_shift) {
7261 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7262 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7263 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7266 if (sz == 2 && left_shift) {
7269 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7270 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7272 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
7275 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7277 /* subshift formed by shifting [esrc'0000'0000'0000] */
7281 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
7287 if (sz == 2 && !left_shift) {
7290 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
7291 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
7293 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
7294 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7295 /* subshift formed by shifting [0000'0000'0000'esrc] */
7296 assign( rss64, binop(Iop_Shr64,
7297 unop(Iop_16Uto64, mkexpr(esrc)),
7305 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
7306 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
7308 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
7309 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
7314 resTy = newTemp(ty);
7315 rssTy = newTemp(ty);
7316 assign( resTy, narrowTo(ty, mkexpr(res64)) );
7317 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
7319 /* Put result back and write the flags thunk. */
7320 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
7321 resTy, rssTy, ty, tmpSH );
7323 if (epartIsReg(modrm)) {
7324 putIRegE(sz, pfx, modrm, mkexpr(resTy));
7326 storeLE( mkexpr(addr), mkexpr(resTy) );
7329 if (amt_is_literal) delta++;
7334 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
7337 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
7339 static HChar* nameBtOp ( BtOp op )
7342 case BtOpNone: return "";
7343 case BtOpSet: return "s";
7344 case BtOpReset: return "r";
7345 case BtOpComp: return "c";
7346 default: vpanic("nameBtOp(amd64)");
7352 ULong dis_bt_G_E ( VexAbiInfo* vbi,
7353 Prefix pfx, Int sz, Long delta, BtOp op )
7358 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
7359 t_addr1, t_rsp, t_mask, t_new;
7361 vassert(sz == 2 || sz == 4 || sz == 8);
7363 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
7364 = t_addr0 = t_addr1 = t_rsp
7365 = t_mask = t_new = IRTemp_INVALID;
7367 t_fetched = newTemp(Ity_I8);
7368 t_new = newTemp(Ity_I8);
7369 t_bitno0 = newTemp(Ity_I64);
7370 t_bitno1 = newTemp(Ity_I64);
7371 t_bitno2 = newTemp(Ity_I8);
7372 t_addr1 = newTemp(Ity_I64);
7373 modrm = getUChar(delta);
7375 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
7377 if (epartIsReg(modrm)) {
7379 /* Get it onto the client's stack. Oh, this is a horrible
7380 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
7381 Because of the ELF ABI stack redzone, there may be live data
7382 up to 128 bytes below %RSP. So we can't just push it on the
7383 stack, else we may wind up trashing live data, and causing
7384 impossible-to-find simulation errors. (Yes, this did
7385 happen.) So we need to drop RSP before at least 128 before
7386 pushing it. That unfortunately means hitting Memcheck's
7387 fast-case painting code. Ideally we should drop more than
7388 128, to reduce the chances of breaking buggy programs that
7389 have live data below -128(%RSP). Memcheck fast-cases moves
7390 of 288 bytes due to the need to handle ppc64-linux quickly,
7391 so let's use 288. Of course the real fix is to get rid of
7392 this kludge entirely. */
7393 t_rsp = newTemp(Ity_I64);
7394 t_addr0 = newTemp(Ity_I64);
7396 vassert(vbi->guest_stack_redzone_size == 128);
7397 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
7398 putIReg64(R_RSP, mkexpr(t_rsp));
7400 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
7402 /* Make t_addr0 point at it. */
7403 assign( t_addr0, mkexpr(t_rsp) );
7405 /* Mask out upper bits of the shift amount, since we're doing a
7407 assign( t_bitno1, binop(Iop_And64,
7409 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
7412 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
7414 assign( t_bitno1, mkexpr(t_bitno0) );
7417 /* At this point: t_addr0 is the address being operated on. If it
7418 was a reg, we will have pushed it onto the client's stack.
7419 t_bitno1 is the bit number, suitably masked in the case of a
7422 /* Now the main sequence. */
7426 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
7428 /* t_addr1 now holds effective address */
7432 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
7434 /* t_bitno2 contains offset of bit within byte */
7436 if (op != BtOpNone) {
7437 t_mask = newTemp(Ity_I8);
7438 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
7441 /* t_mask is now a suitable byte mask */
7443 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
7445 if (op != BtOpNone) {
7449 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
7453 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
7457 binop(Iop_And8, mkexpr(t_fetched),
7458 unop(Iop_Not8, mkexpr(t_mask))) );
7461 vpanic("dis_bt_G_E(amd64)");
7463 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
7464 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
7465 mkexpr(t_new)/*new*/,
7466 guest_RIP_curr_instr );
7468 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
7472 /* Side effect done; now get selected bit into Carry flag */
7473 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
7474 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7475 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7480 unop(Iop_8Uto64, mkexpr(t_fetched)),
7484 /* Set NDEP even though it isn't used. This makes redundant-PUT
7485 elimination of previous stores to this field work better. */
7486 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7488 /* Move reg operand from stack back to reg */
7489 if (epartIsReg(modrm)) {
7490 /* t_rsp still points at it. */
7491 /* only write the reg if actually modifying it; doing otherwise
7492 zeroes the top half erroneously when doing btl due to
7493 standard zero-extend rule */
7495 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
7496 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
7499 DIP("bt%s%c %s, %s\n",
7500 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
7501 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
7508 /* Handle BSF/BSR. Only v-size seems necessary. */
7510 ULong dis_bs_E_G ( VexAbiInfo* vbi,
7511 Prefix pfx, Int sz, Long delta, Bool fwds )
7517 IRType ty = szToITy(sz);
7518 IRTemp src = newTemp(ty);
7519 IRTemp dst = newTemp(ty);
7520 IRTemp src64 = newTemp(Ity_I64);
7521 IRTemp dst64 = newTemp(Ity_I64);
7522 IRTemp src8 = newTemp(Ity_I8);
7524 vassert(sz == 8 || sz == 4 || sz == 2);
7526 modrm = getUChar(delta);
7527 isReg = epartIsReg(modrm);
7530 assign( src, getIRegE(sz, pfx, modrm) );
7533 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7535 assign( src, loadLE(ty, mkexpr(addr)) );
7538 DIP("bs%c%c %s, %s\n",
7539 fwds ? 'f' : 'r', nameISize(sz),
7540 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
7541 nameIRegG(sz, pfx, modrm));
7543 /* First, widen src to 64 bits if it is not already. */
7544 assign( src64, widenUto64(mkexpr(src)) );
7546 /* Generate an 8-bit expression which is zero iff the
7547 original is zero, and nonzero otherwise */
7551 mkexpr(src64), mkU64(0))) );
7553 /* Flags: Z is 1 iff source value is zero. All others
7554 are undefined -- we force them to zero. */
7555 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7556 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7559 IRExpr_Mux0X( mkexpr(src8),
7561 mkU64(AMD64G_CC_MASK_Z),
7566 /* Set NDEP even though it isn't used. This makes redundant-PUT
7567 elimination of previous stores to this field work better. */
7568 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7570 /* Result: iff source value is zero, we can't use
7571 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
7572 But anyway, amd64 semantics say the result is undefined in
7573 such situations. Hence handle the zero case specially. */
7575 /* Bleh. What we compute:
7577 bsf64: if src == 0 then {dst is unchanged}
7580 bsr64: if src == 0 then {dst is unchanged}
7581 else 63 - Clz64(src)
7583 bsf32: if src == 0 then {dst is unchanged}
7584 else Ctz64(32Uto64(src))
7586 bsr32: if src == 0 then {dst is unchanged}
7587 else 63 - Clz64(32Uto64(src))
7589 bsf16: if src == 0 then {dst is unchanged}
7590 else Ctz64(32Uto64(16Uto32(src)))
7592 bsr16: if src == 0 then {dst is unchanged}
7593 else 63 - Clz64(32Uto64(16Uto32(src)))
7596 /* The main computation, guarding against zero. */
7600 /* src == 0 -- leave dst unchanged */
7601 widenUto64( getIRegG( sz, pfx, modrm ) ),
7603 fwds ? unop(Iop_Ctz64, mkexpr(src64))
7606 unop(Iop_Clz64, mkexpr(src64)))
7611 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
7614 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
7616 assign( dst, mkexpr(dst64) );
7618 /* dump result back */
7619 putIRegG( sz, pfx, modrm, mkexpr(dst) );
7625 /* swap rAX with the reg specified by reg and REX.B */
7627 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
7629 IRType ty = szToITy(sz);
7630 IRTemp t1 = newTemp(ty);
7631 IRTemp t2 = newTemp(ty);
7632 vassert(sz == 4 || sz == 8);
7633 vassert(regLo3 < 8);
7635 assign( t1, getIReg64(R_RAX) );
7636 assign( t2, getIRegRexB(8, pfx, regLo3) );
7637 putIReg64( R_RAX, mkexpr(t2) );
7638 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
7640 assign( t1, getIReg32(R_RAX) );
7641 assign( t2, getIRegRexB(4, pfx, regLo3) );
7642 putIReg32( R_RAX, mkexpr(t2) );
7643 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
7645 DIP("xchg%c %s, %s\n",
7646 nameISize(sz), nameIRegRAX(sz),
7647 nameIRegRexB(sz,pfx, regLo3));
7652 void codegen_SAHF ( void )
7654 /* Set the flags to:
7655 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
7656 -- retain the old O flag
7657 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7658 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
7660 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7661 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7662 IRTemp oldflags = newTemp(Ity_I64);
7663 assign( oldflags, mk_amd64g_calculate_rflags_all() );
7664 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7665 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7666 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7667 stmt( IRStmt_Put( OFFB_CC_DEP1,
7669 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
7671 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
7679 void codegen_LAHF ( void )
7681 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
7682 IRExpr* rax_with_hole;
7685 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7686 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7688 IRTemp flags = newTemp(Ity_I64);
7689 assign( flags, mk_amd64g_calculate_rflags_all() );
7692 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
7694 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
7697 = binop(Iop_Or64, rax_with_hole,
7698 binop(Iop_Shl64, new_byte, mkU8(8)));
7699 putIReg64(R_RAX, new_rax);
7704 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
7713 IRType ty = szToITy(size);
7714 IRTemp acc = newTemp(ty);
7715 IRTemp src = newTemp(ty);
7716 IRTemp dest = newTemp(ty);
7717 IRTemp dest2 = newTemp(ty);
7718 IRTemp acc2 = newTemp(ty);
7719 IRTemp cond8 = newTemp(Ity_I8);
7720 IRTemp addr = IRTemp_INVALID;
7721 UChar rm = getUChar(delta0);
7723 /* There are 3 cases to consider:
7725 reg-reg: ignore any lock prefix, generate sequence based
7728 reg-mem, not locked: ignore any lock prefix, generate sequence
7731 reg-mem, locked: use IRCAS
7734 if (epartIsReg(rm)) {
7736 assign( dest, getIRegE(size, pfx, rm) );
7738 assign( src, getIRegG(size, pfx, rm) );
7739 assign( acc, getIRegRAX(size) );
7740 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7741 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7742 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
7743 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7744 putIRegRAX(size, mkexpr(acc2));
7745 putIRegE(size, pfx, rm, mkexpr(dest2));
7746 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7747 nameIRegG(size,pfx,rm),
7748 nameIRegE(size,pfx,rm) );
7750 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
7752 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7753 assign( dest, loadLE(ty, mkexpr(addr)) );
7755 assign( src, getIRegG(size, pfx, rm) );
7756 assign( acc, getIRegRAX(size) );
7757 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7758 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7759 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
7760 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7761 putIRegRAX(size, mkexpr(acc2));
7762 storeLE( mkexpr(addr), mkexpr(dest2) );
7763 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7764 nameIRegG(size,pfx,rm), dis_buf);
7766 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
7768 /* src is new value. acc is expected value. dest is old value.
7769 Compute success from the output of the IRCAS, and steer the
7770 new value for RAX accordingly: in case of success, RAX is
7772 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7774 assign( src, getIRegG(size, pfx, rm) );
7775 assign( acc, getIRegRAX(size) );
7777 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
7778 NULL, mkexpr(acc), NULL, mkexpr(src) )
7780 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7781 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7782 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7783 putIRegRAX(size, mkexpr(acc2));
7784 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7785 nameIRegG(size,pfx,rm), dis_buf);
7794 /* Handle conditional move instructions of the form
7795 cmovcc E(reg-or-mem), G(reg)
7797 E(src) is reg-or-mem
7800 If E is reg, --> GET %E, tmps
7805 If E is mem --> (getAddr E) -> tmpa
7812 ULong dis_cmov_E_G ( VexAbiInfo* vbi,
7818 UChar rm = getUChar(delta0);
7822 IRType ty = szToITy(sz);
7823 IRTemp tmps = newTemp(ty);
7824 IRTemp tmpd = newTemp(ty);
7826 if (epartIsReg(rm)) {
7827 assign( tmps, getIRegE(sz, pfx, rm) );
7828 assign( tmpd, getIRegG(sz, pfx, rm) );
7830 putIRegG( sz, pfx, rm,
7831 IRExpr_Mux0X( unop(Iop_1Uto8,
7832 mk_amd64g_calculate_condition(cond)),
7836 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
7837 nameIRegE(sz,pfx,rm),
7838 nameIRegG(sz,pfx,rm));
7842 /* E refers to memory */
7844 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7845 assign( tmps, loadLE(ty, mkexpr(addr)) );
7846 assign( tmpd, getIRegG(sz, pfx, rm) );
7848 putIRegG( sz, pfx, rm,
7849 IRExpr_Mux0X( unop(Iop_1Uto8,
7850 mk_amd64g_calculate_condition(cond)),
7855 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
7857 nameIRegG(sz,pfx,rm));
7864 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
7866 Prefix pfx, Int sz, Long delta0 )
7869 UChar rm = getUChar(delta0);
7872 IRType ty = szToITy(sz);
7873 IRTemp tmpd = newTemp(ty);
7874 IRTemp tmpt0 = newTemp(ty);
7875 IRTemp tmpt1 = newTemp(ty);
7877 /* There are 3 cases to consider:
7879 reg-reg: ignore any lock prefix,
7880 generate 'naive' (non-atomic) sequence
7882 reg-mem, not locked: ignore any lock prefix, generate 'naive'
7883 (non-atomic) sequence
7885 reg-mem, locked: use IRCAS
7888 if (epartIsReg(rm)) {
7890 assign( tmpd, getIRegE(sz, pfx, rm) );
7891 assign( tmpt0, getIRegG(sz, pfx, rm) );
7892 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7893 mkexpr(tmpd), mkexpr(tmpt0)) );
7894 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7895 putIRegG(sz, pfx, rm, mkexpr(tmpd));
7896 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
7897 DIP("xadd%c %s, %s\n",
7898 nameISize(sz), nameIRegG(sz,pfx,rm),
7899 nameIRegE(sz,pfx,rm));
7903 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
7905 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7906 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7907 assign( tmpt0, getIRegG(sz, pfx, rm) );
7908 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7909 mkexpr(tmpd), mkexpr(tmpt0)) );
7910 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7911 storeLE( mkexpr(addr), mkexpr(tmpt1) );
7912 putIRegG(sz, pfx, rm, mkexpr(tmpd));
7913 DIP("xadd%c %s, %s\n",
7914 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
7918 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
7920 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
7921 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7922 assign( tmpt0, getIRegG(sz, pfx, rm) );
7923 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7924 mkexpr(tmpd), mkexpr(tmpt0)) );
7925 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
7926 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
7927 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7928 putIRegG(sz, pfx, rm, mkexpr(tmpd));
7929 DIP("xadd%c %s, %s\n",
7930 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
7938 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
7941 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
7945 //.. UChar rm = getUChar(delta0);
7946 //.. HChar dis_buf[50];
7948 //.. if (epartIsReg(rm)) {
7949 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
7950 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
7951 //.. return 1+delta0;
7953 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
7954 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
7955 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
7956 //.. return len+delta0;
7960 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
7961 //.. dst is ireg and sz==4, zero out top half of it. */
7964 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
7970 //.. UChar rm = getUChar(delta0);
7971 //.. HChar dis_buf[50];
7973 //.. vassert(sz == 2 || sz == 4);
7975 //.. if (epartIsReg(rm)) {
7977 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
7979 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
7981 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
7982 //.. return 1+delta0;
7984 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
7985 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
7986 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
7987 //.. return len+delta0;
7993 //.. void dis_push_segreg ( UInt sreg, Int sz )
7995 //.. IRTemp t1 = newTemp(Ity_I16);
7996 //.. IRTemp ta = newTemp(Ity_I32);
7997 //.. vassert(sz == 2 || sz == 4);
7999 //.. assign( t1, getSReg(sreg) );
8000 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8001 //.. putIReg(4, R_ESP, mkexpr(ta));
8002 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8004 //.. DIP("pushw %s\n", nameSReg(sreg));
8008 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8010 //.. IRTemp t1 = newTemp(Ity_I16);
8011 //.. IRTemp ta = newTemp(Ity_I32);
8012 //.. vassert(sz == 2 || sz == 4);
8014 //.. assign( ta, getIReg(4, R_ESP) );
8015 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8017 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8018 //.. putSReg( sreg, mkexpr(t1) );
8019 //.. DIP("pop %s\n", nameSReg(sreg));
8023 void dis_ret ( VexAbiInfo* vbi, ULong d64 )
8025 IRTemp t1 = newTemp(Ity_I64);
8026 IRTemp t2 = newTemp(Ity_I64);
8027 IRTemp t3 = newTemp(Ity_I64);
8028 assign(t1, getIReg64(R_RSP));
8029 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8030 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8031 putIReg64(R_RSP, mkexpr(t3));
8032 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8033 jmp_treg(Ijk_Ret,t2);
8037 /*------------------------------------------------------------*/
8038 /*--- SSE/SSE2/SSE3 helpers ---*/
8039 /*------------------------------------------------------------*/
8041 /* Worker function; do not call directly.
8042 Handles full width G = G `op` E and G = (not G) `op` E.
8045 static ULong dis_SSE_E_to_G_all_wrk (
8047 Prefix pfx, Long delta,
8048 HChar* opname, IROp op,
8055 UChar rm = getUChar(delta);
8057 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8058 : getXMMReg(gregOfRexRM(pfx,rm));
8059 if (epartIsReg(rm)) {
8060 putXMMReg( gregOfRexRM(pfx,rm),
8062 getXMMReg(eregOfRexRM(pfx,rm))) );
8063 DIP("%s %s,%s\n", opname,
8064 nameXMMReg(eregOfRexRM(pfx,rm)),
8065 nameXMMReg(gregOfRexRM(pfx,rm)) );
8068 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8069 putXMMReg( gregOfRexRM(pfx,rm),
8071 loadLE(Ity_V128, mkexpr(addr))) );
8072 DIP("%s %s,%s\n", opname,
8074 nameXMMReg(gregOfRexRM(pfx,rm)) );
8080 /* All lanes SSE binary operation, G = G `op` E. */
8083 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi,
8084 Prefix pfx, Long delta,
8085 HChar* opname, IROp op )
8087 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
8090 /* All lanes SSE binary operation, G = (not G) `op` E. */
8093 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi,
8094 Prefix pfx, Long delta,
8095 HChar* opname, IROp op )
8097 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
8101 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8103 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi,
8104 Prefix pfx, Long delta,
8105 HChar* opname, IROp op )
8110 UChar rm = getUChar(delta);
8111 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8112 if (epartIsReg(rm)) {
8113 putXMMReg( gregOfRexRM(pfx,rm),
8115 getXMMReg(eregOfRexRM(pfx,rm))) );
8116 DIP("%s %s,%s\n", opname,
8117 nameXMMReg(eregOfRexRM(pfx,rm)),
8118 nameXMMReg(gregOfRexRM(pfx,rm)) );
8121 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8122 E operand needs to be made simply of zeroes. */
8123 IRTemp epart = newTemp(Ity_V128);
8124 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8125 assign( epart, unop( Iop_32UtoV128,
8126 loadLE(Ity_I32, mkexpr(addr))) );
8127 putXMMReg( gregOfRexRM(pfx,rm),
8128 binop(op, gpart, mkexpr(epart)) );
8129 DIP("%s %s,%s\n", opname,
8131 nameXMMReg(gregOfRexRM(pfx,rm)) );
8137 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
8139 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi,
8140 Prefix pfx, Long delta,
8141 HChar* opname, IROp op )
8146 UChar rm = getUChar(delta);
8147 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8148 if (epartIsReg(rm)) {
8149 putXMMReg( gregOfRexRM(pfx,rm),
8151 getXMMReg(eregOfRexRM(pfx,rm))) );
8152 DIP("%s %s,%s\n", opname,
8153 nameXMMReg(eregOfRexRM(pfx,rm)),
8154 nameXMMReg(gregOfRexRM(pfx,rm)) );
8157 /* We can only do a 64-bit memory read, so the upper half of the
8158 E operand needs to be made simply of zeroes. */
8159 IRTemp epart = newTemp(Ity_V128);
8160 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8161 assign( epart, unop( Iop_64UtoV128,
8162 loadLE(Ity_I64, mkexpr(addr))) );
8163 putXMMReg( gregOfRexRM(pfx,rm),
8164 binop(op, gpart, mkexpr(epart)) );
8165 DIP("%s %s,%s\n", opname,
8167 nameXMMReg(gregOfRexRM(pfx,rm)) );
8173 /* All lanes unary SSE operation, G = op(E). */
8175 static ULong dis_SSE_E_to_G_unary_all (
8177 Prefix pfx, Long delta,
8178 HChar* opname, IROp op
8184 UChar rm = getUChar(delta);
8185 if (epartIsReg(rm)) {
8186 putXMMReg( gregOfRexRM(pfx,rm),
8187 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
8188 DIP("%s %s,%s\n", opname,
8189 nameXMMReg(eregOfRexRM(pfx,rm)),
8190 nameXMMReg(gregOfRexRM(pfx,rm)) );
8193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8194 putXMMReg( gregOfRexRM(pfx,rm),
8195 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
8196 DIP("%s %s,%s\n", opname,
8198 nameXMMReg(gregOfRexRM(pfx,rm)) );
8204 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
8206 static ULong dis_SSE_E_to_G_unary_lo32 (
8208 Prefix pfx, Long delta,
8209 HChar* opname, IROp op
8212 /* First we need to get the old G value and patch the low 32 bits
8213 of the E operand into it. Then apply op and write back to G. */
8217 UChar rm = getUChar(delta);
8218 IRTemp oldG0 = newTemp(Ity_V128);
8219 IRTemp oldG1 = newTemp(Ity_V128);
8221 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8223 if (epartIsReg(rm)) {
8225 binop( Iop_SetV128lo32,
8227 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
8228 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8229 DIP("%s %s,%s\n", opname,
8230 nameXMMReg(eregOfRexRM(pfx,rm)),
8231 nameXMMReg(gregOfRexRM(pfx,rm)) );
8234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8236 binop( Iop_SetV128lo32,
8238 loadLE(Ity_I32, mkexpr(addr)) ));
8239 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8240 DIP("%s %s,%s\n", opname,
8242 nameXMMReg(gregOfRexRM(pfx,rm)) );
8248 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
8250 static ULong dis_SSE_E_to_G_unary_lo64 (
8252 Prefix pfx, Long delta,
8253 HChar* opname, IROp op
8256 /* First we need to get the old G value and patch the low 64 bits
8257 of the E operand into it. Then apply op and write back to G. */
8261 UChar rm = getUChar(delta);
8262 IRTemp oldG0 = newTemp(Ity_V128);
8263 IRTemp oldG1 = newTemp(Ity_V128);
8265 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8267 if (epartIsReg(rm)) {
8269 binop( Iop_SetV128lo64,
8271 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
8272 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8273 DIP("%s %s,%s\n", opname,
8274 nameXMMReg(eregOfRexRM(pfx,rm)),
8275 nameXMMReg(gregOfRexRM(pfx,rm)) );
8278 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8280 binop( Iop_SetV128lo64,
8282 loadLE(Ity_I64, mkexpr(addr)) ));
8283 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8284 DIP("%s %s,%s\n", opname,
8286 nameXMMReg(gregOfRexRM(pfx,rm)) );
8292 /* SSE integer binary operation:
8293 G = G `op` E (eLeft == False)
8294 G = E `op` G (eLeft == True)
8296 static ULong dis_SSEint_E_to_G(
8298 Prefix pfx, Long delta,
8299 HChar* opname, IROp op,
8306 UChar rm = getUChar(delta);
8307 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8308 IRExpr* epart = NULL;
8309 if (epartIsReg(rm)) {
8310 epart = getXMMReg(eregOfRexRM(pfx,rm));
8311 DIP("%s %s,%s\n", opname,
8312 nameXMMReg(eregOfRexRM(pfx,rm)),
8313 nameXMMReg(gregOfRexRM(pfx,rm)) );
8316 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8317 epart = loadLE(Ity_V128, mkexpr(addr));
8318 DIP("%s %s,%s\n", opname,
8320 nameXMMReg(gregOfRexRM(pfx,rm)) );
8323 putXMMReg( gregOfRexRM(pfx,rm),
8324 eLeft ? binop(op, epart, gpart)
8325 : binop(op, gpart, epart) );
8330 /* Helper for doing SSE FP comparisons. */
8332 static void findSSECmpOp ( Bool* needNot, IROp* op,
8333 Int imm8, Bool all_lanes, Int sz )
8343 if (sz == 4 && all_lanes) {
8345 case 0: *op = Iop_CmpEQ32Fx4; return;
8346 case 1: *op = Iop_CmpLT32Fx4; return;
8347 case 2: *op = Iop_CmpLE32Fx4; return;
8348 case 3: *op = Iop_CmpUN32Fx4; return;
8352 if (sz == 4 && !all_lanes) {
8354 case 0: *op = Iop_CmpEQ32F0x4; return;
8355 case 1: *op = Iop_CmpLT32F0x4; return;
8356 case 2: *op = Iop_CmpLE32F0x4; return;
8357 case 3: *op = Iop_CmpUN32F0x4; return;
8361 if (sz == 8 && all_lanes) {
8363 case 0: *op = Iop_CmpEQ64Fx2; return;
8364 case 1: *op = Iop_CmpLT64Fx2; return;
8365 case 2: *op = Iop_CmpLE64Fx2; return;
8366 case 3: *op = Iop_CmpUN64Fx2; return;
8370 if (sz == 8 && !all_lanes) {
8372 case 0: *op = Iop_CmpEQ64F0x2; return;
8373 case 1: *op = Iop_CmpLT64F0x2; return;
8374 case 2: *op = Iop_CmpLE64F0x2; return;
8375 case 3: *op = Iop_CmpUN64F0x2; return;
8379 vpanic("findSSECmpOp(amd64,guest)");
8382 /* Handles SSE 32F/64F comparisons. */
8384 static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi,
8385 Prefix pfx, Long delta,
8386 HChar* opname, Bool all_lanes, Int sz )
8391 Bool needNot = False;
8392 IROp op = Iop_INVALID;
8393 IRTemp plain = newTemp(Ity_V128);
8394 UChar rm = getUChar(delta);
8396 vassert(sz == 4 || sz == 8);
8397 if (epartIsReg(rm)) {
8398 imm8 = getUChar(delta+1);
8399 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
8400 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
8401 getXMMReg(eregOfRexRM(pfx,rm))) );
8403 DIP("%s $%d,%s,%s\n", opname,
8405 nameXMMReg(eregOfRexRM(pfx,rm)),
8406 nameXMMReg(gregOfRexRM(pfx,rm)) );
8408 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
8409 imm8 = getUChar(delta+alen);
8410 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
8414 getXMMReg(gregOfRexRM(pfx,rm)),
8415 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
8416 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
8417 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
8421 DIP("%s $%d,%s,%s\n", opname,
8424 nameXMMReg(gregOfRexRM(pfx,rm)) );
8427 if (needNot && all_lanes) {
8428 putXMMReg( gregOfRexRM(pfx,rm),
8429 unop(Iop_NotV128, mkexpr(plain)) );
8432 if (needNot && !all_lanes) {
8433 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
8434 putXMMReg( gregOfRexRM(pfx,rm),
8435 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
8438 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
8445 /* Vector by scalar shift of G by the amount specified at the bottom
8448 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi,
8449 Prefix pfx, Long delta,
8450 HChar* opname, IROp op )
8456 UChar rm = getUChar(delta);
8457 IRTemp g0 = newTemp(Ity_V128);
8458 IRTemp g1 = newTemp(Ity_V128);
8459 IRTemp amt = newTemp(Ity_I32);
8460 IRTemp amt8 = newTemp(Ity_I8);
8461 if (epartIsReg(rm)) {
8462 assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) );
8463 DIP("%s %s,%s\n", opname,
8464 nameXMMReg(eregOfRexRM(pfx,rm)),
8465 nameXMMReg(gregOfRexRM(pfx,rm)) );
8468 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8469 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
8470 DIP("%s %s,%s\n", opname,
8472 nameXMMReg(gregOfRexRM(pfx,rm)) );
8475 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
8476 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
8478 shl = shr = sar = False;
8481 case Iop_ShlN16x8: shl = True; size = 32; break;
8482 case Iop_ShlN32x4: shl = True; size = 32; break;
8483 case Iop_ShlN64x2: shl = True; size = 64; break;
8484 case Iop_SarN16x8: sar = True; size = 16; break;
8485 case Iop_SarN32x4: sar = True; size = 32; break;
8486 case Iop_ShrN16x8: shr = True; size = 16; break;
8487 case Iop_ShrN32x4: shr = True; size = 32; break;
8488 case Iop_ShrN64x2: shr = True; size = 64; break;
8489 default: vassert(0);
8497 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
8499 binop(op, mkexpr(g0), mkexpr(amt8))
8508 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
8509 binop(op, mkexpr(g0), mkU8(size-1)),
8510 binop(op, mkexpr(g0), mkexpr(amt8))
8517 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
8522 /* Vector by scalar shift of E by an immediate byte. */
8525 ULong dis_SSE_shiftE_imm ( Prefix pfx,
8526 Long delta, HChar* opname, IROp op )
8529 UChar rm = getUChar(delta);
8530 IRTemp e0 = newTemp(Ity_V128);
8531 IRTemp e1 = newTemp(Ity_V128);
8533 vassert(epartIsReg(rm));
8534 vassert(gregLO3ofRM(rm) == 2
8535 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
8536 amt = getUChar(delta+1);
8538 DIP("%s $%d,%s\n", opname,
8540 nameXMMReg(eregOfRexRM(pfx,rm)) );
8541 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
8543 shl = shr = sar = False;
8546 case Iop_ShlN16x8: shl = True; size = 16; break;
8547 case Iop_ShlN32x4: shl = True; size = 32; break;
8548 case Iop_ShlN64x2: shl = True; size = 64; break;
8549 case Iop_SarN16x8: sar = True; size = 16; break;
8550 case Iop_SarN32x4: sar = True; size = 32; break;
8551 case Iop_ShrN16x8: shr = True; size = 16; break;
8552 case Iop_ShrN32x4: shr = True; size = 32; break;
8553 case Iop_ShrN64x2: shr = True; size = 64; break;
8554 default: vassert(0);
8558 assign( e1, amt >= size
8560 : binop(op, mkexpr(e0), mkU8(amt))
8564 assign( e1, amt >= size
8565 ? binop(op, mkexpr(e0), mkU8(size-1))
8566 : binop(op, mkexpr(e0), mkU8(amt))
8572 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
8577 /* Get the current SSE rounding mode. */
8579 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
8584 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
8588 static void put_sse_roundingmode ( IRExpr* sseround )
8590 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
8591 stmt( IRStmt_Put( OFFB_SSEROUND,
8592 unop(Iop_32Uto64,sseround) ) );
8595 /* Break a 128-bit value up into four 32-bit ints. */
8597 static void breakup128to32s ( IRTemp t128,
8599 IRTemp* t3, IRTemp* t2,
8600 IRTemp* t1, IRTemp* t0 )
8602 IRTemp hi64 = newTemp(Ity_I64);
8603 IRTemp lo64 = newTemp(Ity_I64);
8604 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
8605 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
8607 vassert(t0 && *t0 == IRTemp_INVALID);
8608 vassert(t1 && *t1 == IRTemp_INVALID);
8609 vassert(t2 && *t2 == IRTemp_INVALID);
8610 vassert(t3 && *t3 == IRTemp_INVALID);
8612 *t0 = newTemp(Ity_I32);
8613 *t1 = newTemp(Ity_I32);
8614 *t2 = newTemp(Ity_I32);
8615 *t3 = newTemp(Ity_I32);
8616 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
8617 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
8618 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
8619 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
8622 /* Construct a 128-bit value from four 32-bit ints. */
8624 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
8625 IRTemp t1, IRTemp t0 )
8628 binop( Iop_64HLtoV128,
8629 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
8630 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
8634 /* Break a 64-bit value up into four 16-bit ints. */
8636 static void breakup64to16s ( IRTemp t64,
8638 IRTemp* t3, IRTemp* t2,
8639 IRTemp* t1, IRTemp* t0 )
8641 IRTemp hi32 = newTemp(Ity_I32);
8642 IRTemp lo32 = newTemp(Ity_I32);
8643 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
8644 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
8646 vassert(t0 && *t0 == IRTemp_INVALID);
8647 vassert(t1 && *t1 == IRTemp_INVALID);
8648 vassert(t2 && *t2 == IRTemp_INVALID);
8649 vassert(t3 && *t3 == IRTemp_INVALID);
8651 *t0 = newTemp(Ity_I16);
8652 *t1 = newTemp(Ity_I16);
8653 *t2 = newTemp(Ity_I16);
8654 *t3 = newTemp(Ity_I16);
8655 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
8656 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
8657 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
8658 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
8661 /* Construct a 64-bit value from four 16-bit ints. */
8663 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
8664 IRTemp t1, IRTemp t0 )
8667 binop( Iop_32HLto64,
8668 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
8669 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
8674 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
8675 values (aa,bb), computes, for each of the 4 16-bit lanes:
8677 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
8679 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
8681 IRTemp aa = newTemp(Ity_I64);
8682 IRTemp bb = newTemp(Ity_I64);
8683 IRTemp aahi32s = newTemp(Ity_I64);
8684 IRTemp aalo32s = newTemp(Ity_I64);
8685 IRTemp bbhi32s = newTemp(Ity_I64);
8686 IRTemp bblo32s = newTemp(Ity_I64);
8687 IRTemp rHi = newTemp(Ity_I64);
8688 IRTemp rLo = newTemp(Ity_I64);
8689 IRTemp one32x2 = newTemp(Ity_I64);
8694 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
8698 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
8702 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
8706 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
8708 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
8717 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
8733 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
8742 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
8745 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
8746 values (aa,bb), computes, for each lane:
8748 if aa_lane < 0 then - bb_lane
8749 else if aa_lane > 0 then bb_lane
8752 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
8754 IRTemp aa = newTemp(Ity_I64);
8755 IRTemp bb = newTemp(Ity_I64);
8756 IRTemp zero = newTemp(Ity_I64);
8757 IRTemp bbNeg = newTemp(Ity_I64);
8758 IRTemp negMask = newTemp(Ity_I64);
8759 IRTemp posMask = newTemp(Ity_I64);
8760 IROp opSub = Iop_INVALID;
8761 IROp opCmpGTS = Iop_INVALID;
8764 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
8765 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
8766 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
8767 default: vassert(0);
8772 assign( zero, mkU64(0) );
8773 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
8774 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
8775 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
8779 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
8780 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
8784 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
8785 value aa, computes, for each lane
8787 if aa < 0 then -aa else aa
8789 Note that the result is interpreted as unsigned, so that the
8790 absolute value of the most negative signed input can be
8793 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
8795 IRTemp aa = newTemp(Ity_I64);
8796 IRTemp zero = newTemp(Ity_I64);
8797 IRTemp aaNeg = newTemp(Ity_I64);
8798 IRTemp negMask = newTemp(Ity_I64);
8799 IRTemp posMask = newTemp(Ity_I64);
8800 IROp opSub = Iop_INVALID;
8801 IROp opSarN = Iop_INVALID;
8804 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
8805 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
8806 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
8807 default: vassert(0);
8811 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
8812 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
8813 assign( zero, mkU64(0) );
8814 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
8817 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
8818 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
8821 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
8822 IRTemp lo64, Long byteShift )
8824 vassert(byteShift >= 1 && byteShift <= 7);
8827 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
8828 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
8832 /* Generate a SIGSEGV followed by a restart of the current instruction
8833 if effective_addr is not 16-aligned. This is required behaviour
8834 for some SSE3 instructions and all 128-bit SSSE3 instructions.
8835 This assumes that guest_RIP_curr_instr is set correctly! */
8836 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
8841 binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
8844 IRConst_U64(guest_RIP_curr_instr)
8850 /* Helper for deciding whether a given insn (starting at the opcode
8851 byte) may validly be used with a LOCK prefix. The following insns
8852 may be used with LOCK when their destination operand is in memory.
8853 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
8855 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
8856 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
8857 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
8858 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
8859 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
8860 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
8861 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
8875 CMPXCHG 0F B0, 0F B1
8880 ------------------------------
8882 80 /0 = addb $imm8, rm8
8883 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
8884 82 /0 = addb $imm8, rm8
8885 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
8888 01 = addl r32, rm32 and addw r16, rm16
8890 Same for ADD OR ADC SBB AND SUB XOR
8893 FF /1 = dec rm32 and dec rm16
8896 FF /0 = inc rm32 and inc rm16
8899 F7 /3 = neg rm32 and neg rm16
8902 F7 /2 = not rm32 and not rm16
8904 0F BB = btcw r16, rm16 and btcl r32, rm32
8905 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
8909 static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
8912 case 0x00: case 0x01: case 0x08: case 0x09:
8913 case 0x10: case 0x11: case 0x18: case 0x19:
8914 case 0x20: case 0x21: case 0x28: case 0x29:
8915 case 0x30: case 0x31:
8916 if (!epartIsReg(opc[1]))
8920 case 0x80: case 0x81: case 0x82: case 0x83:
8921 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
8922 && !epartIsReg(opc[1]))
8926 case 0xFE: case 0xFF:
8927 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
8928 && !epartIsReg(opc[1]))
8932 case 0xF6: case 0xF7:
8933 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
8934 && !epartIsReg(opc[1]))
8938 case 0x86: case 0x87:
8939 if (!epartIsReg(opc[1]))
8945 case 0xBB: case 0xB3: case 0xAB:
8946 if (!epartIsReg(opc[2]))
8950 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
8951 && !epartIsReg(opc[2]))
8954 case 0xB0: case 0xB1:
8955 if (!epartIsReg(opc[2]))
8959 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
8962 case 0xC0: case 0xC1:
8963 if (!epartIsReg(opc[2]))
8968 } /* switch (opc[1]) */
8974 } /* switch (opc[0]) */
8980 /*------------------------------------------------------------*/
8981 /*--- Disassemble a single instruction ---*/
8982 /*------------------------------------------------------------*/
8984 /* Disassemble a single instruction into IR. The instruction is
8985 located in host memory at &guest_code[delta]. */
8988 DisResult disInstr_AMD64_WRK (
8989 /*OUT*/Bool* expect_CAS,
8991 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
8993 void* callback_opaque,
8995 VexArchInfo* archinfo,
9000 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
9002 UChar opc, modrm, abyte, pre;
9005 Int am_sz, d_sz, n, n_prefixes;
9007 UChar* insn; /* used in SSE decoders */
9009 /* The running delta */
9010 Long delta = delta64;
9012 /* Holds eip at the start of the insn, so that we can print
9013 consistent error messages for unimplemented insns. */
9014 Long delta_start = delta;
9016 /* sz denotes the nominal data-op size of the insn; we change it to
9017 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
9018 conflict REX.W takes precedence. */
9021 /* pfx holds the summary of prefixes. */
9022 Prefix pfx = PFX_EMPTY;
9024 /* Set result defaults. */
9025 dres.whatNext = Dis_Continue;
9027 dres.continueAt = 0;
9029 *expect_CAS = False;
9031 vassert(guest_RIP_next_assumed == 0);
9032 vassert(guest_RIP_next_mustcheck == False);
9034 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
9036 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
9038 /* We may be asked to update the guest RIP before going further. */
9040 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) );
9042 /* Spot "Special" instructions (see comment at top of file). */
9044 UChar* code = (UChar*)(guest_code + delta);
9045 /* Spot the 16-byte preamble:
9046 48C1C703 rolq $3, %rdi
9047 48C1C70D rolq $13, %rdi
9048 48C1C73D rolq $61, %rdi
9049 48C1C733 rolq $51, %rdi
9051 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
9052 && code[ 3] == 0x03 &&
9053 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
9054 && code[ 7] == 0x0D &&
9055 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
9056 && code[11] == 0x3D &&
9057 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
9058 && code[15] == 0x33) {
9059 /* Got a "Special" instruction preamble. Which one is it? */
9060 if (code[16] == 0x48 && code[17] == 0x87
9061 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
9062 /* %RDX = client_request ( %RAX ) */
9063 DIP("%%rdx = client_request ( %%rax )\n");
9065 jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta);
9066 dres.whatNext = Dis_StopHere;
9067 goto decode_success;
9070 if (code[16] == 0x48 && code[17] == 0x87
9071 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
9072 /* %RAX = guest_NRADDR */
9073 DIP("%%rax = guest_NRADDR\n");
9075 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
9076 goto decode_success;
9079 if (code[16] == 0x48 && code[17] == 0x87
9080 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
9081 /* call-noredir *%RAX */
9082 DIP("call-noredir *%%rax\n");
9084 t1 = newTemp(Ity_I64);
9085 assign(t1, getIRegRAX(8));
9086 t2 = newTemp(Ity_I64);
9087 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
9088 putIReg64(R_RSP, mkexpr(t2));
9089 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
9090 jmp_treg(Ijk_NoRedir,t1);
9091 dres.whatNext = Dis_StopHere;
9092 goto decode_success;
9094 /* We don't know what it is. */
9095 goto decode_failure;
9100 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
9101 as many invalid combinations as possible. */
9104 if (n_prefixes > 7) goto decode_failure;
9105 pre = getUChar(delta);
9107 case 0x66: pfx |= PFX_66; break;
9108 case 0x67: pfx |= PFX_ASO; break;
9109 case 0xF2: pfx |= PFX_F2; break;
9110 case 0xF3: pfx |= PFX_F3; break;
9111 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
9112 case 0x2E: pfx |= PFX_CS; break;
9113 case 0x3E: pfx |= PFX_DS; break;
9114 case 0x26: pfx |= PFX_ES; break;
9115 case 0x64: pfx |= PFX_FS; break;
9116 case 0x65: pfx |= PFX_GS; break;
9117 case 0x36: pfx |= PFX_SS; break;
9120 if (pre & (1<<3)) pfx |= PFX_REXW;
9121 if (pre & (1<<2)) pfx |= PFX_REXR;
9122 if (pre & (1<<1)) pfx |= PFX_REXX;
9123 if (pre & (1<<0)) pfx |= PFX_REXB;
9134 /* Dump invalid combinations */
9136 if (pfx & PFX_F2) n++;
9137 if (pfx & PFX_F3) n++;
9139 goto decode_failure; /* can't have both */
9142 if (pfx & PFX_CS) n++;
9143 if (pfx & PFX_DS) n++;
9144 if (pfx & PFX_ES) n++;
9145 if (pfx & PFX_FS) n++;
9146 if (pfx & PFX_GS) n++;
9147 if (pfx & PFX_SS) n++;
9149 goto decode_failure; /* multiple seg overrides == illegal */
9151 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
9152 that we should accept it. */
9153 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero)
9154 goto decode_failure;
9156 /* Ditto for %gs prefixes. */
9157 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60)
9158 goto decode_failure;
9162 if (pfx & PFX_66) sz = 2;
9163 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
9165 /* Now we should be looking at the primary opcode byte or the
9166 leading F2 or F3. Check that any LOCK prefix is actually
9169 if (pfx & PFX_LOCK) {
9170 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
9173 *expect_CAS = False;
9174 goto decode_failure;
9179 /* ---------------------------------------------------- */
9180 /* --- The SSE/SSE2 decoder. --- */
9181 /* ---------------------------------------------------- */
9183 /* What did I do to deserve SSE ? Perhaps I was really bad in a
9186 /* Note, this doesn't handle SSE3 right now. All amd64s support
9187 SSE2 as a minimum so there is no point distinguishing SSE1 vs
9190 insn = (UChar*)&guest_code[delta];
9192 /* FXSAVE is spuriously at the start here only because it is
9193 thusly placed in guest-x86/toIR.c. */
9195 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
9196 Note that the presence or absence of REX.W slightly affects the
9197 written format: whether the saved FPU IP and DP pointers are 64
9198 or 32 bits. But the helper function we call simply writes zero
9199 bits in the relevant fields (which are 64 bits regardless of
9200 what REX.W is) and so it's good enough (iow, equally broken) in
9202 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
9203 && insn[0] == 0x0F && insn[1] == 0xAE
9204 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) {
9206 modrm = getUChar(delta+2);
9207 vassert(!epartIsReg(modrm));
9209 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9211 gen_SEGV_if_not_16_aligned(addr);
9213 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
9215 /* Uses dirty helper:
9216 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */
9217 d = unsafeIRDirty_0_N (
9219 "amd64g_dirtyhelper_FXSAVE",
9220 &amd64g_dirtyhelper_FXSAVE,
9221 mkIRExprVec_1( mkexpr(addr) )
9225 /* declare we're writing memory */
9227 d->mAddr = mkexpr(addr);
9230 /* declare we're reading guest state */
9233 d->fxState[0].fx = Ifx_Read;
9234 d->fxState[0].offset = OFFB_FTOP;
9235 d->fxState[0].size = sizeof(UInt);
9237 d->fxState[1].fx = Ifx_Read;
9238 d->fxState[1].offset = OFFB_FPREGS;
9239 d->fxState[1].size = 8 * sizeof(ULong);
9241 d->fxState[2].fx = Ifx_Read;
9242 d->fxState[2].offset = OFFB_FPTAGS;
9243 d->fxState[2].size = 8 * sizeof(UChar);
9245 d->fxState[3].fx = Ifx_Read;
9246 d->fxState[3].offset = OFFB_FPROUND;
9247 d->fxState[3].size = sizeof(ULong);
9249 d->fxState[4].fx = Ifx_Read;
9250 d->fxState[4].offset = OFFB_FC3210;
9251 d->fxState[4].size = sizeof(ULong);
9253 d->fxState[5].fx = Ifx_Read;
9254 d->fxState[5].offset = OFFB_XMM0;
9255 d->fxState[5].size = 16 * sizeof(U128);
9257 d->fxState[6].fx = Ifx_Read;
9258 d->fxState[6].offset = OFFB_SSEROUND;
9259 d->fxState[6].size = sizeof(ULong);
9261 /* Be paranoid ... this assertion tries to ensure the 16 %xmm
9262 images are packed back-to-back. If not, the value of
9263 d->fxState[5].size is wrong. */
9264 vassert(16 == sizeof(U128));
9265 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16));
9267 stmt( IRStmt_Dirty(d) );
9269 goto decode_success;
9272 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
9273 As with FXSAVE above we ignore the value of REX.W since we're
9274 not bothering with the FPU DP and IP fields. */
9275 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
9276 && insn[0] == 0x0F && insn[1] == 0xAE
9277 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 1) {
9279 modrm = getUChar(delta+2);
9280 vassert(!epartIsReg(modrm));
9282 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9284 gen_SEGV_if_not_16_aligned(addr);
9286 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
9288 /* Uses dirty helper:
9289 VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong )
9291 the VexEmWarn value is simply ignored
9293 d = unsafeIRDirty_0_N (
9295 "amd64g_dirtyhelper_FXRSTOR",
9296 &amd64g_dirtyhelper_FXRSTOR,
9297 mkIRExprVec_1( mkexpr(addr) )
9301 /* declare we're reading memory */
9303 d->mAddr = mkexpr(addr);
9306 /* declare we're writing guest state */
9309 d->fxState[0].fx = Ifx_Write;
9310 d->fxState[0].offset = OFFB_FTOP;
9311 d->fxState[0].size = sizeof(UInt);
9313 d->fxState[1].fx = Ifx_Write;
9314 d->fxState[1].offset = OFFB_FPREGS;
9315 d->fxState[1].size = 8 * sizeof(ULong);
9317 d->fxState[2].fx = Ifx_Write;
9318 d->fxState[2].offset = OFFB_FPTAGS;
9319 d->fxState[2].size = 8 * sizeof(UChar);
9321 d->fxState[3].fx = Ifx_Write;
9322 d->fxState[3].offset = OFFB_FPROUND;
9323 d->fxState[3].size = sizeof(ULong);
9325 d->fxState[4].fx = Ifx_Write;
9326 d->fxState[4].offset = OFFB_FC3210;
9327 d->fxState[4].size = sizeof(ULong);
9329 d->fxState[5].fx = Ifx_Write;
9330 d->fxState[5].offset = OFFB_XMM0;
9331 d->fxState[5].size = 16 * sizeof(U128);
9333 d->fxState[6].fx = Ifx_Write;
9334 d->fxState[6].offset = OFFB_SSEROUND;
9335 d->fxState[6].size = sizeof(ULong);
9337 /* Be paranoid ... this assertion tries to ensure the 16 %xmm
9338 images are packed back-to-back. If not, the value of
9339 d->fxState[5].size is wrong. */
9340 vassert(16 == sizeof(U128));
9341 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16));
9343 stmt( IRStmt_Dirty(d) );
9345 goto decode_success;
9348 /* ------ SSE decoder main ------ */
9350 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
9351 if (haveNo66noF2noF3(pfx) && sz == 4
9352 && insn[0] == 0x0F && insn[1] == 0x58) {
9353 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 );
9354 goto decode_success;
9357 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
9358 if (haveF3no66noF2(pfx) && sz == 4
9359 && insn[0] == 0x0F && insn[1] == 0x58) {
9360 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 );
9361 goto decode_success;
9364 /* 0F 55 = ANDNPS -- G = (not G) and E */
9365 if (haveNo66noF2noF3(pfx) && sz == 4
9366 && insn[0] == 0x0F && insn[1] == 0x55) {
9367 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 );
9368 goto decode_success;
9371 /* 0F 54 = ANDPS -- G = G and E */
9372 if (haveNo66noF2noF3(pfx) && sz == 4
9373 && insn[0] == 0x0F && insn[1] == 0x54) {
9374 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 );
9375 goto decode_success;
9378 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
9379 if (haveNo66noF2noF3(pfx) && sz == 4
9380 && insn[0] == 0x0F && insn[1] == 0xC2) {
9381 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 );
9382 goto decode_success;
9385 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
9386 if (haveF3no66noF2(pfx) && sz == 4
9387 && insn[0] == 0x0F && insn[1] == 0xC2) {
9388 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 );
9389 goto decode_success;
9392 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
9393 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
9394 if (haveNo66noF2noF3(pfx) && sz == 4
9395 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9396 IRTemp argL = newTemp(Ity_F32);
9397 IRTemp argR = newTemp(Ity_F32);
9398 modrm = getUChar(delta+2);
9399 if (epartIsReg(modrm)) {
9400 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
9401 0/*lowest lane*/ ) );
9403 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
9404 nameXMMReg(eregOfRexRM(pfx,modrm)),
9405 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9407 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9408 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
9410 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
9412 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9414 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
9415 0/*lowest lane*/ ) );
9417 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9418 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9424 unop(Iop_F32toF64,mkexpr(argL)),
9425 unop(Iop_F32toF64,mkexpr(argR)))),
9429 goto decode_success;
9432 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
9434 if (haveNo66noF2noF3(pfx) && sz == 4
9435 && insn[0] == 0x0F && insn[1] == 0x2A) {
9436 IRTemp arg64 = newTemp(Ity_I64);
9437 IRTemp rmode = newTemp(Ity_I32);
9439 modrm = getUChar(delta+2);
9441 if (epartIsReg(modrm)) {
9442 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
9444 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
9445 nameXMMReg(gregOfRexRM(pfx,modrm)));
9447 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9448 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9450 DIP("cvtpi2ps %s,%s\n", dis_buf,
9451 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9454 assign( rmode, get_sse_roundingmode() );
9457 gregOfRexRM(pfx,modrm), 0,
9461 unop(Iop_64to32, mkexpr(arg64)) )) );
9464 gregOfRexRM(pfx,modrm), 1,
9468 unop(Iop_64HIto32, mkexpr(arg64)) )) );
9470 goto decode_success;
9473 /* F3 0F 2A = CVTSI2SS
9474 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
9475 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
9476 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)
9477 && insn[0] == 0x0F && insn[1] == 0x2A) {
9479 IRTemp rmode = newTemp(Ity_I32);
9480 assign( rmode, get_sse_roundingmode() );
9481 modrm = getUChar(delta+2);
9484 IRTemp arg32 = newTemp(Ity_I32);
9485 if (epartIsReg(modrm)) {
9486 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
9488 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
9489 nameXMMReg(gregOfRexRM(pfx,modrm)));
9491 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9492 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
9494 DIP("cvtsi2ss %s,%s\n", dis_buf,
9495 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9498 gregOfRexRM(pfx,modrm), 0,
9501 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
9504 IRTemp arg64 = newTemp(Ity_I64);
9505 if (epartIsReg(modrm)) {
9506 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
9508 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
9509 nameXMMReg(gregOfRexRM(pfx,modrm)));
9511 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9512 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9514 DIP("cvtsi2ssq %s,%s\n", dis_buf,
9515 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9518 gregOfRexRM(pfx,modrm), 0,
9521 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
9524 goto decode_success;
9527 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9528 I32 in mmx, according to prevailing SSE rounding mode */
9529 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9530 I32 in mmx, rounding towards zero */
9531 if (haveNo66noF2noF3(pfx) && sz == 4
9532 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9533 IRTemp dst64 = newTemp(Ity_I64);
9534 IRTemp rmode = newTemp(Ity_I32);
9535 IRTemp f32lo = newTemp(Ity_F32);
9536 IRTemp f32hi = newTemp(Ity_F32);
9537 Bool r2zero = toBool(insn[1] == 0x2C);
9540 modrm = getUChar(delta+2);
9542 if (epartIsReg(modrm)) {
9544 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
9545 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
9546 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9547 nameXMMReg(eregOfRexRM(pfx,modrm)),
9548 nameMMXReg(gregLO3ofRM(modrm)));
9550 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9551 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9552 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
9556 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9558 nameMMXReg(gregLO3ofRM(modrm)));
9562 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9564 assign( rmode, get_sse_roundingmode() );
9569 binop( Iop_32HLto64,
9570 binop( Iop_F64toI32S,
9572 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
9573 binop( Iop_F64toI32S,
9575 unop( Iop_F32toF64, mkexpr(f32lo) ) )
9579 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
9580 goto decode_success;
9583 /* F3 0F 2D = CVTSS2SI
9584 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
9585 according to prevailing SSE rounding mode
9586 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
9587 according to prevailing SSE rounding mode
9589 /* F3 0F 2C = CVTTSS2SI
9590 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
9591 truncating towards zero
9592 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
9593 truncating towards zero
9595 if (haveF3no66noF2(pfx)
9597 && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9598 IRTemp rmode = newTemp(Ity_I32);
9599 IRTemp f32lo = newTemp(Ity_F32);
9600 Bool r2zero = toBool(insn[1] == 0x2C);
9601 vassert(sz == 4 || sz == 8);
9603 modrm = getUChar(delta+2);
9604 if (epartIsReg(modrm)) {
9606 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
9607 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9608 nameXMMReg(eregOfRexRM(pfx,modrm)),
9609 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
9611 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9612 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9614 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9616 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
9620 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9622 assign( rmode, get_sse_roundingmode() );
9626 putIReg32( gregOfRexRM(pfx,modrm),
9627 binop( Iop_F64toI32S,
9629 unop(Iop_F32toF64, mkexpr(f32lo))) );
9631 putIReg64( gregOfRexRM(pfx,modrm),
9632 binop( Iop_F64toI64S,
9634 unop(Iop_F32toF64, mkexpr(f32lo))) );
9637 goto decode_success;
9640 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
9641 if (haveNo66noF2noF3(pfx) && sz == 4
9642 && insn[0] == 0x0F && insn[1] == 0x5E) {
9643 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 );
9644 goto decode_success;
9647 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
9648 if (haveF3no66noF2(pfx) && sz == 4
9649 && insn[0] == 0x0F && insn[1] == 0x5E) {
9650 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 );
9651 goto decode_success;
9654 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
9655 if (insn[0] == 0x0F && insn[1] == 0xAE
9656 && haveNo66noF2noF3(pfx)
9657 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) {
9659 IRTemp t64 = newTemp(Ity_I64);
9660 IRTemp ew = newTemp(Ity_I32);
9663 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9665 DIP("ldmxcsr %s\n", dis_buf);
9667 /* The only thing we observe in %mxcsr is the rounding mode.
9668 Therefore, pass the 32-bit value (SSE native-format control
9669 word) to a clean helper, getting back a 64-bit value, the
9670 lower half of which is the SSEROUND value to store, and the
9671 upper half of which is the emulation-warning token which may
9674 /* ULong amd64h_check_ldmxcsr ( ULong ); */
9675 assign( t64, mkIRExprCCall(
9676 Ity_I64, 0/*regparms*/,
9677 "amd64g_check_ldmxcsr",
9678 &amd64g_check_ldmxcsr,
9681 loadLE(Ity_I32, mkexpr(addr))
9687 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
9688 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
9689 put_emwarn( mkexpr(ew) );
9690 /* Finally, if an emulation warning was reported, side-exit to
9691 the next insn, reporting the warning, so that Valgrind's
9692 dispatcher sees the warning. */
9695 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
9697 IRConst_U64(guest_RIP_bbstart+delta)
9700 goto decode_success;
9703 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9704 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
9705 if (haveNo66noF2noF3(pfx) && sz == 4
9706 && insn[0] == 0x0F && insn[1] == 0xF7) {
9708 delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 );
9710 goto decode_failure;
9711 goto decode_success;
9714 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9715 if (haveNo66noF2noF3(pfx) && sz == 4
9716 && insn[0] == 0x0F && insn[1] == 0x5F) {
9717 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 );
9718 goto decode_success;
9721 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9722 if (haveF3no66noF2(pfx) && sz == 4
9723 && insn[0] == 0x0F && insn[1] == 0x5F) {
9724 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 );
9725 goto decode_success;
9728 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9729 if (haveNo66noF2noF3(pfx) && sz == 4
9730 && insn[0] == 0x0F && insn[1] == 0x5D) {
9731 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 );
9732 goto decode_success;
9735 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9736 if (haveF3no66noF2(pfx) && sz == 4
9737 && insn[0] == 0x0F && insn[1] == 0x5D) {
9738 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 );
9739 goto decode_success;
9742 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9743 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9744 if (haveNo66noF2noF3(pfx)
9745 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9746 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9747 modrm = getUChar(delta+2);
9748 if (epartIsReg(modrm)) {
9749 putXMMReg( gregOfRexRM(pfx,modrm),
9750 getXMMReg( eregOfRexRM(pfx,modrm) ));
9751 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9752 nameXMMReg(gregOfRexRM(pfx,modrm)));
9755 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9756 if (insn[1] == 0x28/*movaps*/)
9757 gen_SEGV_if_not_16_aligned( addr );
9758 putXMMReg( gregOfRexRM(pfx,modrm),
9759 loadLE(Ity_V128, mkexpr(addr)) );
9760 DIP("mov[ua]ps %s,%s\n", dis_buf,
9761 nameXMMReg(gregOfRexRM(pfx,modrm)));
9764 goto decode_success;
9767 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9768 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9769 if (haveNo66noF2noF3(pfx)
9770 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9771 && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) {
9772 modrm = getUChar(delta+2);
9773 if (epartIsReg(modrm)) {
9774 /* fall through; awaiting test case */
9776 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9777 if (insn[1] == 0x29/*movaps*/)
9778 gen_SEGV_if_not_16_aligned( addr );
9779 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
9780 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
9783 goto decode_success;
9787 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9788 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9789 if (haveNo66noF2noF3(pfx)
9790 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9791 && insn[0] == 0x0F && insn[1] == 0x16) {
9792 modrm = getUChar(delta+2);
9793 if (epartIsReg(modrm)) {
9795 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
9796 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
9797 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9798 nameXMMReg(gregOfRexRM(pfx,modrm)));
9800 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9802 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
9803 loadLE(Ity_I64, mkexpr(addr)) );
9804 DIP("movhps %s,%s\n", dis_buf,
9805 nameXMMReg( gregOfRexRM(pfx,modrm) ));
9807 goto decode_success;
9810 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9811 if (haveNo66noF2noF3(pfx)
9812 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9813 && insn[0] == 0x0F && insn[1] == 0x17) {
9814 if (!epartIsReg(insn[2])) {
9816 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9818 storeLE( mkexpr(addr),
9819 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
9820 1/*upper lane*/ ) );
9821 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
9823 goto decode_success;
9825 /* else fall through */
9828 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9829 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9830 if (haveNo66noF2noF3(pfx)
9831 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9832 && insn[0] == 0x0F && insn[1] == 0x12) {
9833 modrm = getUChar(delta+2);
9834 if (epartIsReg(modrm)) {
9836 putXMMRegLane64( gregOfRexRM(pfx,modrm),
9838 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
9839 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9840 nameXMMReg(gregOfRexRM(pfx,modrm)));
9842 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9844 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
9845 loadLE(Ity_I64, mkexpr(addr)) );
9846 DIP("movlps %s, %s\n",
9847 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
9849 goto decode_success;
9852 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9853 if (haveNo66noF2noF3(pfx)
9854 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9855 && insn[0] == 0x0F && insn[1] == 0x13) {
9856 if (!epartIsReg(insn[2])) {
9858 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9860 storeLE( mkexpr(addr),
9861 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
9862 0/*lower lane*/ ) );
9863 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
9865 goto decode_success;
9867 /* else fall through */
9870 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9871 to 4 lowest bits of ireg(G) */
9872 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
9873 && insn[0] == 0x0F && insn[1] == 0x50) {
9874 /* sz == 8 is a kludge to handle insns with REX.W redundantly
9875 set to 1, which has been known to happen:
9877 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
9879 20071106: Intel docs say that REX.W isn't redundant: when
9880 present, a 64-bit register is written; when not present, only
9881 the 32-bit half is written. However, testing on a Core2
9882 machine suggests the entire 64 bit register is written
9883 irrespective of the status of REX.W. That could be because
9884 of the default rule that says "if the lower half of a 32-bit
9885 register is written, the upper half is zeroed". By using
9886 putIReg32 here we inadvertantly produce the same behaviour as
9887 the Core2, for the same reason -- putIReg32 implements said
9890 AMD docs give no indication that REX.W is even valid for this
9892 modrm = getUChar(delta+2);
9893 if (epartIsReg(modrm)) {
9895 t0 = newTemp(Ity_I32);
9896 t1 = newTemp(Ity_I32);
9897 t2 = newTemp(Ity_I32);
9898 t3 = newTemp(Ity_I32);
9900 src = eregOfRexRM(pfx,modrm);
9901 assign( t0, binop( Iop_And32,
9902 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9904 assign( t1, binop( Iop_And32,
9905 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9907 assign( t2, binop( Iop_And32,
9908 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9910 assign( t3, binop( Iop_And32,
9911 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9913 putIReg32( gregOfRexRM(pfx,modrm),
9915 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9916 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9919 DIP("movmskps %s,%s\n", nameXMMReg(src),
9920 nameIReg32(gregOfRexRM(pfx,modrm)));
9921 goto decode_success;
9923 /* else fall through */
9926 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9927 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9928 if ( ( (haveNo66noF2noF3(pfx) && sz == 4)
9929 || (have66noF2noF3(pfx) && sz == 2)
9931 && insn[0] == 0x0F && insn[1] == 0x2B) {
9932 modrm = getUChar(delta+2);
9933 if (!epartIsReg(modrm)) {
9934 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9935 gen_SEGV_if_not_16_aligned( addr );
9936 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
9937 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9939 nameXMMReg(gregOfRexRM(pfx,modrm)));
9941 goto decode_success;
9943 /* else fall through */
9946 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9947 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
9948 Intel manual does not say anything about the usual business of
9949 the FP reg tags getting trashed whenever an MMX insn happens.
9950 So we just leave them alone.
9952 if (haveNo66noF2noF3(pfx) && sz == 4
9953 && insn[0] == 0x0F && insn[1] == 0xE7) {
9954 modrm = getUChar(delta+2);
9955 if (!epartIsReg(modrm)) {
9956 /* do_MMX_preamble(); Intel docs don't specify this */
9957 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9958 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
9959 DIP("movntq %s,%s\n", dis_buf,
9960 nameMMXReg(gregLO3ofRM(modrm)));
9962 goto decode_success;
9964 /* else fall through */
9967 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9968 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9969 if (haveF3no66noF2(pfx)
9970 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
9971 && insn[0] == 0x0F && insn[1] == 0x10) {
9972 modrm = getUChar(delta+2);
9973 if (epartIsReg(modrm)) {
9974 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
9975 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
9976 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
9977 nameXMMReg(gregOfRexRM(pfx,modrm)));
9980 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
9981 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
9982 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
9983 loadLE(Ity_I32, mkexpr(addr)) );
9984 DIP("movss %s,%s\n", dis_buf,
9985 nameXMMReg(gregOfRexRM(pfx,modrm)));
9988 goto decode_success;
9991 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9993 if (haveF3no66noF2(pfx) && sz == 4
9994 && insn[0] == 0x0F && insn[1] == 0x11) {
9995 modrm = getUChar(delta+2);
9996 if (epartIsReg(modrm)) {
9997 /* fall through, we don't yet have a test case */
9999 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10000 storeLE( mkexpr(addr),
10001 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
10002 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
10005 goto decode_success;
10009 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
10010 if (haveNo66noF2noF3(pfx) && sz == 4
10011 && insn[0] == 0x0F && insn[1] == 0x59) {
10012 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 );
10013 goto decode_success;
10016 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
10017 if (haveF3no66noF2(pfx) && sz == 4
10018 && insn[0] == 0x0F && insn[1] == 0x59) {
10019 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 );
10020 goto decode_success;
10023 /* 0F 56 = ORPS -- G = G and E */
10024 if (haveNo66noF2noF3(pfx) && sz == 4
10025 && insn[0] == 0x0F && insn[1] == 0x56) {
10026 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 );
10027 goto decode_success;
10030 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10031 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
10032 if (haveNo66noF2noF3(pfx) && sz == 4
10033 && insn[0] == 0x0F && insn[1] == 0xE0) {
10035 delta = dis_MMXop_regmem_to_reg (
10036 vbi, pfx, delta+2, insn[1], "pavgb", False );
10037 goto decode_success;
10040 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10041 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
10042 if (haveNo66noF2noF3(pfx) && sz == 4
10043 && insn[0] == 0x0F && insn[1] == 0xE3) {
10045 delta = dis_MMXop_regmem_to_reg (
10046 vbi, pfx, delta+2, insn[1], "pavgw", False );
10047 goto decode_success;
10050 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10051 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
10052 zero-extend of it in ireg(G). */
10053 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
10054 && insn[0] == 0x0F && insn[1] == 0xC5) {
10056 if (epartIsReg(modrm)) {
10057 IRTemp sV = newTemp(Ity_I64);
10058 t5 = newTemp(Ity_I16);
10060 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
10061 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
10062 switch (insn[3] & 3) {
10063 case 0: assign(t5, mkexpr(t0)); break;
10064 case 1: assign(t5, mkexpr(t1)); break;
10065 case 2: assign(t5, mkexpr(t2)); break;
10066 case 3: assign(t5, mkexpr(t3)); break;
10067 default: vassert(0);
10070 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
10072 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
10073 DIP("pextrw $%d,%s,%s\n",
10074 (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)),
10075 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
10076 : nameIReg32(gregOfRexRM(pfx,modrm))
10079 goto decode_success;
10081 /* else fall through */
10082 /* note, for anyone filling in the mem case: this insn has one
10083 byte after the amode and therefore you must pass 1 as the
10084 last arg to disAMode */
10087 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10088 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
10089 put it into the specified lane of mmx(G). */
10090 if (haveNo66noF2noF3(pfx)
10091 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
10092 && insn[0] == 0x0F && insn[1] == 0xC4) {
10093 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
10094 mmx reg. t4 is the new lane value. t5 is the original
10095 mmx value. t6 is the new mmx value. */
10097 t4 = newTemp(Ity_I16);
10098 t5 = newTemp(Ity_I64);
10099 t6 = newTemp(Ity_I64);
10103 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
10104 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
10106 if (epartIsReg(modrm)) {
10107 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
10109 lane = insn[3+1-1];
10110 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10111 nameIReg16(eregOfRexRM(pfx,modrm)),
10112 nameMMXReg(gregLO3ofRM(modrm)));
10114 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
10116 lane = insn[3+alen-1];
10117 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
10118 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10120 nameMMXReg(gregLO3ofRM(modrm)));
10123 switch (lane & 3) {
10124 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
10125 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
10126 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
10127 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
10128 default: vassert(0);
10130 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
10131 goto decode_success;
10134 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10135 /* 0F EE = PMAXSW -- 16x4 signed max */
10136 if (haveNo66noF2noF3(pfx) && sz == 4
10137 && insn[0] == 0x0F && insn[1] == 0xEE) {
10139 delta = dis_MMXop_regmem_to_reg (
10140 vbi, pfx, delta+2, insn[1], "pmaxsw", False );
10141 goto decode_success;
10144 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10145 /* 0F DE = PMAXUB -- 8x8 unsigned max */
10146 if (haveNo66noF2noF3(pfx) && sz == 4
10147 && insn[0] == 0x0F && insn[1] == 0xDE) {
10149 delta = dis_MMXop_regmem_to_reg (
10150 vbi, pfx, delta+2, insn[1], "pmaxub", False );
10151 goto decode_success;
10154 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10155 /* 0F EA = PMINSW -- 16x4 signed min */
10156 if (haveNo66noF2noF3(pfx) && sz == 4
10157 && insn[0] == 0x0F && insn[1] == 0xEA) {
10159 delta = dis_MMXop_regmem_to_reg (
10160 vbi, pfx, delta+2, insn[1], "pminsw", False );
10161 goto decode_success;
10164 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10165 /* 0F DA = PMINUB -- 8x8 unsigned min */
10166 if (haveNo66noF2noF3(pfx) && sz == 4
10167 && insn[0] == 0x0F && insn[1] == 0xDA) {
10169 delta = dis_MMXop_regmem_to_reg (
10170 vbi, pfx, delta+2, insn[1], "pminub", False );
10171 goto decode_success;
10174 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10175 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
10176 mmx(G), turn them into a byte, and put zero-extend of it in
10178 if (haveNo66noF2noF3(pfx) && sz == 4
10179 && insn[0] == 0x0F && insn[1] == 0xD7) {
10181 if (epartIsReg(modrm)) {
10183 t0 = newTemp(Ity_I64);
10184 t1 = newTemp(Ity_I64);
10185 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
10186 assign(t1, mkIRExprCCall(
10187 Ity_I64, 0/*regparms*/,
10188 "amd64g_calculate_mmx_pmovmskb",
10189 &amd64g_calculate_mmx_pmovmskb,
10190 mkIRExprVec_1(mkexpr(t0))));
10191 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
10192 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
10193 nameIReg32(gregOfRexRM(pfx,modrm)));
10195 goto decode_success;
10197 /* else fall through */
10200 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10201 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
10202 if (haveNo66noF2noF3(pfx) && sz == 4
10203 && insn[0] == 0x0F && insn[1] == 0xE4) {
10205 delta = dis_MMXop_regmem_to_reg (
10206 vbi, pfx, delta+2, insn[1], "pmuluh", False );
10207 goto decode_success;
10210 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
10211 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
10212 /* 0F 18 /2 = PREFETCH1 */
10213 /* 0F 18 /3 = PREFETCH2 */
10214 if (insn[0] == 0x0F && insn[1] == 0x18
10215 && haveNo66noF2noF3(pfx)
10216 && !epartIsReg(insn[2])
10217 && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) {
10218 HChar* hintstr = "??";
10220 modrm = getUChar(delta+2);
10221 vassert(!epartIsReg(modrm));
10223 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10226 switch (gregLO3ofRM(modrm)) {
10227 case 0: hintstr = "nta"; break;
10228 case 1: hintstr = "t0"; break;
10229 case 2: hintstr = "t1"; break;
10230 case 3: hintstr = "t2"; break;
10231 default: vassert(0);
10234 DIP("prefetch%s %s\n", hintstr, dis_buf);
10235 goto decode_success;
10238 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10239 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
10240 if (haveNo66noF2noF3(pfx) && sz == 4
10241 && insn[0] == 0x0F && insn[1] == 0xF6) {
10243 delta = dis_MMXop_regmem_to_reg (
10244 vbi, pfx, delta+2, insn[1], "psadbw", False );
10245 goto decode_success;
10248 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
10249 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
10250 if (haveNo66noF2noF3(pfx) && sz == 4
10251 && insn[0] == 0x0F && insn[1] == 0x70) {
10253 IRTemp sV, dV, s3, s2, s1, s0;
10254 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10255 sV = newTemp(Ity_I64);
10256 dV = newTemp(Ity_I64);
10259 if (epartIsReg(modrm)) {
10260 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
10261 order = (Int)insn[3];
10263 DIP("pshufw $%d,%s,%s\n", order,
10264 nameMMXReg(eregLO3ofRM(modrm)),
10265 nameMMXReg(gregLO3ofRM(modrm)));
10267 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
10268 1/*extra byte after amode*/ );
10269 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
10270 order = (Int)insn[2+alen];
10272 DIP("pshufw $%d,%s,%s\n", order,
10274 nameMMXReg(gregLO3ofRM(modrm)));
10276 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
10278 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10280 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
10281 SEL((order>>2)&3), SEL((order>>0)&3) )
10283 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
10285 goto decode_success;
10288 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
10289 if (haveNo66noF2noF3(pfx) && sz == 4
10290 && insn[0] == 0x0F && insn[1] == 0x53) {
10291 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10292 "rcpps", Iop_Recip32Fx4 );
10293 goto decode_success;
10296 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
10297 if (haveF3no66noF2(pfx) && sz == 4
10298 && insn[0] == 0x0F && insn[1] == 0x53) {
10299 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10300 "rcpss", Iop_Recip32F0x4 );
10301 goto decode_success;
10304 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
10305 if (haveNo66noF2noF3(pfx) && sz == 4
10306 && insn[0] == 0x0F && insn[1] == 0x52) {
10307 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10308 "rsqrtps", Iop_RSqrt32Fx4 );
10309 goto decode_success;
10312 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
10313 if (haveF3no66noF2(pfx) && sz == 4
10314 && insn[0] == 0x0F && insn[1] == 0x52) {
10315 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10316 "rsqrtss", Iop_RSqrt32F0x4 );
10317 goto decode_success;
10320 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
10321 if (haveNo66noF2noF3(pfx)
10322 && insn[0] == 0x0F && insn[1] == 0xAE
10323 && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7
10326 /* Insert a memory fence. It's sometimes important that these
10327 are carried through to the generated code. */
10328 stmt( IRStmt_MBE(Imbe_Fence) );
10330 goto decode_success;
10333 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
10334 if (haveNo66noF2noF3(pfx) && sz == 4
10335 && insn[0] == 0x0F && insn[1] == 0xC6) {
10338 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10339 sV = newTemp(Ity_V128);
10340 dV = newTemp(Ity_V128);
10341 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10343 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
10345 if (epartIsReg(modrm)) {
10346 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10347 select = (Int)insn[3];
10349 DIP("shufps $%d,%s,%s\n", select,
10350 nameXMMReg(eregOfRexRM(pfx,modrm)),
10351 nameXMMReg(gregOfRexRM(pfx,modrm)));
10353 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
10354 1/*byte at end of insn*/ );
10355 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10356 select = (Int)insn[2+alen];
10358 DIP("shufps $%d,%s,%s\n", select,
10360 nameXMMReg(gregOfRexRM(pfx,modrm)));
10363 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
10364 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
10366 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
10367 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10370 gregOfRexRM(pfx,modrm),
10371 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
10372 SELD((select>>2)&3), SELD((select>>0)&3) )
10378 goto decode_success;
10381 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
10382 if (haveNo66noF2noF3(pfx) && sz == 4
10383 && insn[0] == 0x0F && insn[1] == 0x51) {
10384 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
10385 "sqrtps", Iop_Sqrt32Fx4 );
10386 goto decode_success;
10389 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
10390 if (haveF3no66noF2(pfx) && sz == 4
10391 && insn[0] == 0x0F && insn[1] == 0x51) {
10392 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
10393 "sqrtss", Iop_Sqrt32F0x4 );
10394 goto decode_success;
10397 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
10398 if (insn[0] == 0x0F && insn[1] == 0xAE
10399 && haveNo66noF2noF3(pfx)
10400 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) {
10403 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10406 /* Fake up a native SSE mxcsr word. The only thing it depends
10407 on is SSEROUND[1:0], so call a clean helper to cook it up.
10409 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
10410 DIP("stmxcsr %s\n", dis_buf);
10415 Ity_I64, 0/*regp*/,
10416 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
10417 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
10421 goto decode_success;
10424 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
10425 if (haveNo66noF2noF3(pfx) && sz == 4
10426 && insn[0] == 0x0F && insn[1] == 0x5C) {
10427 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 );
10428 goto decode_success;
10431 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
10432 if (haveF3no66noF2(pfx) && sz == 4
10433 && insn[0] == 0x0F && insn[1] == 0x5C) {
10434 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 );
10435 goto decode_success;
10438 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
10439 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
10440 /* These just appear to be special cases of SHUFPS */
10441 if (haveNo66noF2noF3(pfx) && sz == 4
10442 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10444 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10445 Bool hi = toBool(insn[1] == 0x15);
10446 sV = newTemp(Ity_V128);
10447 dV = newTemp(Ity_V128);
10448 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10450 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
10452 if (epartIsReg(modrm)) {
10453 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10455 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10456 nameXMMReg(eregOfRexRM(pfx,modrm)),
10457 nameXMMReg(gregOfRexRM(pfx,modrm)));
10459 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10460 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10462 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10464 nameXMMReg(gregOfRexRM(pfx,modrm)));
10467 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
10468 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
10471 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) );
10473 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) );
10476 goto decode_success;
10479 /* 0F 57 = XORPS -- G = G and E */
10480 if (haveNo66noF2noF3(pfx) && sz == 4
10481 && insn[0] == 0x0F && insn[1] == 0x57) {
10482 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 );
10483 goto decode_success;
10486 /* ---------------------------------------------------- */
10487 /* --- end of the SSE decoder. --- */
10488 /* ---------------------------------------------------- */
10490 /* ---------------------------------------------------- */
10491 /* --- start of the SSE2 decoder. --- */
10492 /* ---------------------------------------------------- */
10494 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
10495 if (have66noF2noF3(pfx)
10496 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
10497 && insn[0] == 0x0F && insn[1] == 0x58) {
10498 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 );
10499 goto decode_success;
10502 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
10503 if (haveF2no66noF3(pfx)
10504 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
10505 && insn[0] == 0x0F && insn[1] == 0x58) {
10506 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 );
10507 goto decode_success;
10510 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
10511 if (have66noF2noF3(pfx) && sz == 2
10512 && insn[0] == 0x0F && insn[1] == 0x55) {
10513 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 );
10514 goto decode_success;
10517 /* 66 0F 54 = ANDPD -- G = G and E */
10518 if (have66noF2noF3(pfx) && sz == 2
10519 && insn[0] == 0x0F && insn[1] == 0x54) {
10520 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 );
10521 goto decode_success;
10524 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
10525 if (have66noF2noF3(pfx) && sz == 2
10526 && insn[0] == 0x0F && insn[1] == 0xC2) {
10527 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 );
10528 goto decode_success;
10531 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
10532 if (haveF2no66noF3(pfx) && sz == 4
10533 && insn[0] == 0x0F && insn[1] == 0xC2) {
10534 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 );
10535 goto decode_success;
10538 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
10539 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
10540 if (have66noF2noF3(pfx) && sz == 2
10541 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
10542 IRTemp argL = newTemp(Ity_F64);
10543 IRTemp argR = newTemp(Ity_F64);
10544 modrm = getUChar(delta+2);
10545 if (epartIsReg(modrm)) {
10546 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10547 0/*lowest lane*/ ) );
10549 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
10550 nameXMMReg(eregOfRexRM(pfx,modrm)),
10551 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10553 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10554 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10556 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
10558 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10560 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10561 0/*lowest lane*/ ) );
10563 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10564 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10569 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10573 goto decode_success;
10576 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
10578 if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) {
10579 IRTemp arg64 = newTemp(Ity_I64);
10580 if (sz != 4) goto decode_failure;
10582 modrm = getUChar(delta+2);
10583 if (epartIsReg(modrm)) {
10584 assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) );
10586 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10587 nameXMMReg(gregOfRexRM(pfx,modrm)));
10589 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10590 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10592 DIP("cvtdq2pd %s,%s\n", dis_buf,
10593 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10597 gregOfRexRM(pfx,modrm), 0,
10598 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
10602 gregOfRexRM(pfx,modrm), 1,
10603 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
10606 goto decode_success;
10609 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
10611 if (haveNo66noF2noF3(pfx) && sz == 4
10612 && insn[0] == 0x0F && insn[1] == 0x5B) {
10613 IRTemp argV = newTemp(Ity_V128);
10614 IRTemp rmode = newTemp(Ity_I32);
10616 modrm = getUChar(delta+2);
10617 if (epartIsReg(modrm)) {
10618 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10620 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10621 nameXMMReg(gregOfRexRM(pfx,modrm)));
10623 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10624 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10626 DIP("cvtdq2ps %s,%s\n", dis_buf,
10627 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10630 assign( rmode, get_sse_roundingmode() );
10631 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10633 # define CVT(_t) binop( Iop_F64toF32, \
10635 unop(Iop_I32StoF64,mkexpr(_t)))
10637 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
10638 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
10639 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10640 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10644 goto decode_success;
10647 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10648 lo half xmm(G), and zero upper half, rounding towards zero */
10649 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10650 lo half xmm(G), according to prevailing rounding mode, and zero
10652 if ( ( (haveF2no66noF3(pfx) && sz == 4)
10653 || (have66noF2noF3(pfx) && sz == 2)
10655 && insn[0] == 0x0F && insn[1] == 0xE6) {
10656 IRTemp argV = newTemp(Ity_V128);
10657 IRTemp rmode = newTemp(Ity_I32);
10658 Bool r2zero = toBool(sz == 2);
10660 modrm = getUChar(delta+2);
10661 if (epartIsReg(modrm)) {
10662 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10664 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
10665 nameXMMReg(eregOfRexRM(pfx,modrm)),
10666 nameXMMReg(gregOfRexRM(pfx,modrm)));
10668 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10669 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10671 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
10673 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10677 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10679 assign( rmode, get_sse_roundingmode() );
10682 t0 = newTemp(Ity_F64);
10683 t1 = newTemp(Ity_F64);
10684 assign( t0, unop(Iop_ReinterpI64asF64,
10685 unop(Iop_V128to64, mkexpr(argV))) );
10686 assign( t1, unop(Iop_ReinterpI64asF64,
10687 unop(Iop_V128HIto64, mkexpr(argV))) );
10689 # define CVT(_t) binop( Iop_F64toI32S, \
10693 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
10694 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
10695 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10696 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10700 goto decode_success;
10703 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10704 I32 in mmx, according to prevailing SSE rounding mode */
10705 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10706 I32 in mmx, rounding towards zero */
10707 if (have66noF2noF3(pfx) && sz == 2
10708 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
10709 IRTemp dst64 = newTemp(Ity_I64);
10710 IRTemp rmode = newTemp(Ity_I32);
10711 IRTemp f64lo = newTemp(Ity_F64);
10712 IRTemp f64hi = newTemp(Ity_F64);
10713 Bool r2zero = toBool(insn[1] == 0x2C);
10716 modrm = getUChar(delta+2);
10718 if (epartIsReg(modrm)) {
10720 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10721 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
10722 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
10723 nameXMMReg(eregOfRexRM(pfx,modrm)),
10724 nameMMXReg(gregLO3ofRM(modrm)));
10726 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10727 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10728 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
10732 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
10734 nameMMXReg(gregLO3ofRM(modrm)));
10738 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10740 assign( rmode, get_sse_roundingmode() );
10745 binop( Iop_32HLto64,
10746 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
10747 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
10751 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
10752 goto decode_success;
10755 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
10756 lo half xmm(G), rounding according to prevailing SSE rounding
10757 mode, and zero upper half */
10758 /* Note, this is practically identical to CVTPD2DQ. It would have
10759 been nicer to merge them together, but the insn[] offsets differ
10761 if (have66noF2noF3(pfx) && sz == 2
10762 && insn[0] == 0x0F && insn[1] == 0x5A) {
10763 IRTemp argV = newTemp(Ity_V128);
10764 IRTemp rmode = newTemp(Ity_I32);
10766 modrm = getUChar(delta+2);
10767 if (epartIsReg(modrm)) {
10768 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10770 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10771 nameXMMReg(gregOfRexRM(pfx,modrm)));
10773 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10774 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10776 DIP("cvtpd2ps %s,%s\n", dis_buf,
10777 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10780 assign( rmode, get_sse_roundingmode() );
10781 t0 = newTemp(Ity_F64);
10782 t1 = newTemp(Ity_F64);
10783 assign( t0, unop(Iop_ReinterpI64asF64,
10784 unop(Iop_V128to64, mkexpr(argV))) );
10785 assign( t1, unop(Iop_ReinterpI64asF64,
10786 unop(Iop_V128HIto64, mkexpr(argV))) );
10788 # define CVT(_t) binop( Iop_F64toF32, \
10792 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
10793 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
10794 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10795 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10799 goto decode_success;
10802 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
10804 if (have66noF2noF3(pfx) && sz == 2
10805 && insn[0] == 0x0F && insn[1] == 0x2A) {
10806 IRTemp arg64 = newTemp(Ity_I64);
10808 modrm = getUChar(delta+2);
10809 if (epartIsReg(modrm)) {
10810 /* Only switch to MMX mode if the source is a MMX register.
10811 This is inconsistent with all other instructions which
10812 convert between XMM and (M64 or MMX), which always switch
10813 to MMX mode even if 64-bit operand is M64 and not MMX. At
10814 least, that's what the Intel docs seem to me to say.
10817 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
10819 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
10820 nameXMMReg(gregOfRexRM(pfx,modrm)));
10822 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10823 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10825 DIP("cvtpi2pd %s,%s\n", dis_buf,
10826 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10830 gregOfRexRM(pfx,modrm), 0,
10831 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
10835 gregOfRexRM(pfx,modrm), 1,
10836 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
10839 goto decode_success;
10842 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10843 xmm(G), rounding towards zero */
10844 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10845 xmm(G), as per the prevailing rounding mode */
10846 if ( ( (have66noF2noF3(pfx) && sz == 2)
10847 || (haveF3no66noF2(pfx) && sz == 4)
10849 && insn[0] == 0x0F && insn[1] == 0x5B) {
10850 IRTemp argV = newTemp(Ity_V128);
10851 IRTemp rmode = newTemp(Ity_I32);
10852 Bool r2zero = toBool(sz == 4);
10854 modrm = getUChar(delta+2);
10855 if (epartIsReg(modrm)) {
10856 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10858 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10859 nameXMMReg(gregOfRexRM(pfx,modrm)));
10861 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10862 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10864 DIP("cvtps2dq %s,%s\n", dis_buf,
10865 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10869 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10871 assign( rmode, get_sse_roundingmode() );
10874 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10876 /* This is less than ideal. If it turns out to be a performance
10877 bottleneck it can be improved. */
10879 binop( Iop_F64toI32S, \
10881 unop( Iop_F32toF64, \
10882 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10884 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
10885 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
10886 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
10887 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
10891 goto decode_success;
10894 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
10896 if (haveNo66noF2noF3(pfx) && sz == 4
10897 && insn[0] == 0x0F && insn[1] == 0x5A) {
10898 IRTemp f32lo = newTemp(Ity_F32);
10899 IRTemp f32hi = newTemp(Ity_F32);
10901 modrm = getUChar(delta+2);
10902 if (epartIsReg(modrm)) {
10903 assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) );
10904 assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) );
10906 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10907 nameXMMReg(gregOfRexRM(pfx,modrm)));
10909 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10910 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10911 assign( f32hi, loadLE(Ity_F32,
10912 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10914 DIP("cvtps2pd %s,%s\n", dis_buf,
10915 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10918 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1,
10919 unop(Iop_F32toF64, mkexpr(f32hi)) );
10920 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
10921 unop(Iop_F32toF64, mkexpr(f32lo)) );
10923 goto decode_success;
10926 /* F2 0F 2D = CVTSD2SI
10927 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
10928 according to prevailing SSE rounding mode
10929 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
10930 according to prevailing SSE rounding mode
10932 /* F2 0F 2C = CVTTSD2SI
10933 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
10934 truncating towards zero
10935 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
10936 truncating towards zero
10938 if (haveF2no66noF3(pfx)
10940 && (insn[1] == 0x2D || insn[1] == 0x2C)) {
10941 IRTemp rmode = newTemp(Ity_I32);
10942 IRTemp f64lo = newTemp(Ity_F64);
10943 Bool r2zero = toBool(insn[1] == 0x2C);
10944 vassert(sz == 4 || sz == 8);
10946 modrm = getUChar(delta+2);
10947 if (epartIsReg(modrm)) {
10949 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10950 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10951 nameXMMReg(eregOfRexRM(pfx,modrm)),
10952 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
10954 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10955 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10957 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10959 nameIReg(sz, gregOfRexRM(pfx,modrm), False));
10963 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10965 assign( rmode, get_sse_roundingmode() );
10969 putIReg32( gregOfRexRM(pfx,modrm),
10970 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10972 putIReg64( gregOfRexRM(pfx,modrm),
10973 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10976 goto decode_success;
10979 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10980 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10981 if (haveF2no66noF3(pfx) && sz == 4
10982 && insn[0] == 0x0F && insn[1] == 0x5A) {
10983 IRTemp rmode = newTemp(Ity_I32);
10984 IRTemp f64lo = newTemp(Ity_F64);
10987 modrm = getUChar(delta+2);
10988 if (epartIsReg(modrm)) {
10990 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10991 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
10992 nameXMMReg(gregOfRexRM(pfx,modrm)));
10994 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
10995 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10997 DIP("cvtsd2ss %s,%s\n", dis_buf,
10998 nameXMMReg(gregOfRexRM(pfx,modrm)));
11001 assign( rmode, get_sse_roundingmode() );
11003 gregOfRexRM(pfx,modrm), 0,
11004 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
11007 goto decode_success;
11010 /* F2 0F 2A = CVTSI2SD
11011 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
11012 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
11014 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)
11015 && insn[0] == 0x0F && insn[1] == 0x2A) {
11016 modrm = getUChar(delta+2);
11019 IRTemp arg32 = newTemp(Ity_I32);
11020 if (epartIsReg(modrm)) {
11021 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
11023 DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
11024 nameXMMReg(gregOfRexRM(pfx,modrm)));
11026 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11027 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
11029 DIP("cvtsi2sd %s,%s\n", dis_buf,
11030 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11032 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
11033 unop(Iop_I32StoF64, mkexpr(arg32))
11037 IRTemp arg64 = newTemp(Ity_I64);
11038 if (epartIsReg(modrm)) {
11039 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
11041 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
11042 nameXMMReg(gregOfRexRM(pfx,modrm)));
11044 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11045 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11047 DIP("cvtsi2sdq %s,%s\n", dis_buf,
11048 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11051 gregOfRexRM(pfx,modrm),
11053 binop( Iop_I64StoF64,
11054 get_sse_roundingmode(),
11061 goto decode_success;
11064 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
11066 if (haveF3no66noF2(pfx) && sz == 4
11067 && insn[0] == 0x0F && insn[1] == 0x5A) {
11068 IRTemp f32lo = newTemp(Ity_F32);
11070 modrm = getUChar(delta+2);
11071 if (epartIsReg(modrm)) {
11073 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
11074 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11075 nameXMMReg(gregOfRexRM(pfx,modrm)));
11077 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11078 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
11080 DIP("cvtss2sd %s,%s\n", dis_buf,
11081 nameXMMReg(gregOfRexRM(pfx,modrm)));
11084 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
11085 unop( Iop_F32toF64, mkexpr(f32lo) ) );
11087 goto decode_success;
11090 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
11091 if (have66noF2noF3(pfx) && sz == 2
11092 && insn[0] == 0x0F && insn[1] == 0x5E) {
11093 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 );
11094 goto decode_success;
11097 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
11098 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) {
11100 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 );
11101 goto decode_success;
11104 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
11105 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
11106 if (haveNo66noF2noF3(pfx) && sz == 4
11107 && insn[0] == 0x0F && insn[1] == 0xAE
11108 && epartIsReg(insn[2])
11109 && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) {
11111 /* Insert a memory fence. It's sometimes important that these
11112 are carried through to the generated code. */
11113 stmt( IRStmt_MBE(Imbe_Fence) );
11114 DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m");
11115 goto decode_success;
11118 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
11119 if (have66noF2noF3(pfx) && sz == 2
11120 && insn[0] == 0x0F && insn[1] == 0x5F) {
11121 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 );
11122 goto decode_success;
11125 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
11126 if (haveF2no66noF3(pfx) && sz == 4
11127 && insn[0] == 0x0F && insn[1] == 0x5F) {
11128 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 );
11129 goto decode_success;
11132 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
11133 if (have66noF2noF3(pfx) && sz == 2
11134 && insn[0] == 0x0F && insn[1] == 0x5D) {
11135 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 );
11136 goto decode_success;
11139 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
11140 if (haveF2no66noF3(pfx) && sz == 4
11141 && insn[0] == 0x0F && insn[1] == 0x5D) {
11142 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 );
11143 goto decode_success;
11146 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
11147 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
11148 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
11149 if (have66noF2noF3(pfx)
11150 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11152 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
11153 HChar* wot = insn[1]==0x28 ? "apd" :
11154 insn[1]==0x10 ? "upd" : "dqa";
11155 modrm = getUChar(delta+2);
11156 if (epartIsReg(modrm)) {
11157 putXMMReg( gregOfRexRM(pfx,modrm),
11158 getXMMReg( eregOfRexRM(pfx,modrm) ));
11159 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)),
11160 nameXMMReg(gregOfRexRM(pfx,modrm)));
11163 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11164 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
11165 gen_SEGV_if_not_16_aligned( addr );
11166 putXMMReg( gregOfRexRM(pfx,modrm),
11167 loadLE(Ity_V128, mkexpr(addr)) );
11168 DIP("mov%s %s,%s\n", wot, dis_buf,
11169 nameXMMReg(gregOfRexRM(pfx,modrm)));
11172 goto decode_success;
11175 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
11176 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
11177 if (have66noF2noF3(pfx) && insn[0] == 0x0F
11178 && (insn[1] == 0x29 || insn[1] == 0x11)) {
11179 HChar* wot = insn[1]==0x29 ? "apd" : "upd";
11180 modrm = getUChar(delta+2);
11181 if (epartIsReg(modrm)) {
11182 putXMMReg( eregOfRexRM(pfx,modrm),
11183 getXMMReg( gregOfRexRM(pfx,modrm) ) );
11184 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)),
11185 nameXMMReg(eregOfRexRM(pfx,modrm)));
11188 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11189 if (insn[1] == 0x29/*movapd*/)
11190 gen_SEGV_if_not_16_aligned( addr );
11191 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11192 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)),
11196 goto decode_success;
11199 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */
11200 /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */
11201 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) {
11202 vassert(sz == 2 || sz == 8);
11203 if (sz == 2) sz = 4;
11204 modrm = getUChar(delta+2);
11205 if (epartIsReg(modrm)) {
11209 gregOfRexRM(pfx,modrm),
11210 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
11212 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
11213 nameXMMReg(gregOfRexRM(pfx,modrm)));
11216 gregOfRexRM(pfx,modrm),
11217 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
11219 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
11220 nameXMMReg(gregOfRexRM(pfx,modrm)));
11223 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11226 gregOfRexRM(pfx,modrm),
11228 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
11229 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
11231 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
11232 nameXMMReg(gregOfRexRM(pfx,modrm)));
11234 goto decode_success;
11237 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
11238 /* or from xmm low 1/2 to ireg64 or m64. */
11239 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) {
11240 if (sz == 2) sz = 4;
11241 vassert(sz == 4 || sz == 8);
11242 modrm = getUChar(delta+2);
11243 if (epartIsReg(modrm)) {
11246 putIReg32( eregOfRexRM(pfx,modrm),
11247 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
11248 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11249 nameIReg32(eregOfRexRM(pfx,modrm)));
11251 putIReg64( eregOfRexRM(pfx,modrm),
11252 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11253 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11254 nameIReg64(eregOfRexRM(pfx,modrm)));
11257 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11259 storeLE( mkexpr(addr),
11261 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
11262 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
11263 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
11264 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11266 goto decode_success;
11269 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
11270 if (have66noF2noF3(pfx) && sz == 2
11271 && insn[0] == 0x0F && insn[1] == 0x7F) {
11272 modrm = getUChar(delta+2);
11273 if (epartIsReg(modrm)) {
11275 putXMMReg( eregOfRexRM(pfx,modrm),
11276 getXMMReg(gregOfRexRM(pfx,modrm)) );
11277 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11278 nameXMMReg(eregOfRexRM(pfx,modrm)));
11280 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11281 gen_SEGV_if_not_16_aligned( addr );
11283 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11284 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11286 goto decode_success;
11289 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
11290 if (haveF3no66noF2(pfx) && sz == 4
11291 && insn[0] == 0x0F && insn[1] == 0x6F) {
11292 modrm = getUChar(delta+2);
11293 if (epartIsReg(modrm)) {
11294 putXMMReg( gregOfRexRM(pfx,modrm),
11295 getXMMReg( eregOfRexRM(pfx,modrm) ));
11296 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11297 nameXMMReg(gregOfRexRM(pfx,modrm)));
11300 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11301 putXMMReg( gregOfRexRM(pfx,modrm),
11302 loadLE(Ity_V128, mkexpr(addr)) );
11303 DIP("movdqu %s,%s\n", dis_buf,
11304 nameXMMReg(gregOfRexRM(pfx,modrm)));
11307 goto decode_success;
11310 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
11311 if (haveF3no66noF2(pfx) && sz == 4
11312 && insn[0] == 0x0F && insn[1] == 0x7F) {
11313 modrm = getUChar(delta+2);
11314 if (epartIsReg(modrm)) {
11315 goto decode_failure; /* awaiting test case */
11317 putXMMReg( eregOfRexRM(pfx,modrm),
11318 getXMMReg(gregOfRexRM(pfx,modrm)) );
11319 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11320 nameXMMReg(eregOfRexRM(pfx,modrm)));
11322 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11324 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11325 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
11327 goto decode_success;
11330 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
11331 if (haveF2no66noF3(pfx) && sz == 4
11332 && insn[0] == 0x0F && insn[1] == 0xD6) {
11333 modrm = getUChar(delta+2);
11334 if (epartIsReg(modrm)) {
11336 putMMXReg( gregLO3ofRM(modrm),
11337 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11338 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11339 nameMMXReg(gregLO3ofRM(modrm)));
11341 goto decode_success;
11343 /* apparently no mem case for this insn */
11344 goto decode_failure;
11348 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
11349 /* These seems identical to MOVHPS. This instruction encoding is
11350 completely crazy. */
11351 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) {
11352 modrm = getUChar(delta+2);
11353 if (epartIsReg(modrm)) {
11354 /* fall through; apparently reg-reg is not possible */
11356 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11358 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
11359 loadLE(Ity_I64, mkexpr(addr)) );
11360 DIP("movhpd %s,%s\n", dis_buf,
11361 nameXMMReg( gregOfRexRM(pfx,modrm) ));
11362 goto decode_success;
11366 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
11367 /* Again, this seems identical to MOVHPS. */
11368 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) {
11369 if (!epartIsReg(insn[2])) {
11371 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11373 storeLE( mkexpr(addr),
11374 getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
11375 1/*upper lane*/ ) );
11376 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
11378 goto decode_success;
11380 /* else fall through */
11383 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
11384 /* Identical to MOVLPS ? */
11385 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) {
11386 modrm = getUChar(delta+2);
11387 if (epartIsReg(modrm)) {
11388 /* fall through; apparently reg-reg is not possible */
11390 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11392 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11394 loadLE(Ity_I64, mkexpr(addr)) );
11395 DIP("movlpd %s, %s\n",
11396 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11397 goto decode_success;
11401 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
11402 /* Identical to MOVLPS ? */
11403 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) {
11404 modrm = getUChar(delta+2);
11405 if (!epartIsReg(modrm)) {
11406 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11408 storeLE( mkexpr(addr),
11409 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11410 0/*lower lane*/ ) );
11411 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11413 goto decode_success;
11415 /* else fall through */
11418 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
11419 2 lowest bits of ireg(G) */
11420 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)
11421 && insn[0] == 0x0F && insn[1] == 0x50) {
11422 /* sz == 8 is a kludge to handle insns with REX.W redundantly
11423 set to 1, which has been known to happen:
11424 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
11425 20071106: see further comments on MOVMSKPS implementation above.
11427 modrm = getUChar(delta+2);
11428 if (epartIsReg(modrm)) {
11430 t0 = newTemp(Ity_I32);
11431 t1 = newTemp(Ity_I32);
11433 src = eregOfRexRM(pfx,modrm);
11434 assign( t0, binop( Iop_And32,
11435 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
11437 assign( t1, binop( Iop_And32,
11438 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
11440 putIReg32( gregOfRexRM(pfx,modrm),
11441 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
11443 DIP("movmskpd %s,%s\n", nameXMMReg(src),
11444 nameIReg32(gregOfRexRM(pfx,modrm)));
11445 goto decode_success;
11447 /* else fall through */
11448 goto decode_failure;
11451 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
11452 if (have66noF2noF3(pfx) && sz == 2
11453 && insn[0] == 0x0F && insn[1] == 0xF7) {
11454 modrm = getUChar(delta+2);
11455 if (epartIsReg(modrm)) {
11456 IRTemp regD = newTemp(Ity_V128);
11457 IRTemp mask = newTemp(Ity_V128);
11458 IRTemp olddata = newTemp(Ity_V128);
11459 IRTemp newdata = newTemp(Ity_V128);
11460 addr = newTemp(Ity_I64);
11462 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
11463 assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) ));
11465 /* Unfortunately can't do the obvious thing with SarN8x16
11466 here since that can't be re-emitted as SSE2 code - no such
11470 binop(Iop_64HLtoV128,
11472 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
11475 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
11477 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
11485 unop(Iop_NotV128, mkexpr(mask)))) );
11486 storeLE( mkexpr(addr), mkexpr(newdata) );
11489 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ),
11490 nameXMMReg( gregOfRexRM(pfx,modrm) ) );
11491 goto decode_success;
11493 /* else fall through */
11496 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
11497 if (have66noF2noF3(pfx) && sz == 2
11498 && insn[0] == 0x0F && insn[1] == 0xE7) {
11499 modrm = getUChar(delta+2);
11500 if (!epartIsReg(modrm)) {
11501 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11502 gen_SEGV_if_not_16_aligned( addr );
11503 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11504 DIP("movntdq %s,%s\n", dis_buf,
11505 nameXMMReg(gregOfRexRM(pfx,modrm)));
11507 goto decode_success;
11509 /* else fall through */
11510 goto decode_failure;
11513 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
11514 if (haveNo66noF2noF3(pfx) &&
11515 insn[0] == 0x0F && insn[1] == 0xC3) {
11516 vassert(sz == 4 || sz == 8);
11517 modrm = getUChar(delta+2);
11518 if (!epartIsReg(modrm)) {
11519 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11520 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
11521 DIP("movnti %s,%s\n", dis_buf,
11522 nameIRegG(sz, pfx, modrm));
11524 goto decode_success;
11526 /* else fall through */
11529 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
11530 or lo half xmm). */
11531 if (have66noF2noF3(pfx)
11532 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11533 && insn[0] == 0x0F && insn[1] == 0xD6) {
11534 modrm = getUChar(delta+2);
11535 if (epartIsReg(modrm)) {
11536 /* fall through, awaiting test case */
11537 /* dst: lo half copied, hi half zeroed */
11539 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11540 storeLE( mkexpr(addr),
11541 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11542 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
11544 goto decode_success;
11548 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
11550 if (haveF3no66noF2(pfx) && sz == 4
11551 && insn[0] == 0x0F && insn[1] == 0xD6) {
11552 modrm = getUChar(delta+2);
11553 if (epartIsReg(modrm)) {
11555 putXMMReg( gregOfRexRM(pfx,modrm),
11556 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
11557 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
11558 nameXMMReg(gregOfRexRM(pfx,modrm)));
11560 goto decode_success;
11562 /* apparently no mem case for this insn */
11563 goto decode_failure;
11567 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
11568 G (lo half xmm). Upper half of G is zeroed out. */
11569 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11570 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11571 If E is reg, upper half of G is unchanged. */
11572 if ( (haveF2no66noF3(pfx)
11573 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11574 && insn[0] == 0x0F && insn[1] == 0x10)
11576 (haveF3no66noF2(pfx)
11577 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11578 && insn[0] == 0x0F && insn[1] == 0x7E)
11580 modrm = getUChar(delta+2);
11581 if (epartIsReg(modrm)) {
11582 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11583 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11584 if (insn[1] == 0x7E/*MOVQ*/) {
11585 /* zero bits 127:64 */
11586 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
11588 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11589 nameXMMReg(gregOfRexRM(pfx,modrm)));
11592 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11593 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11594 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11595 loadLE(Ity_I64, mkexpr(addr)) );
11596 DIP("movsd %s,%s\n", dis_buf,
11597 nameXMMReg(gregOfRexRM(pfx,modrm)));
11600 goto decode_success;
11603 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11604 or lo half xmm). */
11605 if (haveF2no66noF3(pfx)
11606 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11607 && insn[0] == 0x0F && insn[1] == 0x11) {
11608 modrm = getUChar(delta+2);
11609 if (epartIsReg(modrm)) {
11610 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
11611 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11612 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11613 nameXMMReg(eregOfRexRM(pfx,modrm)));
11616 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11617 storeLE( mkexpr(addr),
11618 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11619 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11623 goto decode_success;
11626 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
11627 if (have66noF2noF3(pfx)
11628 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11629 && insn[0] == 0x0F && insn[1] == 0x59) {
11630 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 );
11631 goto decode_success;
11634 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
11635 if (haveF2no66noF3(pfx)
11636 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11637 && insn[0] == 0x0F && insn[1] == 0x59) {
11638 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 );
11639 goto decode_success;
11642 /* 66 0F 56 = ORPD -- G = G and E */
11643 if (have66noF2noF3(pfx) && sz == 2
11644 && insn[0] == 0x0F && insn[1] == 0x56) {
11645 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 );
11646 goto decode_success;
11649 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
11650 if (have66noF2noF3(pfx) && sz == 2
11651 && insn[0] == 0x0F && insn[1] == 0xC6) {
11653 IRTemp sV = newTemp(Ity_V128);
11654 IRTemp dV = newTemp(Ity_V128);
11655 IRTemp s1 = newTemp(Ity_I64);
11656 IRTemp s0 = newTemp(Ity_I64);
11657 IRTemp d1 = newTemp(Ity_I64);
11658 IRTemp d0 = newTemp(Ity_I64);
11661 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11663 if (epartIsReg(modrm)) {
11664 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
11665 select = (Int)insn[3];
11667 DIP("shufpd $%d,%s,%s\n", select,
11668 nameXMMReg(eregOfRexRM(pfx,modrm)),
11669 nameXMMReg(gregOfRexRM(pfx,modrm)));
11671 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
11672 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11673 select = (Int)insn[2+alen];
11675 DIP("shufpd $%d,%s,%s\n", select,
11677 nameXMMReg(gregOfRexRM(pfx,modrm)));
11680 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11681 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11682 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11683 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11685 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11686 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11689 gregOfRexRM(pfx,modrm),
11690 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
11696 goto decode_success;
11699 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
11700 if (have66noF2noF3(pfx) && sz == 2
11701 && insn[0] == 0x0F && insn[1] == 0x51) {
11702 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
11703 "sqrtpd", Iop_Sqrt64Fx2 );
11704 goto decode_success;
11707 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
11708 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) {
11710 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2,
11711 "sqrtsd", Iop_Sqrt64F0x2 );
11712 goto decode_success;
11715 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
11716 if (have66noF2noF3(pfx) && sz == 2
11717 && insn[0] == 0x0F && insn[1] == 0x5C) {
11718 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 );
11719 goto decode_success;
11722 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
11723 if (haveF2no66noF3(pfx)
11724 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
11725 && insn[0] == 0x0F && insn[1] == 0x5C) {
11726 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 );
11727 goto decode_success;
11730 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
11731 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
11732 /* These just appear to be special cases of SHUFPS */
11733 if (have66noF2noF3(pfx)
11734 && sz == 2 /* could be 8 if rex also present */
11735 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
11736 IRTemp s1 = newTemp(Ity_I64);
11737 IRTemp s0 = newTemp(Ity_I64);
11738 IRTemp d1 = newTemp(Ity_I64);
11739 IRTemp d0 = newTemp(Ity_I64);
11740 IRTemp sV = newTemp(Ity_V128);
11741 IRTemp dV = newTemp(Ity_V128);
11742 Bool hi = toBool(insn[1] == 0x15);
11745 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11747 if (epartIsReg(modrm)) {
11748 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
11750 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11751 nameXMMReg(eregOfRexRM(pfx,modrm)),
11752 nameXMMReg(gregOfRexRM(pfx,modrm)));
11754 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
11755 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11757 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11759 nameXMMReg(gregOfRexRM(pfx,modrm)));
11762 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11763 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11764 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11765 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11768 putXMMReg( gregOfRexRM(pfx,modrm),
11769 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
11771 putXMMReg( gregOfRexRM(pfx,modrm),
11772 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
11775 goto decode_success;
11778 /* 66 0F 57 = XORPD -- G = G xor E */
11779 if (have66noF2noF3(pfx) && sz == 2
11780 && insn[0] == 0x0F && insn[1] == 0x57) {
11781 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 );
11782 goto decode_success;
11785 /* 66 0F 6B = PACKSSDW */
11786 if (have66noF2noF3(pfx) && sz == 2
11787 && insn[0] == 0x0F && insn[1] == 0x6B) {
11788 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11789 "packssdw", Iop_QNarrow32Sx4, True );
11790 goto decode_success;
11793 /* 66 0F 63 = PACKSSWB */
11794 if (have66noF2noF3(pfx) && sz == 2
11795 && insn[0] == 0x0F && insn[1] == 0x63) {
11796 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11797 "packsswb", Iop_QNarrow16Sx8, True );
11798 goto decode_success;
11801 /* 66 0F 67 = PACKUSWB */
11802 if (have66noF2noF3(pfx) && sz == 2
11803 && insn[0] == 0x0F && insn[1] == 0x67) {
11804 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11805 "packuswb", Iop_QNarrow16Ux8, True );
11806 goto decode_success;
11809 /* 66 0F FC = PADDB */
11810 if (have66noF2noF3(pfx) && sz == 2
11811 && insn[0] == 0x0F && insn[1] == 0xFC) {
11812 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11813 "paddb", Iop_Add8x16, False );
11814 goto decode_success;
11817 /* 66 0F FE = PADDD */
11818 if (have66noF2noF3(pfx) && sz == 2
11819 && insn[0] == 0x0F && insn[1] == 0xFE) {
11820 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11821 "paddd", Iop_Add32x4, False );
11822 goto decode_success;
11825 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11826 /* 0F D4 = PADDQ -- add 64x1 */
11827 if (haveNo66noF2noF3(pfx) && sz == 4
11828 && insn[0] == 0x0F && insn[1] == 0xD4) {
11830 delta = dis_MMXop_regmem_to_reg (
11831 vbi, pfx, delta+2, insn[1], "paddq", False );
11832 goto decode_success;
11835 /* 66 0F D4 = PADDQ */
11836 if (have66noF2noF3(pfx) && sz == 2
11837 && insn[0] == 0x0F && insn[1] == 0xD4) {
11838 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11839 "paddq", Iop_Add64x2, False );
11840 goto decode_success;
11843 /* 66 0F FD = PADDW */
11844 if (have66noF2noF3(pfx) && sz == 2
11845 && insn[0] == 0x0F && insn[1] == 0xFD) {
11846 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11847 "paddw", Iop_Add16x8, False );
11848 goto decode_success;
11851 /* 66 0F EC = PADDSB */
11852 if (have66noF2noF3(pfx) && sz == 2
11853 && insn[0] == 0x0F && insn[1] == 0xEC) {
11854 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11855 "paddsb", Iop_QAdd8Sx16, False );
11856 goto decode_success;
11859 /* 66 0F ED = PADDSW */
11860 if (have66noF2noF3(pfx) && sz == 2
11861 && insn[0] == 0x0F && insn[1] == 0xED) {
11862 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11863 "paddsw", Iop_QAdd16Sx8, False );
11864 goto decode_success;
11867 /* 66 0F DC = PADDUSB */
11868 if (have66noF2noF3(pfx) && sz == 2
11869 && insn[0] == 0x0F && insn[1] == 0xDC) {
11870 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11871 "paddusb", Iop_QAdd8Ux16, False );
11872 goto decode_success;
11875 /* 66 0F DD = PADDUSW */
11876 if (have66noF2noF3(pfx) && sz == 2
11877 && insn[0] == 0x0F && insn[1] == 0xDD) {
11878 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11879 "paddusw", Iop_QAdd16Ux8, False );
11880 goto decode_success;
11883 /* 66 0F DB = PAND */
11884 if (have66noF2noF3(pfx) && sz == 2
11885 && insn[0] == 0x0F && insn[1] == 0xDB) {
11886 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 );
11887 goto decode_success;
11890 /* 66 0F DF = PANDN */
11891 if (have66noF2noF3(pfx) && sz == 2
11892 && insn[0] == 0x0F && insn[1] == 0xDF) {
11893 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 );
11894 goto decode_success;
11897 /* 66 0F E0 = PAVGB */
11898 if (have66noF2noF3(pfx) && sz == 2
11899 && insn[0] == 0x0F && insn[1] == 0xE0) {
11900 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11901 "pavgb", Iop_Avg8Ux16, False );
11902 goto decode_success;
11905 /* 66 0F E3 = PAVGW */
11906 if (have66noF2noF3(pfx) && sz == 2
11907 && insn[0] == 0x0F && insn[1] == 0xE3) {
11908 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11909 "pavgw", Iop_Avg16Ux8, False );
11910 goto decode_success;
11913 /* 66 0F 74 = PCMPEQB */
11914 if (have66noF2noF3(pfx) && sz == 2
11915 && insn[0] == 0x0F && insn[1] == 0x74) {
11916 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11917 "pcmpeqb", Iop_CmpEQ8x16, False );
11918 goto decode_success;
11921 /* 66 0F 76 = PCMPEQD */
11922 if (have66noF2noF3(pfx) && sz == 2
11923 && insn[0] == 0x0F && insn[1] == 0x76) {
11924 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11925 "pcmpeqd", Iop_CmpEQ32x4, False );
11926 goto decode_success;
11929 /* 66 0F 75 = PCMPEQW */
11930 if (have66noF2noF3(pfx) && sz == 2
11931 && insn[0] == 0x0F && insn[1] == 0x75) {
11932 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11933 "pcmpeqw", Iop_CmpEQ16x8, False );
11934 goto decode_success;
11937 /* 66 0F 64 = PCMPGTB */
11938 if (have66noF2noF3(pfx) && sz == 2
11939 && insn[0] == 0x0F && insn[1] == 0x64) {
11940 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11941 "pcmpgtb", Iop_CmpGT8Sx16, False );
11942 goto decode_success;
11945 /* 66 0F 66 = PCMPGTD */
11946 if (have66noF2noF3(pfx) && sz == 2
11947 && insn[0] == 0x0F && insn[1] == 0x66) {
11948 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11949 "pcmpgtd", Iop_CmpGT32Sx4, False );
11950 goto decode_success;
11953 /* 66 0F 65 = PCMPGTW */
11954 if (have66noF2noF3(pfx) && sz == 2
11955 && insn[0] == 0x0F && insn[1] == 0x65) {
11956 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
11957 "pcmpgtw", Iop_CmpGT16Sx8, False );
11958 goto decode_success;
11961 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
11962 zero-extend of it in ireg(G). */
11963 if (have66noF2noF3(pfx)
11964 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11965 && insn[0] == 0x0F && insn[1] == 0xC5) {
11967 if (epartIsReg(modrm)) {
11968 t5 = newTemp(Ity_V128);
11969 t4 = newTemp(Ity_I16);
11970 assign(t5, getXMMReg(eregOfRexRM(pfx,modrm)));
11971 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
11972 switch (insn[3] & 7) {
11973 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
11974 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
11975 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
11976 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
11977 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
11978 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
11979 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
11980 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
11981 default: vassert(0);
11983 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4)));
11984 DIP("pextrw $%d,%s,%s\n",
11985 (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)),
11986 nameIReg32(gregOfRexRM(pfx,modrm)));
11988 goto decode_success;
11990 /* else fall through */
11991 /* note, if memory case is ever filled in, there is 1 byte after
11995 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11996 put it into the specified lane of xmm(G). */
11997 if (have66noF2noF3(pfx)
11998 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
11999 && insn[0] == 0x0F && insn[1] == 0xC4) {
12001 t4 = newTemp(Ity_I16);
12004 if (epartIsReg(modrm)) {
12005 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
12007 lane = insn[3+1-1];
12008 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
12009 nameIReg16(eregOfRexRM(pfx,modrm)),
12010 nameXMMReg(gregOfRexRM(pfx,modrm)));
12012 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12013 1/*byte after the amode*/ );
12015 lane = insn[3+alen-1];
12016 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
12017 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
12019 nameXMMReg(gregOfRexRM(pfx,modrm)));
12022 putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) );
12023 goto decode_success;
12026 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
12027 E(xmm or mem) to G(xmm) */
12028 if (have66noF2noF3(pfx) && sz == 2
12029 && insn[0] == 0x0F && insn[1] == 0xF5) {
12030 IRTemp s1V = newTemp(Ity_V128);
12031 IRTemp s2V = newTemp(Ity_V128);
12032 IRTemp dV = newTemp(Ity_V128);
12033 IRTemp s1Hi = newTemp(Ity_I64);
12034 IRTemp s1Lo = newTemp(Ity_I64);
12035 IRTemp s2Hi = newTemp(Ity_I64);
12036 IRTemp s2Lo = newTemp(Ity_I64);
12037 IRTemp dHi = newTemp(Ity_I64);
12038 IRTemp dLo = newTemp(Ity_I64);
12040 if (epartIsReg(modrm)) {
12041 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
12043 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12044 nameXMMReg(gregOfRexRM(pfx,modrm)));
12046 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12047 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
12049 DIP("pmaddwd %s,%s\n", dis_buf,
12050 nameXMMReg(gregOfRexRM(pfx,modrm)));
12052 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
12053 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
12054 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
12055 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
12056 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
12057 assign( dHi, mkIRExprCCall(
12058 Ity_I64, 0/*regparms*/,
12059 "amd64g_calculate_mmx_pmaddwd",
12060 &amd64g_calculate_mmx_pmaddwd,
12061 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
12063 assign( dLo, mkIRExprCCall(
12064 Ity_I64, 0/*regparms*/,
12065 "amd64g_calculate_mmx_pmaddwd",
12066 &amd64g_calculate_mmx_pmaddwd,
12067 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
12069 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
12070 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12071 goto decode_success;
12074 /* 66 0F EE = PMAXSW -- 16x8 signed max */
12075 if (have66noF2noF3(pfx) && sz == 2
12076 && insn[0] == 0x0F && insn[1] == 0xEE) {
12077 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12078 "pmaxsw", Iop_Max16Sx8, False );
12079 goto decode_success;
12082 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
12083 if (have66noF2noF3(pfx) && sz == 2
12084 && insn[0] == 0x0F && insn[1] == 0xDE) {
12085 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12086 "pmaxub", Iop_Max8Ux16, False );
12087 goto decode_success;
12090 /* 66 0F EA = PMINSW -- 16x8 signed min */
12091 if (have66noF2noF3(pfx) && sz == 2
12092 && insn[0] == 0x0F && insn[1] == 0xEA) {
12093 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12094 "pminsw", Iop_Min16Sx8, False );
12095 goto decode_success;
12098 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
12099 if (have66noF2noF3(pfx) && sz == 2
12100 && insn[0] == 0x0F && insn[1] == 0xDA) {
12101 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12102 "pminub", Iop_Min8Ux16, False );
12103 goto decode_success;
12106 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
12107 xmm(E), turn them into a byte, and put zero-extend of it in
12108 ireg(G). Doing this directly is just too cumbersome; give up
12109 therefore and call a helper. */
12110 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
12111 if (have66noF2noF3(pfx)
12112 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
12113 && insn[0] == 0x0F && insn[1] == 0xD7) {
12115 if (epartIsReg(modrm)) {
12116 t0 = newTemp(Ity_I64);
12117 t1 = newTemp(Ity_I64);
12118 assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0));
12119 assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1));
12120 t5 = newTemp(Ity_I64);
12121 assign(t5, mkIRExprCCall(
12122 Ity_I64, 0/*regparms*/,
12123 "amd64g_calculate_sse_pmovmskb",
12124 &amd64g_calculate_sse_pmovmskb,
12125 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
12126 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5)));
12127 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12128 nameIReg32(gregOfRexRM(pfx,modrm)));
12130 goto decode_success;
12132 /* else fall through */
12135 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
12136 if (have66noF2noF3(pfx) && sz == 2
12137 && insn[0] == 0x0F && insn[1] == 0xE4) {
12138 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12139 "pmulhuw", Iop_MulHi16Ux8, False );
12140 goto decode_success;
12143 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
12144 if (have66noF2noF3(pfx) && sz == 2
12145 && insn[0] == 0x0F && insn[1] == 0xE5) {
12146 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12147 "pmulhw", Iop_MulHi16Sx8, False );
12148 goto decode_success;
12151 /* 66 0F D5 = PMULHL -- 16x8 multiply */
12152 if (have66noF2noF3(pfx) && sz == 2
12153 && insn[0] == 0x0F && insn[1] == 0xD5) {
12154 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12155 "pmullw", Iop_Mul16x8, False );
12156 goto decode_success;
12159 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
12160 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
12161 0 to form 64-bit result */
12162 if (haveNo66noF2noF3(pfx) && sz == 4
12163 && insn[0] == 0x0F && insn[1] == 0xF4) {
12164 IRTemp sV = newTemp(Ity_I64);
12165 IRTemp dV = newTemp(Ity_I64);
12166 t1 = newTemp(Ity_I32);
12167 t0 = newTemp(Ity_I32);
12171 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
12173 if (epartIsReg(modrm)) {
12174 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
12176 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12177 nameMMXReg(gregLO3ofRM(modrm)));
12179 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12180 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12182 DIP("pmuludq %s,%s\n", dis_buf,
12183 nameMMXReg(gregLO3ofRM(modrm)));
12186 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
12187 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
12188 putMMXReg( gregLO3ofRM(modrm),
12189 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
12190 goto decode_success;
12193 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
12194 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
12196 /* This is a really poor translation -- could be improved if
12197 performance critical */
12198 if (have66noF2noF3(pfx) && sz == 2
12199 && insn[0] == 0x0F && insn[1] == 0xF4) {
12201 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
12202 sV = newTemp(Ity_V128);
12203 dV = newTemp(Ity_V128);
12204 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
12205 t1 = newTemp(Ity_I64);
12206 t0 = newTemp(Ity_I64);
12208 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12210 if (epartIsReg(modrm)) {
12211 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12213 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12214 nameXMMReg(gregOfRexRM(pfx,modrm)));
12216 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12217 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12219 DIP("pmuludq %s,%s\n", dis_buf,
12220 nameXMMReg(gregOfRexRM(pfx,modrm)));
12223 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
12224 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12226 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
12227 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) );
12228 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
12229 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) );
12230 goto decode_success;
12233 /* 66 0F EB = POR */
12234 if (have66noF2noF3(pfx) && sz == 2
12235 && insn[0] == 0x0F && insn[1] == 0xEB) {
12236 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 );
12237 goto decode_success;
12240 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
12241 from E(xmm or mem) to G(xmm) */
12242 if (have66noF2noF3(pfx) && sz == 2
12243 && insn[0] == 0x0F && insn[1] == 0xF6) {
12244 IRTemp s1V = newTemp(Ity_V128);
12245 IRTemp s2V = newTemp(Ity_V128);
12246 IRTemp dV = newTemp(Ity_V128);
12247 IRTemp s1Hi = newTemp(Ity_I64);
12248 IRTemp s1Lo = newTemp(Ity_I64);
12249 IRTemp s2Hi = newTemp(Ity_I64);
12250 IRTemp s2Lo = newTemp(Ity_I64);
12251 IRTemp dHi = newTemp(Ity_I64);
12252 IRTemp dLo = newTemp(Ity_I64);
12254 if (epartIsReg(modrm)) {
12255 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
12257 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12258 nameXMMReg(gregOfRexRM(pfx,modrm)));
12260 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12261 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
12263 DIP("psadbw %s,%s\n", dis_buf,
12264 nameXMMReg(gregOfRexRM(pfx,modrm)));
12266 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
12267 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
12268 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
12269 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
12270 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
12271 assign( dHi, mkIRExprCCall(
12272 Ity_I64, 0/*regparms*/,
12273 "amd64g_calculate_mmx_psadbw",
12274 &amd64g_calculate_mmx_psadbw,
12275 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
12277 assign( dLo, mkIRExprCCall(
12278 Ity_I64, 0/*regparms*/,
12279 "amd64g_calculate_mmx_psadbw",
12280 &amd64g_calculate_mmx_psadbw,
12281 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
12283 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
12284 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12285 goto decode_success;
12288 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
12289 if (have66noF2noF3(pfx) && sz == 2
12290 && insn[0] == 0x0F && insn[1] == 0x70) {
12292 IRTemp sV, dV, s3, s2, s1, s0;
12293 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12294 sV = newTemp(Ity_V128);
12295 dV = newTemp(Ity_V128);
12297 if (epartIsReg(modrm)) {
12298 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12299 order = (Int)insn[3];
12301 DIP("pshufd $%d,%s,%s\n", order,
12302 nameXMMReg(eregOfRexRM(pfx,modrm)),
12303 nameXMMReg(gregOfRexRM(pfx,modrm)));
12305 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12306 1/*byte after the amode*/ );
12307 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12308 order = (Int)insn[2+alen];
12310 DIP("pshufd $%d,%s,%s\n", order,
12312 nameXMMReg(gregOfRexRM(pfx,modrm)));
12314 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12317 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12319 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
12320 SEL((order>>2)&3), SEL((order>>0)&3) )
12322 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12324 goto decode_success;
12327 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
12328 mem) to G(xmm), and copy lower half */
12329 if (haveF3no66noF2(pfx) && sz == 4
12330 && insn[0] == 0x0F && insn[1] == 0x70) {
12332 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
12333 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12334 sV = newTemp(Ity_V128);
12335 dV = newTemp(Ity_V128);
12336 sVhi = newTemp(Ity_I64);
12337 dVhi = newTemp(Ity_I64);
12339 if (epartIsReg(modrm)) {
12340 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12341 order = (Int)insn[3];
12343 DIP("pshufhw $%d,%s,%s\n", order,
12344 nameXMMReg(eregOfRexRM(pfx,modrm)),
12345 nameXMMReg(gregOfRexRM(pfx,modrm)));
12347 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12348 1/*byte after the amode*/ );
12349 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12350 order = (Int)insn[2+alen];
12352 DIP("pshufhw $%d,%s,%s\n", order,
12354 nameXMMReg(gregOfRexRM(pfx,modrm)));
12356 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
12357 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
12360 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12362 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
12363 SEL((order>>2)&3), SEL((order>>0)&3) )
12365 assign(dV, binop( Iop_64HLtoV128,
12367 unop(Iop_V128to64, mkexpr(sV))) );
12368 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12370 goto decode_success;
12373 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
12374 mem) to G(xmm), and copy upper half */
12375 if (haveF2no66noF3(pfx) && sz == 4
12376 && insn[0] == 0x0F && insn[1] == 0x70) {
12378 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
12379 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12380 sV = newTemp(Ity_V128);
12381 dV = newTemp(Ity_V128);
12382 sVlo = newTemp(Ity_I64);
12383 dVlo = newTemp(Ity_I64);
12385 if (epartIsReg(modrm)) {
12386 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
12387 order = (Int)insn[3];
12389 DIP("pshuflw $%d,%s,%s\n", order,
12390 nameXMMReg(eregOfRexRM(pfx,modrm)),
12391 nameXMMReg(gregOfRexRM(pfx,modrm)));
12393 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
12394 1/*byte after the amode*/ );
12395 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12396 order = (Int)insn[2+alen];
12398 DIP("pshuflw $%d,%s,%s\n", order,
12400 nameXMMReg(gregOfRexRM(pfx,modrm)));
12402 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
12403 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
12406 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12408 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
12409 SEL((order>>2)&3), SEL((order>>0)&3) )
12411 assign(dV, binop( Iop_64HLtoV128,
12412 unop(Iop_V128HIto64, mkexpr(sV)),
12414 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
12416 goto decode_success;
12419 /* 66 0F 72 /6 ib = PSLLD by immediate */
12420 if (have66noF2noF3(pfx) && sz == 2
12421 && insn[0] == 0x0F && insn[1] == 0x72
12422 && epartIsReg(insn[2])
12423 && gregLO3ofRM(insn[2]) == 6) {
12424 delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 );
12425 goto decode_success;
12428 /* 66 0F F2 = PSLLD by E */
12429 if (have66noF2noF3(pfx) && sz == 2
12430 && insn[0] == 0x0F && insn[1] == 0xF2) {
12431 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 );
12432 goto decode_success;
12435 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
12436 /* note, if mem case ever filled in, 1 byte after amode */
12437 if (have66noF2noF3(pfx) && sz == 2
12438 && insn[0] == 0x0F && insn[1] == 0x73
12439 && epartIsReg(insn[2])
12440 && gregLO3ofRM(insn[2]) == 7) {
12441 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
12442 Int imm = (Int)insn[3];
12443 Int reg = eregOfRexRM(pfx,insn[2]);
12444 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
12445 vassert(imm >= 0 && imm <= 255);
12448 sV = newTemp(Ity_V128);
12449 dV = newTemp(Ity_V128);
12450 hi64 = newTemp(Ity_I64);
12451 lo64 = newTemp(Ity_I64);
12452 hi64r = newTemp(Ity_I64);
12453 lo64r = newTemp(Ity_I64);
12456 putXMMReg(reg, mkV128(0x0000));
12457 goto decode_success;
12460 assign( sV, getXMMReg(reg) );
12461 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
12462 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
12465 assign( lo64r, mkexpr(lo64) );
12466 assign( hi64r, mkexpr(hi64) );
12470 assign( lo64r, mkU64(0) );
12471 assign( hi64r, mkexpr(lo64) );
12475 assign( lo64r, mkU64(0) );
12476 assign( hi64r, binop( Iop_Shl64,
12478 mkU8( 8*(imm-8) ) ));
12480 assign( lo64r, binop( Iop_Shl64,
12485 binop(Iop_Shl64, mkexpr(hi64),
12487 binop(Iop_Shr64, mkexpr(lo64),
12488 mkU8(8 * (8 - imm)) )
12492 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
12493 putXMMReg(reg, mkexpr(dV));
12494 goto decode_success;
12497 /* 66 0F 73 /6 ib = PSLLQ by immediate */
12498 if (have66noF2noF3(pfx) && sz == 2
12499 && insn[0] == 0x0F && insn[1] == 0x73
12500 && epartIsReg(insn[2])
12501 && gregLO3ofRM(insn[2]) == 6) {
12502 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 );
12503 goto decode_success;
12506 /* 66 0F F3 = PSLLQ by E */
12507 if (have66noF2noF3(pfx) && sz == 2
12508 && insn[0] == 0x0F && insn[1] == 0xF3) {
12509 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 );
12510 goto decode_success;
12513 /* 66 0F 71 /6 ib = PSLLW by immediate */
12514 if (have66noF2noF3(pfx) && sz == 2
12515 && insn[0] == 0x0F && insn[1] == 0x71
12516 && epartIsReg(insn[2])
12517 && gregLO3ofRM(insn[2]) == 6) {
12518 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 );
12519 goto decode_success;
12522 /* 66 0F F1 = PSLLW by E */
12523 if (have66noF2noF3(pfx) && sz == 2
12524 && insn[0] == 0x0F && insn[1] == 0xF1) {
12525 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 );
12526 goto decode_success;
12529 /* 66 0F 72 /4 ib = PSRAD by immediate */
12530 if (have66noF2noF3(pfx) && sz == 2
12531 && insn[0] == 0x0F && insn[1] == 0x72
12532 && epartIsReg(insn[2])
12533 && gregLO3ofRM(insn[2]) == 4) {
12534 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 );
12535 goto decode_success;
12538 /* 66 0F E2 = PSRAD by E */
12539 if (have66noF2noF3(pfx) && sz == 2
12540 && insn[0] == 0x0F && insn[1] == 0xE2) {
12541 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 );
12542 goto decode_success;
12545 /* 66 0F 71 /4 ib = PSRAW by immediate */
12546 if (have66noF2noF3(pfx) && sz == 2
12547 && insn[0] == 0x0F && insn[1] == 0x71
12548 && epartIsReg(insn[2])
12549 && gregLO3ofRM(insn[2]) == 4) {
12550 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 );
12551 goto decode_success;
12554 /* 66 0F E1 = PSRAW by E */
12555 if (have66noF2noF3(pfx) && sz == 2
12556 && insn[0] == 0x0F && insn[1] == 0xE1) {
12557 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 );
12558 goto decode_success;
12561 /* 66 0F 72 /2 ib = PSRLD by immediate */
12562 if (have66noF2noF3(pfx) && sz == 2
12563 && insn[0] == 0x0F && insn[1] == 0x72
12564 && epartIsReg(insn[2])
12565 && gregLO3ofRM(insn[2]) == 2) {
12566 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 );
12567 goto decode_success;
12570 /* 66 0F D2 = PSRLD by E */
12571 if (have66noF2noF3(pfx) && sz == 2
12572 && insn[0] == 0x0F && insn[1] == 0xD2) {
12573 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 );
12574 goto decode_success;
12577 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
12578 /* note, if mem case ever filled in, 1 byte after amode */
12579 if (have66noF2noF3(pfx) && sz == 2
12580 && insn[0] == 0x0F && insn[1] == 0x73
12581 && epartIsReg(insn[2])
12582 && gregLO3ofRM(insn[2]) == 3) {
12583 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
12584 Int imm = (Int)insn[3];
12585 Int reg = eregOfRexRM(pfx,insn[2]);
12586 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
12587 vassert(imm >= 0 && imm <= 255);
12590 sV = newTemp(Ity_V128);
12591 dV = newTemp(Ity_V128);
12592 hi64 = newTemp(Ity_I64);
12593 lo64 = newTemp(Ity_I64);
12594 hi64r = newTemp(Ity_I64);
12595 lo64r = newTemp(Ity_I64);
12598 putXMMReg(reg, mkV128(0x0000));
12599 goto decode_success;
12602 assign( sV, getXMMReg(reg) );
12603 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
12604 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
12607 assign( lo64r, mkexpr(lo64) );
12608 assign( hi64r, mkexpr(hi64) );
12612 assign( hi64r, mkU64(0) );
12613 assign( lo64r, mkexpr(hi64) );
12617 assign( hi64r, mkU64(0) );
12618 assign( lo64r, binop( Iop_Shr64,
12620 mkU8( 8*(imm-8) ) ));
12622 assign( hi64r, binop( Iop_Shr64,
12627 binop(Iop_Shr64, mkexpr(lo64),
12629 binop(Iop_Shl64, mkexpr(hi64),
12630 mkU8(8 * (8 - imm)) )
12635 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
12636 putXMMReg(reg, mkexpr(dV));
12637 goto decode_success;
12640 /* 66 0F 73 /2 ib = PSRLQ by immediate */
12641 if (have66noF2noF3(pfx) && sz == 2
12642 && insn[0] == 0x0F && insn[1] == 0x73
12643 && epartIsReg(insn[2])
12644 && gregLO3ofRM(insn[2]) == 2) {
12645 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 );
12646 goto decode_success;
12649 /* 66 0F D3 = PSRLQ by E */
12650 if (have66noF2noF3(pfx) && sz == 2
12651 && insn[0] == 0x0F && insn[1] == 0xD3) {
12652 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 );
12653 goto decode_success;
12656 /* 66 0F 71 /2 ib = PSRLW by immediate */
12657 if (have66noF2noF3(pfx) && sz == 2
12658 && insn[0] == 0x0F && insn[1] == 0x71
12659 && epartIsReg(insn[2])
12660 && gregLO3ofRM(insn[2]) == 2) {
12661 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 );
12662 goto decode_success;
12665 /* 66 0F D1 = PSRLW by E */
12666 if (have66noF2noF3(pfx) && sz == 2
12667 && insn[0] == 0x0F && insn[1] == 0xD1) {
12668 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 );
12669 goto decode_success;
12672 /* 66 0F F8 = PSUBB */
12673 if (have66noF2noF3(pfx) && sz == 2
12674 && insn[0] == 0x0F && insn[1] == 0xF8) {
12675 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12676 "psubb", Iop_Sub8x16, False );
12677 goto decode_success;
12680 /* 66 0F FA = PSUBD */
12681 if (have66noF2noF3(pfx) && sz == 2
12682 && insn[0] == 0x0F && insn[1] == 0xFA) {
12683 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12684 "psubd", Iop_Sub32x4, False );
12685 goto decode_success;
12688 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
12689 /* 0F FB = PSUBQ -- sub 64x1 */
12690 if (haveNo66noF2noF3(pfx) && sz == 4
12691 && insn[0] == 0x0F && insn[1] == 0xFB) {
12693 delta = dis_MMXop_regmem_to_reg (
12694 vbi, pfx, delta+2, insn[1], "psubq", False );
12695 goto decode_success;
12698 /* 66 0F FB = PSUBQ */
12699 if (have66noF2noF3(pfx) && sz == 2
12700 && insn[0] == 0x0F && insn[1] == 0xFB) {
12701 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12702 "psubq", Iop_Sub64x2, False );
12703 goto decode_success;
12706 /* 66 0F F9 = PSUBW */
12707 if (have66noF2noF3(pfx) && sz == 2
12708 && insn[0] == 0x0F && insn[1] == 0xF9) {
12709 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12710 "psubw", Iop_Sub16x8, False );
12711 goto decode_success;
12714 /* 66 0F E8 = PSUBSB */
12715 if (have66noF2noF3(pfx) && sz == 2
12716 && insn[0] == 0x0F && insn[1] == 0xE8) {
12717 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12718 "psubsb", Iop_QSub8Sx16, False );
12719 goto decode_success;
12722 /* 66 0F E9 = PSUBSW */
12723 if (have66noF2noF3(pfx) && sz == 2
12724 && insn[0] == 0x0F && insn[1] == 0xE9) {
12725 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12726 "psubsw", Iop_QSub16Sx8, False );
12727 goto decode_success;
12730 /* 66 0F D8 = PSUBSB */
12731 if (have66noF2noF3(pfx) && sz == 2
12732 && insn[0] == 0x0F && insn[1] == 0xD8) {
12733 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12734 "psubusb", Iop_QSub8Ux16, False );
12735 goto decode_success;
12738 /* 66 0F D9 = PSUBSW */
12739 if (have66noF2noF3(pfx) && sz == 2
12740 && insn[0] == 0x0F && insn[1] == 0xD9) {
12741 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12742 "psubusw", Iop_QSub16Ux8, False );
12743 goto decode_success;
12746 /* 66 0F 68 = PUNPCKHBW */
12747 if (have66noF2noF3(pfx) && sz == 2
12748 && insn[0] == 0x0F && insn[1] == 0x68) {
12749 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12751 Iop_InterleaveHI8x16, True );
12752 goto decode_success;
12755 /* 66 0F 6A = PUNPCKHDQ */
12756 if (have66noF2noF3(pfx) && sz == 2
12757 && insn[0] == 0x0F && insn[1] == 0x6A) {
12758 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12760 Iop_InterleaveHI32x4, True );
12761 goto decode_success;
12764 /* 66 0F 6D = PUNPCKHQDQ */
12765 if (have66noF2noF3(pfx) && sz == 2
12766 && insn[0] == 0x0F && insn[1] == 0x6D) {
12767 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12769 Iop_InterleaveHI64x2, True );
12770 goto decode_success;
12773 /* 66 0F 69 = PUNPCKHWD */
12774 if (have66noF2noF3(pfx) && sz == 2
12775 && insn[0] == 0x0F && insn[1] == 0x69) {
12776 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12778 Iop_InterleaveHI16x8, True );
12779 goto decode_success;
12782 /* 66 0F 60 = PUNPCKLBW */
12783 if (have66noF2noF3(pfx) && sz == 2
12784 && insn[0] == 0x0F && insn[1] == 0x60) {
12785 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12787 Iop_InterleaveLO8x16, True );
12788 goto decode_success;
12791 /* 66 0F 62 = PUNPCKLDQ */
12792 if (have66noF2noF3(pfx) && sz == 2
12793 && insn[0] == 0x0F && insn[1] == 0x62) {
12794 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12796 Iop_InterleaveLO32x4, True );
12797 goto decode_success;
12800 /* 66 0F 6C = PUNPCKLQDQ */
12801 if (have66noF2noF3(pfx) && sz == 2
12802 && insn[0] == 0x0F && insn[1] == 0x6C) {
12803 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12805 Iop_InterleaveLO64x2, True );
12806 goto decode_success;
12809 /* 66 0F 61 = PUNPCKLWD */
12810 if (have66noF2noF3(pfx) && sz == 2
12811 && insn[0] == 0x0F && insn[1] == 0x61) {
12812 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
12814 Iop_InterleaveLO16x8, True );
12815 goto decode_success;
12818 /* 66 0F EF = PXOR */
12819 if (have66noF2noF3(pfx) && sz == 2
12820 && insn[0] == 0x0F && insn[1] == 0xEF) {
12821 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 );
12822 goto decode_success;
12825 //.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
12826 //.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE
12827 //.. //-- && (!epartIsReg(insn[2]))
12828 //.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
12829 //.. //-- Bool store = gregOfRM(insn[2]) == 0;
12830 //.. //-- vg_assert(sz == 4);
12831 //.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
12832 //.. //-- t1 = LOW24(pair);
12833 //.. //-- eip += 2+HI8(pair);
12834 //.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
12835 //.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
12836 //.. //-- Lit16, (UShort)insn[2],
12837 //.. //-- TempReg, t1 );
12838 //.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
12839 //.. //-- goto decode_success;
12842 /* 0F AE /7 = CLFLUSH -- flush cache line */
12843 if (haveNo66noF2noF3(pfx) && sz == 4
12844 && insn[0] == 0x0F && insn[1] == 0xAE
12845 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) {
12847 /* This is something of a hack. We need to know the size of the
12848 cache line containing addr. Since we don't (easily), assume
12849 256 on the basis that no real cache would have a line that
12850 big. It's safe to invalidate more stuff than we need, just
12852 ULong lineszB = 256ULL;
12854 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12857 /* Round addr down to the start of the containing block. */
12862 mkU64( ~(lineszB-1) ))) );
12864 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) );
12866 irsb->jumpkind = Ijk_TInval;
12867 irsb->next = mkU64(guest_RIP_bbstart+delta);
12868 dres.whatNext = Dis_StopHere;
12870 DIP("clflush %s\n", dis_buf);
12871 goto decode_success;
12874 /* ---------------------------------------------------- */
12875 /* --- end of the SSE/SSE2 decoder. --- */
12876 /* ---------------------------------------------------- */
12878 /* ---------------------------------------------------- */
12879 /* --- start of the SSE3 decoder. --- */
12880 /* ---------------------------------------------------- */
12882 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
12883 duplicating some lanes (2:2:0:0). */
12884 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
12885 duplicating some lanes (3:3:1:1). */
12886 if (haveF3no66noF2(pfx) && sz == 4
12887 && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) {
12888 IRTemp s3, s2, s1, s0;
12889 IRTemp sV = newTemp(Ity_V128);
12890 Bool isH = insn[1] == 0x16;
12891 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12894 if (epartIsReg(modrm)) {
12895 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12896 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12897 nameXMMReg(eregOfRexRM(pfx,modrm)),
12898 nameXMMReg(gregOfRexRM(pfx,modrm)));
12901 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12902 gen_SEGV_if_not_16_aligned( addr );
12903 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12904 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12906 nameXMMReg(gregOfRexRM(pfx,modrm)));
12910 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12911 putXMMReg( gregOfRexRM(pfx,modrm),
12912 isH ? mk128from32s( s3, s3, s1, s1 )
12913 : mk128from32s( s2, s2, s0, s0 ) );
12914 goto decode_success;
12917 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
12918 duplicating some lanes (0:1:0:1). */
12919 if (haveF2no66noF3(pfx)
12920 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
12921 && insn[0] == 0x0F && insn[1] == 0x12) {
12922 IRTemp sV = newTemp(Ity_V128);
12923 IRTemp d0 = newTemp(Ity_I64);
12926 if (epartIsReg(modrm)) {
12927 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12928 DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12929 nameXMMReg(gregOfRexRM(pfx,modrm)));
12931 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
12933 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12934 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
12935 DIP("movddup %s,%s\n", dis_buf,
12936 nameXMMReg(gregOfRexRM(pfx,modrm)));
12940 putXMMReg( gregOfRexRM(pfx,modrm),
12941 binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
12942 goto decode_success;
12945 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
12946 if (haveF2no66noF3(pfx) && sz == 4
12947 && insn[0] == 0x0F && insn[1] == 0xD0) {
12948 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
12949 IRTemp eV = newTemp(Ity_V128);
12950 IRTemp gV = newTemp(Ity_V128);
12951 IRTemp addV = newTemp(Ity_V128);
12952 IRTemp subV = newTemp(Ity_V128);
12953 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
12956 if (epartIsReg(modrm)) {
12957 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12958 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12959 nameXMMReg(gregOfRexRM(pfx,modrm)));
12962 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12963 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12964 DIP("addsubps %s,%s\n", dis_buf,
12965 nameXMMReg(gregOfRexRM(pfx,modrm)));
12969 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12971 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
12972 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
12974 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
12975 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
12977 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ));
12978 goto decode_success;
12981 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
12982 if (have66noF2noF3(pfx) && sz == 2
12983 && insn[0] == 0x0F && insn[1] == 0xD0) {
12984 IRTemp eV = newTemp(Ity_V128);
12985 IRTemp gV = newTemp(Ity_V128);
12986 IRTemp addV = newTemp(Ity_V128);
12987 IRTemp subV = newTemp(Ity_V128);
12988 IRTemp a1 = newTemp(Ity_I64);
12989 IRTemp s0 = newTemp(Ity_I64);
12992 if (epartIsReg(modrm)) {
12993 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12994 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12995 nameXMMReg(gregOfRexRM(pfx,modrm)));
12998 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
12999 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
13000 DIP("addsubpd %s,%s\n", dis_buf,
13001 nameXMMReg(gregOfRexRM(pfx,modrm)));
13005 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13007 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
13008 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
13010 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
13011 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
13013 putXMMReg( gregOfRexRM(pfx,modrm),
13014 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
13015 goto decode_success;
13018 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
13019 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
13020 if (haveF2no66noF3(pfx) && sz == 4
13021 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
13022 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
13023 IRTemp eV = newTemp(Ity_V128);
13024 IRTemp gV = newTemp(Ity_V128);
13025 IRTemp leftV = newTemp(Ity_V128);
13026 IRTemp rightV = newTemp(Ity_V128);
13027 Bool isAdd = insn[1] == 0x7C;
13028 HChar* str = isAdd ? "add" : "sub";
13029 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
13032 if (epartIsReg(modrm)) {
13033 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
13034 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13035 nameXMMReg(gregOfRexRM(pfx,modrm)));
13038 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
13039 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
13040 DIP("h%sps %s,%s\n", str, dis_buf,
13041 nameXMMReg(gregOfRexRM(pfx,modrm)));
13045 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13047 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
13048 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
13050 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
13051 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
13053 putXMMReg( gregOfRexRM(pfx,modrm),
13054 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
13055 mkexpr(leftV), mkexpr(rightV) ) );
13056 goto decode_success;
13059 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
13060 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
13061 if (have66noF2noF3(pfx) && sz == 2
13062 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
13063 IRTemp e1 = newTemp(Ity_I64);
13064 IRTemp e0 = newTemp(Ity_I64);
13065 IRTemp g1 = newTemp(Ity_I64);
13066 IRTemp g0 = newTemp(Ity_I64);
13067 IRTemp eV = newTemp(Ity_V128);
13068 IRTemp gV = newTemp(Ity_V128);
13069 IRTemp leftV = newTemp(Ity_V128);
13070 IRTemp rightV = newTemp(Ity_V128);
13071 Bool isAdd = insn[1] == 0x7C;
13072 HChar* str = isAdd ? "add" : "sub";
13075 if (epartIsReg(modrm)) {
13076 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
13077 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13078 nameXMMReg(gregOfRexRM(pfx,modrm)));
13081 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
13082 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
13083 DIP("h%spd %s,%s\n", str, dis_buf,
13084 nameXMMReg(gregOfRexRM(pfx,modrm)));
13088 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13090 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
13091 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
13092 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
13093 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
13095 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
13096 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
13098 putXMMReg( gregOfRexRM(pfx,modrm),
13099 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
13100 mkexpr(leftV), mkexpr(rightV) ) );
13101 goto decode_success;
13104 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
13105 if (haveF2no66noF3(pfx) && sz == 4
13106 && insn[0] == 0x0F && insn[1] == 0xF0) {
13108 if (epartIsReg(modrm)) {
13109 goto decode_failure;
13111 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
13112 putXMMReg( gregOfRexRM(pfx,modrm),
13113 loadLE(Ity_V128, mkexpr(addr)) );
13114 DIP("lddqu %s,%s\n", dis_buf,
13115 nameXMMReg(gregOfRexRM(pfx,modrm)));
13118 goto decode_success;
13121 /* ---------------------------------------------------- */
13122 /* --- end of the SSE3 decoder. --- */
13123 /* ---------------------------------------------------- */
13125 /* ---------------------------------------------------- */
13126 /* --- start of the SSSE3 decoder. --- */
13127 /* ---------------------------------------------------- */
13129 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
13130 Unsigned Bytes (MMX) */
13131 if (haveNo66noF2noF3(pfx)
13133 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
13134 IRTemp sV = newTemp(Ity_I64);
13135 IRTemp dV = newTemp(Ity_I64);
13136 IRTemp sVoddsSX = newTemp(Ity_I64);
13137 IRTemp sVevensSX = newTemp(Ity_I64);
13138 IRTemp dVoddsZX = newTemp(Ity_I64);
13139 IRTemp dVevensZX = newTemp(Ity_I64);
13143 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13145 if (epartIsReg(modrm)) {
13146 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13148 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13149 nameMMXReg(gregLO3ofRM(modrm)));
13151 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13152 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13154 DIP("pmaddubsw %s,%s\n", dis_buf,
13155 nameMMXReg(gregLO3ofRM(modrm)));
13158 /* compute dV unsigned x sV signed */
13160 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
13162 binop(Iop_SarN16x4,
13163 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
13166 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
13168 binop(Iop_ShrN16x4,
13169 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
13173 gregLO3ofRM(modrm),
13174 binop(Iop_QAdd16Sx4,
13175 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
13176 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
13179 goto decode_success;
13182 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
13183 Unsigned Bytes (XMM) */
13184 if (have66noF2noF3(pfx)
13185 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13186 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
13187 IRTemp sV = newTemp(Ity_V128);
13188 IRTemp dV = newTemp(Ity_V128);
13189 IRTemp sVoddsSX = newTemp(Ity_V128);
13190 IRTemp sVevensSX = newTemp(Ity_V128);
13191 IRTemp dVoddsZX = newTemp(Ity_V128);
13192 IRTemp dVevensZX = newTemp(Ity_V128);
13195 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13197 if (epartIsReg(modrm)) {
13198 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13200 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13201 nameXMMReg(gregOfRexRM(pfx,modrm)));
13203 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13204 gen_SEGV_if_not_16_aligned( addr );
13205 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13207 DIP("pmaddubsw %s,%s\n", dis_buf,
13208 nameXMMReg(gregOfRexRM(pfx,modrm)));
13211 /* compute dV unsigned x sV signed */
13213 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
13215 binop(Iop_SarN16x8,
13216 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
13219 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
13221 binop(Iop_ShrN16x8,
13222 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
13226 gregOfRexRM(pfx,modrm),
13227 binop(Iop_QAdd16Sx8,
13228 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
13229 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
13232 goto decode_success;
13235 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
13236 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
13237 mmx) and G to G (mmx). */
13238 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
13239 mmx) and G to G (mmx). */
13240 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
13242 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
13244 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
13246 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
13249 if (haveNo66noF2noF3(pfx)
13251 && insn[0] == 0x0F && insn[1] == 0x38
13252 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
13253 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
13254 HChar* str = "???";
13255 IROp opV64 = Iop_INVALID;
13256 IROp opCatO = Iop_CatOddLanes16x4;
13257 IROp opCatE = Iop_CatEvenLanes16x4;
13258 IRTemp sV = newTemp(Ity_I64);
13259 IRTemp dV = newTemp(Ity_I64);
13264 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
13265 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
13266 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
13267 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
13268 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
13269 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
13270 default: vassert(0);
13272 if (insn[2] == 0x02 || insn[2] == 0x06) {
13273 opCatO = Iop_InterleaveHI32x2;
13274 opCatE = Iop_InterleaveLO32x2;
13278 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13280 if (epartIsReg(modrm)) {
13281 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13283 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13284 nameMMXReg(gregLO3ofRM(modrm)));
13286 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13287 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13289 DIP("ph%s %s,%s\n", str, dis_buf,
13290 nameMMXReg(gregLO3ofRM(modrm)));
13294 gregLO3ofRM(modrm),
13296 binop(opCatE,mkexpr(sV),mkexpr(dV)),
13297 binop(opCatO,mkexpr(sV),mkexpr(dV))
13300 goto decode_success;
13303 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
13304 xmm) and G to G (xmm). */
13305 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
13306 xmm) and G to G (xmm). */
13307 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
13309 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
13311 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
13313 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
13316 if (have66noF2noF3(pfx)
13317 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13318 && insn[0] == 0x0F && insn[1] == 0x38
13319 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
13320 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
13321 HChar* str = "???";
13322 IROp opV64 = Iop_INVALID;
13323 IROp opCatO = Iop_CatOddLanes16x4;
13324 IROp opCatE = Iop_CatEvenLanes16x4;
13325 IRTemp sV = newTemp(Ity_V128);
13326 IRTemp dV = newTemp(Ity_V128);
13327 IRTemp sHi = newTemp(Ity_I64);
13328 IRTemp sLo = newTemp(Ity_I64);
13329 IRTemp dHi = newTemp(Ity_I64);
13330 IRTemp dLo = newTemp(Ity_I64);
13335 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
13336 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
13337 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
13338 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
13339 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
13340 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
13341 default: vassert(0);
13343 if (insn[2] == 0x02 || insn[2] == 0x06) {
13344 opCatO = Iop_InterleaveHI32x2;
13345 opCatE = Iop_InterleaveLO32x2;
13348 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13350 if (epartIsReg(modrm)) {
13351 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
13352 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13353 nameXMMReg(gregOfRexRM(pfx,modrm)));
13356 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13357 gen_SEGV_if_not_16_aligned( addr );
13358 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13359 DIP("ph%s %s,%s\n", str, dis_buf,
13360 nameXMMReg(gregOfRexRM(pfx,modrm)));
13364 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13365 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13366 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13367 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13369 /* This isn't a particularly efficient way to compute the
13370 result, but at least it avoids a proliferation of IROps,
13371 hence avoids complication all the backends. */
13373 gregOfRexRM(pfx,modrm),
13374 binop(Iop_64HLtoV128,
13376 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
13377 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
13380 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
13381 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
13385 goto decode_success;
13388 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
13390 if (haveNo66noF2noF3(pfx)
13392 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
13393 IRTemp sV = newTemp(Ity_I64);
13394 IRTemp dV = newTemp(Ity_I64);
13398 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13400 if (epartIsReg(modrm)) {
13401 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13403 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13404 nameMMXReg(gregLO3ofRM(modrm)));
13406 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13407 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13409 DIP("pmulhrsw %s,%s\n", dis_buf,
13410 nameMMXReg(gregLO3ofRM(modrm)));
13414 gregLO3ofRM(modrm),
13415 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
13417 goto decode_success;
13420 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
13422 if (have66noF2noF3(pfx)
13423 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13424 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
13425 IRTemp sV = newTemp(Ity_V128);
13426 IRTemp dV = newTemp(Ity_V128);
13427 IRTemp sHi = newTemp(Ity_I64);
13428 IRTemp sLo = newTemp(Ity_I64);
13429 IRTemp dHi = newTemp(Ity_I64);
13430 IRTemp dLo = newTemp(Ity_I64);
13433 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13435 if (epartIsReg(modrm)) {
13436 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13438 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13439 nameXMMReg(gregOfRexRM(pfx,modrm)));
13441 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13442 gen_SEGV_if_not_16_aligned( addr );
13443 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13445 DIP("pmulhrsw %s,%s\n", dis_buf,
13446 nameXMMReg(gregOfRexRM(pfx,modrm)));
13449 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13450 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13451 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13452 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13455 gregOfRexRM(pfx,modrm),
13456 binop(Iop_64HLtoV128,
13457 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
13458 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
13461 goto decode_success;
13464 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
13465 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
13466 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
13467 if (haveNo66noF2noF3(pfx)
13469 && insn[0] == 0x0F && insn[1] == 0x38
13470 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
13471 IRTemp sV = newTemp(Ity_I64);
13472 IRTemp dV = newTemp(Ity_I64);
13473 HChar* str = "???";
13477 case 0x08: laneszB = 1; str = "b"; break;
13478 case 0x09: laneszB = 2; str = "w"; break;
13479 case 0x0A: laneszB = 4; str = "d"; break;
13480 default: vassert(0);
13485 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13487 if (epartIsReg(modrm)) {
13488 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13490 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13491 nameMMXReg(gregLO3ofRM(modrm)));
13493 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13494 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13496 DIP("psign%s %s,%s\n", str, dis_buf,
13497 nameMMXReg(gregLO3ofRM(modrm)));
13501 gregLO3ofRM(modrm),
13502 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
13504 goto decode_success;
13507 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
13508 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
13509 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
13510 if (have66noF2noF3(pfx)
13511 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13512 && insn[0] == 0x0F && insn[1] == 0x38
13513 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
13514 IRTemp sV = newTemp(Ity_V128);
13515 IRTemp dV = newTemp(Ity_V128);
13516 IRTemp sHi = newTemp(Ity_I64);
13517 IRTemp sLo = newTemp(Ity_I64);
13518 IRTemp dHi = newTemp(Ity_I64);
13519 IRTemp dLo = newTemp(Ity_I64);
13520 HChar* str = "???";
13524 case 0x08: laneszB = 1; str = "b"; break;
13525 case 0x09: laneszB = 2; str = "w"; break;
13526 case 0x0A: laneszB = 4; str = "d"; break;
13527 default: vassert(0);
13531 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13533 if (epartIsReg(modrm)) {
13534 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13536 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13537 nameXMMReg(gregOfRexRM(pfx,modrm)));
13539 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13540 gen_SEGV_if_not_16_aligned( addr );
13541 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13543 DIP("psign%s %s,%s\n", str, dis_buf,
13544 nameXMMReg(gregOfRexRM(pfx,modrm)));
13547 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13548 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13549 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13550 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13553 gregOfRexRM(pfx,modrm),
13554 binop(Iop_64HLtoV128,
13555 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
13556 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
13559 goto decode_success;
13562 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
13563 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
13564 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
13565 if (haveNo66noF2noF3(pfx)
13567 && insn[0] == 0x0F && insn[1] == 0x38
13568 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
13569 IRTemp sV = newTemp(Ity_I64);
13570 HChar* str = "???";
13574 case 0x1C: laneszB = 1; str = "b"; break;
13575 case 0x1D: laneszB = 2; str = "w"; break;
13576 case 0x1E: laneszB = 4; str = "d"; break;
13577 default: vassert(0);
13583 if (epartIsReg(modrm)) {
13584 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13586 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
13587 nameMMXReg(gregLO3ofRM(modrm)));
13589 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13590 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13592 DIP("pabs%s %s,%s\n", str, dis_buf,
13593 nameMMXReg(gregLO3ofRM(modrm)));
13597 gregLO3ofRM(modrm),
13598 dis_PABS_helper( mkexpr(sV), laneszB )
13600 goto decode_success;
13603 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
13604 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
13605 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
13606 if (have66noF2noF3(pfx)
13607 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13608 && insn[0] == 0x0F && insn[1] == 0x38
13609 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
13610 IRTemp sV = newTemp(Ity_V128);
13611 IRTemp sHi = newTemp(Ity_I64);
13612 IRTemp sLo = newTemp(Ity_I64);
13613 HChar* str = "???";
13617 case 0x1C: laneszB = 1; str = "b"; break;
13618 case 0x1D: laneszB = 2; str = "w"; break;
13619 case 0x1E: laneszB = 4; str = "d"; break;
13620 default: vassert(0);
13625 if (epartIsReg(modrm)) {
13626 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13628 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
13629 nameXMMReg(gregOfRexRM(pfx,modrm)));
13631 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13632 gen_SEGV_if_not_16_aligned( addr );
13633 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13635 DIP("pabs%s %s,%s\n", str, dis_buf,
13636 nameXMMReg(gregOfRexRM(pfx,modrm)));
13639 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13640 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13643 gregOfRexRM(pfx,modrm),
13644 binop(Iop_64HLtoV128,
13645 dis_PABS_helper( mkexpr(sHi), laneszB ),
13646 dis_PABS_helper( mkexpr(sLo), laneszB )
13649 goto decode_success;
13652 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
13653 if (haveNo66noF2noF3(pfx) && sz == 4
13654 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13655 IRTemp sV = newTemp(Ity_I64);
13656 IRTemp dV = newTemp(Ity_I64);
13657 IRTemp res = newTemp(Ity_I64);
13661 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13663 if (epartIsReg(modrm)) {
13664 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13665 d64 = (Long)insn[3+1];
13667 DIP("palignr $%d,%s,%s\n", (Int)d64,
13668 nameMMXReg(eregLO3ofRM(modrm)),
13669 nameMMXReg(gregLO3ofRM(modrm)));
13671 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
13672 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13673 d64 = (Long)insn[3+alen];
13675 DIP("palignr $%d%s,%s\n", (Int)d64,
13677 nameMMXReg(gregLO3ofRM(modrm)));
13681 assign( res, mkexpr(sV) );
13683 else if (d64 >= 1 && d64 <= 7) {
13686 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
13687 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
13690 else if (d64 == 8) {
13691 assign( res, mkexpr(dV) );
13693 else if (d64 >= 9 && d64 <= 15) {
13694 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
13696 else if (d64 >= 16 && d64 <= 255) {
13697 assign( res, mkU64(0) );
13702 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
13703 goto decode_success;
13706 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
13707 if (have66noF2noF3(pfx)
13708 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13709 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13710 IRTemp sV = newTemp(Ity_V128);
13711 IRTemp dV = newTemp(Ity_V128);
13712 IRTemp sHi = newTemp(Ity_I64);
13713 IRTemp sLo = newTemp(Ity_I64);
13714 IRTemp dHi = newTemp(Ity_I64);
13715 IRTemp dLo = newTemp(Ity_I64);
13716 IRTemp rHi = newTemp(Ity_I64);
13717 IRTemp rLo = newTemp(Ity_I64);
13720 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13722 if (epartIsReg(modrm)) {
13723 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13724 d64 = (Long)insn[3+1];
13726 DIP("palignr $%d,%s,%s\n", (Int)d64,
13727 nameXMMReg(eregOfRexRM(pfx,modrm)),
13728 nameXMMReg(gregOfRexRM(pfx,modrm)));
13730 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
13731 gen_SEGV_if_not_16_aligned( addr );
13732 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13733 d64 = (Long)insn[3+alen];
13735 DIP("palignr $%d,%s,%s\n", (Int)d64,
13737 nameXMMReg(gregOfRexRM(pfx,modrm)));
13740 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13741 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13742 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13743 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13746 assign( rHi, mkexpr(sHi) );
13747 assign( rLo, mkexpr(sLo) );
13749 else if (d64 >= 1 && d64 <= 7) {
13750 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) );
13751 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) );
13753 else if (d64 == 8) {
13754 assign( rHi, mkexpr(dLo) );
13755 assign( rLo, mkexpr(sHi) );
13757 else if (d64 >= 9 && d64 <= 15) {
13758 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) );
13759 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) );
13761 else if (d64 == 16) {
13762 assign( rHi, mkexpr(dHi) );
13763 assign( rLo, mkexpr(dLo) );
13765 else if (d64 >= 17 && d64 <= 23) {
13766 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) );
13767 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) );
13769 else if (d64 == 24) {
13770 assign( rHi, mkU64(0) );
13771 assign( rLo, mkexpr(dHi) );
13773 else if (d64 >= 25 && d64 <= 31) {
13774 assign( rHi, mkU64(0) );
13775 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) );
13777 else if (d64 >= 32 && d64 <= 255) {
13778 assign( rHi, mkU64(0) );
13779 assign( rLo, mkU64(0) );
13785 gregOfRexRM(pfx,modrm),
13786 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13788 goto decode_success;
13791 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
13792 if (haveNo66noF2noF3(pfx)
13794 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13795 IRTemp sV = newTemp(Ity_I64);
13796 IRTemp dV = newTemp(Ity_I64);
13800 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13802 if (epartIsReg(modrm)) {
13803 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13805 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13806 nameMMXReg(gregLO3ofRM(modrm)));
13808 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13809 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13811 DIP("pshufb %s,%s\n", dis_buf,
13812 nameMMXReg(gregLO3ofRM(modrm)));
13816 gregLO3ofRM(modrm),
13819 /* permute the lanes */
13823 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
13825 /* mask off lanes which have (index & 0x80) == 0x80 */
13826 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
13829 goto decode_success;
13832 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
13833 if (have66noF2noF3(pfx)
13834 && (sz == 2 || /*redundant REX.W*/ sz == 8)
13835 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13836 IRTemp sV = newTemp(Ity_V128);
13837 IRTemp dV = newTemp(Ity_V128);
13838 IRTemp sHi = newTemp(Ity_I64);
13839 IRTemp sLo = newTemp(Ity_I64);
13840 IRTemp dHi = newTemp(Ity_I64);
13841 IRTemp dLo = newTemp(Ity_I64);
13842 IRTemp rHi = newTemp(Ity_I64);
13843 IRTemp rLo = newTemp(Ity_I64);
13844 IRTemp sevens = newTemp(Ity_I64);
13845 IRTemp mask0x80hi = newTemp(Ity_I64);
13846 IRTemp mask0x80lo = newTemp(Ity_I64);
13847 IRTemp maskBit3hi = newTemp(Ity_I64);
13848 IRTemp maskBit3lo = newTemp(Ity_I64);
13849 IRTemp sAnd7hi = newTemp(Ity_I64);
13850 IRTemp sAnd7lo = newTemp(Ity_I64);
13851 IRTemp permdHi = newTemp(Ity_I64);
13852 IRTemp permdLo = newTemp(Ity_I64);
13855 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13857 if (epartIsReg(modrm)) {
13858 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13860 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13861 nameXMMReg(gregOfRexRM(pfx,modrm)));
13863 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
13864 gen_SEGV_if_not_16_aligned( addr );
13865 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13867 DIP("pshufb %s,%s\n", dis_buf,
13868 nameXMMReg(gregOfRexRM(pfx,modrm)));
13871 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13872 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13873 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13874 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13876 assign( sevens, mkU64(0x0707070707070707ULL) );
13879 mask0x80hi = Not(SarN8x8(sHi,7))
13880 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
13881 sAnd7hi = And(sHi,sevens)
13882 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
13883 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
13884 rHi = And(permdHi,mask0x80hi)
13888 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
13893 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
13896 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
13903 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
13904 mkexpr(maskBit3hi)),
13906 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
13907 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
13909 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
13911 /* And the same for the lower half of the result. What fun. */
13915 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
13920 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
13923 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
13930 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
13931 mkexpr(maskBit3lo)),
13933 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
13934 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
13936 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
13939 gregOfRexRM(pfx,modrm),
13940 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13942 goto decode_success;
13945 /* ---------------------------------------------------- */
13946 /* --- end of the SSSE3 decoder. --- */
13947 /* ---------------------------------------------------- */
13949 /* ---------------------------------------------------- */
13950 /* --- start of the SSE4 decoder --- */
13951 /* ---------------------------------------------------- */
13953 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
13954 Blend Packed Double Precision Floating-Point Values (XMM) */
13955 if ( have66noF2noF3( pfx )
13957 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0D ) {
13960 UShort imm8_mask_16;
13962 IRTemp dst_vec = newTemp(Ity_V128);
13963 IRTemp src_vec = newTemp(Ity_V128);
13964 IRTemp imm8_mask = newTemp(Ity_V128);
13967 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
13969 if ( epartIsReg( modrm ) ) {
13970 imm8 = (Int)insn[4];
13971 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
13973 DIP( "blendpd $%d, %s,%s\n", imm8,
13974 nameXMMReg( eregOfRexRM(pfx, modrm) ),
13975 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13977 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
13978 1/* imm8 is 1 byte after the amode */ );
13979 gen_SEGV_if_not_16_aligned( addr );
13980 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
13981 imm8 = (Int)insn[2+alen+1];
13983 DIP( "blendpd $%d, %s,%s\n",
13984 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
13987 switch( imm8 & 3 ) {
13988 case 0: imm8_mask_16 = 0x0000; break;
13989 case 1: imm8_mask_16 = 0x00FF; break;
13990 case 2: imm8_mask_16 = 0xFF00; break;
13991 case 3: imm8_mask_16 = 0xFFFF; break;
13992 default: vassert(0); break;
13994 assign( imm8_mask, mkV128( imm8_mask_16 ) );
13996 putXMMReg( gregOfRexRM(pfx, modrm),
13998 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
13999 binop( Iop_AndV128, mkexpr(dst_vec),
14000 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
14002 goto decode_success;
14006 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
14007 Blend Packed Single Precision Floating-Point Values (XMM) */
14008 if ( have66noF2noF3( pfx )
14010 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0C ) {
14013 IRTemp dst_vec = newTemp(Ity_V128);
14014 IRTemp src_vec = newTemp(Ity_V128);
14018 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14020 if ( epartIsReg( modrm ) ) {
14021 imm8 = (Int)insn[3+1];
14022 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14024 DIP( "blendps $%d, %s,%s\n", imm8,
14025 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14026 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14028 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14029 1/* imm8 is 1 byte after the amode */ );
14030 gen_SEGV_if_not_16_aligned( addr );
14031 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14032 imm8 = (Int)insn[3+alen];
14034 DIP( "blendpd $%d, %s,%s\n",
14035 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14038 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 0x0F0F,
14039 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 0xF0F0, 0xF0FF,
14040 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
14041 IRTemp imm8_mask = newTemp(Ity_V128);
14042 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
14044 putXMMReg( gregOfRexRM(pfx, modrm),
14046 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
14047 binop( Iop_AndV128, mkexpr(dst_vec),
14048 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
14050 goto decode_success;
14054 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
14055 Blend Packed Words (XMM) */
14056 if ( have66noF2noF3( pfx )
14058 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0E ) {
14061 IRTemp dst_vec = newTemp(Ity_V128);
14062 IRTemp src_vec = newTemp(Ity_V128);
14066 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14068 if ( epartIsReg( modrm ) ) {
14069 imm8 = (Int)insn[3+1];
14070 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14072 DIP( "pblendw $%d, %s,%s\n", imm8,
14073 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14074 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14076 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14077 1/* imm8 is 1 byte after the amode */ );
14078 gen_SEGV_if_not_16_aligned( addr );
14079 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14080 imm8 = (Int)insn[3+alen];
14082 DIP( "pblendw $%d, %s,%s\n",
14083 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14086 /* Make w be a 16-bit version of imm8, formed by duplicating each
14090 for (i = 0; i < 8; i++) {
14091 if (imm8 & (1 << i))
14092 imm16 |= (3 << (2*i));
14094 IRTemp imm16_mask = newTemp(Ity_V128);
14095 assign( imm16_mask, mkV128( imm16 ));
14097 putXMMReg( gregOfRexRM(pfx, modrm),
14099 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm16_mask) ),
14100 binop( Iop_AndV128, mkexpr(dst_vec),
14101 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
14103 goto decode_success;
14107 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
14108 * Carry-less multiplication of selected XMM quadwords into XMM
14109 * registers (a.k.a multiplication of polynomials over GF(2))
14111 if ( have66noF2noF3( pfx )
14113 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x44 ) {
14116 IRTemp svec = newTemp(Ity_V128);
14117 IRTemp dvec = newTemp(Ity_V128);
14121 assign( dvec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14123 if ( epartIsReg( modrm ) ) {
14124 imm8 = (Int)insn[4];
14125 assign( svec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14127 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
14128 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14129 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14131 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14132 1/* imm8 is 1 byte after the amode */ );
14133 gen_SEGV_if_not_16_aligned( addr );
14134 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
14135 imm8 = (Int)insn[2+alen+1];
14137 DIP( "pclmulqdq $%d, %s,%s\n",
14138 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14141 t0 = newTemp(Ity_I64);
14142 t1 = newTemp(Ity_I64);
14143 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, mkexpr(dvec)));
14144 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, mkexpr(svec)));
14146 t2 = newTemp(Ity_I64);
14147 t3 = newTemp(Ity_I64);
14151 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
14153 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
14154 &amd64g_calculate_pclmul, args));
14155 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
14157 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
14158 &amd64g_calculate_pclmul, args));
14160 IRTemp res = newTemp(Ity_V128);
14161 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
14162 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14164 goto decode_success;
14167 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
14168 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
14169 if ( have66noF2noF3( pfx )
14171 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x41 ) {
14174 IRTemp src_vec = newTemp(Ity_V128);
14175 IRTemp dst_vec = newTemp(Ity_V128);
14176 IRTemp and_vec = newTemp(Ity_V128);
14177 IRTemp sum_vec = newTemp(Ity_V128);
14181 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14183 if ( epartIsReg( modrm ) ) {
14184 imm8 = (Int)insn[4];
14185 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14187 DIP( "dppd $%d, %s,%s\n", imm8,
14188 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14189 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14191 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14192 1/* imm8 is 1 byte after the amode */ );
14193 gen_SEGV_if_not_16_aligned( addr );
14194 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14195 imm8 = (Int)insn[2+alen+1];
14197 DIP( "dppd $%d, %s,%s\n",
14198 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14201 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
14203 assign( and_vec, binop( Iop_AndV128,
14204 binop( Iop_Mul64Fx2,
14205 mkexpr(dst_vec), mkexpr(src_vec) ),
14206 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
14208 assign( sum_vec, binop( Iop_Add64F0x2,
14209 binop( Iop_InterleaveHI64x2,
14210 mkexpr(and_vec), mkexpr(and_vec) ),
14211 binop( Iop_InterleaveLO64x2,
14212 mkexpr(and_vec), mkexpr(and_vec) ) ) );
14214 putXMMReg( gregOfRexRM( pfx, modrm ),
14215 binop( Iop_AndV128,
14216 binop( Iop_InterleaveLO64x2,
14217 mkexpr(sum_vec), mkexpr(sum_vec) ),
14218 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
14220 goto decode_success;
14224 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
14225 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
14226 if ( have66noF2noF3( pfx )
14230 && insn[2] == 0x40 ) {
14233 IRTemp xmm1_vec = newTemp(Ity_V128);
14234 IRTemp xmm2_vec = newTemp(Ity_V128);
14235 IRTemp tmp_prod_vec = newTemp(Ity_V128);
14236 IRTemp prod_vec = newTemp(Ity_V128);
14237 IRTemp sum_vec = newTemp(Ity_V128);
14238 IRTemp v3, v2, v1, v0;
14239 v3 = v2 = v1 = v0 = IRTemp_INVALID;
14243 assign( xmm1_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14245 if ( epartIsReg( modrm ) ) {
14246 imm8 = (Int)insn[4];
14247 assign( xmm2_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14249 DIP( "dpps $%d, %s,%s\n", imm8,
14250 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14251 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14253 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14254 1/* imm8 is 1 byte after the amode */ );
14255 gen_SEGV_if_not_16_aligned( addr );
14256 assign( xmm2_vec, loadLE( Ity_V128, mkexpr(addr) ) );
14257 imm8 = (Int)insn[2+alen+1];
14259 DIP( "dpps $%d, %s,%s\n",
14260 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14263 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
14264 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
14265 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
14267 assign( tmp_prod_vec,
14268 binop( Iop_AndV128,
14269 binop( Iop_Mul32Fx4, mkexpr(xmm1_vec), mkexpr(xmm2_vec) ),
14270 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
14271 breakup128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
14272 assign( prod_vec, mk128from32s( v3, v1, v2, v0 ) );
14274 assign( sum_vec, binop( Iop_Add32Fx4,
14275 binop( Iop_InterleaveHI32x4,
14276 mkexpr(prod_vec), mkexpr(prod_vec) ),
14277 binop( Iop_InterleaveLO32x4,
14278 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
14280 putXMMReg( gregOfRexRM(pfx, modrm),
14281 binop( Iop_AndV128,
14282 binop( Iop_Add32Fx4,
14283 binop( Iop_InterleaveHI32x4,
14284 mkexpr(sum_vec), mkexpr(sum_vec) ),
14285 binop( Iop_InterleaveLO32x4,
14286 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
14287 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
14289 goto decode_success;
14293 /* 66 0F 3A 21 /r ib = INSERTPS xmm1, xmm2/m32, imm8
14294 Insert Packed Single Precision Floating-Point Value (XMM) */
14295 if ( have66noF2noF3( pfx )
14297 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x21 ) {
14303 IRTemp dstVec = newTemp(Ity_V128);
14304 IRTemp srcDWord = newTemp(Ity_I32);
14308 assign( dstVec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
14310 if ( epartIsReg( modrm ) ) {
14311 IRTemp src_vec = newTemp(Ity_V128);
14312 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14314 IRTemp src_lane_0 = IRTemp_INVALID;
14315 IRTemp src_lane_1 = IRTemp_INVALID;
14316 IRTemp src_lane_2 = IRTemp_INVALID;
14317 IRTemp src_lane_3 = IRTemp_INVALID;
14318 breakup128to32s( src_vec,
14319 &src_lane_3, &src_lane_2, &src_lane_1, &src_lane_0 );
14321 imm8 = (Int)insn[4];
14322 imm8_count_s = ((imm8 >> 6) & 3);
14323 switch( imm8_count_s ) {
14324 case 0: assign( srcDWord, mkexpr(src_lane_0) ); break;
14325 case 1: assign( srcDWord, mkexpr(src_lane_1) ); break;
14326 case 2: assign( srcDWord, mkexpr(src_lane_2) ); break;
14327 case 3: assign( srcDWord, mkexpr(src_lane_3) ); break;
14328 default: vassert(0); break;
14332 DIP( "insertps $%d, %s,%s\n", imm8,
14333 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14334 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14336 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
14337 1/* const imm8 is 1 byte after the amode */ );
14338 assign( srcDWord, loadLE( Ity_I32, mkexpr(addr) ) );
14339 imm8 = (Int)insn[2+alen+1];
14342 DIP( "insertps $%d, %s,%s\n",
14343 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14346 IRTemp dst_lane_0 = IRTemp_INVALID;
14347 IRTemp dst_lane_1 = IRTemp_INVALID;
14348 IRTemp dst_lane_2 = IRTemp_INVALID;
14349 IRTemp dst_lane_3 = IRTemp_INVALID;
14350 breakup128to32s( dstVec,
14351 &dst_lane_3, &dst_lane_2, &dst_lane_1, &dst_lane_0 );
14353 imm8_count_d = ((imm8 >> 4) & 3);
14354 switch( imm8_count_d ) {
14355 case 0: dst_lane_0 = srcDWord; break;
14356 case 1: dst_lane_1 = srcDWord; break;
14357 case 2: dst_lane_2 = srcDWord; break;
14358 case 3: dst_lane_3 = srcDWord; break;
14359 default: vassert(0); break;
14362 imm8_zmask = (imm8 & 15);
14363 IRTemp zero_32 = newTemp(Ity_I32);
14364 assign( zero_32, mkU32(0) );
14366 IRExpr* ire_vec_128 = mk128from32s(
14367 ((imm8_zmask & 8) == 8) ? zero_32 : dst_lane_3,
14368 ((imm8_zmask & 4) == 4) ? zero_32 : dst_lane_2,
14369 ((imm8_zmask & 2) == 2) ? zero_32 : dst_lane_1,
14370 ((imm8_zmask & 1) == 1) ? zero_32 : dst_lane_0 );
14372 putXMMReg( gregOfRexRM(pfx, modrm), ire_vec_128 );
14374 goto decode_success;
14378 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
14379 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
14380 if ( have66noF2noF3( pfx )
14382 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x14 ) {
14385 IRTemp xmm_vec = newTemp(Ity_V128);
14386 IRTemp sel_lane = newTemp(Ity_I32);
14387 IRTemp shr_lane = newTemp(Ity_I32);
14390 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14391 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14393 if ( epartIsReg( modrm ) ) {
14394 imm8 = (Int)insn[3+1];
14396 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14397 imm8 = (Int)insn[3+alen];
14399 switch( (imm8 >> 2) & 3 ) {
14400 case 0: assign( sel_lane, mkexpr(t0) ); break;
14401 case 1: assign( sel_lane, mkexpr(t1) ); break;
14402 case 2: assign( sel_lane, mkexpr(t2) ); break;
14403 case 3: assign( sel_lane, mkexpr(t3) ); break;
14404 default: vassert(0);
14407 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
14409 if ( epartIsReg( modrm ) ) {
14410 putIReg64( eregOfRexRM(pfx,modrm),
14412 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
14415 DIP( "pextrb $%d, %s,%s\n", imm8,
14416 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14417 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14419 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
14421 DIP( "$%d, pextrb %s,%s\n",
14422 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14425 goto decode_success;
14429 /* 66 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
14430 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
14431 Note that this insn has the same opcodes as PEXTRQ, but
14432 here the REX.W bit is _not_ present */
14433 if ( have66noF2noF3( pfx )
14434 && sz == 2 /* REX.W is _not_ present */
14435 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
14438 IRTemp xmm_vec = newTemp(Ity_V128);
14439 IRTemp src_dword = newTemp(Ity_I32);
14442 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14443 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14445 if ( epartIsReg( modrm ) ) {
14446 imm8_10 = (Int)(insn[3+1] & 3);
14448 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14449 imm8_10 = (Int)(insn[3+alen] & 3);
14452 switch ( imm8_10 ) {
14453 case 0: assign( src_dword, mkexpr(t0) ); break;
14454 case 1: assign( src_dword, mkexpr(t1) ); break;
14455 case 2: assign( src_dword, mkexpr(t2) ); break;
14456 case 3: assign( src_dword, mkexpr(t3) ); break;
14457 default: vassert(0);
14460 if ( epartIsReg( modrm ) ) {
14461 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
14463 DIP( "pextrd $%d, %s,%s\n", imm8_10,
14464 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14465 nameIReg32( eregOfRexRM(pfx, modrm) ) );
14467 storeLE( mkexpr(addr), mkexpr(src_dword) );
14469 DIP( "pextrd $%d, %s,%s\n",
14470 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14473 goto decode_success;
14477 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
14478 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
14479 Note that this insn has the same opcodes as PEXTRD, but
14480 here the REX.W bit is present */
14481 if ( have66noF2noF3( pfx )
14482 && sz == 8 /* REX.W is present */
14483 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
14486 IRTemp xmm_vec = newTemp(Ity_V128);
14487 IRTemp src_qword = newTemp(Ity_I64);
14490 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14492 if ( epartIsReg( modrm ) ) {
14493 imm8_0 = (Int)(insn[3+1] & 1);
14495 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14496 imm8_0 = (Int)(insn[3+alen] & 1);
14498 switch ( imm8_0 ) {
14499 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); break;
14500 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); break;
14501 default: vassert(0);
14504 if ( epartIsReg( modrm ) ) {
14505 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
14507 DIP( "pextrq $%d, %s,%s\n", imm8_0,
14508 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14509 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14511 storeLE( mkexpr(addr), mkexpr(src_qword) );
14513 DIP( "pextrq $%d, %s,%s\n",
14514 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14517 goto decode_success;
14521 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
14522 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
14523 if ( have66noF2noF3( pfx )
14525 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x15 ) {
14528 IRTemp xmm_vec = newTemp(Ity_V128);
14529 IRTemp src_word = newTemp(Ity_I16);
14532 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14533 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14535 if ( epartIsReg( modrm ) ) {
14536 imm8_20 = (Int)(insn[3+1] & 7);
14538 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14539 imm8_20 = (Int)(insn[3+alen] & 7);
14542 switch ( imm8_20 ) {
14543 case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break;
14544 case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break;
14545 case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break;
14546 case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break;
14547 case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break;
14548 case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break;
14549 case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break;
14550 case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break;
14551 default: vassert(0);
14554 if ( epartIsReg( modrm ) ) {
14555 putIReg64( eregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(src_word)) );
14557 DIP( "pextrw $%d, %s,%s\n", imm8_20,
14558 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14559 nameIReg64( eregOfRexRM(pfx, modrm) ) );
14561 storeLE( mkexpr(addr), mkexpr(src_word) );
14563 DIP( "pextrw $%d, %s,%s\n",
14564 imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14567 goto decode_success;
14571 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
14572 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
14573 if ( have66noF2noF3( pfx )
14574 && sz == 8 /* REX.W is present */
14575 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) {
14578 IRTemp src_elems = newTemp(Ity_I64);
14579 IRTemp src_vec = newTemp(Ity_V128);
14583 if ( epartIsReg( modrm ) ) {
14584 imm8_0 = (Int)(insn[3+1] & 1);
14585 assign( src_elems, getIReg64( eregOfRexRM(pfx,modrm) ) );
14587 DIP( "pinsrq $%d, %s,%s\n", imm8_0,
14588 nameIReg64( eregOfRexRM(pfx, modrm) ),
14589 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14591 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14592 imm8_0 = (Int)(insn[3+alen] & 1);
14593 assign( src_elems, loadLE( Ity_I64, mkexpr(addr) ) );
14595 DIP( "pinsrq $%d, %s,%s\n",
14596 imm8_0, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14600 if ( imm8_0 == 0 ) {
14602 assign( src_vec, binop( Iop_64HLtoV128, mkU64(0), mkexpr(src_elems) ) );
14605 assign( src_vec, binop( Iop_64HLtoV128, mkexpr(src_elems), mkU64(0) ) );
14608 putXMMReg( gregOfRexRM(pfx, modrm),
14609 binop( Iop_OrV128, mkexpr(src_vec),
14610 binop( Iop_AndV128,
14611 getXMMReg( gregOfRexRM(pfx, modrm) ),
14612 mkV128(mask) ) ) );
14614 goto decode_success;
14618 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
14619 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
14620 if ( have66noF2noF3( pfx )
14621 && sz == 2 /* REX.W is NOT present */
14622 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) {
14625 IRTemp src_elems = newTemp(Ity_I32);
14626 IRTemp src_vec = newTemp(Ity_V128);
14627 IRTemp z32 = newTemp(Ity_I32);
14631 if ( epartIsReg( modrm ) ) {
14632 imm8_10 = (Int)(insn[3+1] & 3);
14633 assign( src_elems, getIReg32( eregOfRexRM(pfx,modrm) ) );
14635 DIP( "pinsrd $%d, %s,%s\n", imm8_10,
14636 nameIReg32( eregOfRexRM(pfx, modrm) ),
14637 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14639 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14640 imm8_10 = (Int)(insn[3+alen] & 3);
14641 assign( src_elems, loadLE( Ity_I32, mkexpr(addr) ) );
14643 DIP( "pinsrd $%d, %s,%s\n",
14644 imm8_10, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14647 assign(z32, mkU32(0));
14651 case 3: mask = 0x0FFF;
14652 assign(src_vec, mk128from32s(src_elems, z32, z32, z32));
14654 case 2: mask = 0xF0FF;
14655 assign(src_vec, mk128from32s(z32, src_elems, z32, z32));
14657 case 1: mask = 0xFF0F;
14658 assign(src_vec, mk128from32s(z32, z32, src_elems, z32));
14660 case 0: mask = 0xFFF0;
14661 assign(src_vec, mk128from32s(z32, z32, z32, src_elems));
14663 default: vassert(0);
14666 putXMMReg( gregOfRexRM(pfx, modrm),
14667 binop( Iop_OrV128, mkexpr(src_vec),
14668 binop( Iop_AndV128,
14669 getXMMReg( gregOfRexRM(pfx, modrm) ),
14670 mkV128(mask) ) ) );
14672 goto decode_success;
14675 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
14676 Extract byte from r32/m8 and insert into xmm1 */
14677 if ( have66noF2noF3( pfx )
14679 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x20 ) {
14682 IRTemp new8 = newTemp(Ity_I64);
14686 if ( epartIsReg( modrm ) ) {
14687 imm8 = (Int)(insn[3+1] & 0xF);
14688 assign( new8, binop(Iop_And64,
14690 getIReg32(eregOfRexRM(pfx,modrm))),
14693 DIP( "pinsrb $%d,%s,%s\n", imm8,
14694 nameIReg32( eregOfRexRM(pfx, modrm) ),
14695 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14697 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14698 imm8 = (Int)(insn[3+alen] & 0xF);
14699 assign( new8, unop(Iop_8Uto64, loadLE( Ity_I8, mkexpr(addr) )));
14701 DIP( "pinsrb $%d,%s,%s\n",
14702 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14705 // Create a V128 value which has the selected byte in the
14706 // specified lane, and zeroes everywhere else.
14707 IRTemp tmp128 = newTemp(Ity_V128);
14708 IRTemp halfshift = newTemp(Ity_I64);
14709 assign(halfshift, binop(Iop_Shl64,
14710 mkexpr(new8), mkU8(8 * (imm8 & 7))));
14711 vassert(imm8 >= 0 && imm8 <= 15);
14713 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
14715 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
14718 UShort mask = ~(1 << imm8);
14720 putXMMReg( gregOfRexRM(pfx, modrm),
14723 binop( Iop_AndV128,
14724 getXMMReg( gregOfRexRM(pfx, modrm) ),
14725 mkV128(mask) ) ) );
14727 goto decode_success;
14731 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
14732 float from xmm reg and store in gen.reg or mem. This is
14733 identical to PEXTRD, except that REX.W appears to be ignored.
14735 if ( have66noF2noF3( pfx )
14736 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14737 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x17 ) {
14740 IRTemp xmm_vec = newTemp(Ity_V128);
14741 IRTemp src_dword = newTemp(Ity_I32);
14744 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
14745 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
14747 if ( epartIsReg( modrm ) ) {
14748 imm8_10 = (Int)(insn[3+1] & 3);
14750 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
14751 imm8_10 = (Int)(insn[3+alen] & 3);
14754 switch ( imm8_10 ) {
14755 case 0: assign( src_dword, mkexpr(t0) ); break;
14756 case 1: assign( src_dword, mkexpr(t1) ); break;
14757 case 2: assign( src_dword, mkexpr(t2) ); break;
14758 case 3: assign( src_dword, mkexpr(t3) ); break;
14759 default: vassert(0);
14762 if ( epartIsReg( modrm ) ) {
14763 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
14765 DIP( "extractps $%d, %s,%s\n", imm8_10,
14766 nameXMMReg( gregOfRexRM(pfx, modrm) ),
14767 nameIReg32( eregOfRexRM(pfx, modrm) ) );
14769 storeLE( mkexpr(addr), mkexpr(src_dword) );
14771 DIP( "extractps $%d, %s,%s\n",
14772 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
14775 goto decode_success;
14779 /* 66 0F 38 37 = PCMPGTQ
14780 64x2 comparison (signed, presumably; the Intel docs don't say :-)
14782 if ( have66noF2noF3( pfx ) && sz == 2
14783 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x37) {
14784 /* FIXME: this needs an alignment check */
14785 delta = dis_SSEint_E_to_G( vbi, pfx, delta+3,
14786 "pcmpgtq", Iop_CmpGT64Sx2, False );
14787 goto decode_success;
14790 /* 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
14791 Maximum of Packed Signed Double Word Integers (XMM)
14792 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
14793 Minimum of Packed Signed Double Word Integers (XMM) */
14794 if ( have66noF2noF3( pfx ) && sz == 2
14795 && insn[0] == 0x0F && insn[1] == 0x38
14796 && (insn[2] == 0x3D || insn[2] == 0x39)) {
14797 /* FIXME: this needs an alignment check */
14798 Bool isMAX = insn[2] == 0x3D;
14799 delta = dis_SSEint_E_to_G(
14801 isMAX ? "pmaxsd" : "pminsd",
14802 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
14805 goto decode_success;
14808 /* 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
14809 Maximum of Packed Unsigned Doubleword Integers (XMM)
14810 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
14811 Minimum of Packed Unsigned Doubleword Integers (XMM) */
14812 if ( have66noF2noF3( pfx ) && sz == 2
14813 && insn[0] == 0x0F && insn[1] == 0x38
14814 && (insn[2] == 0x3F || insn[2] == 0x3B)) {
14815 /* FIXME: this needs an alignment check */
14816 Bool isMAX = insn[2] == 0x3F;
14817 delta = dis_SSEint_E_to_G(
14819 isMAX ? "pmaxud" : "pminud",
14820 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
14823 goto decode_success;
14826 /* 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
14827 Maximum of Packed Unsigned Word Integers (XMM)
14828 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
14829 Minimum of Packed Unsigned Word Integers (XMM)
14831 if ( have66noF2noF3( pfx ) && sz == 2
14832 && insn[0] == 0x0F && insn[1] == 0x38
14833 && (insn[2] == 0x3E || insn[2] == 0x3A)) {
14834 /* FIXME: this needs an alignment check */
14835 Bool isMAX = insn[2] == 0x3E;
14836 delta = dis_SSEint_E_to_G(
14838 isMAX ? "pmaxuw" : "pminuw",
14839 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
14842 goto decode_success;
14845 /* 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128
14847 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128
14850 if ( have66noF2noF3( pfx ) && sz == 2
14851 && insn[0] == 0x0F && insn[1] == 0x38
14852 && (insn[2] == 0x3C || insn[2] == 0x38)) {
14853 /* FIXME: this needs an alignment check */
14854 Bool isMAX = insn[2] == 0x3C;
14855 delta = dis_SSEint_E_to_G(
14857 isMAX ? "pmaxsb" : "pminsb",
14858 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
14861 goto decode_success;
14864 /* 66 0f 38 20 /r = PMOVSXBW xmm1, xmm2/m64
14865 Packed Move with Sign Extend from Byte to Word (XMM) */
14866 if ( have66noF2noF3( pfx )
14868 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x20 ) {
14872 IRTemp srcVec = newTemp(Ity_V128);
14874 if ( epartIsReg( modrm ) ) {
14875 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14877 DIP( "pmovsxbw %s,%s\n",
14878 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14879 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14881 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14883 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
14885 DIP( "pmovsxbw %s,%s\n",
14886 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14889 putXMMReg( gregOfRexRM(pfx, modrm),
14890 binop( Iop_SarN16x8,
14891 binop( Iop_ShlN16x8,
14892 binop( Iop_InterleaveLO8x16,
14893 IRExpr_Const( IRConst_V128(0) ),
14898 goto decode_success;
14902 /* 66 0f 38 21 /r = PMOVSXBD xmm1, xmm2/m32
14903 Packed Move with Sign Extend from Byte to DWord (XMM) */
14904 if ( have66noF2noF3( pfx )
14906 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x21 ) {
14910 IRTemp srcVec = newTemp(Ity_V128);
14912 if ( epartIsReg( modrm ) ) {
14913 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14915 DIP( "pmovsxbd %s,%s\n",
14916 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14917 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14919 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14921 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
14923 DIP( "pmovsxbd %s,%s\n",
14924 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14927 IRTemp zeroVec = newTemp(Ity_V128);
14928 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
14930 putXMMReg( gregOfRexRM(pfx, modrm),
14931 binop( Iop_SarN32x4,
14932 binop( Iop_ShlN32x4,
14933 binop( Iop_InterleaveLO8x16,
14935 binop( Iop_InterleaveLO8x16,
14937 mkexpr(srcVec) ) ),
14938 mkU8(24) ), mkU8(24) ) );
14940 goto decode_success;
14944 /* 66 0f 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
14945 Packed Move with Sign Extend from Byte to QWord (XMM) */
14946 if ( have66noF2noF3(pfx)
14948 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x22 ) {
14952 IRTemp srcBytes = newTemp(Ity_I16);
14954 if ( epartIsReg(modrm) ) {
14955 assign( srcBytes, getXMMRegLane16( eregOfRexRM(pfx, modrm), 0 ) );
14957 DIP( "pmovsxbq %s,%s\n",
14958 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14959 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14961 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14962 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
14964 DIP( "pmovsxbq %s,%s\n",
14965 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14968 putXMMReg( gregOfRexRM( pfx, modrm ),
14969 binop( Iop_64HLtoV128,
14972 mkexpr(srcBytes) ) ),
14974 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
14976 goto decode_success;
14980 /* 66 0f 38 23 /r = PMOVSXWD xmm1, xmm2/m64
14981 Packed Move with Sign Extend from Word to DWord (XMM) */
14982 if ( have66noF2noF3( pfx )
14984 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x23 ) {
14988 IRTemp srcVec = newTemp(Ity_V128);
14990 if ( epartIsReg(modrm) ) {
14991 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
14993 DIP( "pmovsxwd %s,%s\n",
14994 nameXMMReg( eregOfRexRM(pfx, modrm) ),
14995 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
14997 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
14999 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15001 DIP( "pmovsxwd %s,%s\n",
15002 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15005 putXMMReg( gregOfRexRM(pfx, modrm),
15006 binop( Iop_SarN32x4,
15007 binop( Iop_ShlN32x4,
15008 binop( Iop_InterleaveLO16x8,
15009 IRExpr_Const( IRConst_V128(0) ),
15014 goto decode_success;
15018 /* 66 0f 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
15019 Packed Move with Sign Extend from Word to QWord (XMM) */
15020 if ( have66noF2noF3( pfx )
15022 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x24 ) {
15026 IRTemp srcBytes = newTemp(Ity_I32);
15028 if ( epartIsReg( modrm ) ) {
15029 assign( srcBytes, getXMMRegLane32( eregOfRexRM(pfx, modrm), 0 ) );
15031 DIP( "pmovsxwq %s,%s\n",
15032 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15033 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15035 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15036 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
15038 DIP( "pmovsxwq %s,%s\n",
15039 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15042 putXMMReg( gregOfRexRM( pfx, modrm ),
15043 binop( Iop_64HLtoV128,
15045 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
15047 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
15049 goto decode_success;
15053 /* 66 0f 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
15054 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
15055 if ( have66noF2noF3( pfx )
15057 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x25 ) {
15061 IRTemp srcBytes = newTemp(Ity_I64);
15063 if ( epartIsReg(modrm) ) {
15064 assign( srcBytes, getXMMRegLane64( eregOfRexRM(pfx, modrm), 0 ) );
15066 DIP( "pmovsxdq %s,%s\n",
15067 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15068 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15070 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15071 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
15073 DIP( "pmovsxdq %s,%s\n",
15074 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15077 putXMMReg( gregOfRexRM(pfx, modrm),
15078 binop( Iop_64HLtoV128,
15080 unop( Iop_64HIto32, mkexpr(srcBytes) ) ),
15082 unop( Iop_64to32, mkexpr(srcBytes) ) ) ) );
15084 goto decode_success;
15088 /* 66 0f 38 30 /r = PMOVZXBW xmm1, xmm2/m64
15089 Packed Move with Zero Extend from Byte to Word (XMM) */
15090 if ( have66noF2noF3(pfx)
15092 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x30 ) {
15096 IRTemp srcVec = newTemp(Ity_V128);
15098 if ( epartIsReg(modrm) ) {
15099 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15101 DIP( "pmovzxbw %s,%s\n",
15102 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15103 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15105 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15107 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15109 DIP( "pmovzxbw %s,%s\n",
15110 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15113 putXMMReg( gregOfRexRM(pfx, modrm),
15114 binop( Iop_InterleaveLO8x16,
15115 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
15117 goto decode_success;
15121 /* 66 0f 38 31 /r = PMOVZXBD xmm1, xmm2/m32
15122 Packed Move with Zero Extend from Byte to DWord (XMM) */
15123 if ( have66noF2noF3( pfx )
15125 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x31 ) {
15129 IRTemp srcVec = newTemp(Ity_V128);
15131 if ( epartIsReg(modrm) ) {
15132 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15134 DIP( "pmovzxbd %s,%s\n",
15135 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15136 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15138 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15140 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
15142 DIP( "pmovzxbd %s,%s\n",
15143 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15146 IRTemp zeroVec = newTemp(Ity_V128);
15147 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15149 putXMMReg( gregOfRexRM( pfx, modrm ),
15150 binop( Iop_InterleaveLO8x16,
15152 binop( Iop_InterleaveLO8x16,
15153 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
15155 goto decode_success;
15159 /* 66 0f 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
15160 Packed Move with Zero Extend from Byte to QWord (XMM) */
15161 if ( have66noF2noF3( pfx )
15163 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x32 ) {
15167 IRTemp srcVec = newTemp(Ity_V128);
15169 if ( epartIsReg(modrm) ) {
15170 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15172 DIP( "pmovzxbq %s,%s\n",
15173 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15174 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15176 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15178 unop( Iop_32UtoV128,
15179 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ) ) ) );
15181 DIP( "pmovzxbq %s,%s\n",
15182 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15185 IRTemp zeroVec = newTemp(Ity_V128);
15186 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15188 putXMMReg( gregOfRexRM( pfx, modrm ),
15189 binop( Iop_InterleaveLO8x16,
15191 binop( Iop_InterleaveLO8x16,
15193 binop( Iop_InterleaveLO8x16,
15194 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
15196 goto decode_success;
15200 /* 66 0f 38 33 /r = PMOVZXWD xmm1, xmm2/m64
15201 Packed Move with Zero Extend from Word to DWord (XMM) */
15202 if ( have66noF2noF3( pfx )
15204 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x33 ) {
15208 IRTemp srcVec = newTemp(Ity_V128);
15210 if ( epartIsReg(modrm) ) {
15211 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15213 DIP( "pmovzxwd %s,%s\n",
15214 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15215 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15217 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15219 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15221 DIP( "pmovzxwd %s,%s\n",
15222 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15225 putXMMReg( gregOfRexRM(pfx, modrm),
15226 binop( Iop_InterleaveLO16x8,
15227 IRExpr_Const( IRConst_V128(0) ),
15228 mkexpr(srcVec) ) );
15230 goto decode_success;
15234 /* 66 0f 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
15235 Packed Move with Zero Extend from Word to QWord (XMM) */
15236 if ( have66noF2noF3( pfx )
15238 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x34 ) {
15242 IRTemp srcVec = newTemp(Ity_V128);
15244 if ( epartIsReg( modrm ) ) {
15245 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15247 DIP( "pmovzxwq %s,%s\n",
15248 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15249 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15251 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15253 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
15255 DIP( "pmovzxwq %s,%s\n",
15256 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15259 IRTemp zeroVec = newTemp( Ity_V128 );
15260 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15262 putXMMReg( gregOfRexRM( pfx, modrm ),
15263 binop( Iop_InterleaveLO16x8,
15265 binop( Iop_InterleaveLO16x8,
15266 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
15268 goto decode_success;
15272 /* 66 0f 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
15273 Packed Move with Zero Extend from DWord to QWord (XMM) */
15274 if ( have66noF2noF3( pfx )
15276 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x35 ) {
15280 IRTemp srcVec = newTemp(Ity_V128);
15282 if ( epartIsReg(modrm) ) {
15283 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15285 DIP( "pmovzxdq %s,%s\n",
15286 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15287 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15289 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15291 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15293 DIP( "pmovzxdq %s,%s\n",
15294 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15297 putXMMReg( gregOfRexRM(pfx, modrm),
15298 binop( Iop_InterleaveLO32x4,
15299 IRExpr_Const( IRConst_V128(0) ),
15300 mkexpr(srcVec) ) );
15302 goto decode_success;
15306 /* 66 0f 38 40 /r = PMULLD xmm1, xmm2/m128
15307 32x4 integer multiply from xmm2/m128 to xmm1 */
15308 if ( have66noF2noF3( pfx )
15310 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x40 ) {
15314 IRTemp argL = newTemp(Ity_V128);
15315 IRTemp argR = newTemp(Ity_V128);
15317 if ( epartIsReg(modrm) ) {
15318 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
15320 DIP( "pmulld %s,%s\n",
15321 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15322 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15324 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15325 gen_SEGV_if_not_16_aligned( addr );
15326 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
15328 DIP( "pmulld %s,%s\n",
15329 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15332 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
15334 putXMMReg( gregOfRexRM(pfx, modrm),
15335 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
15337 goto decode_success;
15341 /* F3 0F B8 = POPCNT{W,L,Q}
15342 Count the number of 1 bits in a register
15344 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
15345 && insn[0] == 0x0F && insn[1] == 0xB8) {
15346 vassert(sz == 2 || sz == 4 || sz == 8);
15347 /*IRType*/ ty = szToITy(sz);
15348 IRTemp src = newTemp(ty);
15350 if (epartIsReg(modrm)) {
15351 assign(src, getIRegE(sz, pfx, modrm));
15353 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15354 nameIRegG(sz, pfx, modrm));
15356 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0);
15357 assign(src, loadLE(ty, mkexpr(addr)));
15359 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
15360 nameIRegG(sz, pfx, modrm));
15363 IRTemp result = gen_POPCOUNT(ty, src);
15364 putIRegG(sz, pfx, modrm, mkexpr(result));
15366 // Update flags. This is pretty lame .. perhaps can do better
15367 // if this turns out to be performance critical.
15368 // O S A C P are cleared. Z is set if SRC == 0.
15369 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15370 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15371 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15372 stmt( IRStmt_Put( OFFB_CC_DEP1,
15376 widenUto64(mkexpr(src)),
15378 mkU8(AMD64G_CC_SHIFT_Z))));
15380 goto decode_success;
15384 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
15385 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
15387 if (have66noF2noF3(pfx)
15389 && insn[0] == 0x0F && insn[1] == 0x3A
15390 && (insn[2] == 0x0B || insn[2] == 0x0A)) {
15392 Bool isD = insn[2] == 0x0B;
15393 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
15394 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
15399 if (epartIsReg(modrm)) {
15401 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
15402 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
15404 if (imm & ~7) goto decode_failure;
15406 DIP( "rounds%c $%d,%s,%s\n",
15408 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
15409 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15411 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15412 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
15413 imm = insn[3+alen];
15414 if (imm & ~7) goto decode_failure;
15416 DIP( "rounds%c $%d,%s,%s\n",
15418 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15421 /* (imm & 3) contains an Intel-encoded rounding mode. Because
15422 that encoding is the same as the encoding for IRRoundingMode,
15423 we can use that value directly in the IR as a rounding
15425 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
15426 (imm & 4) ? get_sse_roundingmode()
15431 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
15433 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
15435 goto decode_success;
15439 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
15440 if (have66noF2noF3(pfx)
15442 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x09) {
15444 IRTemp src0 = newTemp(Ity_F64);
15445 IRTemp src1 = newTemp(Ity_F64);
15446 IRTemp res0 = newTemp(Ity_F64);
15447 IRTemp res1 = newTemp(Ity_F64);
15448 IRTemp rm = newTemp(Ity_I32);
15453 if (epartIsReg(modrm)) {
15455 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
15457 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
15459 if (imm & ~7) goto decode_failure;
15461 DIP( "roundpd $%d,%s,%s\n",
15462 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
15463 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15465 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15466 gen_SEGV_if_not_16_aligned(addr);
15467 assign( src0, loadLE(Ity_F64,
15468 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
15469 assign( src1, loadLE(Ity_F64,
15470 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
15471 imm = insn[3+alen];
15472 if (imm & ~7) goto decode_failure;
15474 DIP( "roundpd $%d,%s,%s\n",
15475 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15478 /* (imm & 3) contains an Intel-encoded rounding mode. Because
15479 that encoding is the same as the encoding for IRRoundingMode,
15480 we can use that value directly in the IR as a rounding
15482 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
15484 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
15485 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
15487 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
15488 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
15490 goto decode_success;
15494 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
15495 if (have66noF2noF3(pfx)
15497 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x08) {
15499 IRTemp src0 = newTemp(Ity_F32);
15500 IRTemp src1 = newTemp(Ity_F32);
15501 IRTemp src2 = newTemp(Ity_F32);
15502 IRTemp src3 = newTemp(Ity_F32);
15503 IRTemp res0 = newTemp(Ity_F32);
15504 IRTemp res1 = newTemp(Ity_F32);
15505 IRTemp res2 = newTemp(Ity_F32);
15506 IRTemp res3 = newTemp(Ity_F32);
15507 IRTemp rm = newTemp(Ity_I32);
15512 if (epartIsReg(modrm)) {
15514 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
15516 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
15518 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
15520 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
15522 if (imm & ~7) goto decode_failure;
15524 DIP( "roundps $%d,%s,%s\n",
15525 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
15526 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15528 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15529 gen_SEGV_if_not_16_aligned(addr);
15530 assign( src0, loadLE(Ity_F32,
15531 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
15532 assign( src1, loadLE(Ity_F32,
15533 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
15534 assign( src2, loadLE(Ity_F32,
15535 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
15536 assign( src3, loadLE(Ity_F32,
15537 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
15538 imm = insn[3+alen];
15539 if (imm & ~7) goto decode_failure;
15541 DIP( "roundps $%d,%s,%s\n",
15542 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15545 /* (imm & 3) contains an Intel-encoded rounding mode. Because
15546 that encoding is the same as the encoding for IRRoundingMode,
15547 we can use that value directly in the IR as a rounding
15549 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
15551 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
15552 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
15553 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
15554 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
15556 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
15557 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
15558 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
15559 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
15561 goto decode_success;
15565 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
15566 which we can only decode if we're sure this is an AMD cpu that
15567 supports LZCNT, since otherwise it's BSR, which behaves
15569 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
15570 && insn[0] == 0x0F && insn[1] == 0xBD
15571 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
15572 vassert(sz == 2 || sz == 4 || sz == 8);
15573 /*IRType*/ ty = szToITy(sz);
15574 IRTemp src = newTemp(ty);
15576 if (epartIsReg(modrm)) {
15577 assign(src, getIRegE(sz, pfx, modrm));
15579 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15580 nameIRegG(sz, pfx, modrm));
15582 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0);
15583 assign(src, loadLE(ty, mkexpr(addr)));
15585 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
15586 nameIRegG(sz, pfx, modrm));
15589 IRTemp res = gen_LZCNT(ty, src);
15590 putIRegG(sz, pfx, modrm, mkexpr(res));
15592 // Update flags. This is pretty lame .. perhaps can do better
15593 // if this turns out to be performance critical.
15594 // O S A P are cleared. Z is set if RESULT == 0.
15595 // C is set if SRC is zero.
15596 IRTemp src64 = newTemp(Ity_I64);
15597 IRTemp res64 = newTemp(Ity_I64);
15598 assign(src64, widenUto64(mkexpr(src)));
15599 assign(res64, widenUto64(mkexpr(res)));
15601 IRTemp oszacp = newTemp(Ity_I64);
15607 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
15608 mkU8(AMD64G_CC_SHIFT_Z)),
15611 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
15612 mkU8(AMD64G_CC_SHIFT_C))
15616 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15617 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15618 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15619 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
15621 goto decode_success;
15624 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
15625 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
15626 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
15627 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
15628 (selected special cases that actually occur in glibc,
15629 not by any means a complete implementation.)
15631 if (have66noF2noF3(pfx)
15633 && insn[0] == 0x0F && insn[1] == 0x3A
15634 && (insn[2] >= 0x60 && insn[2] <= 0x63)) {
15636 UInt isISTRx = insn[2] & 2;
15637 UInt isxSTRM = (insn[2] & 1) ^ 1;
15642 /* This is a nasty kludge. We need to pass 2 x V128 to the
15643 helper (which is clean). Since we can't do that, use a dirty
15644 helper to compute the results directly from the XMM regs in
15645 the guest state. That means for the memory case, we need to
15646 move the left operand into a pseudo-register (XMM16, let's
15649 if (epartIsReg(modrm)) {
15650 regNoL = eregOfRexRM(pfx, modrm);
15651 regNoR = gregOfRexRM(pfx, modrm);
15655 regNoL = 16; /* use XMM16 as an intermediary */
15656 regNoR = gregOfRexRM(pfx, modrm);
15657 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15658 /* No alignment check; I guess that makes sense, given that
15659 these insns are for dealing with C style strings. */
15660 stmt( IRStmt_Put( OFFB_XMM16, loadLE(Ity_V128, mkexpr(addr)) ));
15661 imm = insn[3+alen];
15665 /* Now we know the XMM reg numbers for the operands, and the
15666 immediate byte. Is it one we can actually handle? Throw out
15667 any cases for which the helper function has not been
15671 case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
15672 case 0x1A: case 0x3A: case 0x44: case 0x4A:
15675 goto decode_failure;
15678 /* Who ya gonna call? Presumably not Ghostbusters. */
15679 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
15680 HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
15682 /* Round up the arguments. Note that this is a kludge -- the
15683 use of mkU64 rather than mkIRExpr_HWord implies the
15684 assumption that the host's word size is 64-bit. */
15685 UInt gstOffL = regNoL == 16 ? OFFB_XMM16 : xmmGuestRegOffset(regNoL);
15686 UInt gstOffR = xmmGuestRegOffset(regNoR);
15688 IRExpr* opc4_and_imm = mkU64((insn[2] << 8) | (imm & 0xFF));
15689 IRExpr* gstOffLe = mkU64(gstOffL);
15690 IRExpr* gstOffRe = mkU64(gstOffR);
15691 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
15692 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
15694 = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
15696 IRTemp resT = newTemp(Ity_I64);
15697 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
15698 /* It's not really a dirty call, but we can't use the clean
15699 helper mechanism here for the very lame reason that we can't
15700 pass 2 x V128s by value to a helper, nor get one back. Hence
15701 this roundabout scheme. */
15702 d->needsBBP = True;
15704 d->fxState[0].fx = Ifx_Read;
15705 d->fxState[0].offset = gstOffL;
15706 d->fxState[0].size = sizeof(U128);
15707 d->fxState[1].fx = Ifx_Read;
15708 d->fxState[1].offset = gstOffR;
15709 d->fxState[1].size = sizeof(U128);
15711 /* Declare that the helper writes XMM0. */
15713 d->fxState[2].fx = Ifx_Write;
15714 d->fxState[2].offset = xmmGuestRegOffset(0);
15715 d->fxState[2].size = sizeof(U128);
15718 stmt( IRStmt_Dirty(d) );
15720 /* Now resT[15:0] holds the new OSZACP values, so the condition
15721 codes must be updated. And for a xSTRI case, resT[31:16]
15722 holds the new ECX value, so stash that too. */
15724 putIReg64(R_RCX, binop(Iop_And64,
15725 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
15731 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
15733 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15734 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15735 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15737 if (regNoL == 16) {
15738 DIP("pcmp%cstr%c $%x,%s,%s\n",
15739 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
15740 (UInt)imm, dis_buf, nameXMMReg(regNoR));
15742 DIP("pcmp%cstr%c $%x,%s,%s\n",
15743 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
15744 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
15747 goto decode_success;
15751 /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128
15752 Logical compare (set ZF and CF from AND/ANDN of the operands) */
15753 if (have66noF2noF3( pfx ) && sz == 2
15754 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) {
15756 IRTemp vecE = newTemp(Ity_V128);
15757 IRTemp vecG = newTemp(Ity_V128);
15759 if ( epartIsReg(modrm) ) {
15760 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
15762 DIP( "ptest %s,%s\n",
15763 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15764 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15766 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15767 gen_SEGV_if_not_16_aligned( addr );
15768 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
15770 DIP( "ptest %s,%s\n",
15771 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15774 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
15776 /* Set Z=1 iff (vecE & vecG) == 0
15777 Set C=1 iff (vecE & not vecG) == 0
15780 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
15781 IRTemp andV = newTemp(Ity_V128);
15782 IRTemp andnV = newTemp(Ity_V128);
15783 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
15784 assign(andnV, binop(Iop_AndV128,
15786 binop(Iop_XorV128, mkexpr(vecG),
15789 /* The same, but reduced to 64-bit values, by or-ing the top
15790 and bottom 64-bits together. It relies on this trick:
15792 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
15794 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
15795 InterleaveHI64x2([a,b],[a,b]) == [a,a]
15797 and so the OR of the above 2 exprs produces
15798 [a OR b, a OR b], from which we simply take the lower half.
15800 IRTemp and64 = newTemp(Ity_I64);
15801 IRTemp andn64 = newTemp(Ity_I64);
15807 binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)),
15808 binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV))
15817 binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)),
15818 binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV))
15823 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
15824 slice out the Z and C bits conveniently. We use the standard
15825 trick all-zeroes -> all-zeroes, anything-else -> all-ones
15826 done by "(x | -x) >>s (word-size - 1)".
15828 IRTemp z64 = newTemp(Ity_I64);
15829 IRTemp c64 = newTemp(Ity_I64);
15834 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
15844 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
15850 /* And finally, slice out the Z and C flags and set the flags
15851 thunk to COPY for them. OSAP are set to zero. */
15852 IRTemp newOSZACP = newTemp(Ity_I64);
15855 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
15856 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))
15860 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
15861 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15862 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15863 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15865 goto decode_success;
15868 /* 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
15869 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
15870 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
15871 Blend at various granularities, with XMM0 (implicit operand)
15872 providing the controlling mask.
15874 if (have66noF2noF3(pfx) && sz == 2
15875 && insn[0] == 0x0F && insn[1] == 0x38
15876 && (insn[2] == 0x15 || insn[2] == 0x14 || insn[2] == 0x10)) {
15881 IROp opSAR = Iop_INVALID;
15884 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
15887 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
15890 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
15895 IRTemp vecE = newTemp(Ity_V128);
15896 IRTemp vecG = newTemp(Ity_V128);
15897 IRTemp vec0 = newTemp(Ity_V128);
15899 if ( epartIsReg(modrm) ) {
15900 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
15902 DIP( "%s %s,%s\n", nm,
15903 nameXMMReg( eregOfRexRM(pfx, modrm) ),
15904 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15906 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15907 gen_SEGV_if_not_16_aligned( addr );
15908 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
15910 DIP( "%s %s,%s\n", nm,
15911 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
15914 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
15915 assign(vec0, getXMMReg(0));
15917 /* Now the tricky bit is to convert vec0 into a suitable mask,
15918 by copying the most significant bit of each lane into all
15919 positions in the lane. */
15920 IRTemp sh = newTemp(Ity_I8);
15921 assign(sh, mkU8(8 * gran - 1));
15923 IRTemp mask = newTemp(Ity_V128);
15924 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
15926 IRTemp notmask = newTemp(Ity_V128);
15927 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
15929 IRExpr* res = binop(Iop_OrV128,
15930 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
15931 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)));
15932 putXMMReg(gregOfRexRM(pfx, modrm), res);
15934 goto decode_success;
15937 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
15938 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
15939 The decoding on this is a bit unusual.
15941 if (haveF2noF3(pfx)
15942 && insn[0] == 0x0F && insn[1] == 0x38
15943 && (insn[2] == 0xF1
15944 || (insn[2] == 0xF0 && !have66(pfx)))) {
15947 if (insn[2] == 0xF0)
15950 vassert(sz == 2 || sz == 4 || sz == 8);
15952 IRType tyE = szToITy(sz);
15953 IRTemp valE = newTemp(tyE);
15955 if (epartIsReg(modrm)) {
15956 assign(valE, getIRegE(sz, pfx, modrm));
15958 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
15959 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm));
15961 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
15962 assign(valE, loadLE(tyE, mkexpr(addr)));
15964 DIP("crc32b %s,%s\n", dis_buf,
15965 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm));
15968 /* Somewhat funny getting/putting of the crc32 value, in order
15969 to ensure that it turns into 64-bit gets and puts. However,
15970 mask off the upper 32 bits so as to not get memcheck false
15971 +ves around the helper call. */
15972 IRTemp valG0 = newTemp(Ity_I64);
15973 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
15974 mkU64(0xFFFFFFFF)));
15979 case 1: nm = "amd64g_calc_crc32b";
15980 fn = &amd64g_calc_crc32b; break;
15981 case 2: nm = "amd64g_calc_crc32w";
15982 fn = &amd64g_calc_crc32w; break;
15983 case 4: nm = "amd64g_calc_crc32l";
15984 fn = &amd64g_calc_crc32l; break;
15985 case 8: nm = "amd64g_calc_crc32q";
15986 fn = &amd64g_calc_crc32q; break;
15989 IRTemp valG1 = newTemp(Ity_I64);
15991 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
15992 mkIRExprVec_2(mkexpr(valG0),
15993 widenUto64(mkexpr(valE)))));
15995 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
15996 goto decode_success;
15999 /* ---------------------------------------------------- */
16000 /* --- end of the SSE4 decoder --- */
16001 /* ---------------------------------------------------- */
16003 /*after_sse_decoders:*/
16005 /* Get the primary opcode. */
16006 opc = getUChar(delta); delta++;
16008 /* We get here if the current insn isn't SSE, or this CPU doesn't
16013 /* ------------------------ Control flow --------------- */
16015 case 0xC2: /* RET imm16 */
16016 if (have66orF2orF3(pfx)) goto decode_failure;
16017 d64 = getUDisp16(delta);
16020 dres.whatNext = Dis_StopHere;
16021 DIP("ret %lld\n", d64);
16024 case 0xC3: /* RET */
16025 if (have66orF2(pfx)) goto decode_failure;
16026 /* F3 is acceptable on AMD. */
16028 dres.whatNext = Dis_StopHere;
16029 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
16032 case 0xE8: /* CALL J4 */
16033 if (haveF2orF3(pfx)) goto decode_failure;
16034 d64 = getSDisp32(delta); delta += 4;
16035 d64 += (guest_RIP_bbstart+delta);
16036 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
16037 t1 = newTemp(Ity_I64);
16038 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
16039 putIReg64(R_RSP, mkexpr(t1));
16040 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
16041 t2 = newTemp(Ity_I64);
16042 assign(t2, mkU64((Addr64)d64));
16043 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
16044 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
16045 /* follow into the call target. */
16046 dres.whatNext = Dis_ResteerU;
16047 dres.continueAt = d64;
16049 jmp_lit(Ijk_Call,d64);
16050 dres.whatNext = Dis_StopHere;
16052 DIP("call 0x%llx\n",d64);
16055 //.. //-- case 0xC8: /* ENTER */
16056 //.. //-- d32 = getUDisp16(eip); eip += 2;
16057 //.. //-- abyte = getUChar(delta); delta++;
16059 //.. //-- vg_assert(sz == 4);
16060 //.. //-- vg_assert(abyte == 0);
16062 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
16063 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
16064 //.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
16065 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
16066 //.. //-- uLiteral(cb, sz);
16067 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
16068 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
16069 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
16070 //.. //-- if (d32) {
16071 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
16072 //.. //-- uLiteral(cb, d32);
16073 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
16075 //.. //-- DIP("enter 0x%x, 0x%x", d32, abyte);
16078 case 0xC9: /* LEAVE */
16079 /* In 64-bit mode this defaults to a 64-bit operand size. There
16080 is no way to encode a 32-bit variant. Hence sz==4 but we do
16083 goto decode_failure;
16084 t1 = newTemp(Ity_I64);
16085 t2 = newTemp(Ity_I64);
16086 assign(t1, getIReg64(R_RBP));
16087 /* First PUT RSP looks redundant, but need it because RSP must
16088 always be up-to-date for Memcheck to work... */
16089 putIReg64(R_RSP, mkexpr(t1));
16090 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
16091 putIReg64(R_RBP, mkexpr(t2));
16092 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
16096 //.. //-- /* ---------------- Misc weird-ass insns --------------- */
16098 //.. //-- case 0x27: /* DAA */
16099 //.. //-- case 0x2F: /* DAS */
16100 //.. //-- t1 = newTemp(cb);
16101 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1);
16102 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
16103 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
16104 //.. //-- uWiden(cb, 1, False);
16105 //.. //-- uInstr0(cb, CALLM_S, 0);
16106 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
16107 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
16108 //.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) );
16109 //.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO);
16110 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
16111 //.. //-- uInstr0(cb, CALLM_E, 0);
16112 //.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL);
16113 //.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n");
16116 //.. //-- case 0x37: /* AAA */
16117 //.. //-- case 0x3F: /* AAS */
16118 //.. //-- t1 = newTemp(cb);
16119 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
16120 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
16121 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
16122 //.. //-- uWiden(cb, 2, False);
16123 //.. //-- uInstr0(cb, CALLM_S, 0);
16124 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
16125 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
16126 //.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) );
16127 //.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty);
16128 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
16129 //.. //-- uInstr0(cb, CALLM_E, 0);
16130 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
16131 //.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n");
16134 //.. //-- case 0xD4: /* AAM */
16135 //.. //-- case 0xD5: /* AAD */
16136 //.. //-- d32 = getUChar(delta); delta++;
16137 //.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
16138 //.. //-- t1 = newTemp(cb);
16139 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
16140 //.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
16141 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
16142 //.. //-- uWiden(cb, 2, False);
16143 //.. //-- uInstr0(cb, CALLM_S, 0);
16144 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
16145 //.. //-- uInstr1(cb, CALLM, 0, Lit16,
16146 //.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
16147 //.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
16148 //.. //-- uInstr1(cb, POP, 4, TempReg, t1);
16149 //.. //-- uInstr0(cb, CALLM_E, 0);
16150 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
16151 //.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
16154 /* ------------------------ CWD/CDQ -------------------- */
16156 case 0x98: /* CBW */
16157 if (haveF2orF3(pfx)) goto decode_failure;
16159 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
16160 DIP(/*"cdqe\n"*/"cltq");
16164 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
16169 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
16173 goto decode_failure;
16175 case 0x99: /* CWD/CDQ/CQO */
16176 if (haveF2orF3(pfx)) goto decode_failure;
16177 vassert(sz == 2 || sz == 4 || sz == 8);
16180 binop(mkSizedOp(ty,Iop_Sar8),
16182 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
16183 DIP(sz == 2 ? "cwd\n"
16184 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
16188 /* ------------------------ FPU ops -------------------- */
16190 case 0x9E: /* SAHF */
16195 case 0x9F: /* LAHF */
16200 case 0x9B: /* FWAIT */
16213 Bool redundantREXWok = False;
16215 if (haveF2orF3(pfx))
16216 goto decode_failure;
16218 /* kludge to tolerate redundant rex.w prefixes (should do this
16219 properly one day) */
16220 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
16221 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
16222 redundantREXWok = True;
16225 || (sz == 8 && redundantREXWok))
16226 && haveNo66noF2noF3(pfx)) {
16227 Long delta0 = delta;
16228 Bool decode_OK = False;
16229 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
16232 goto decode_failure;
16236 goto decode_failure;
16240 /* ------------------------ INT ------------------------ */
16242 case 0xCC: /* INT 3 */
16243 jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta);
16244 dres.whatNext = Dis_StopHere;
16248 case 0xCD: { /* INT imm8 */
16249 IRJumpKind jk = Ijk_Boring;
16250 if (have66orF2orF3(pfx)) goto decode_failure;
16251 d64 = getUChar(delta); delta++;
16253 case 32: jk = Ijk_Sys_int32; break;
16254 default: goto decode_failure;
16256 guest_RIP_next_mustcheck = True;
16257 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
16258 jmp_lit(jk, guest_RIP_next_assumed);
16259 /* It's important that all ArchRegs carry their up-to-date value
16260 at this point. So we declare an end-of-block here, which
16261 forces any TempRegs caching ArchRegs to be flushed. */
16262 dres.whatNext = Dis_StopHere;
16263 DIP("int $0x%02x\n", (UInt)d64);
16267 /* ------------------------ Jcond, byte offset --------- */
16269 case 0xEB: /* Jb (jump, byte offset) */
16270 if (haveF2orF3(pfx)) goto decode_failure;
16272 goto decode_failure; /* JRS added 2004 July 11 */
16273 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
16275 if (resteerOkFn(callback_opaque,d64)) {
16276 dres.whatNext = Dis_ResteerU;
16277 dres.continueAt = d64;
16279 jmp_lit(Ijk_Boring,d64);
16280 dres.whatNext = Dis_StopHere;
16282 DIP("jmp-8 0x%llx\n", d64);
16285 case 0xE9: /* Jv (jump, 16/32 offset) */
16286 if (haveF2orF3(pfx)) goto decode_failure;
16288 goto decode_failure; /* JRS added 2004 July 11 */
16289 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
16291 if (resteerOkFn(callback_opaque,d64)) {
16292 dres.whatNext = Dis_ResteerU;
16293 dres.continueAt = d64;
16295 jmp_lit(Ijk_Boring,d64);
16296 dres.whatNext = Dis_StopHere;
16298 DIP("jmp 0x%llx\n", d64);
16303 case 0x72: /* JBb/JNAEb (jump below) */
16304 case 0x73: /* JNBb/JAEb (jump not below) */
16305 case 0x74: /* JZb/JEb (jump zero) */
16306 case 0x75: /* JNZb/JNEb (jump not zero) */
16307 case 0x76: /* JBEb/JNAb (jump below or equal) */
16308 case 0x77: /* JNBEb/JAb (jump not below or equal) */
16309 case 0x78: /* JSb (jump negative) */
16310 case 0x79: /* JSb (jump not negative) */
16311 case 0x7A: /* JP (jump parity even) */
16312 case 0x7B: /* JNP/JPO (jump parity odd) */
16313 case 0x7C: /* JLb/JNGEb (jump less) */
16314 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
16315 case 0x7E: /* JLEb/JNGb (jump less or equal) */
16316 case 0x7F: /* JGb/JNLEb (jump greater) */
16318 HChar* comment = "";
16319 if (haveF2orF3(pfx)) goto decode_failure;
16320 jmpDelta = getSDisp8(delta);
16321 vassert(-128 <= jmpDelta && jmpDelta < 128);
16322 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
16325 && vex_control.guest_chase_cond
16326 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
16328 && resteerOkFn( callback_opaque, d64) ) {
16329 /* Speculation: assume this backward branch is taken. So we
16330 need to emit a side-exit to the insn following this one,
16331 on the negation of the condition, and continue at the
16332 branch target address (d64). If we wind up back at the
16333 first instruction of the trace, just stop; it's better to
16334 let the IR loop unroller handle that case. */
16336 mk_amd64g_calculate_condition(
16337 (AMD64Condcode)(1 ^ (opc - 0x70))),
16339 IRConst_U64(guest_RIP_bbstart+delta) ) );
16340 dres.whatNext = Dis_ResteerC;
16341 dres.continueAt = d64;
16342 comment = "(assumed taken)";
16346 && vex_control.guest_chase_cond
16347 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
16349 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
16350 /* Speculation: assume this forward branch is not taken. So
16351 we need to emit a side-exit to d64 (the dest) and continue
16352 disassembling at the insn immediately following this
16355 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
16357 IRConst_U64(d64) ) );
16358 dres.whatNext = Dis_ResteerC;
16359 dres.continueAt = guest_RIP_bbstart+delta;
16360 comment = "(assumed not taken)";
16363 /* Conservative default translation - end the block at this
16365 jcc_01( (AMD64Condcode)(opc - 0x70),
16366 guest_RIP_bbstart+delta,
16368 dres.whatNext = Dis_StopHere;
16370 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
16375 /* JRCXZ or JECXZ, depending address size override. */
16376 if (have66orF2orF3(pfx)) goto decode_failure;
16377 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
16379 if (haveASO(pfx)) {
16381 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
16382 unop(Iop_32Uto64, getIReg32(R_RCX)),
16387 DIP("jecxz 0x%llx\n", d64);
16390 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
16396 DIP("jrcxz 0x%llx\n", d64);
16400 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
16401 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
16402 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
16403 { /* The docs say this uses rCX as a count depending on the
16404 address size override, not the operand one. */
16405 IRExpr* zbit = NULL;
16406 IRExpr* count = NULL;
16407 IRExpr* cond = NULL;
16408 HChar* xtra = NULL;
16410 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
16411 /* So at this point we've rejected any variants which appear to
16412 be governed by the usual operand-size modifiers. Hence only
16413 the address size prefix can have an effect. It changes the
16414 size from 64 (default) to 32. */
16415 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
16417 if (haveASO(pfx)) {
16418 /* 64to32 of 64-bit get is merely a get-put improvement
16420 putIReg32(R_RCX, binop(Iop_Sub32,
16421 unop(Iop_64to32, getIReg64(R_RCX)),
16424 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
16427 /* This is correct, both for 32- and 64-bit versions. If we're
16428 doing a 32-bit dec and the result is zero then the default
16429 zero extension rule will cause the upper 32 bits to be zero
16430 too. Hence a 64-bit check against zero is OK. */
16431 count = getIReg64(R_RCX);
16432 cond = binop(Iop_CmpNE64, count, mkU64(0));
16439 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
16440 cond = mkAnd1(cond, zbit);
16444 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
16445 cond = mkAnd1(cond, zbit);
16450 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) );
16452 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
16456 /* ------------------------ IMUL ----------------------- */
16458 case 0x69: /* IMUL Iv, Ev, Gv */
16459 if (haveF2orF3(pfx)) goto decode_failure;
16460 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
16462 case 0x6B: /* IMUL Ib, Ev, Gv */
16463 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
16466 /* ------------------------ MOV ------------------------ */
16468 case 0x88: /* MOV Gb,Eb */
16469 if (haveF2orF3(pfx)) goto decode_failure;
16470 delta = dis_mov_G_E(vbi, pfx, 1, delta);
16473 case 0x89: /* MOV Gv,Ev */
16474 if (haveF2orF3(pfx)) goto decode_failure;
16475 delta = dis_mov_G_E(vbi, pfx, sz, delta);
16478 case 0x8A: /* MOV Eb,Gb */
16479 if (haveF2orF3(pfx)) goto decode_failure;
16480 delta = dis_mov_E_G(vbi, pfx, 1, delta);
16483 case 0x8B: /* MOV Ev,Gv */
16484 if (haveF2orF3(pfx)) goto decode_failure;
16485 delta = dis_mov_E_G(vbi, pfx, sz, delta);
16488 case 0x8D: /* LEA M,Gv */
16489 if (haveF2orF3(pfx)) goto decode_failure;
16490 if (sz != 4 && sz != 8)
16491 goto decode_failure;
16492 modrm = getUChar(delta);
16493 if (epartIsReg(modrm))
16494 goto decode_failure;
16495 /* NOTE! this is the one place where a segment override prefix
16496 has no effect on the address calculation. Therefore we clear
16497 any segment override bits in pfx. */
16498 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
16500 /* This is a hack. But it isn't clear that really doing the
16501 calculation at 32 bits is really worth it. Hence for leal,
16502 do the full 64-bit calculation and then truncate it. */
16503 putIRegG( sz, pfx, modrm,
16505 ? unop(Iop_64to32, mkexpr(addr))
16508 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
16509 nameIRegG(sz,pfx,modrm));
16512 //.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
16513 //.. delta = dis_mov_Sw_Ew(sorb, sz, delta);
16516 //.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
16517 //.. delta = dis_mov_Ew_Sw(sorb, delta);
16520 case 0xA0: /* MOV Ob,AL */
16521 if (have66orF2orF3(pfx)) goto decode_failure;
16523 /* Fall through ... */
16524 case 0xA1: /* MOV Ov,eAX */
16525 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
16526 goto decode_failure;
16527 d64 = getDisp64(delta);
16530 addr = newTemp(Ity_I64);
16531 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
16532 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
16533 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
16534 segRegTxt(pfx), d64,
16538 case 0xA2: /* MOV AL,Ob */
16539 if (have66orF2orF3(pfx)) goto decode_failure;
16541 /* Fall through ... */
16542 case 0xA3: /* MOV eAX,Ov */
16543 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
16544 goto decode_failure;
16545 d64 = getDisp64(delta);
16548 addr = newTemp(Ity_I64);
16549 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
16550 storeLE( mkexpr(addr), getIRegRAX(sz) );
16551 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
16552 segRegTxt(pfx), d64);
16555 /* XXXX be careful here with moves to AH/BH/CH/DH */
16556 case 0xB0: /* MOV imm,AL */
16557 case 0xB1: /* MOV imm,CL */
16558 case 0xB2: /* MOV imm,DL */
16559 case 0xB3: /* MOV imm,BL */
16560 case 0xB4: /* MOV imm,AH */
16561 case 0xB5: /* MOV imm,CH */
16562 case 0xB6: /* MOV imm,DH */
16563 case 0xB7: /* MOV imm,BH */
16564 if (haveF2orF3(pfx)) goto decode_failure;
16565 d64 = getUChar(delta);
16567 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
16568 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
16571 case 0xB8: /* MOV imm,eAX */
16572 case 0xB9: /* MOV imm,eCX */
16573 case 0xBA: /* MOV imm,eDX */
16574 case 0xBB: /* MOV imm,eBX */
16575 case 0xBC: /* MOV imm,eSP */
16576 case 0xBD: /* MOV imm,eBP */
16577 case 0xBE: /* MOV imm,eSI */
16578 case 0xBF: /* MOV imm,eDI */
16579 /* This is the one-and-only place where 64-bit literals are
16580 allowed in the instruction stream. */
16581 if (haveF2orF3(pfx)) goto decode_failure;
16583 d64 = getDisp64(delta);
16585 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
16586 DIP("movabsq $%lld,%s\n", (Long)d64,
16587 nameIRegRexB(8,pfx,opc-0xB8));
16589 d64 = getSDisp(imin(4,sz),delta);
16590 delta += imin(4,sz);
16591 putIRegRexB(sz, pfx, opc-0xB8,
16592 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
16593 DIP("mov%c $%lld,%s\n", nameISize(sz),
16595 nameIRegRexB(sz,pfx,opc-0xB8));
16599 case 0xC6: /* MOV Ib,Eb */
16602 case 0xC7: /* MOV Iv,Ev */
16606 if (haveF2orF3(pfx)) goto decode_failure;
16607 modrm = getUChar(delta);
16608 if (epartIsReg(modrm)) {
16609 delta++; /* mod/rm byte */
16610 d64 = getSDisp(imin(4,sz),delta);
16611 delta += imin(4,sz);
16612 putIRegE(sz, pfx, modrm,
16613 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
16614 DIP("mov%c $%lld, %s\n", nameISize(sz),
16616 nameIRegE(sz,pfx,modrm));
16618 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
16619 /*xtra*/imin(4,sz) );
16621 d64 = getSDisp(imin(4,sz),delta);
16622 delta += imin(4,sz);
16623 storeLE(mkexpr(addr),
16624 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
16625 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
16629 /* ------------------------ MOVx ------------------------ */
16631 case 0x63: /* MOVSX */
16632 if (haveF2orF3(pfx)) goto decode_failure;
16633 if (haveREX(pfx) && 1==getRexW(pfx)) {
16635 /* movsx r/m32 to r64 */
16636 modrm = getUChar(delta);
16637 if (epartIsReg(modrm)) {
16639 putIRegG(8, pfx, modrm,
16641 getIRegE(4, pfx, modrm)));
16642 DIP("movslq %s,%s\n",
16643 nameIRegE(4, pfx, modrm),
16644 nameIRegG(8, pfx, modrm));
16647 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16649 putIRegG(8, pfx, modrm,
16651 loadLE(Ity_I32, mkexpr(addr))));
16652 DIP("movslq %s,%s\n", dis_buf,
16653 nameIRegG(8, pfx, modrm));
16657 goto decode_failure;
16660 /* ------------------------ opl imm, A ----------------- */
16662 case 0x04: /* ADD Ib, AL */
16663 if (haveF2orF3(pfx)) goto decode_failure;
16664 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
16666 case 0x05: /* ADD Iv, eAX */
16667 if (haveF2orF3(pfx)) goto decode_failure;
16668 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
16671 case 0x0C: /* OR Ib, AL */
16672 if (haveF2orF3(pfx)) goto decode_failure;
16673 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
16675 case 0x0D: /* OR Iv, eAX */
16676 if (haveF2orF3(pfx)) goto decode_failure;
16677 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
16680 case 0x14: /* ADC Ib, AL */
16681 if (haveF2orF3(pfx)) goto decode_failure;
16682 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
16684 case 0x15: /* ADC Iv, eAX */
16685 if (haveF2orF3(pfx)) goto decode_failure;
16686 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
16689 case 0x1C: /* SBB Ib, AL */
16690 if (haveF2orF3(pfx)) goto decode_failure;
16691 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
16693 case 0x1D: /* SBB Iv, eAX */
16694 if (haveF2orF3(pfx)) goto decode_failure;
16695 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
16698 case 0x24: /* AND Ib, AL */
16699 if (haveF2orF3(pfx)) goto decode_failure;
16700 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
16702 case 0x25: /* AND Iv, eAX */
16703 if (haveF2orF3(pfx)) goto decode_failure;
16704 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
16707 case 0x2C: /* SUB Ib, AL */
16708 if (haveF2orF3(pfx)) goto decode_failure;
16709 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
16711 case 0x2D: /* SUB Iv, eAX */
16712 if (haveF2orF3(pfx)) goto decode_failure;
16713 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
16716 case 0x34: /* XOR Ib, AL */
16717 if (haveF2orF3(pfx)) goto decode_failure;
16718 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
16720 case 0x35: /* XOR Iv, eAX */
16721 if (haveF2orF3(pfx)) goto decode_failure;
16722 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
16725 case 0x3C: /* CMP Ib, AL */
16726 if (haveF2orF3(pfx)) goto decode_failure;
16727 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
16729 case 0x3D: /* CMP Iv, eAX */
16730 if (haveF2orF3(pfx)) goto decode_failure;
16731 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
16734 case 0xA8: /* TEST Ib, AL */
16735 if (haveF2orF3(pfx)) goto decode_failure;
16736 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
16738 case 0xA9: /* TEST Iv, eAX */
16739 if (haveF2orF3(pfx)) goto decode_failure;
16740 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
16743 /* ------------------------ opl Ev, Gv ----------------- */
16745 case 0x02: /* ADD Eb,Gb */
16746 if (haveF2orF3(pfx)) goto decode_failure;
16747 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
16749 case 0x03: /* ADD Ev,Gv */
16750 if (haveF2orF3(pfx)) goto decode_failure;
16751 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
16754 case 0x0A: /* OR Eb,Gb */
16755 if (haveF2orF3(pfx)) goto decode_failure;
16756 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
16758 case 0x0B: /* OR Ev,Gv */
16759 if (haveF2orF3(pfx)) goto decode_failure;
16760 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
16763 case 0x12: /* ADC Eb,Gb */
16764 if (haveF2orF3(pfx)) goto decode_failure;
16765 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
16767 case 0x13: /* ADC Ev,Gv */
16768 if (haveF2orF3(pfx)) goto decode_failure;
16769 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
16772 case 0x1A: /* SBB Eb,Gb */
16773 if (haveF2orF3(pfx)) goto decode_failure;
16774 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
16776 case 0x1B: /* SBB Ev,Gv */
16777 if (haveF2orF3(pfx)) goto decode_failure;
16778 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
16781 case 0x22: /* AND Eb,Gb */
16782 if (haveF2orF3(pfx)) goto decode_failure;
16783 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
16785 case 0x23: /* AND Ev,Gv */
16786 if (haveF2orF3(pfx)) goto decode_failure;
16787 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
16790 case 0x2A: /* SUB Eb,Gb */
16791 if (haveF2orF3(pfx)) goto decode_failure;
16792 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
16794 case 0x2B: /* SUB Ev,Gv */
16795 if (haveF2orF3(pfx)) goto decode_failure;
16796 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
16799 case 0x32: /* XOR Eb,Gb */
16800 if (haveF2orF3(pfx)) goto decode_failure;
16801 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
16803 case 0x33: /* XOR Ev,Gv */
16804 if (haveF2orF3(pfx)) goto decode_failure;
16805 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
16808 case 0x3A: /* CMP Eb,Gb */
16809 if (haveF2orF3(pfx)) goto decode_failure;
16810 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
16812 case 0x3B: /* CMP Ev,Gv */
16813 if (haveF2orF3(pfx)) goto decode_failure;
16814 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
16817 case 0x84: /* TEST Eb,Gb */
16818 if (haveF2orF3(pfx)) goto decode_failure;
16819 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
16821 case 0x85: /* TEST Ev,Gv */
16822 if (haveF2orF3(pfx)) goto decode_failure;
16823 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
16826 /* ------------------------ opl Gv, Ev ----------------- */
16828 case 0x00: /* ADD Gb,Eb */
16829 if (haveF2orF3(pfx)) goto decode_failure;
16830 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
16832 case 0x01: /* ADD Gv,Ev */
16833 if (haveF2orF3(pfx)) goto decode_failure;
16834 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
16837 case 0x08: /* OR Gb,Eb */
16838 if (haveF2orF3(pfx)) goto decode_failure;
16839 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
16841 case 0x09: /* OR Gv,Ev */
16842 if (haveF2orF3(pfx)) goto decode_failure;
16843 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
16846 case 0x10: /* ADC Gb,Eb */
16847 if (haveF2orF3(pfx)) goto decode_failure;
16848 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
16850 case 0x11: /* ADC Gv,Ev */
16851 if (haveF2orF3(pfx)) goto decode_failure;
16852 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
16855 case 0x18: /* SBB Gb,Eb */
16856 if (haveF2orF3(pfx)) goto decode_failure;
16857 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
16859 case 0x19: /* SBB Gv,Ev */
16860 if (haveF2orF3(pfx)) goto decode_failure;
16861 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
16864 case 0x20: /* AND Gb,Eb */
16865 if (haveF2orF3(pfx)) goto decode_failure;
16866 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
16868 case 0x21: /* AND Gv,Ev */
16869 if (haveF2orF3(pfx)) goto decode_failure;
16870 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
16873 case 0x28: /* SUB Gb,Eb */
16874 if (haveF2orF3(pfx)) goto decode_failure;
16875 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
16877 case 0x29: /* SUB Gv,Ev */
16878 if (haveF2orF3(pfx)) goto decode_failure;
16879 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
16882 case 0x30: /* XOR Gb,Eb */
16883 if (haveF2orF3(pfx)) goto decode_failure;
16884 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
16886 case 0x31: /* XOR Gv,Ev */
16887 if (haveF2orF3(pfx)) goto decode_failure;
16888 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
16891 case 0x38: /* CMP Gb,Eb */
16892 if (haveF2orF3(pfx)) goto decode_failure;
16893 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
16895 case 0x39: /* CMP Gv,Ev */
16896 if (haveF2orF3(pfx)) goto decode_failure;
16897 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
16900 /* ------------------------ POP ------------------------ */
16902 case 0x58: /* POP eAX */
16903 case 0x59: /* POP eCX */
16904 case 0x5A: /* POP eDX */
16905 case 0x5B: /* POP eBX */
16906 case 0x5D: /* POP eBP */
16907 case 0x5E: /* POP eSI */
16908 case 0x5F: /* POP eDI */
16909 case 0x5C: /* POP eSP */
16910 if (haveF2orF3(pfx)) goto decode_failure;
16911 vassert(sz == 2 || sz == 4 || sz == 8);
16913 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
16914 t1 = newTemp(szToITy(sz));
16915 t2 = newTemp(Ity_I64);
16916 assign(t2, getIReg64(R_RSP));
16917 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
16918 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
16919 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
16920 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
16923 case 0x9D: /* POPF */
16924 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
16925 So sz==4 actually means sz==8. */
16926 if (haveF2orF3(pfx)) goto decode_failure;
16927 vassert(sz == 2 || sz == 4);
16928 if (sz == 4) sz = 8;
16929 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
16930 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
16931 assign(t2, getIReg64(R_RSP));
16932 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
16933 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
16934 /* t1 is the flag word. Mask out everything except OSZACP and
16935 set the flags thunk to AMD64G_CC_OP_COPY. */
16936 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16937 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16938 stmt( IRStmt_Put( OFFB_CC_DEP1,
16941 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
16942 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
16943 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
16948 /* Also need to set the D flag, which is held in bit 10 of t1.
16949 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
16956 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
16959 mkU64(0xFFFFFFFFFFFFFFFFULL)))
16962 /* And set the ID flag */
16969 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
16975 /* And set the AC flag too */
16982 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
16988 DIP("popf%c\n", nameISize(sz));
16991 //.. case 0x61: /* POPA */
16992 //.. /* This is almost certainly wrong for sz==2. So ... */
16993 //.. if (sz != 4) goto decode_failure;
16995 //.. /* t5 is the old %ESP value. */
16996 //.. t5 = newTemp(Ity_I32);
16997 //.. assign( t5, getIReg(4, R_ESP) );
16999 //.. /* Reload all the registers, except %esp. */
17000 //.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
17001 //.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
17002 //.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
17003 //.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
17004 //.. /* ignore saved %ESP */
17005 //.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
17006 //.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
17007 //.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
17009 //.. /* and move %ESP back up */
17010 //.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
17012 //.. DIP("pusha%c\n", nameISize(sz));
17015 case 0x8F: { /* POPQ m64 / POPW m16 */
17018 /* There is no encoding for 32-bit pop in 64-bit mode.
17019 So sz==4 actually means sz==8. */
17020 if (haveF2orF3(pfx)) goto decode_failure;
17021 vassert(sz == 2 || sz == 4
17022 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
17023 if (sz == 4) sz = 8;
17024 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
17026 rm = getUChar(delta);
17028 /* make sure this instruction is correct POP */
17029 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
17030 goto decode_failure;
17031 /* and has correct size */
17034 t1 = newTemp(Ity_I64);
17035 t3 = newTemp(Ity_I64);
17036 assign( t1, getIReg64(R_RSP) );
17037 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
17039 /* Increase RSP; must be done before the STORE. Intel manual
17040 says: If the RSP register is used as a base register for
17041 addressing a destination operand in memory, the POP
17042 instruction computes the effective address of the operand
17043 after it increments the RSP register. */
17044 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
17046 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
17047 storeLE( mkexpr(addr), mkexpr(t3) );
17049 DIP("popl %s\n", dis_buf);
17055 //.. //-- case 0x1F: /* POP %DS */
17056 //.. //-- dis_pop_segreg( cb, R_DS, sz ); break;
17057 //.. //-- case 0x07: /* POP %ES */
17058 //.. //-- dis_pop_segreg( cb, R_ES, sz ); break;
17059 //.. //-- case 0x17: /* POP %SS */
17060 //.. //-- dis_pop_segreg( cb, R_SS, sz ); break;
17062 /* ------------------------ PUSH ----------------------- */
17064 case 0x50: /* PUSH eAX */
17065 case 0x51: /* PUSH eCX */
17066 case 0x52: /* PUSH eDX */
17067 case 0x53: /* PUSH eBX */
17068 case 0x55: /* PUSH eBP */
17069 case 0x56: /* PUSH eSI */
17070 case 0x57: /* PUSH eDI */
17071 case 0x54: /* PUSH eSP */
17072 /* This is the Right Way, in that the value to be pushed is
17073 established before %rsp is changed, so that pushq %rsp
17074 correctly pushes the old value. */
17075 if (haveF2orF3(pfx)) goto decode_failure;
17076 vassert(sz == 2 || sz == 4 || sz == 8);
17078 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
17079 ty = sz==2 ? Ity_I16 : Ity_I64;
17081 t2 = newTemp(Ity_I64);
17082 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
17083 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
17084 putIReg64(R_RSP, mkexpr(t2) );
17085 storeLE(mkexpr(t2),mkexpr(t1));
17086 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
17089 case 0x68: /* PUSH Iv */
17090 if (haveF2orF3(pfx)) goto decode_failure;
17091 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
17092 if (sz == 4) sz = 8;
17093 d64 = getSDisp(imin(4,sz),delta);
17094 delta += imin(4,sz);
17096 case 0x6A: /* PUSH Ib, sign-extended to sz */
17097 if (haveF2orF3(pfx)) goto decode_failure;
17098 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
17099 if (sz == 4) sz = 8;
17100 d64 = getSDisp8(delta); delta += 1;
17104 t1 = newTemp(Ity_I64);
17106 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
17107 putIReg64(R_RSP, mkexpr(t1) );
17108 /* stop mkU16 asserting if d32 is a negative 16-bit number
17112 storeLE( mkexpr(t1), mkU(ty,d64) );
17113 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
17116 case 0x9C: /* PUSHF */ {
17117 /* Note. There is no encoding for a 32-bit pushf in 64-bit
17118 mode. So sz==4 actually means sz==8. */
17119 /* 24 July 06: has also been seen with a redundant REX prefix,
17120 so must also allow sz==8. */
17121 if (haveF2orF3(pfx)) goto decode_failure;
17122 vassert(sz == 2 || sz == 4 || sz == 8);
17123 if (sz == 4) sz = 8;
17124 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
17126 t1 = newTemp(Ity_I64);
17127 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
17128 putIReg64(R_RSP, mkexpr(t1) );
17130 t2 = newTemp(Ity_I64);
17131 assign( t2, mk_amd64g_calculate_rflags_all() );
17133 /* Patch in the D flag. This can simply be a copy of bit 10 of
17134 baseBlock[OFFB_DFLAG]. */
17135 t3 = newTemp(Ity_I64);
17136 assign( t3, binop(Iop_Or64,
17139 IRExpr_Get(OFFB_DFLAG,Ity_I64),
17143 /* And patch in the ID flag. */
17144 t4 = newTemp(Ity_I64);
17145 assign( t4, binop(Iop_Or64,
17148 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
17153 /* And patch in the AC flag too. */
17154 t5 = newTemp(Ity_I64);
17155 assign( t5, binop(Iop_Or64,
17158 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
17163 /* if sz==2, the stored value needs to be narrowed. */
17165 storeLE( mkexpr(t1), unop(Iop_32to16,
17166 unop(Iop_64to32,mkexpr(t5))) );
17168 storeLE( mkexpr(t1), mkexpr(t5) );
17170 DIP("pushf%c\n", nameISize(sz));
17174 //.. case 0x60: /* PUSHA */
17175 //.. /* This is almost certainly wrong for sz==2. So ... */
17176 //.. if (sz != 4) goto decode_failure;
17178 //.. /* This is the Right Way, in that the value to be pushed is
17179 //.. established before %esp is changed, so that pusha
17180 //.. correctly pushes the old %esp value. New value of %esp is
17181 //.. pushed at start. */
17182 //.. /* t0 is the %ESP value we're going to push. */
17183 //.. t0 = newTemp(Ity_I32);
17184 //.. assign( t0, getIReg(4, R_ESP) );
17186 //.. /* t5 will be the new %ESP value. */
17187 //.. t5 = newTemp(Ity_I32);
17188 //.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
17190 //.. /* Update guest state before prodding memory. */
17191 //.. putIReg(4, R_ESP, mkexpr(t5));
17193 //.. /* Dump all the registers. */
17194 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
17195 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
17196 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
17197 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
17198 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
17199 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
17200 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
17201 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
17203 //.. DIP("pusha%c\n", nameISize(sz));
17207 //.. //-- case 0x0E: /* PUSH %CS */
17208 //.. //-- dis_push_segreg( cb, R_CS, sz ); break;
17209 //.. //-- case 0x1E: /* PUSH %DS */
17210 //.. //-- dis_push_segreg( cb, R_DS, sz ); break;
17211 //.. //-- case 0x06: /* PUSH %ES */
17212 //.. //-- dis_push_segreg( cb, R_ES, sz ); break;
17213 //.. //-- case 0x16: /* PUSH %SS */
17214 //.. //-- dis_push_segreg( cb, R_SS, sz ); break;
17216 //.. /* ------------------------ SCAS et al ----------------- */
17218 //.. case 0xA4: /* MOVS, no REP prefix */
17220 //.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
17223 //.. case 0xA6: /* CMPSb, no REP prefix */
17224 //.. //-- case 0xA7:
17225 //.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
17229 case 0xAC: /* LODS, no REP prefix */
17231 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
17234 //.. case 0xAE: /* SCAS, no REP prefix */
17236 //.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
17240 case 0xFC: /* CLD */
17241 if (haveF2orF3(pfx)) goto decode_failure;
17242 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
17246 case 0xFD: /* STD */
17247 if (haveF2orF3(pfx)) goto decode_failure;
17248 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
17252 case 0xF8: /* CLC */
17253 case 0xF9: /* STC */
17254 case 0xF5: /* CMC */
17255 t0 = newTemp(Ity_I64);
17256 t1 = newTemp(Ity_I64);
17257 assign( t0, mk_amd64g_calculate_rflags_all() );
17260 assign( t1, binop(Iop_And64, mkexpr(t0),
17261 mkU64(~AMD64G_CC_MASK_C)));
17265 assign( t1, binop(Iop_Or64, mkexpr(t0),
17266 mkU64(AMD64G_CC_MASK_C)));
17270 assign( t1, binop(Iop_Xor64, mkexpr(t0),
17271 mkU64(AMD64G_CC_MASK_C)));
17275 vpanic("disInstr(x64)(clc/stc/cmc)");
17277 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
17278 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
17279 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
17280 /* Set NDEP even though it isn't used. This makes redundant-PUT
17281 elimination of previous stores to this field work better. */
17282 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
17285 //.. /* REPNE prefix insn */
17287 //.. Addr32 eip_orig = guest_eip_bbstart + delta - 1;
17288 //.. vassert(sorb == 0);
17289 //.. abyte = getUChar(delta); delta++;
17291 //.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; }
17292 //.. whatNext = Dis_StopHere;
17294 //.. switch (abyte) {
17295 //.. /* According to the Intel manual, "repne movs" should never occur, but
17296 //.. * in practice it has happened, so allow for it here... */
17297 //.. case 0xA4: sz = 1; /* REPNE MOVS<sz> */
17298 //.. goto decode_failure;
17299 //.. //-- case 0xA5:
17300 //.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig,
17301 //.. // guest_eip_bbstart+delta, "repne movs" );
17304 //.. //-- case 0xA6: sz = 1; /* REPNE CMPS<sz> */
17305 //.. //-- case 0xA7:
17306 //.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" );
17309 //.. case 0xAE: sz = 1; /* REPNE SCAS<sz> */
17311 //.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
17312 //.. guest_eip_bbstart+delta, "repne scas" );
17316 //.. goto decode_failure;
17321 /* ------ AE: SCAS variants ------ */
17324 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
17326 goto decode_failure;
17327 if (haveF2(pfx) && !haveF3(pfx)) {
17330 dis_REP_op ( AMD64CondNZ, dis_SCAS, sz,
17331 guest_RIP_curr_instr,
17332 guest_RIP_bbstart+delta, "repne scas", pfx );
17333 dres.whatNext = Dis_StopHere;
17336 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
17338 goto decode_failure;
17339 if (!haveF2(pfx) && haveF3(pfx)) {
17342 dis_REP_op ( AMD64CondZ, dis_SCAS, sz,
17343 guest_RIP_curr_instr,
17344 guest_RIP_bbstart+delta, "repe scas", pfx );
17345 dres.whatNext = Dis_StopHere;
17348 /* AE/AF: scasb/scas{w,l,q} */
17349 if (!haveF2(pfx) && !haveF3(pfx)) {
17352 dis_string_op( dis_SCAS, sz, "scas", pfx );
17355 goto decode_failure;
17357 /* ------ A6, A7: CMPS variants ------ */
17360 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
17362 goto decode_failure;
17363 if (haveF3(pfx) && !haveF2(pfx)) {
17366 dis_REP_op ( AMD64CondZ, dis_CMPS, sz,
17367 guest_RIP_curr_instr,
17368 guest_RIP_bbstart+delta, "repe cmps", pfx );
17369 dres.whatNext = Dis_StopHere;
17372 goto decode_failure;
17374 /* ------ AA, AB: STOS variants ------ */
17377 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
17379 goto decode_failure;
17380 if (haveF3(pfx) && !haveF2(pfx)) {
17383 dis_REP_op ( AMD64CondAlways, dis_STOS, sz,
17384 guest_RIP_curr_instr,
17385 guest_RIP_bbstart+delta, "rep stos", pfx );
17386 dres.whatNext = Dis_StopHere;
17389 /* AA/AB: stosb/stos{w,l,q} */
17390 if (!haveF3(pfx) && !haveF2(pfx)) {
17393 dis_string_op( dis_STOS, sz, "stos", pfx );
17396 goto decode_failure;
17398 /* ------ A4, A5: MOVS variants ------ */
17401 /* F3 A4: rep movsb */
17403 goto decode_failure;
17404 if (haveF3(pfx) && !haveF2(pfx)) {
17407 dis_REP_op ( AMD64CondAlways, dis_MOVS, sz,
17408 guest_RIP_curr_instr,
17409 guest_RIP_bbstart+delta, "rep movs", pfx );
17410 dres.whatNext = Dis_StopHere;
17414 if (!haveF3(pfx) && !haveF2(pfx)) {
17417 dis_string_op( dis_MOVS, sz, "movs", pfx );
17420 goto decode_failure;
17423 /* ------------------------ XCHG ----------------------- */
17425 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
17426 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
17427 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
17428 used with an explicit LOCK prefix, we don't want to end up with
17429 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
17430 the generic LOCK logic at the top of disInstr. */
17431 case 0x86: /* XCHG Gb,Eb */
17433 /* Fall through ... */
17434 case 0x87: /* XCHG Gv,Ev */
17435 if (haveF2orF3(pfx)) goto decode_failure;
17436 modrm = getUChar(delta);
17438 t1 = newTemp(ty); t2 = newTemp(ty);
17439 if (epartIsReg(modrm)) {
17440 assign(t1, getIRegE(sz, pfx, modrm));
17441 assign(t2, getIRegG(sz, pfx, modrm));
17442 putIRegG(sz, pfx, modrm, mkexpr(t1));
17443 putIRegE(sz, pfx, modrm, mkexpr(t2));
17445 DIP("xchg%c %s, %s\n",
17446 nameISize(sz), nameIRegG(sz, pfx, modrm),
17447 nameIRegE(sz, pfx, modrm));
17449 *expect_CAS = True;
17450 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17451 assign( t1, loadLE(ty, mkexpr(addr)) );
17452 assign( t2, getIRegG(sz, pfx, modrm) );
17453 casLE( mkexpr(addr),
17454 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
17455 putIRegG( sz, pfx, modrm, mkexpr(t1) );
17457 DIP("xchg%c %s, %s\n", nameISize(sz),
17458 nameIRegG(sz, pfx, modrm), dis_buf);
17462 case 0x90: /* XCHG eAX,eAX */
17463 /* detect and handle F3 90 (rep nop) specially */
17464 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
17465 DIP("rep nop (P4 pause)\n");
17466 /* "observe" the hint. The Vex client needs to be careful not
17467 to cause very long delays as a result, though. */
17468 jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta);
17469 dres.whatNext = Dis_StopHere;
17472 /* detect and handle NOPs specially */
17473 if (/* F2/F3 probably change meaning completely */
17475 /* If REX.B is 1, we're not exchanging rAX with itself */
17476 && getRexB(pfx)==0 ) {
17480 /* else fall through to normal case. */
17481 case 0x91: /* XCHG rAX,rCX */
17482 case 0x92: /* XCHG rAX,rDX */
17483 case 0x93: /* XCHG rAX,rBX */
17484 case 0x94: /* XCHG rAX,rSP */
17485 case 0x95: /* XCHG rAX,rBP */
17486 case 0x96: /* XCHG rAX,rSI */
17487 case 0x97: /* XCHG rAX,rDI */
17489 /* guard against mutancy */
17490 if (haveF2orF3(pfx)) goto decode_failure;
17492 /* sz == 2 could legitimately happen, but we don't handle it yet */
17493 if (sz == 2) goto decode_failure; /* awaiting test case */
17495 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
17498 //.. //-- /* ------------------------ XLAT ----------------------- */
17500 //.. //-- case 0xD7: /* XLAT */
17501 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
17502 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */
17503 //.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */
17504 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */
17505 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */
17506 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2);
17507 //.. //-- uWiden(cb, 1, False);
17508 //.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */
17509 //.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */
17510 //.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */
17512 //.. //-- DIP("xlat%c [ebx]\n", nameISize(sz));
17515 /* ------------------------ IN / OUT ----------------------- */
17517 case 0xE4: /* IN imm8, AL */
17519 t1 = newTemp(Ity_I64);
17520 abyte = getUChar(delta); delta++;
17521 assign(t1, mkU64( abyte & 0xFF ));
17522 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
17524 case 0xE5: /* IN imm8, eAX */
17525 if (!(sz == 2 || sz == 4)) goto decode_failure;
17526 t1 = newTemp(Ity_I64);
17527 abyte = getUChar(delta); delta++;
17528 assign(t1, mkU64( abyte & 0xFF ));
17529 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
17531 case 0xEC: /* IN %DX, AL */
17533 t1 = newTemp(Ity_I64);
17534 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
17535 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
17538 case 0xED: /* IN %DX, eAX */
17539 if (!(sz == 2 || sz == 4)) goto decode_failure;
17540 t1 = newTemp(Ity_I64);
17541 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
17542 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
17546 /* At this point, sz indicates the width, and t1 is a 64-bit
17547 value giving port number. */
17549 if (haveF2orF3(pfx)) goto decode_failure;
17550 vassert(sz == 1 || sz == 2 || sz == 4);
17552 t2 = newTemp(Ity_I64);
17553 d = unsafeIRDirty_1_N(
17556 "amd64g_dirtyhelper_IN",
17557 &amd64g_dirtyhelper_IN,
17558 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
17560 /* do the call, dumping the result in t2. */
17561 stmt( IRStmt_Dirty(d) );
17562 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
17566 case 0xE6: /* OUT AL, imm8 */
17568 t1 = newTemp(Ity_I64);
17569 abyte = getUChar(delta); delta++;
17570 assign( t1, mkU64( abyte & 0xFF ) );
17571 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
17573 case 0xE7: /* OUT eAX, imm8 */
17574 if (!(sz == 2 || sz == 4)) goto decode_failure;
17575 t1 = newTemp(Ity_I64);
17576 abyte = getUChar(delta); delta++;
17577 assign( t1, mkU64( abyte & 0xFF ) );
17578 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
17580 case 0xEE: /* OUT AL, %DX */
17582 t1 = newTemp(Ity_I64);
17583 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
17584 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
17587 case 0xEF: /* OUT eAX, %DX */
17588 if (!(sz == 2 || sz == 4)) goto decode_failure;
17589 t1 = newTemp(Ity_I64);
17590 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
17591 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
17595 /* At this point, sz indicates the width, and t1 is a 64-bit
17596 value giving port number. */
17598 if (haveF2orF3(pfx)) goto decode_failure;
17599 vassert(sz == 1 || sz == 2 || sz == 4);
17601 d = unsafeIRDirty_0_N(
17603 "amd64g_dirtyhelper_OUT",
17604 &amd64g_dirtyhelper_OUT,
17605 mkIRExprVec_3( mkexpr(t1),
17606 widenUto64( getIRegRAX(sz) ),
17609 stmt( IRStmt_Dirty(d) );
17613 /* ------------------------ (Grp1 extensions) ---------- */
17615 case 0x80: /* Grp1 Ib,Eb */
17616 if (haveF2orF3(pfx)) goto decode_failure;
17617 modrm = getUChar(delta);
17618 am_sz = lengthAMode(pfx,delta);
17621 d64 = getSDisp8(delta + am_sz);
17622 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
17625 case 0x81: /* Grp1 Iv,Ev */
17626 if (haveF2orF3(pfx)) goto decode_failure;
17627 modrm = getUChar(delta);
17628 am_sz = lengthAMode(pfx,delta);
17630 d64 = getSDisp(d_sz, delta + am_sz);
17631 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
17634 case 0x83: /* Grp1 Ib,Ev */
17635 if (haveF2orF3(pfx)) goto decode_failure;
17636 modrm = getUChar(delta);
17637 am_sz = lengthAMode(pfx,delta);
17639 d64 = getSDisp8(delta + am_sz);
17640 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
17643 /* ------------------------ (Grp2 extensions) ---------- */
17645 case 0xC0: { /* Grp2 Ib,Eb */
17646 Bool decode_OK = True;
17647 if (haveF2orF3(pfx)) goto decode_failure;
17648 modrm = getUChar(delta);
17649 am_sz = lengthAMode(pfx,delta);
17651 d64 = getUChar(delta + am_sz);
17653 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17654 mkU8(d64 & 0xFF), NULL, &decode_OK );
17655 if (!decode_OK) goto decode_failure;
17658 case 0xC1: { /* Grp2 Ib,Ev */
17659 Bool decode_OK = True;
17660 if (haveF2orF3(pfx)) goto decode_failure;
17661 modrm = getUChar(delta);
17662 am_sz = lengthAMode(pfx,delta);
17664 d64 = getUChar(delta + am_sz);
17665 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17666 mkU8(d64 & 0xFF), NULL, &decode_OK );
17667 if (!decode_OK) goto decode_failure;
17670 case 0xD0: { /* Grp2 1,Eb */
17671 Bool decode_OK = True;
17672 if (haveF2orF3(pfx)) goto decode_failure;
17673 modrm = getUChar(delta);
17674 am_sz = lengthAMode(pfx,delta);
17678 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17679 mkU8(d64), NULL, &decode_OK );
17680 if (!decode_OK) goto decode_failure;
17683 case 0xD1: { /* Grp2 1,Ev */
17684 Bool decode_OK = True;
17685 if (haveF2orF3(pfx)) goto decode_failure;
17686 modrm = getUChar(delta);
17687 am_sz = lengthAMode(pfx,delta);
17690 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17691 mkU8(d64), NULL, &decode_OK );
17692 if (!decode_OK) goto decode_failure;
17695 case 0xD2: { /* Grp2 CL,Eb */
17696 Bool decode_OK = True;
17697 if (haveF2orF3(pfx)) goto decode_failure;
17698 modrm = getUChar(delta);
17699 am_sz = lengthAMode(pfx,delta);
17702 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17703 getIRegCL(), "%cl", &decode_OK );
17704 if (!decode_OK) goto decode_failure;
17707 case 0xD3: { /* Grp2 CL,Ev */
17708 Bool decode_OK = True;
17709 if (haveF2orF3(pfx)) goto decode_failure;
17710 modrm = getUChar(delta);
17711 am_sz = lengthAMode(pfx,delta);
17713 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
17714 getIRegCL(), "%cl", &decode_OK );
17715 if (!decode_OK) goto decode_failure;
17719 /* ------------------------ (Grp3 extensions) ---------- */
17721 case 0xF6: { /* Grp3 Eb */
17722 Bool decode_OK = True;
17723 if (haveF2orF3(pfx)) goto decode_failure;
17724 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
17725 if (!decode_OK) goto decode_failure;
17728 case 0xF7: { /* Grp3 Ev */
17729 Bool decode_OK = True;
17730 if (haveF2orF3(pfx)) goto decode_failure;
17731 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
17732 if (!decode_OK) goto decode_failure;
17736 /* ------------------------ (Grp4 extensions) ---------- */
17738 case 0xFE: { /* Grp4 Eb */
17739 Bool decode_OK = True;
17740 if (haveF2orF3(pfx)) goto decode_failure;
17741 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
17742 if (!decode_OK) goto decode_failure;
17746 /* ------------------------ (Grp5 extensions) ---------- */
17748 case 0xFF: { /* Grp5 Ev */
17749 Bool decode_OK = True;
17750 if (haveF2orF3(pfx)) goto decode_failure;
17751 delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK );
17752 if (!decode_OK) goto decode_failure;
17756 /* ------------------------ Escapes to 2-byte opcodes -- */
17759 opc = getUChar(delta); delta++;
17762 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
17764 case 0xBA: { /* Grp8 Ib,Ev */
17765 Bool decode_OK = False;
17766 if (haveF2orF3(pfx)) goto decode_failure;
17767 modrm = getUChar(delta);
17768 am_sz = lengthAMode(pfx,delta);
17769 d64 = getSDisp8(delta + am_sz);
17770 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
17773 goto decode_failure;
17777 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
17779 case 0xBC: /* BSF Gv,Ev */
17780 if (haveF2orF3(pfx)) goto decode_failure;
17781 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
17783 case 0xBD: /* BSR Gv,Ev */
17784 if (haveF2orF3(pfx)) goto decode_failure;
17785 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
17788 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
17790 case 0xC8: /* BSWAP %eax */
17797 case 0xCF: /* BSWAP %edi */
17798 if (haveF2orF3(pfx)) goto decode_failure;
17799 /* According to the AMD64 docs, this insn can have size 4 or
17802 t1 = newTemp(Ity_I32);
17803 t2 = newTemp(Ity_I32);
17804 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
17807 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
17809 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
17810 mkU32(0x00FF0000)),
17812 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
17813 mkU32(0x0000FF00)),
17814 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
17815 mkU32(0x000000FF) )
17818 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
17819 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
17822 else if (sz == 8) {
17823 IRTemp m8 = newTemp(Ity_I64);
17824 IRTemp s8 = newTemp(Ity_I64);
17825 IRTemp m16 = newTemp(Ity_I64);
17826 IRTemp s16 = newTemp(Ity_I64);
17827 IRTemp m32 = newTemp(Ity_I64);
17828 t1 = newTemp(Ity_I64);
17829 t2 = newTemp(Ity_I64);
17830 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
17832 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
17836 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
17839 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
17844 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
17848 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
17851 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
17856 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
17860 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
17863 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
17868 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
17869 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
17872 goto decode_failure;
17875 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
17877 /* All of these are possible at sizes 2, 4 and 8, but until a
17878 size 2 test case shows up, only handle sizes 4 and 8. */
17880 case 0xA3: /* BT Gv,Ev */
17881 if (haveF2orF3(pfx)) goto decode_failure;
17882 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
17883 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone );
17885 case 0xB3: /* BTR Gv,Ev */
17886 if (haveF2orF3(pfx)) goto decode_failure;
17887 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
17888 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset );
17890 case 0xAB: /* BTS Gv,Ev */
17891 if (haveF2orF3(pfx)) goto decode_failure;
17892 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
17893 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet );
17895 case 0xBB: /* BTC Gv,Ev */
17896 if (haveF2orF3(pfx)) goto decode_failure;
17897 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
17898 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp );
17901 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
17905 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
17906 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
17907 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
17908 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
17909 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
17910 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
17911 case 0x48: /* CMOVSb (cmov negative) */
17912 case 0x49: /* CMOVSb (cmov not negative) */
17913 case 0x4A: /* CMOVP (cmov parity even) */
17914 case 0x4B: /* CMOVNP (cmov parity odd) */
17915 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
17916 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
17917 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
17918 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
17919 if (haveF2orF3(pfx)) goto decode_failure;
17920 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
17923 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
17925 case 0xB0: { /* CMPXCHG Gb,Eb */
17927 if (haveF2orF3(pfx)) goto decode_failure;
17928 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
17929 if (!ok) goto decode_failure;
17932 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
17934 if (haveF2orF3(pfx)) goto decode_failure;
17935 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
17936 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
17937 if (!ok) goto decode_failure;
17941 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
17942 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
17943 IRTemp expdHi = newTemp(elemTy);
17944 IRTemp expdLo = newTemp(elemTy);
17945 IRTemp dataHi = newTemp(elemTy);
17946 IRTemp dataLo = newTemp(elemTy);
17947 IRTemp oldHi = newTemp(elemTy);
17948 IRTemp oldLo = newTemp(elemTy);
17949 IRTemp flags_old = newTemp(Ity_I64);
17950 IRTemp flags_new = newTemp(Ity_I64);
17951 IRTemp success = newTemp(Ity_I1);
17952 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
17953 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
17954 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
17955 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
17956 IRTemp expdHi64 = newTemp(Ity_I64);
17957 IRTemp expdLo64 = newTemp(Ity_I64);
17959 /* Translate this using a DCAS, even if there is no LOCK
17960 prefix. Life is too short to bother with generating two
17961 different translations for the with/without-LOCK-prefix
17963 *expect_CAS = True;
17965 /* Decode, and generate address. */
17966 if (have66orF2orF3(pfx)) goto decode_failure;
17967 if (sz != 4 && sz != 8) goto decode_failure;
17968 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
17969 goto decode_failure;
17970 modrm = getUChar(delta);
17971 if (epartIsReg(modrm)) goto decode_failure;
17972 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
17973 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17976 /* cmpxchg16b requires an alignment check. */
17978 gen_SEGV_if_not_16_aligned( addr );
17980 /* Get the expected and new values. */
17981 assign( expdHi64, getIReg64(R_RDX) );
17982 assign( expdLo64, getIReg64(R_RAX) );
17984 /* These are the correctly-sized expected and new values.
17985 However, we also get expdHi64/expdLo64 above as 64-bits
17986 regardless, because we will need them later in the 32-bit
17987 case (paradoxically). */
17988 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
17989 : mkexpr(expdHi64) );
17990 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
17991 : mkexpr(expdLo64) );
17992 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
17993 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
17997 mkIRCAS( oldHi, oldLo,
17998 Iend_LE, mkexpr(addr),
17999 mkexpr(expdHi), mkexpr(expdLo),
18000 mkexpr(dataHi), mkexpr(dataLo)
18003 /* success when oldHi:oldLo == expdHi:expdLo */
18007 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
18008 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
18013 /* If the DCAS is successful, that is to say oldHi:oldLo ==
18014 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
18015 which is where they came from originally. Both the actual
18016 contents of these two regs, and any shadow values, are
18017 unchanged. If the DCAS fails then we're putting into
18018 RDX:RAX the value seen in memory. */
18019 /* Now of course there's a complication in the 32-bit case
18020 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
18021 unchanged; but if we use the same scheme as in the 64-bit
18022 case, we get hit by the standard rule that a write to the
18023 bottom 32 bits of an integer register zeros the upper 32
18024 bits. And so the upper halves of RDX and RAX mysteriously
18025 become zero. So we have to stuff back in the original
18026 64-bit values which we previously stashed in
18027 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
18028 /* It's just _so_ much fun ... */
18030 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
18031 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
18036 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
18037 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
18042 /* Copy the success bit into the Z flag and leave the others
18044 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
18048 binop(Iop_And64, mkexpr(flags_old),
18049 mkU64(~AMD64G_CC_MASK_Z)),
18052 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
18053 mkU8(AMD64G_CC_SHIFT_Z)) ));
18055 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18056 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
18057 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18058 /* Set NDEP even though it isn't used. This makes
18059 redundant-PUT elimination of previous stores to this field
18061 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18063 /* Sheesh. Aren't you glad it was me and not you that had to
18064 write and validate all this grunge? */
18066 DIP("cmpxchg8b %s\n", dis_buf);
18071 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
18073 case 0xA2: { /* CPUID */
18074 /* Uses dirty helper:
18075 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
18076 declared to mod rax, wr rbx, rcx, rdx
18079 HChar* fName = NULL;
18080 void* fAddr = NULL;
18081 if (haveF2orF3(pfx)) goto decode_failure;
18082 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
18083 |VEX_HWCAPS_AMD64_CX16)) {
18084 //fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
18085 //fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
18086 /* This is a Core-2-like machine */
18087 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
18088 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
18089 /* This is a Core-i5-like machine */
18092 /* Give a CPUID for at least a baseline machine, SSE2
18093 only, and no CX16 */
18094 fName = "amd64g_dirtyhelper_CPUID_baseline";
18095 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
18098 vassert(fName); vassert(fAddr);
18099 d = unsafeIRDirty_0_N ( 0/*regparms*/,
18100 fName, fAddr, mkIRExprVec_0() );
18101 /* declare guest state effects */
18102 d->needsBBP = True;
18104 d->fxState[0].fx = Ifx_Modify;
18105 d->fxState[0].offset = OFFB_RAX;
18106 d->fxState[0].size = 8;
18107 d->fxState[1].fx = Ifx_Write;
18108 d->fxState[1].offset = OFFB_RBX;
18109 d->fxState[1].size = 8;
18110 d->fxState[2].fx = Ifx_Modify;
18111 d->fxState[2].offset = OFFB_RCX;
18112 d->fxState[2].size = 8;
18113 d->fxState[3].fx = Ifx_Write;
18114 d->fxState[3].offset = OFFB_RDX;
18115 d->fxState[3].size = 8;
18116 /* execute the dirty call, side-effecting guest state */
18117 stmt( IRStmt_Dirty(d) );
18118 /* CPUID is a serialising insn. So, just in case someone is
18119 using it as a memory fence ... */
18120 stmt( IRStmt_MBE(Imbe_Fence) );
18125 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
18127 case 0xB6: /* MOVZXb Eb,Gv */
18128 if (haveF2orF3(pfx)) goto decode_failure;
18129 if (sz != 2 && sz != 4 && sz != 8)
18130 goto decode_failure;
18131 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
18133 case 0xB7: /* MOVZXw Ew,Gv */
18134 if (haveF2orF3(pfx)) goto decode_failure;
18135 if (sz != 4 && sz != 8)
18136 goto decode_failure;
18137 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
18140 case 0xBE: /* MOVSXb Eb,Gv */
18141 if (haveF2orF3(pfx)) goto decode_failure;
18142 if (sz != 2 && sz != 4 && sz != 8)
18143 goto decode_failure;
18144 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
18146 case 0xBF: /* MOVSXw Ew,Gv */
18147 if (haveF2orF3(pfx)) goto decode_failure;
18148 if (sz != 4 && sz != 8)
18149 goto decode_failure;
18150 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
18153 //.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
18155 //.. //-- case 0xC3: /* MOVNTI Gv,Ev */
18156 //.. //-- vg_assert(sz == 4);
18157 //.. //-- modrm = getUChar(eip);
18158 //.. //-- vg_assert(!epartIsReg(modrm));
18159 //.. //-- t1 = newTemp(cb);
18160 //.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
18161 //.. //-- pair = disAMode ( cb, sorb, eip, dis_buf );
18162 //.. //-- t2 = LOW24(pair);
18163 //.. //-- eip += HI8(pair);
18164 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
18165 //.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
18168 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
18170 case 0xAF: /* IMUL Ev, Gv */
18171 if (haveF2orF3(pfx)) goto decode_failure;
18172 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
18175 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
18178 if (haveF2orF3(pfx)) goto decode_failure;
18179 modrm = getUChar(delta);
18180 if (epartIsReg(modrm)) goto decode_failure;
18181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18183 DIP("nop%c %s\n", nameISize(sz), dis_buf);
18186 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
18189 case 0x82: /* JBb/JNAEb (jump below) */
18190 case 0x83: /* JNBb/JAEb (jump not below) */
18191 case 0x84: /* JZb/JEb (jump zero) */
18192 case 0x85: /* JNZb/JNEb (jump not zero) */
18193 case 0x86: /* JBEb/JNAb (jump below or equal) */
18194 case 0x87: /* JNBEb/JAb (jump not below or equal) */
18195 case 0x88: /* JSb (jump negative) */
18196 case 0x89: /* JSb (jump not negative) */
18197 case 0x8A: /* JP (jump parity even) */
18198 case 0x8B: /* JNP/JPO (jump parity odd) */
18199 case 0x8C: /* JLb/JNGEb (jump less) */
18200 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
18201 case 0x8E: /* JLEb/JNGb (jump less or equal) */
18202 case 0x8F: /* JGb/JNLEb (jump greater) */
18204 HChar* comment = "";
18205 if (haveF2orF3(pfx)) goto decode_failure;
18206 jmpDelta = getSDisp32(delta);
18207 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
18210 && vex_control.guest_chase_cond
18211 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
18213 && resteerOkFn( callback_opaque, d64) ) {
18214 /* Speculation: assume this backward branch is taken. So
18215 we need to emit a side-exit to the insn following this
18216 one, on the negation of the condition, and continue at
18217 the branch target address (d64). If we wind up back at
18218 the first instruction of the trace, just stop; it's
18219 better to let the IR loop unroller handle that case. */
18221 mk_amd64g_calculate_condition(
18222 (AMD64Condcode)(1 ^ (opc - 0x80))),
18224 IRConst_U64(guest_RIP_bbstart+delta) ) );
18225 dres.whatNext = Dis_ResteerC;
18226 dres.continueAt = d64;
18227 comment = "(assumed taken)";
18231 && vex_control.guest_chase_cond
18232 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
18234 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
18235 /* Speculation: assume this forward branch is not taken.
18236 So we need to emit a side-exit to d64 (the dest) and
18237 continue disassembling at the insn immediately
18238 following this one. */
18240 mk_amd64g_calculate_condition((AMD64Condcode)
18243 IRConst_U64(d64) ) );
18244 dres.whatNext = Dis_ResteerC;
18245 dres.continueAt = guest_RIP_bbstart+delta;
18246 comment = "(assumed not taken)";
18249 /* Conservative default translation - end the block at
18251 jcc_01( (AMD64Condcode)(opc - 0x80),
18252 guest_RIP_bbstart+delta,
18254 dres.whatNext = Dis_StopHere;
18256 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
18260 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
18261 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
18262 /* 0F 0D /1 -- prefetchw mem8 */
18263 if (have66orF2orF3(pfx)) goto decode_failure;
18264 modrm = getUChar(delta);
18265 if (epartIsReg(modrm)) goto decode_failure;
18266 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
18267 goto decode_failure;
18269 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18272 switch (gregLO3ofRM(modrm)) {
18273 case 0: DIP("prefetch %s\n", dis_buf); break;
18274 case 1: DIP("prefetchw %s\n", dis_buf); break;
18275 default: vassert(0); /*NOTREACHED*/
18279 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
18280 case 0x31: { /* RDTSC */
18281 IRTemp val = newTemp(Ity_I64);
18282 IRExpr** args = mkIRExprVec_0();
18283 IRDirty* d = unsafeIRDirty_1_N (
18286 "amd64g_dirtyhelper_RDTSC",
18287 &amd64g_dirtyhelper_RDTSC,
18290 if (have66orF2orF3(pfx)) goto decode_failure;
18291 /* execute the dirty call, dumping the result in val. */
18292 stmt( IRStmt_Dirty(d) );
18293 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
18294 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
18299 //.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
18301 //.. case 0xA1: /* POP %FS */
18302 //.. dis_pop_segreg( R_FS, sz ); break;
18303 //.. case 0xA9: /* POP %GS */
18304 //.. dis_pop_segreg( R_GS, sz ); break;
18306 //.. case 0xA0: /* PUSH %FS */
18307 //.. dis_push_segreg( R_FS, sz ); break;
18308 //.. case 0xA8: /* PUSH %GS */
18309 //.. dis_push_segreg( R_GS, sz ); break;
18311 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
18314 case 0x92: /* set-Bb/set-NAEb (set if below) */
18315 case 0x93: /* set-NBb/set-AEb (set if not below) */
18316 case 0x94: /* set-Zb/set-Eb (set if zero) */
18317 case 0x95: /* set-NZb/set-NEb (set if not zero) */
18318 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
18319 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
18320 case 0x98: /* set-Sb (set if negative) */
18321 case 0x99: /* set-Sb (set if not negative) */
18322 case 0x9A: /* set-P (set if parity even) */
18323 case 0x9B: /* set-NP (set if parity odd) */
18324 case 0x9C: /* set-Lb/set-NGEb (set if less) */
18325 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
18326 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
18327 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
18328 if (haveF2orF3(pfx)) goto decode_failure;
18329 t1 = newTemp(Ity_I8);
18330 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
18331 modrm = getUChar(delta);
18332 if (epartIsReg(modrm)) {
18334 putIRegE(1, pfx, modrm, mkexpr(t1));
18335 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
18336 nameIRegE(1,pfx,modrm));
18338 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18340 storeLE( mkexpr(addr), mkexpr(t1) );
18341 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
18345 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
18347 case 0xA4: /* SHLDv imm8,Gv,Ev */
18348 modrm = getUChar(delta);
18349 d64 = delta + lengthAMode(pfx, delta);
18350 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
18351 delta = dis_SHLRD_Gv_Ev (
18352 vbi, pfx, delta, modrm, sz,
18353 mkU8(getUChar(d64)), True, /* literal */
18354 dis_buf, True /* left */ );
18356 case 0xA5: /* SHLDv %cl,Gv,Ev */
18357 modrm = getUChar(delta);
18358 delta = dis_SHLRD_Gv_Ev (
18359 vbi, pfx, delta, modrm, sz,
18360 getIRegCL(), False, /* not literal */
18361 "%cl", True /* left */ );
18364 case 0xAC: /* SHRDv imm8,Gv,Ev */
18365 modrm = getUChar(delta);
18366 d64 = delta + lengthAMode(pfx, delta);
18367 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
18368 delta = dis_SHLRD_Gv_Ev (
18369 vbi, pfx, delta, modrm, sz,
18370 mkU8(getUChar(d64)), True, /* literal */
18371 dis_buf, False /* right */ );
18373 case 0xAD: /* SHRDv %cl,Gv,Ev */
18374 modrm = getUChar(delta);
18375 delta = dis_SHLRD_Gv_Ev (
18376 vbi, pfx, delta, modrm, sz,
18377 getIRegCL(), False, /* not literal */
18378 "%cl", False /* right */);
18381 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
18382 case 0x05: /* SYSCALL */
18383 guest_RIP_next_mustcheck = True;
18384 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
18385 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
18386 /* It's important that all guest state is up-to-date
18387 at this point. So we declare an end-of-block here, which
18388 forces any cached guest state to be flushed. */
18389 jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed);
18390 dres.whatNext = Dis_StopHere;
18394 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
18396 case 0xC0: { /* XADD Gb,Eb */
18397 Bool decode_OK = False;
18398 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
18400 goto decode_failure;
18403 case 0xC1: { /* XADD Gv,Ev */
18404 Bool decode_OK = False;
18405 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
18407 goto decode_failure;
18411 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
18415 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
18417 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
18418 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
18419 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
18420 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
18424 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
18427 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
18430 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
18434 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
18437 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
18440 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
18442 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
18443 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
18445 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
18449 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
18453 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
18455 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
18456 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
18457 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
18461 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
18465 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
18467 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
18468 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
18469 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
18470 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
18472 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
18476 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
18480 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
18483 Long delta0 = delta-1;
18484 Bool decode_OK = False;
18486 /* If sz==2 this is SSE, and we assume sse idec has
18487 already spotted those cases by now. */
18488 if (sz != 4 && sz != 8)
18489 goto decode_failure;
18490 if (have66orF2orF3(pfx))
18491 goto decode_failure;
18493 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 );
18496 goto decode_failure;
18501 case 0x0E: /* FEMMS */
18502 case 0x77: /* EMMS */
18504 goto decode_failure;
18505 do_EMMS_preamble();
18509 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
18510 case 0x01: /* 0F 01 /0 -- SGDT */
18511 /* 0F 01 /1 -- SIDT */
18513 /* This is really revolting, but ... since each processor
18514 (core) only has one IDT and one GDT, just let the guest
18515 see it (pass-through semantics). I can't see any way to
18516 construct a faked-up value, so don't bother to try. */
18517 modrm = getUChar(delta);
18518 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18520 if (epartIsReg(modrm)) goto decode_failure;
18521 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
18522 goto decode_failure;
18523 switch (gregLO3ofRM(modrm)) {
18524 case 0: DIP("sgdt %s\n", dis_buf); break;
18525 case 1: DIP("sidt %s\n", dis_buf); break;
18526 default: vassert(0); /*NOTREACHED*/
18529 IRDirty* d = unsafeIRDirty_0_N (
18531 "amd64g_dirtyhelper_SxDT",
18532 &amd64g_dirtyhelper_SxDT,
18533 mkIRExprVec_2( mkexpr(addr),
18534 mkU64(gregLO3ofRM(modrm)) )
18536 /* declare we're writing memory */
18537 d->mFx = Ifx_Write;
18538 d->mAddr = mkexpr(addr);
18540 stmt( IRStmt_Dirty(d) );
18544 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
18547 goto decode_failure;
18548 } /* switch (opc) for the 2-byte opcodes */
18549 goto decode_success;
18550 } /* case 0x0F: of primary opcode */
18552 /* ------------------------ ??? ------------------------ */
18556 /* All decode failures end up here. */
18557 vex_printf("vex amd64->IR: unhandled instruction bytes: "
18558 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
18559 (Int)getUChar(delta_start+0),
18560 (Int)getUChar(delta_start+1),
18561 (Int)getUChar(delta_start+2),
18562 (Int)getUChar(delta_start+3),
18563 (Int)getUChar(delta_start+4),
18564 (Int)getUChar(delta_start+5),
18565 (Int)getUChar(delta_start+6),
18566 (Int)getUChar(delta_start+7) );
18568 /* Tell the dispatcher that this insn cannot be decoded, and so has
18569 not been executed, and (is currently) the next to be executed.
18570 RIP should be up-to-date since it made so at the start of each
18571 insn, but nevertheless be paranoid and update it again right
18573 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
18574 jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
18575 dres.whatNext = Dis_StopHere;
18577 /* We also need to say that a CAS is not expected now, regardless
18578 of what it might have been set to at the start of the function,
18579 since the IR that we've emitted just above (to synthesis a
18580 SIGILL) does not involve any CAS, and presumably no other IR has
18581 been emitted for this (non-decoded) insn. */
18582 *expect_CAS = False;
18585 } /* switch (opc) for the main (primary) opcode switch. */
18588 /* All decode successes end up here. */
18590 dres.len = (Int)toUInt(delta - delta_start);
18598 /*------------------------------------------------------------*/
18599 /*--- Top-level fn ---*/
18600 /*------------------------------------------------------------*/
18602 /* Disassemble a single instruction into IR. The instruction
18603 is located in host memory at &guest_code[delta]. */
18605 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
18607 Bool (*resteerOkFn) ( void*, Addr64 ),
18609 void* callback_opaque,
18610 UChar* guest_code_IN,
18613 VexArch guest_arch,
18614 VexArchInfo* archinfo,
18615 VexAbiInfo* abiinfo,
18616 Bool host_bigendian_IN )
18619 Bool expect_CAS, has_CAS;
18622 /* Set globals (see top of this file) */
18623 vassert(guest_arch == VexArchAMD64);
18624 guest_code = guest_code_IN;
18626 host_is_bigendian = host_bigendian_IN;
18627 guest_RIP_curr_instr = guest_IP;
18628 guest_RIP_bbstart = guest_IP - delta;
18630 /* We'll consult these after doing disInstr_AMD64_WRK. */
18631 guest_RIP_next_assumed = 0;
18632 guest_RIP_next_mustcheck = False;
18634 x1 = irsb_IN->stmts_used;
18635 expect_CAS = False;
18636 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
18639 delta, archinfo, abiinfo );
18640 x2 = irsb_IN->stmts_used;
18643 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
18644 got it right. Failure of this assertion is serious and denotes
18645 a bug in disInstr. */
18646 if (guest_RIP_next_mustcheck
18647 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
18649 vex_printf("assumed next %%rip = 0x%llx\n",
18650 guest_RIP_next_assumed );
18651 vex_printf(" actual next %%rip = 0x%llx\n",
18652 guest_RIP_curr_instr + dres.len );
18653 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
18656 /* See comment at the top of disInstr_AMD64_WRK for meaning of
18657 expect_CAS. Here, we (sanity-)check for the presence/absence of
18658 IRCAS as directed by the returned expect_CAS value. */
18660 for (i = x1; i < x2; i++) {
18661 if (irsb_IN->stmts[i]->tag == Ist_CAS)
18665 if (expect_CAS != has_CAS) {
18666 /* inconsistency detected. re-disassemble the instruction so as
18667 to generate a useful error message; then assert. */
18668 vex_traceflags |= VEX_TRACE_FE;
18669 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
18672 delta, archinfo, abiinfo );
18673 for (i = x1; i < x2; i++) {
18674 vex_printf("\t\t");
18675 ppIRStmt(irsb_IN->stmts[i]);
18678 /* Failure of this assertion is serious and denotes a bug in
18680 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
18687 /*------------------------------------------------------------*/
18688 /*--- Unused stuff ---*/
18689 /*------------------------------------------------------------*/
18691 // A potentially more Memcheck-friendly version of gen_LZCNT, if
18692 // this should ever be needed.
18694 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
18696 // /* Scheme is simple: propagate the most significant 1-bit into all
18697 // lower positions in the word. This gives a word of the form
18698 // 0---01---1. Now invert it, giving a word of the form
18699 // 1---10---0, then do a population-count idiom (to count the 1s,
18700 // which is the number of leading zeroes, or the word size if the
18701 // original word was 0.
18705 // for (i = 0; i < 7; i++) {
18706 // t[i] = newTemp(ty);
18708 // if (ty == Ity_I64) {
18709 // assign(t[0], binop(Iop_Or64, mkexpr(src),
18710 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
18711 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
18712 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
18713 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
18714 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
18715 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
18716 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
18717 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
18718 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
18719 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
18720 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
18721 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
18722 // return gen_POPCOUNT(ty, t[6]);
18724 // if (ty == Ity_I32) {
18725 // assign(t[0], binop(Iop_Or32, mkexpr(src),
18726 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
18727 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
18728 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
18729 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
18730 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
18731 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
18732 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
18733 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
18734 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
18735 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
18736 // return gen_POPCOUNT(ty, t[5]);
18738 // if (ty == Ity_I16) {
18739 // assign(t[0], binop(Iop_Or16, mkexpr(src),
18740 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
18741 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
18742 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
18743 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
18744 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
18745 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
18746 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
18747 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
18748 // return gen_POPCOUNT(ty, t[4]);
18754 /*--------------------------------------------------------------------*/
18755 /*--- end guest_amd64_toIR.c ---*/
18756 /*--------------------------------------------------------------------*/