2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_emwarn.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
42 #include "main_util.h"
43 #include "guest_generic_bb_to_IR.h"
44 #include "guest_amd64_defs.h"
45 #include "guest_generic_x87.h"
48 /* This file contains helper functions for amd64 guest code.
49 Calls to these functions are generated by the back end.
50 These calls are of course in the host machine code and
51 this file will be compiled to host machine code, so that
54 Only change the signatures of these helper functions very
55 carefully. If you change the signature here, you'll have to change
56 the parameters passed to it in the IR calls constructed by
59 The convention used is that all functions called from generated
60 code are named amd64g_<something>, and any function whose name lacks
61 that prefix is not called from generated code. Note that some
62 LibVEX_* functions can however be called by VEX's client, but that
63 is not the same as calling them from VEX-generated code.
67 /* Set to 1 to get detailed profiling info about use of the flag
69 #define PROFILE_RFLAGS 0
72 /*---------------------------------------------------------------*/
73 /*--- %rflags run-time helpers. ---*/
74 /*---------------------------------------------------------------*/
76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
82 Long u1, v1, w1, w2, t;
83 u0 = u & 0xFFFFFFFFULL;
85 v0 = v & 0xFFFFFFFFULL;
88 t = u1 * v0 + (w0 >> 32);
89 w1 = t & 0xFFFFFFFFULL;
92 *rHi = u1 * v1 + w2 + (w1 >> 32);
96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
99 ULong u1, v1, w1,w2,t;
100 u0 = u & 0xFFFFFFFFULL;
102 v0 = v & 0xFFFFFFFFULL;
105 t = u1 * v0 + (w0 >> 32);
106 w1 = t & 0xFFFFFFFFULL;
109 *rHi = u1 * v1 + w2 + (w1 >> 32);
114 static const UChar parity_table[256] = {
115 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
116 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
149 /* generalised left-shifter */
150 static inline Long lshift ( Long x, Int n )
158 /* identity on ULong */
159 static inline ULong idULong ( ULong x )
165 #define PREAMBLE(__data_bits) \
166 /* const */ ULong DATA_MASK \
173 : 0xFFFFFFFFFFFFFFFFULL)); \
174 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
175 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
176 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
177 /* const */ ULong CC_NDEP = cc_ndep_formal; \
178 /* Four bogus assignments, which hopefully gcc can */ \
179 /* optimise away, and which stop it complaining about */ \
180 /* unused variables. */ \
181 SIGN_MASK = SIGN_MASK; \
182 DATA_MASK = DATA_MASK; \
187 /*-------------------------------------------------------------*/
189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
191 PREAMBLE(DATA_BITS); \
192 { Long cf, pf, af, zf, sf, of; \
193 Long argL, argR, res; \
197 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
198 pf = parity_table[(UChar)res]; \
199 af = (res ^ argL ^ argR) & 0x10; \
200 zf = ((DATA_UTYPE)res == 0) << 6; \
201 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
202 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
203 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
204 return cf | pf | af | zf | sf | of; \
208 /*-------------------------------------------------------------*/
210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
212 PREAMBLE(DATA_BITS); \
213 { Long cf, pf, af, zf, sf, of; \
214 Long argL, argR, res; \
218 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
219 pf = parity_table[(UChar)res]; \
220 af = (res ^ argL ^ argR) & 0x10; \
221 zf = ((DATA_UTYPE)res == 0) << 6; \
222 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
223 of = lshift((argL ^ argR) & (argL ^ res), \
224 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
225 return cf | pf | af | zf | sf | of; \
229 /*-------------------------------------------------------------*/
231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
233 PREAMBLE(DATA_BITS); \
234 { Long cf, pf, af, zf, sf, of; \
235 Long argL, argR, oldC, res; \
236 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
238 argR = CC_DEP2 ^ oldC; \
239 res = (argL + argR) + oldC; \
241 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
243 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
244 pf = parity_table[(UChar)res]; \
245 af = (res ^ argL ^ argR) & 0x10; \
246 zf = ((DATA_UTYPE)res == 0) << 6; \
247 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
248 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
249 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
250 return cf | pf | af | zf | sf | of; \
254 /*-------------------------------------------------------------*/
256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
258 PREAMBLE(DATA_BITS); \
259 { Long cf, pf, af, zf, sf, of; \
260 Long argL, argR, oldC, res; \
261 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
263 argR = CC_DEP2 ^ oldC; \
264 res = (argL - argR) - oldC; \
266 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
268 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
269 pf = parity_table[(UChar)res]; \
270 af = (res ^ argL ^ argR) & 0x10; \
271 zf = ((DATA_UTYPE)res == 0) << 6; \
272 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
273 of = lshift((argL ^ argR) & (argL ^ res), \
274 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
275 return cf | pf | af | zf | sf | of; \
279 /*-------------------------------------------------------------*/
281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
283 PREAMBLE(DATA_BITS); \
284 { Long cf, pf, af, zf, sf, of; \
286 pf = parity_table[(UChar)CC_DEP1]; \
288 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
289 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
291 return cf | pf | af | zf | sf | of; \
295 /*-------------------------------------------------------------*/
297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
299 PREAMBLE(DATA_BITS); \
300 { Long cf, pf, af, zf, sf, of; \
301 Long argL, argR, res; \
305 cf = CC_NDEP & AMD64G_CC_MASK_C; \
306 pf = parity_table[(UChar)res]; \
307 af = (res ^ argL ^ argR) & 0x10; \
308 zf = ((DATA_UTYPE)res == 0) << 6; \
309 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
310 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
311 return cf | pf | af | zf | sf | of; \
315 /*-------------------------------------------------------------*/
317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
319 PREAMBLE(DATA_BITS); \
320 { Long cf, pf, af, zf, sf, of; \
321 Long argL, argR, res; \
325 cf = CC_NDEP & AMD64G_CC_MASK_C; \
326 pf = parity_table[(UChar)res]; \
327 af = (res ^ argL ^ argR) & 0x10; \
328 zf = ((DATA_UTYPE)res == 0) << 6; \
329 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
330 of = ((res & DATA_MASK) \
331 == ((ULong)SIGN_MASK - 1)) << 11; \
332 return cf | pf | af | zf | sf | of; \
336 /*-------------------------------------------------------------*/
338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
340 PREAMBLE(DATA_BITS); \
341 { Long cf, pf, af, zf, sf, of; \
342 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
343 pf = parity_table[(UChar)CC_DEP1]; \
344 af = 0; /* undefined */ \
345 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
346 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
347 /* of is defined if shift count == 1 */ \
348 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
349 & AMD64G_CC_MASK_O; \
350 return cf | pf | af | zf | sf | of; \
354 /*-------------------------------------------------------------*/
356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
358 PREAMBLE(DATA_BITS); \
359 { Long cf, pf, af, zf, sf, of; \
361 pf = parity_table[(UChar)CC_DEP1]; \
362 af = 0; /* undefined */ \
363 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
364 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
365 /* of is defined if shift count == 1 */ \
366 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
367 & AMD64G_CC_MASK_O; \
368 return cf | pf | af | zf | sf | of; \
372 /*-------------------------------------------------------------*/
374 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
375 /* DEP1 = result, NDEP = old flags */
376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
378 PREAMBLE(DATA_BITS); \
380 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
381 | (AMD64G_CC_MASK_C & CC_DEP1) \
382 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
384 ^ lshift(CC_DEP1, 11))); \
389 /*-------------------------------------------------------------*/
391 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
392 /* DEP1 = result, NDEP = old flags */
393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
395 PREAMBLE(DATA_BITS); \
397 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
398 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
399 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
401 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
406 /*-------------------------------------------------------------*/
408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
409 DATA_U2TYPE, NARROWto2U) \
411 PREAMBLE(DATA_BITS); \
412 { Long cf, pf, af, zf, sf, of; \
415 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
416 * ((DATA_UTYPE)CC_DEP2) ); \
419 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
420 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
421 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
423 pf = parity_table[(UChar)lo]; \
424 af = 0; /* undefined */ \
425 zf = (lo == 0) << 6; \
426 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
428 return cf | pf | af | zf | sf | of; \
432 /*-------------------------------------------------------------*/
434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
435 DATA_S2TYPE, NARROWto2S) \
437 PREAMBLE(DATA_BITS); \
438 { Long cf, pf, af, zf, sf, of; \
441 = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
442 * ((DATA_STYPE)CC_DEP2) ); \
445 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
446 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
447 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
448 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
449 pf = parity_table[(UChar)lo]; \
450 af = 0; /* undefined */ \
451 zf = (lo == 0) << 6; \
452 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
454 return cf | pf | af | zf | sf | of; \
458 /*-------------------------------------------------------------*/
460 #define ACTIONS_UMULQ \
463 { Long cf, pf, af, zf, sf, of; \
465 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
467 pf = parity_table[(UChar)lo]; \
468 af = 0; /* undefined */ \
469 zf = (lo == 0) << 6; \
470 sf = lshift(lo, 8 - 64) & 0x80; \
472 return cf | pf | af | zf | sf | of; \
476 /*-------------------------------------------------------------*/
478 #define ACTIONS_SMULQ \
481 { Long cf, pf, af, zf, sf, of; \
483 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
484 cf = (hi != (lo >>/*s*/ (64-1))); \
485 pf = parity_table[(UChar)lo]; \
486 af = 0; /* undefined */ \
487 zf = (lo == 0) << 6; \
488 sf = lshift(lo, 8 - 64) & 0x80; \
490 return cf | pf | af | zf | sf | of; \
497 static Bool initted = False;
499 /* C flag, fast route */
500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
501 /* C flag, slow route */
502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
503 /* table for calculate_cond */
504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
505 /* total entry counts for calc_all, calc_c, calc_cond. */
506 static UInt n_calc_all = 0;
507 static UInt n_calc_c = 0;
508 static UInt n_calc_cond = 0;
510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
513 static void showCounts ( void )
517 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
518 n_calc_all, n_calc_cond, n_calc_c);
520 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
521 " S NS P NP L NL LE NLE\n");
522 vex_printf(" -----------------------------------------------------"
523 "----------------------------------------\n");
524 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
527 if (op > 0 && (op-1) % 4 == 0)
529 if (op > 0 && (op-1) % 4 == 1)
531 if (op > 0 && (op-1) % 4 == 2)
533 if (op > 0 && (op-1) % 4 == 3)
536 vex_printf("%2d%c: ", op, ch);
537 vex_printf("%6u ", tabc_slow[op]);
538 vex_printf("%6u ", tabc_fast[op]);
539 for (co = 0; co < 16; co++) {
540 Int n = tab_cond[op][co];
542 vex_printf(" %3dK", n / 1000);
545 vex_printf(" %3d ", n );
555 static void initCounts ( void )
559 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
560 tabc_fast[op] = tabc_slow[op] = 0;
561 for (co = 0; co < 16; co++)
562 tab_cond[op][co] = 0;
566 #endif /* PROFILE_RFLAGS */
569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
570 /* Calculate all the 6 flags from the supplied thunk parameters.
571 Worker function, not directly called from generated code. */
573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
574 ULong cc_dep1_formal,
575 ULong cc_dep2_formal,
576 ULong cc_ndep_formal )
579 case AMD64G_CC_OP_COPY:
580 return cc_dep1_formal
581 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
582 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
584 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
585 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
586 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
587 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
589 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
590 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
591 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
592 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
594 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
595 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
596 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
597 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
599 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
600 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
601 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
602 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
604 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
605 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
606 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
607 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
609 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
610 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
611 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
612 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
614 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
615 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
616 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
617 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
619 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
620 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
621 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
622 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
624 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
625 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
626 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
627 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
629 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
630 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
631 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
632 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
634 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
635 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
636 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
637 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
639 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
641 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
643 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
646 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
648 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
650 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
652 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
655 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
658 /* shouldn't really make these calls from generated code */
659 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
660 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
661 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
662 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
668 /* Calculate all the 6 flags from the supplied thunk parameters. */
669 ULong amd64g_calculate_rflags_all ( ULong cc_op,
675 if (!initted) initCounts();
677 if (SHOW_COUNTS_NOW) showCounts();
680 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
685 /* Calculate just the carry flag from the supplied thunk parameters. */
686 ULong amd64g_calculate_rflags_c ( ULong cc_op,
692 if (!initted) initCounts();
695 if (SHOW_COUNTS_NOW) showCounts();
698 /* Fast-case some common ones. */
700 case AMD64G_CC_OP_COPY:
701 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
702 case AMD64G_CC_OP_LOGICQ:
703 case AMD64G_CC_OP_LOGICL:
704 case AMD64G_CC_OP_LOGICW:
705 case AMD64G_CC_OP_LOGICB:
707 // case AMD64G_CC_OP_SUBL:
708 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
709 // ? AMD64G_CC_MASK_C : 0;
710 // case AMD64G_CC_OP_SUBW:
711 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
712 // ? AMD64G_CC_MASK_C : 0;
713 // case AMD64G_CC_OP_SUBB:
714 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
715 // ? AMD64G_CC_MASK_C : 0;
716 // case AMD64G_CC_OP_INCL:
717 // case AMD64G_CC_OP_DECL:
718 // return cc_ndep & AMD64G_CC_MASK_C;
728 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
741 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
743 ULong of,sf,zf,cf,pf;
744 ULong inv = cond & 1;
747 if (!initted) initCounts();
748 tab_cond[cc_op][cond]++;
750 if (SHOW_COUNTS_NOW) showCounts();
755 case AMD64CondO: /* OF == 1 */
756 of = rflags >> AMD64G_CC_SHIFT_O;
757 return 1 & (inv ^ of);
760 case AMD64CondZ: /* ZF == 1 */
761 zf = rflags >> AMD64G_CC_SHIFT_Z;
762 return 1 & (inv ^ zf);
765 case AMD64CondB: /* CF == 1 */
766 cf = rflags >> AMD64G_CC_SHIFT_C;
767 return 1 & (inv ^ cf);
771 case AMD64CondBE: /* (CF or ZF) == 1 */
772 cf = rflags >> AMD64G_CC_SHIFT_C;
773 zf = rflags >> AMD64G_CC_SHIFT_Z;
774 return 1 & (inv ^ (cf | zf));
778 case AMD64CondS: /* SF == 1 */
779 sf = rflags >> AMD64G_CC_SHIFT_S;
780 return 1 & (inv ^ sf);
783 case AMD64CondP: /* PF == 1 */
784 pf = rflags >> AMD64G_CC_SHIFT_P;
785 return 1 & (inv ^ pf);
788 case AMD64CondL: /* (SF xor OF) == 1 */
789 sf = rflags >> AMD64G_CC_SHIFT_S;
790 of = rflags >> AMD64G_CC_SHIFT_O;
791 return 1 & (inv ^ (sf ^ of));
795 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
796 sf = rflags >> AMD64G_CC_SHIFT_S;
797 of = rflags >> AMD64G_CC_SHIFT_O;
798 zf = rflags >> AMD64G_CC_SHIFT_Z;
799 return 1 & (inv ^ ((sf ^ of) | zf));
803 /* shouldn't really make these calls from generated code */
804 vex_printf("amd64g_calculate_condition"
805 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
806 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
807 vpanic("amd64g_calculate_condition");
812 /* VISIBLE TO LIBVEX CLIENT */
813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
815 ULong rflags = amd64g_calculate_rflags_all_WRK(
816 vex_state->guest_CC_OP,
817 vex_state->guest_CC_DEP1,
818 vex_state->guest_CC_DEP2,
819 vex_state->guest_CC_NDEP
821 Long dflag = vex_state->guest_DFLAG;
822 vassert(dflag == 1 || dflag == -1);
825 if (vex_state->guest_IDFLAG == 1)
830 /* VISIBLE TO LIBVEX CLIENT */
832 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
833 /*MOD*/VexGuestAMD64State* vex_state )
835 ULong oszacp = amd64g_calculate_rflags_all_WRK(
836 vex_state->guest_CC_OP,
837 vex_state->guest_CC_DEP1,
838 vex_state->guest_CC_DEP2,
839 vex_state->guest_CC_NDEP
841 if (new_carry_flag & 1) {
842 oszacp |= AMD64G_CC_MASK_C;
844 oszacp &= ~AMD64G_CC_MASK_C;
846 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
847 vex_state->guest_CC_DEP1 = oszacp;
848 vex_state->guest_CC_DEP2 = 0;
849 vex_state->guest_CC_NDEP = 0;
853 /*---------------------------------------------------------------*/
854 /*--- %rflags translation-time function specialisers. ---*/
855 /*--- These help iropt specialise calls the above run-time ---*/
856 /*--- %rflags functions. ---*/
857 /*---------------------------------------------------------------*/
859 /* Used by the optimiser to try specialisations. Returns an
860 equivalent expression, or NULL if none. */
862 static Bool isU64 ( IRExpr* e, ULong n )
864 return toBool( e->tag == Iex_Const
865 && e->Iex.Const.con->tag == Ico_U64
866 && e->Iex.Const.con->Ico.U64 == n );
869 IRExpr* guest_amd64_spechelper ( HChar* function_name,
872 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
873 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
874 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
875 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
878 for (i = 0; args[i]; i++)
881 vex_printf("spec request:\n");
882 vex_printf(" %s ", function_name);
883 for (i = 0; i < arity; i++) {
890 /* --------- specialising "amd64g_calculate_condition" --------- */
892 if (vex_streq(function_name, "amd64g_calculate_condition")) {
893 /* specialise calls to above "calculate condition" function */
894 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
901 /*---------------- ADDQ ----------------*/
903 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
904 /* long long add, then Z --> test (dst+src == 0) */
905 return unop(Iop_1Uto64,
907 binop(Iop_Add64, cc_dep1, cc_dep2),
911 /*---------------- SUBQ ----------------*/
913 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
914 /* long long sub/cmp, then Z --> test dst==src */
915 return unop(Iop_1Uto64,
916 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
918 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
919 /* long long sub/cmp, then NZ --> test dst!=src */
920 return unop(Iop_1Uto64,
921 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
924 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
925 /* long long sub/cmp, then L (signed less than)
926 --> test dst <s src */
927 return unop(Iop_1Uto64,
928 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
931 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
932 /* long long sub/cmp, then B (unsigned less than)
933 --> test dst <u src */
934 return unop(Iop_1Uto64,
935 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
937 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
938 /* long long sub/cmp, then NB (unsigned greater than or equal)
939 --> test src <=u dst */
940 /* Note, args are opposite way round from the usual */
941 return unop(Iop_1Uto64,
942 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
945 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
946 /* long long sub/cmp, then BE (unsigned less than or equal)
947 --> test dst <=u src */
948 return unop(Iop_1Uto64,
949 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
952 /*---------------- SUBL ----------------*/
954 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
955 /* long sub/cmp, then Z --> test dst==src */
956 return unop(Iop_1Uto64,
958 binop(Iop_Shl64,cc_dep1,mkU8(32)),
959 binop(Iop_Shl64,cc_dep2,mkU8(32))));
961 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
962 /* long sub/cmp, then NZ --> test dst!=src */
963 return unop(Iop_1Uto64,
965 binop(Iop_Shl64,cc_dep1,mkU8(32)),
966 binop(Iop_Shl64,cc_dep2,mkU8(32))));
969 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
970 /* long sub/cmp, then L (signed less than)
971 --> test dst <s src */
972 return unop(Iop_1Uto64,
974 binop(Iop_Shl64,cc_dep1,mkU8(32)),
975 binop(Iop_Shl64,cc_dep2,mkU8(32))));
978 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
979 /* long sub/cmp, then LE (signed less than or equal)
980 --> test dst <=s src */
981 return unop(Iop_1Uto64,
983 binop(Iop_Shl64,cc_dep1,mkU8(32)),
984 binop(Iop_Shl64,cc_dep2,mkU8(32))));
987 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
988 /* long sub/cmp, then NLE (signed greater than)
989 --> test !(dst <=s src)
990 --> test (dst >s src)
991 --> test (src <s dst) */
992 return unop(Iop_1Uto64,
994 binop(Iop_Shl64,cc_dep2,mkU8(32)),
995 binop(Iop_Shl64,cc_dep1,mkU8(32))));
999 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1000 /* long sub/cmp, then BE (unsigned less than or equal)
1001 --> test dst <=u src */
1002 return unop(Iop_1Uto64,
1004 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1005 binop(Iop_Shl64,cc_dep2,mkU8(32))));
1007 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1008 /* long sub/cmp, then NBE (unsigned greater than)
1009 --> test src <u dst */
1010 /* Note, args are opposite way round from the usual */
1011 return unop(Iop_1Uto64,
1013 binop(Iop_Shl64,cc_dep2,mkU8(32)),
1014 binop(Iop_Shl64,cc_dep1,mkU8(32))));
1017 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1018 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
1019 return unop(Iop_1Uto64,
1022 binop(Iop_Shl64, cc_dep1, mkU8(32)),
1023 binop(Iop_Shl64, cc_dep2, mkU8(32))),
1027 /*---------------- SUBW ----------------*/
1029 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1030 /* word sub/cmp, then Z --> test dst==src */
1031 return unop(Iop_1Uto64,
1033 unop(Iop_64to16,cc_dep1),
1034 unop(Iop_64to16,cc_dep2)));
1036 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1037 /* word sub/cmp, then NZ --> test dst!=src */
1038 return unop(Iop_1Uto64,
1040 unop(Iop_64to16,cc_dep1),
1041 unop(Iop_64to16,cc_dep2)));
1044 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1045 /* word sub/cmp, then LE (signed less than or equal)
1046 --> test dst <=s src */
1047 return unop(Iop_1Uto64,
1049 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1050 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1054 /*---------------- SUBB ----------------*/
1056 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1057 /* byte sub/cmp, then Z --> test dst==src */
1058 return unop(Iop_1Uto64,
1060 unop(Iop_64to8,cc_dep1),
1061 unop(Iop_64to8,cc_dep2)));
1063 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1064 /* byte sub/cmp, then NZ --> test dst!=src */
1065 return unop(Iop_1Uto64,
1067 unop(Iop_64to8,cc_dep1),
1068 unop(Iop_64to8,cc_dep2)));
1071 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1072 && isU64(cc_dep2, 0)) {
1073 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1076 This is yet another scheme by which gcc figures out if the
1077 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1078 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1079 for an 8-bit comparison, since the args to the helper
1080 function are always U64s. */
1081 return binop(Iop_And64,
1082 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1085 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1086 && isU64(cc_dep2, 0)) {
1087 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1088 --> test !(dst <s 0)
1091 return binop(Iop_Xor64,
1093 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1098 /*---------------- LOGICQ ----------------*/
1100 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1101 /* long long and/or/xor, then Z --> test dst==0 */
1102 return unop(Iop_1Uto64,
1103 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1106 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1107 /* long long and/or/xor, then L
1108 LOGIC sets SF and ZF according to the
1109 result and makes OF be zero. L computes SF ^ OF, but
1110 OF is zero, so this reduces to SF -- which will be 1 iff
1111 the result is < signed 0. Hence ...
1113 return unop(Iop_1Uto64,
1119 /*---------------- LOGICL ----------------*/
1121 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1122 /* long and/or/xor, then Z --> test dst==0 */
1123 return unop(Iop_1Uto64,
1125 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1129 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1130 /* long and/or/xor, then NZ --> test dst!=0 */
1131 return unop(Iop_1Uto64,
1133 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1137 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1138 /* long and/or/xor, then LE
1139 This is pretty subtle. LOGIC sets SF and ZF according to the
1140 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1141 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1142 the result is <=signed 0. Hence ...
1144 return unop(Iop_1Uto64,
1146 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1150 /*---------------- LOGICB ----------------*/
1152 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1153 /* byte and/or/xor, then Z --> test dst==0 */
1154 return unop(Iop_1Uto64,
1155 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1158 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1159 /* byte and/or/xor, then NZ --> test dst!=0 */
1160 return unop(Iop_1Uto64,
1161 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1165 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1166 /* this is an idiom gcc sometimes uses to find out if the top
1167 bit of a byte register is set: eg testb %al,%al; js ..
1168 Since it just depends on the top bit of the byte, extract
1169 that bit and explicitly get rid of all the rest. This
1170 helps memcheck avoid false positives in the case where any
1171 of the other bits in the byte are undefined. */
1172 /* byte and/or/xor, then S --> (UInt)result[7] */
1173 return binop(Iop_And64,
1174 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1178 /*---------------- INCB ----------------*/
1180 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1181 /* 8-bit inc, then LE --> sign bit of the arg */
1182 return binop(Iop_And64,
1184 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1189 /*---------------- INCW ----------------*/
1191 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1192 /* 16-bit inc, then Z --> test dst == 0 */
1193 return unop(Iop_1Uto64,
1195 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1199 /*---------------- DECL ----------------*/
1201 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1202 /* dec L, then Z --> test dst == 0 */
1203 return unop(Iop_1Uto64,
1205 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1209 /*---------------- DECW ----------------*/
1211 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1212 /* 16-bit dec, then NZ --> test dst != 0 */
1213 return unop(Iop_1Uto64,
1215 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1219 /*---------------- COPY ----------------*/
1220 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1221 jbe" for example. */
1223 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1224 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1225 /* COPY, then BE --> extract C and Z from dep1, and test (C
1227 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1229 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1239 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1240 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1249 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1250 /* COPY, then B --> extract C dep1, and test (C == 1). */
1258 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1266 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1267 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1268 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1269 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1270 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1278 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1286 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1287 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1295 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1306 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1308 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1309 /* specialise calls to above "calculate_rflags_c" function */
1310 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1311 vassert(arity == 4);
1317 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1318 /* C after sub denotes unsigned less than */
1319 return unop(Iop_1Uto64,
1324 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1325 /* C after sub denotes unsigned less than */
1326 return unop(Iop_1Uto64,
1328 binop(Iop_Shl64,cc_dep1,mkU8(32)),
1329 binop(Iop_Shl64,cc_dep2,mkU8(32))));
1331 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1332 /* C after sub denotes unsigned less than */
1333 return unop(Iop_1Uto64,
1335 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1336 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1338 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1339 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1340 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1341 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1342 /* cflag after logic is zero */
1345 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1346 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1347 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1352 if (cc_op->tag == Iex_Const) {
1353 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1369 /*---------------------------------------------------------------*/
1370 /*--- Supporting functions for x87 FPU activities. ---*/
1371 /*---------------------------------------------------------------*/
1373 static inline Bool host_is_little_endian ( void )
1375 UInt x = 0x76543210;
1376 UChar* p = (UChar*)(&x);
1377 return toBool(*p == 0x10);
1380 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1381 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1382 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1384 Bool mantissaIsZero;
1389 vassert(host_is_little_endian());
1391 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1393 f64 = (UChar*)(&dbl);
1394 sign = toUChar( (f64[7] >> 7) & 1 );
1396 /* First off, if the tag indicates the register was empty,
1397 return 1,0,sign,1 */
1399 /* vex_printf("Empty\n"); */
1400 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1401 | AMD64G_FC_MASK_C0;
1404 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1409 (f64[6] & 0x0F) == 0
1410 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1413 /* If both exponent and mantissa are zero, the value is zero.
1414 Return 1,0,sign,0. */
1415 if (bexp == 0 && mantissaIsZero) {
1416 /* vex_printf("Zero\n"); */
1417 return AMD64G_FC_MASK_C3 | 0
1418 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1421 /* If exponent is zero but mantissa isn't, it's a denormal.
1422 Return 1,1,sign,0. */
1423 if (bexp == 0 && !mantissaIsZero) {
1424 /* vex_printf("Denormal\n"); */
1425 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1426 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1429 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1430 Return 0,1,sign,1. */
1431 if (bexp == 0x7FF && mantissaIsZero) {
1432 /* vex_printf("Inf\n"); */
1433 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1434 | AMD64G_FC_MASK_C0;
1437 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1438 Return 0,0,sign,1. */
1439 if (bexp == 0x7FF && !mantissaIsZero) {
1440 /* vex_printf("NaN\n"); */
1441 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1444 /* Uh, ok, we give up. It must be a normal finite number.
1447 /* vex_printf("normal\n"); */
1448 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1452 /* Create an x87 FPU state from the guest state, as close as
1453 we can approximate it. */
1455 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1456 /*OUT*/UChar* x87_state )
1460 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1461 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1462 Fpu_State* x87 = (Fpu_State*)x87_state;
1463 UInt ftop = vex_state->guest_FTOP;
1464 UInt c3210 = vex_state->guest_FC3210;
1466 for (i = 0; i < 14; i++)
1469 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1470 x87->env[FP_ENV_STAT]
1471 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1472 x87->env[FP_ENV_CTRL]
1473 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1475 /* Dump the register stack in ST order. */
1477 for (stno = 0; stno < 8; stno++) {
1478 preg = (stno + ftop) & 7;
1479 if (vexTags[preg] == 0) {
1480 /* register is empty */
1481 tagw |= (3 << (2*preg));
1482 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1483 &x87->reg[10*stno] );
1485 /* register is full. */
1486 tagw |= (0 << (2*preg));
1487 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1488 &x87->reg[10*stno] );
1491 x87->env[FP_ENV_TAG] = toUShort(tagw);
1495 /* CALLED FROM GENERATED CODE */
1496 /* DIRTY HELPER (reads guest state, writes guest mem) */
1497 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
1498 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
1500 /* Derived from values obtained from
1501 vendor_id : AuthenticAMD
1504 model name : AMD Athlon(tm) 64 Processor 3200+
1509 /* Somewhat roundabout, but at least it's simple. */
1511 UShort* addrS = (UShort*)addr;
1512 UChar* addrC = (UChar*)addr;
1513 U128* xmm = (U128*)(addr + 160);
1518 UShort *srcS, *dstS;
1520 do_get_x87( gst, (UChar*)&tmp );
1521 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1523 /* Now build the proper fxsave image from the x87 image we just
1526 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1527 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1529 /* set addrS[2] in an endian-independent way */
1531 fp_tags = tmp.env[FP_ENV_TAG];
1532 for (r = 0; r < 8; r++) {
1533 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1534 summary_tags |= (1 << r);
1536 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1537 addrC[5] = 0; /* pad */
1539 /* FOP: faulting fpu opcode. From experimentation, the real CPU
1540 does not write this field. (?!) */
1541 addrS[3] = 0; /* BOGUS */
1543 /* RIP (Last x87 instruction pointer). From experimentation, the
1544 real CPU does not write this field. (?!) */
1545 addrS[4] = 0; /* BOGUS */
1546 addrS[5] = 0; /* BOGUS */
1547 addrS[6] = 0; /* BOGUS */
1548 addrS[7] = 0; /* BOGUS */
1550 /* RDP (Last x87 data pointer). From experimentation, the real CPU
1551 does not write this field. (?!) */
1552 addrS[8] = 0; /* BOGUS */
1553 addrS[9] = 0; /* BOGUS */
1554 addrS[10] = 0; /* BOGUS */
1555 addrS[11] = 0; /* BOGUS */
1557 addrS[12] = toUShort(mxcsr); /* MXCSR */
1558 addrS[13] = toUShort(mxcsr >> 16);
1560 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1561 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1563 /* Copy in the FP registers, in ST order. */
1564 for (stno = 0; stno < 8; stno++) {
1565 srcS = (UShort*)(&tmp.reg[10*stno]);
1566 dstS = (UShort*)(&addrS[16 + 8*stno]);
1577 /* That's the first 160 bytes of the image done. Now only %xmm0
1578 .. %xmm15 remain to be copied. If the host is big-endian, these
1579 need to be byte-swapped. */
1580 vassert(host_is_little_endian());
1582 # define COPY_U128(_dst,_src) \
1583 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1584 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1587 COPY_U128( xmm[0], gst->guest_XMM0 );
1588 COPY_U128( xmm[1], gst->guest_XMM1 );
1589 COPY_U128( xmm[2], gst->guest_XMM2 );
1590 COPY_U128( xmm[3], gst->guest_XMM3 );
1591 COPY_U128( xmm[4], gst->guest_XMM4 );
1592 COPY_U128( xmm[5], gst->guest_XMM5 );
1593 COPY_U128( xmm[6], gst->guest_XMM6 );
1594 COPY_U128( xmm[7], gst->guest_XMM7 );
1595 COPY_U128( xmm[8], gst->guest_XMM8 );
1596 COPY_U128( xmm[9], gst->guest_XMM9 );
1597 COPY_U128( xmm[10], gst->guest_XMM10 );
1598 COPY_U128( xmm[11], gst->guest_XMM11 );
1599 COPY_U128( xmm[12], gst->guest_XMM12 );
1600 COPY_U128( xmm[13], gst->guest_XMM13 );
1601 COPY_U128( xmm[14], gst->guest_XMM14 );
1602 COPY_U128( xmm[15], gst->guest_XMM15 );
1608 /* DIRTY HELPER (writes guest state) */
1609 /* Initialise the x87 FPU state as per 'finit'. */
1610 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
1613 gst->guest_FTOP = 0;
1614 for (i = 0; i < 8; i++) {
1615 gst->guest_FPTAG[i] = 0; /* empty */
1616 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1618 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
1619 gst->guest_FC3210 = 0;
1623 /* CALLED FROM GENERATED CODE */
1624 /* DIRTY HELPER (reads guest memory) */
1625 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
1628 convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1632 /* CALLED FROM GENERATED CODE */
1633 /* DIRTY HELPER (writes guest memory) */
1634 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
1636 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1640 /* CALLED FROM GENERATED CODE */
1642 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1643 Extract from it the required SSEROUND value and any resulting
1644 emulation warning, and return (warn << 32) | sseround value.
1646 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
1648 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1649 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1650 ULong rmode = (mxcsr >> 13) & 3;
1652 /* Detect any required emulation warnings. */
1653 VexEmWarn ew = EmWarn_NONE;
1655 if ((mxcsr & 0x1F80) != 0x1F80) {
1656 /* unmasked exceptions! */
1657 ew = EmWarn_X86_sseExns;
1660 if (mxcsr & (1<<15)) {
1665 if (mxcsr & (1<<6)) {
1667 ew = EmWarn_X86_daz;
1670 return (((ULong)ew) << 32) | ((ULong)rmode);
1674 /* CALLED FROM GENERATED CODE */
1676 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1677 native format MXCSR value. */
1678 ULong amd64g_create_mxcsr ( ULong sseround )
1681 return 0x1F80 | (sseround << 13);
1686 /* fpucw[15:0] contains a x87 native format FPU control word.
1687 Extract from it the required FPROUND value and any resulting
1688 emulation warning, and return (warn << 32) | fpround value.
1690 ULong amd64g_check_fldcw ( ULong fpucw )
1692 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1693 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1694 ULong rmode = (fpucw >> 10) & 3;
1696 /* Detect any required emulation warnings. */
1697 VexEmWarn ew = EmWarn_NONE;
1699 if ((fpucw & 0x3F) != 0x3F) {
1700 /* unmasked exceptions! */
1701 ew = EmWarn_X86_x87exns;
1704 if (((fpucw >> 8) & 3) != 3) {
1705 /* unsupported precision */
1706 ew = EmWarn_X86_x87precision;
1709 return (((ULong)ew) << 32) | ((ULong)rmode);
1714 /* Given fpround as an IRRoundingMode value, create a suitable x87
1715 native format FPU control word. */
1716 ULong amd64g_create_fpucw ( ULong fpround )
1719 return 0x037F | (fpround << 10);
1723 /* This is used to implement 'fldenv'.
1724 Reads 28 bytes at x87_state[0 .. 27]. */
1725 /* CALLED FROM GENERATED CODE */
1727 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
1728 /*IN*/HWord x87_state)
1732 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1733 Fpu_State* x87 = (Fpu_State*)x87_state;
1734 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1735 UInt tagw = x87->env[FP_ENV_TAG];
1736 UInt fpucw = x87->env[FP_ENV_CTRL];
1737 ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1743 for (stno = 0; stno < 8; stno++) {
1744 preg = (stno + ftop) & 7;
1745 tag = (tagw >> (2*preg)) & 3;
1747 /* register is empty */
1750 /* register is non-empty */
1756 vex_state->guest_FTOP = ftop;
1759 vex_state->guest_FC3210 = c3210;
1761 /* handle the control word, setting FPROUND and detecting any
1762 emulation warnings. */
1763 pair = amd64g_check_fldcw ( (ULong)fpucw );
1764 fpround = pair & 0xFFFFFFFFULL;
1765 ew = (VexEmWarn)(pair >> 32);
1767 vex_state->guest_FPROUND = fpround & 3;
1769 /* emulation warnings --> caller */
1774 /* CALLED FROM GENERATED CODE */
1776 /* Create an x87 FPU env from the guest state, as close as we can
1777 approximate it. Writes 28 bytes at x87_state[0..27]. */
1778 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
1779 /*OUT*/HWord x87_state )
1783 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1784 Fpu_State* x87 = (Fpu_State*)x87_state;
1785 UInt ftop = vex_state->guest_FTOP;
1786 ULong c3210 = vex_state->guest_FC3210;
1788 for (i = 0; i < 14; i++)
1791 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1792 x87->env[FP_ENV_STAT]
1793 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
1794 x87->env[FP_ENV_CTRL]
1795 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
1797 /* Compute the x87 tag word. */
1799 for (stno = 0; stno < 8; stno++) {
1800 preg = (stno + ftop) & 7;
1801 if (vexTags[preg] == 0) {
1802 /* register is empty */
1803 tagw |= (3 << (2*preg));
1805 /* register is full. */
1806 tagw |= (0 << (2*preg));
1809 x87->env[FP_ENV_TAG] = toUShort(tagw);
1811 /* We don't dump the x87 registers, tho. */
1815 /*---------------------------------------------------------------*/
1816 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1817 /*---------------------------------------------------------------*/
1819 /* Claim to be the following CPU, which is probably representative of
1820 the lowliest (earliest) amd64 offerings. It can do neither sse3
1823 vendor_id : AuthenticAMD
1826 model name : AMD Opteron (tm) Processor 848
1829 cache size : 1024 KB
1834 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
1835 mtrr pge mca cmov pat pse36 clflush mmx fxsr
1836 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
1838 TLB size : 1088 4K pages
1840 cache_alignment : 64
1841 address sizes : 40 bits physical, 48 bits virtual
1842 power management: ts fid vid ttp
1844 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
1846 # define SET_ABCD(_a,_b,_c,_d) \
1847 do { st->guest_RAX = (ULong)(_a); \
1848 st->guest_RBX = (ULong)(_b); \
1849 st->guest_RCX = (ULong)(_c); \
1850 st->guest_RDX = (ULong)(_d); \
1853 switch (0xFFFFFFFF & st->guest_RAX) {
1855 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
1858 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
1861 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
1864 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
1867 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
1870 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
1873 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1876 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
1879 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
1882 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
1885 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
1888 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1895 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
1898 vendor_id : GenuineIntel
1901 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
1904 cache size : 4096 KB
1913 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
1914 mtrr pge mca cmov pat pse36 clflush dts acpi
1915 mmx fxsr sse sse2 ss ht tm syscall nx lm
1916 constant_tsc pni monitor ds_cpl vmx est tm2
1920 cache_alignment : 64
1921 address sizes : 36 bits physical, 48 bits virtual
1924 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
1926 # define SET_ABCD(_a,_b,_c,_d) \
1927 do { st->guest_RAX = (ULong)(_a); \
1928 st->guest_RBX = (ULong)(_b); \
1929 st->guest_RCX = (ULong)(_c); \
1930 st->guest_RDX = (ULong)(_d); \
1933 switch (0xFFFFFFFF & st->guest_RAX) {
1935 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
1938 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
1941 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
1944 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1947 switch (0xFFFFFFFF & st->guest_RCX) {
1948 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
1949 0x0000003f, 0x00000001); break;
1950 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
1951 0x0000003f, 0x00000001); break;
1952 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
1953 0x00000fff, 0x00000001); break;
1954 default: SET_ABCD(0x00000000, 0x00000000,
1955 0x00000000, 0x00000000); break;
1960 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
1963 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
1966 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1969 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
1972 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1975 unhandled_eax_value:
1976 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
1979 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
1982 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
1985 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
1988 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
1991 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
1994 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
1997 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2000 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2003 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2006 goto unhandled_eax_value;
2012 ULong amd64g_calculate_RCR ( ULong arg,
2017 Bool wantRflags = toBool(szIN < 0);
2018 ULong sz = wantRflags ? (-szIN) : szIN;
2019 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2020 ULong cf=0, of=0, tempcf;
2024 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2025 of = ((arg >> 63) ^ cf) & 1;
2026 while (tempCOUNT > 0) {
2028 arg = (arg >> 1) | (cf << 63);
2034 while (tempCOUNT >= 33) tempCOUNT -= 33;
2035 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2036 of = ((arg >> 31) ^ cf) & 1;
2037 while (tempCOUNT > 0) {
2039 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2045 while (tempCOUNT >= 17) tempCOUNT -= 17;
2046 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2047 of = ((arg >> 15) ^ cf) & 1;
2048 while (tempCOUNT > 0) {
2050 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2056 while (tempCOUNT >= 9) tempCOUNT -= 9;
2057 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2058 of = ((arg >> 7) ^ cf) & 1;
2059 while (tempCOUNT > 0) {
2061 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
2067 vpanic("calculate_RCR(amd64g): invalid size");
2072 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2073 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2075 /* caller can ask to have back either the resulting flags or
2076 resulting value, but not both */
2077 return wantRflags ? rflags_in : arg;
2080 ULong amd64g_calculate_RCL ( ULong arg,
2085 Bool wantRflags = toBool(szIN < 0);
2086 ULong sz = wantRflags ? (-szIN) : szIN;
2087 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2088 ULong cf=0, of=0, tempcf;
2092 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2093 while (tempCOUNT > 0) {
2094 tempcf = (arg >> 63) & 1;
2095 arg = (arg << 1) | (cf & 1);
2099 of = ((arg >> 63) ^ cf) & 1;
2102 while (tempCOUNT >= 33) tempCOUNT -= 33;
2103 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2104 while (tempCOUNT > 0) {
2105 tempcf = (arg >> 31) & 1;
2106 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2110 of = ((arg >> 31) ^ cf) & 1;
2113 while (tempCOUNT >= 17) tempCOUNT -= 17;
2114 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2115 while (tempCOUNT > 0) {
2116 tempcf = (arg >> 15) & 1;
2117 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
2121 of = ((arg >> 15) ^ cf) & 1;
2124 while (tempCOUNT >= 9) tempCOUNT -= 9;
2125 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2126 while (tempCOUNT > 0) {
2127 tempcf = (arg >> 7) & 1;
2128 arg = 0xFFULL & ((arg << 1) | (cf & 1));
2132 of = ((arg >> 7) ^ cf) & 1;
2135 vpanic("calculate_RCL(amd64g): invalid size");
2140 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2141 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2143 return wantRflags ? rflags_in : arg;
2147 /* CALLED FROM GENERATED CODE */
2148 /* DIRTY HELPER (non-referentially-transparent) */
2149 /* Horrible hack. On non-amd64 platforms, return 1. */
2150 ULong amd64g_dirtyhelper_RDTSC ( void )
2152 # if defined(__x86_64__)
2154 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
2155 return (((ULong)edx) << 32) | ((ULong)eax);
2162 /* CALLED FROM GENERATED CODE */
2163 /* DIRTY HELPER (non-referentially-transparent) */
2164 /* Horrible hack. On non-amd64 platforms, return 0. */
2165 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
2167 # if defined(__x86_64__)
2172 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
2173 : "=a" (r) : "Nd" (portno));
2176 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
2177 : "=a" (r) : "Nd" (portno));
2180 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
2181 : "=a" (r) : "Nd" (portno));
2184 break; /* note: no 64-bit version of insn exists */
2193 /* CALLED FROM GENERATED CODE */
2194 /* DIRTY HELPER (non-referentially-transparent) */
2195 /* Horrible hack. On non-amd64 platforms, do nothing. */
2196 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
2198 # if defined(__x86_64__)
2202 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
2203 : : "a" (data), "Nd" (portno));
2206 __asm__ __volatile__("outw %w0, %w1"
2207 : : "a" (data), "Nd" (portno));
2210 __asm__ __volatile__("outb %b0, %w1"
2211 : : "a" (data), "Nd" (portno));
2214 break; /* note: no 64-bit version of insn exists */
2222 /*---------------------------------------------------------------*/
2223 /*--- Helpers for MMX/SSE/SSE2. ---*/
2224 /*---------------------------------------------------------------*/
2226 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2227 return toUChar(xx>yy ? xx-yy : yy-xx);
2230 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2231 return (((ULong)w1) << 32) | ((ULong)w0);
2234 static inline UShort sel16x4_3 ( ULong w64 ) {
2235 UInt hi32 = toUInt(w64 >> 32);
2236 return toUShort(hi32 >> 16);
2238 static inline UShort sel16x4_2 ( ULong w64 ) {
2239 UInt hi32 = toUInt(w64 >> 32);
2240 return toUShort(hi32);
2242 static inline UShort sel16x4_1 ( ULong w64 ) {
2243 UInt lo32 = toUInt(w64);
2244 return toUShort(lo32 >> 16);
2246 static inline UShort sel16x4_0 ( ULong w64 ) {
2247 UInt lo32 = toUInt(w64);
2248 return toUShort(lo32);
2251 static inline UChar sel8x8_7 ( ULong w64 ) {
2252 UInt hi32 = toUInt(w64 >> 32);
2253 return toUChar(hi32 >> 24);
2255 static inline UChar sel8x8_6 ( ULong w64 ) {
2256 UInt hi32 = toUInt(w64 >> 32);
2257 return toUChar(hi32 >> 16);
2259 static inline UChar sel8x8_5 ( ULong w64 ) {
2260 UInt hi32 = toUInt(w64 >> 32);
2261 return toUChar(hi32 >> 8);
2263 static inline UChar sel8x8_4 ( ULong w64 ) {
2264 UInt hi32 = toUInt(w64 >> 32);
2265 return toUChar(hi32 >> 0);
2267 static inline UChar sel8x8_3 ( ULong w64 ) {
2268 UInt lo32 = toUInt(w64);
2269 return toUChar(lo32 >> 24);
2271 static inline UChar sel8x8_2 ( ULong w64 ) {
2272 UInt lo32 = toUInt(w64);
2273 return toUChar(lo32 >> 16);
2275 static inline UChar sel8x8_1 ( ULong w64 ) {
2276 UInt lo32 = toUInt(w64);
2277 return toUChar(lo32 >> 8);
2279 static inline UChar sel8x8_0 ( ULong w64 ) {
2280 UInt lo32 = toUInt(w64);
2281 return toUChar(lo32 >> 0);
2284 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2285 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2289 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2290 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2291 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2292 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2296 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2297 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
2300 if (xx & (1ULL << (64-1))) r |= (1<<7);
2301 if (xx & (1ULL << (56-1))) r |= (1<<6);
2302 if (xx & (1ULL << (48-1))) r |= (1<<5);
2303 if (xx & (1ULL << (40-1))) r |= (1<<4);
2304 if (xx & (1ULL << (32-1))) r |= (1<<3);
2305 if (xx & (1ULL << (24-1))) r |= (1<<2);
2306 if (xx & (1ULL << (16-1))) r |= (1<<1);
2307 if (xx & (1ULL << ( 8-1))) r |= (1<<0);
2311 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2312 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2315 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2316 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2317 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2318 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2319 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2320 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2321 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2322 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2327 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2328 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
2330 ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
2331 ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
2332 return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
2336 /*---------------------------------------------------------------*/
2337 /*--- Helpers for dealing with, and describing, ---*/
2338 /*--- guest state as a whole. ---*/
2339 /*---------------------------------------------------------------*/
2341 /* Initialise the entire amd64 guest state. */
2342 /* VISIBLE TO LIBVEX CLIENT */
2343 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
2345 vex_state->guest_RAX = 0;
2346 vex_state->guest_RCX = 0;
2347 vex_state->guest_RDX = 0;
2348 vex_state->guest_RBX = 0;
2349 vex_state->guest_RSP = 0;
2350 vex_state->guest_RBP = 0;
2351 vex_state->guest_RSI = 0;
2352 vex_state->guest_RDI = 0;
2353 vex_state->guest_R8 = 0;
2354 vex_state->guest_R9 = 0;
2355 vex_state->guest_R10 = 0;
2356 vex_state->guest_R11 = 0;
2357 vex_state->guest_R12 = 0;
2358 vex_state->guest_R13 = 0;
2359 vex_state->guest_R14 = 0;
2360 vex_state->guest_R15 = 0;
2362 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
2363 vex_state->guest_CC_DEP1 = 0;
2364 vex_state->guest_CC_DEP2 = 0;
2365 vex_state->guest_CC_NDEP = 0;
2367 vex_state->guest_DFLAG = 1; /* forwards */
2368 vex_state->guest_IDFLAG = 0;
2370 /* HACK: represent the offset associated with %fs==0. This
2371 assumes that %fs is only ever zero. */
2372 vex_state->guest_FS_ZERO = 0;
2374 vex_state->guest_RIP = 0;
2376 /* Initialise the simulated FPU */
2377 amd64g_dirtyhelper_FINIT( vex_state );
2379 /* Initialise the SSE state. */
2380 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2382 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
2383 SSEZERO(vex_state->guest_XMM0);
2384 SSEZERO(vex_state->guest_XMM1);
2385 SSEZERO(vex_state->guest_XMM2);
2386 SSEZERO(vex_state->guest_XMM3);
2387 SSEZERO(vex_state->guest_XMM4);
2388 SSEZERO(vex_state->guest_XMM5);
2389 SSEZERO(vex_state->guest_XMM6);
2390 SSEZERO(vex_state->guest_XMM7);
2391 SSEZERO(vex_state->guest_XMM8);
2392 SSEZERO(vex_state->guest_XMM9);
2393 SSEZERO(vex_state->guest_XMM10);
2394 SSEZERO(vex_state->guest_XMM11);
2395 SSEZERO(vex_state->guest_XMM12);
2396 SSEZERO(vex_state->guest_XMM13);
2397 SSEZERO(vex_state->guest_XMM14);
2398 SSEZERO(vex_state->guest_XMM15);
2402 vex_state->guest_EMWARN = EmWarn_NONE;
2404 /* These should not ever be either read or written, but we
2405 initialise them anyway. */
2406 vex_state->guest_TISTART = 0;
2407 vex_state->guest_TILEN = 0;
2409 vex_state->guest_NRADDR = 0;
2410 vex_state->guest_SC_CLASS = 0;
2411 vex_state->guest_GS_0x60 = 0;
2413 vex_state->guest_IP_AT_SYSCALL = 0;
2414 /* vex_state->padding = 0; */
2418 /* Figure out if any part of the guest state contained in minoff
2419 .. maxoff requires precise memory exceptions. If in doubt return
2420 True (but this is generates significantly slower code).
2422 By default we enforce precise exns for guest %RSP, %RBP and %RIP
2423 only. These are the minimum needed to extract correct stack
2424 backtraces from amd64 code.
2426 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
2429 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
2430 Int rbp_max = rbp_min + 8 - 1;
2431 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
2432 Int rsp_max = rsp_min + 8 - 1;
2433 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
2434 Int rip_max = rip_min + 8 - 1;
2436 if (maxoff < rbp_min || minoff > rbp_max) {
2437 /* no overlap with rbp */
2442 if (maxoff < rsp_min || minoff > rsp_max) {
2443 /* no overlap with rsp */
2448 if (maxoff < rip_min || minoff > rip_max) {
2449 /* no overlap with eip */
2458 #define ALWAYSDEFD(field) \
2459 { offsetof(VexGuestAMD64State, field), \
2460 (sizeof ((VexGuestAMD64State*)0)->field) }
2465 /* Total size of the guest state, in bytes. */
2466 .total_sizeB = sizeof(VexGuestAMD64State),
2468 /* Describe the stack pointer. */
2469 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
2472 /* Describe the frame pointer. */
2473 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
2476 /* Describe the instruction pointer. */
2477 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
2480 /* Describe any sections to be regarded by Memcheck as
2481 'always-defined'. */
2484 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2485 and DEP2 have to be tracked. See detailed comment in
2486 gdefs.h on meaning of thunk fields. */
2488 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2489 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2490 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2491 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2492 /* 4 */ ALWAYSDEFD(guest_RIP),
2493 /* 5 */ ALWAYSDEFD(guest_FS_ZERO),
2494 /* 6 */ ALWAYSDEFD(guest_FTOP),
2495 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2496 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2497 /* 9 */ ALWAYSDEFD(guest_FC3210),
2498 // /* */ ALWAYSDEFD(guest_CS),
2499 // /* */ ALWAYSDEFD(guest_DS),
2500 // /* */ ALWAYSDEFD(guest_ES),
2501 // /* */ ALWAYSDEFD(guest_FS),
2502 // /* */ ALWAYSDEFD(guest_GS),
2503 // /* */ ALWAYSDEFD(guest_SS),
2504 // /* */ ALWAYSDEFD(guest_LDT),
2505 // /* */ ALWAYSDEFD(guest_GDT),
2506 /* 10 */ ALWAYSDEFD(guest_EMWARN),
2507 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
2508 /* 12 */ ALWAYSDEFD(guest_TISTART),
2509 /* 13 */ ALWAYSDEFD(guest_TILEN),
2510 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
2511 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2516 /*---------------------------------------------------------------*/
2517 /*--- end guest_amd64_helpers.c ---*/
2518 /*---------------------------------------------------------------*/