2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_defs.h ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Only to be used within the guest-amd64 directory. */
38 #ifndef __VEX_GUEST_AMD64_DEFS_H
39 #define __VEX_GUEST_AMD64_DEFS_H
42 /*---------------------------------------------------------*/
43 /*--- amd64 to IR conversion ---*/
44 /*---------------------------------------------------------*/
46 /* Convert one amd64 insn to IR. See the type DisOneInstrFn in
49 DisResult disInstr_AMD64 ( IRSB* irbb,
51 Bool (*resteerOkFn) ( void*, Addr64 ),
53 void* callback_opaque,
58 VexArchInfo* archinfo,
60 Bool host_bigendian );
62 /* Used by the optimiser to specialise calls to helpers. */
64 IRExpr* guest_amd64_spechelper ( HChar* function_name,
66 IRStmt** precedingStmts,
67 Int n_precedingStmts );
69 /* Describes to the optimiser which part of the guest state require
70 precise memory exceptions. This is logically part of the guest
73 Bool guest_amd64_state_requires_precise_mem_exns ( Int, Int );
76 VexGuestLayout amd64guest_layout;
79 /*---------------------------------------------------------*/
80 /*--- amd64 guest helpers ---*/
81 /*---------------------------------------------------------*/
83 /* --- CLEAN HELPERS --- */
85 extern ULong amd64g_calculate_rflags_all (
87 ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
90 extern ULong amd64g_calculate_rflags_c (
92 ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
95 extern ULong amd64g_calculate_condition (
96 ULong/*AMD64Condcode*/ cond,
98 ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
101 extern ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl );
103 extern ULong amd64g_calculate_RCR (
104 ULong arg, ULong rot_amt, ULong rflags_in, Long sz
107 extern ULong amd64g_calculate_RCL (
108 ULong arg, ULong rot_amt, ULong rflags_in, Long sz
111 extern ULong amd64g_check_fldcw ( ULong fpucw );
113 extern ULong amd64g_create_fpucw ( ULong fpround );
115 extern ULong amd64g_check_ldmxcsr ( ULong mxcsr );
117 extern ULong amd64g_create_mxcsr ( ULong sseround );
119 extern VexEmWarn amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord );
121 extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord );
123 /* Translate a guest virtual_addr into a guest linear address by
124 consulting the supplied LDT/GDT structures. Their representation
125 must be as specified in pub/libvex_guest_amd64.h. To indicate a
126 translation failure, 1<<32 is returned. On success, the lower 32
127 bits of the returned result indicate the linear address.
130 //ULong amd64g_use_seg_selector ( HWord ldt, HWord gdt,
131 // UInt seg_selector, UInt virtual_addr );
133 extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong );
134 extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong );
135 extern ULong amd64g_calculate_mmx_pmovmskb ( ULong );
136 extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
139 /* --- DIRTY HELPERS --- */
141 extern ULong amd64g_dirtyhelper_loadF80le ( ULong/*addr*/ );
143 extern void amd64g_dirtyhelper_storeF80le ( ULong/*addr*/, ULong/*data*/ );
145 extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
146 extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
147 extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
149 extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
151 extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord );
153 extern ULong amd64g_dirtyhelper_RDTSC ( void );
155 extern ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ );
156 extern void amd64g_dirtyhelper_OUT ( ULong portno, ULong data,
157 ULong sz/*1,2 or 4*/ );
159 extern void amd64g_dirtyhelper_SxDT ( void* address,
160 ULong op /* 0 or 1 */ );
162 /* Helps with PCMP{I,E}STR{I,M}.
164 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
165 actually it could be a clean helper, but for the fact that we can't
166 pass by value 2 x V128 to a clean helper, nor have one returned.)
167 Reads guest state, writes to guest state for the xSTRM cases, no
168 accesses of memory, is a pure function.
170 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
171 the callee knows which I/E and I/M variant it is dealing with and
172 what the specific operation is. 4th byte of opcode is in the range
179 gstOffL and gstOffR are the guest state offsets for the two XMM
180 register inputs. We never have to deal with the memory case since
181 that is handled by pre-loading the relevant value into the fake
184 For ESTRx variants, edxIN and eaxIN hold the values of those two
187 In all cases, the bottom 16 bits of the result contain the new
188 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
189 result hold the new %ecx value. For xSTRM variants, the helper
190 writes the result directly to the guest XMM0.
192 Declarable side effects: in all cases, reads guest state at
193 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
196 Is expected to be called with opc_and_imm combinations which have
197 actually been validated, and will assert if otherwise. The front
198 end should ensure we're only called with verified values.
200 extern ULong amd64g_dirtyhelper_PCMPxSTRx (
203 HWord gstOffL, HWord gstOffR,
204 HWord edxIN, HWord eaxIN
208 //extern void amd64g_dirtyhelper_CPUID_sse0 ( VexGuestAMD64State* );
209 //extern void amd64g_dirtyhelper_CPUID_sse1 ( VexGuestAMD64State* );
210 //extern void amd64g_dirtyhelper_CPUID_sse2 ( VexGuestAMD64State* );
212 //extern void amd64g_dirtyhelper_FSAVE ( VexGuestAMD64State*, HWord );
215 // amd64g_dirtyhelper_FRSTOR ( VexGuestAMD64State*, HWord );
217 //extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord );
220 // amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord );
224 /*---------------------------------------------------------*/
225 /*--- Condition code stuff ---*/
226 /*---------------------------------------------------------*/
229 #define AMD64G_CC_SHIFT_O 11
230 #define AMD64G_CC_SHIFT_S 7
231 #define AMD64G_CC_SHIFT_Z 6
232 #define AMD64G_CC_SHIFT_A 4
233 #define AMD64G_CC_SHIFT_C 0
234 #define AMD64G_CC_SHIFT_P 2
236 #define AMD64G_CC_MASK_O (1ULL << AMD64G_CC_SHIFT_O)
237 #define AMD64G_CC_MASK_S (1ULL << AMD64G_CC_SHIFT_S)
238 #define AMD64G_CC_MASK_Z (1ULL << AMD64G_CC_SHIFT_Z)
239 #define AMD64G_CC_MASK_A (1ULL << AMD64G_CC_SHIFT_A)
240 #define AMD64G_CC_MASK_C (1ULL << AMD64G_CC_SHIFT_C)
241 #define AMD64G_CC_MASK_P (1ULL << AMD64G_CC_SHIFT_P)
244 #define AMD64G_FC_SHIFT_C3 14
245 #define AMD64G_FC_SHIFT_C2 10
246 #define AMD64G_FC_SHIFT_C1 9
247 #define AMD64G_FC_SHIFT_C0 8
249 #define AMD64G_FC_MASK_C3 (1ULL << AMD64G_FC_SHIFT_C3)
250 #define AMD64G_FC_MASK_C2 (1ULL << AMD64G_FC_SHIFT_C2)
251 #define AMD64G_FC_MASK_C1 (1ULL << AMD64G_FC_SHIFT_C1)
252 #define AMD64G_FC_MASK_C0 (1ULL << AMD64G_FC_SHIFT_C0)
255 /* %RFLAGS thunk descriptors. A four-word thunk is used to record
256 details of the most recent flag-setting operation, so the flags can
257 be computed later if needed. It is possible to do this a little
258 more efficiently using a 3-word thunk, but that makes it impossible
259 to describe the flag data dependencies sufficiently accurately for
260 Memcheck. Hence 4 words are used, with minimal loss of efficiency.
264 CC_OP, which describes the operation.
266 CC_DEP1 and CC_DEP2. These are arguments to the operation.
267 We want Memcheck to believe that the resulting flags are
268 data-dependent on both CC_DEP1 and CC_DEP2, hence the
271 CC_NDEP. This is a 3rd argument to the operation which is
272 sometimes needed. We arrange things so that Memcheck does
273 not believe the resulting flags are data-dependent on CC_NDEP
276 To make Memcheck believe that (the definedness of) the encoded
277 flags depends only on (the definedness of) CC_DEP1 and CC_DEP2
280 (1) In the guest state layout info (amd64guest_layout), CC_OP and
281 CC_NDEP are marked as always defined.
283 (2) When passing the thunk components to an evaluation function
284 (calculate_condition, calculate_eflags, calculate_eflags_c) the
285 IRCallee's mcx_mask must be set so as to exclude from
286 consideration all passed args except CC_DEP1 and CC_DEP2.
288 Strictly speaking only (2) is necessary for correctness. However,
289 (1) helps efficiency in that since (2) means we never ask about the
290 definedness of CC_OP or CC_NDEP, we may as well not even bother to
291 track their definedness.
293 When building the thunk, it is always necessary to write words into
294 CC_DEP1 and CC_DEP2, even if those args are not used given the
295 CC_OP field (eg, CC_DEP2 is not used if CC_OP is CC_LOGIC1/2/4).
296 This is important because otherwise Memcheck could give false
297 positives as it does not understand the relationship between the
298 CC_OP field and CC_DEP1 and CC_DEP2, and so believes that the
299 definedness of the stored flags always depends on both CC_DEP1 and
302 However, it is only necessary to set CC_NDEP when the CC_OP value
303 requires it, because Memcheck ignores CC_NDEP, and the evaluation
304 functions do understand the CC_OP fields and will only examine
305 CC_NDEP for suitable values of CC_OP.
307 A summary of the field usages is:
309 Operation DEP1 DEP2 NDEP
310 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
312 add/sub/mul first arg second arg unused
314 adc/sbb first arg (second arg)
315 XOR old_carry old_carry
317 and/or/xor result zero unused
319 inc/dec result zero old_carry
321 shl/shr/sar result subshifted- unused
324 rol/ror result zero old_flags
326 copy old_flags zero unused.
329 Therefore Memcheck will believe the following:
331 * add/sub/mul -- definedness of result flags depends on definedness
334 * adc/sbb -- definedness of result flags depends on definedness of
335 both args and definedness of the old C flag. Because only two
336 DEP fields are available, the old C flag is XOR'd into the second
337 arg so that Memcheck sees the data dependency on it. That means
338 the NDEP field must contain a second copy of the old C flag
339 so that the evaluation functions can correctly recover the second
342 * and/or/xor are straightforward -- definedness of result flags
343 depends on definedness of result value.
345 * inc/dec -- definedness of result flags depends only on
346 definedness of result. This isn't really true -- it also depends
347 on the old C flag. However, we don't want Memcheck to see that,
348 and so the old C flag must be passed in NDEP and not in DEP2.
349 It's inconceivable that a compiler would generate code that puts
350 the C flag in an undefined state, then does an inc/dec, which
351 leaves C unchanged, and then makes a conditional jump/move based
352 on C. So our fiction seems a good approximation.
354 * shl/shr/sar -- straightforward, again, definedness of result
355 flags depends on definedness of result value. The subshifted
356 value (value shifted one less) is also needed, but its
357 definedness is the same as the definedness of the shifted value.
359 * rol/ror -- these only set O and C, and leave A Z C P alone.
360 However it seems prudent (as per inc/dec) to say the definedness
361 of all resulting flags depends on the definedness of the result,
362 hence the old flags must go in as NDEP and not DEP2.
364 * rcl/rcr are too difficult to do in-line, and so are done by a
365 helper function. They are not part of this scheme. The helper
366 function takes the value to be rotated, the rotate amount and the
367 old flags, and returns the new flags and the rotated value.
368 Since the helper's mcx_mask does not have any set bits, Memcheck
369 will lazily propagate undefinedness from any of the 3 args into
370 both results (flags and actual value).
373 AMD64G_CC_OP_COPY=0, /* DEP1 = current flags, DEP2 = 0, NDEP = unused */
374 /* just copy DEP1 to output */
376 AMD64G_CC_OP_ADDB, /* 1 */
377 AMD64G_CC_OP_ADDW, /* 2 DEP1 = argL, DEP2 = argR, NDEP = unused */
378 AMD64G_CC_OP_ADDL, /* 3 */
379 AMD64G_CC_OP_ADDQ, /* 4 */
381 AMD64G_CC_OP_SUBB, /* 5 */
382 AMD64G_CC_OP_SUBW, /* 6 DEP1 = argL, DEP2 = argR, NDEP = unused */
383 AMD64G_CC_OP_SUBL, /* 7 */
384 AMD64G_CC_OP_SUBQ, /* 8 */
386 AMD64G_CC_OP_ADCB, /* 9 */
387 AMD64G_CC_OP_ADCW, /* 10 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
388 AMD64G_CC_OP_ADCL, /* 11 */
389 AMD64G_CC_OP_ADCQ, /* 12 */
391 AMD64G_CC_OP_SBBB, /* 13 */
392 AMD64G_CC_OP_SBBW, /* 14 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
393 AMD64G_CC_OP_SBBL, /* 15 */
394 AMD64G_CC_OP_SBBQ, /* 16 */
396 AMD64G_CC_OP_LOGICB, /* 17 */
397 AMD64G_CC_OP_LOGICW, /* 18 DEP1 = result, DEP2 = 0, NDEP = unused */
398 AMD64G_CC_OP_LOGICL, /* 19 */
399 AMD64G_CC_OP_LOGICQ, /* 20 */
401 AMD64G_CC_OP_INCB, /* 21 */
402 AMD64G_CC_OP_INCW, /* 22 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
403 AMD64G_CC_OP_INCL, /* 23 */
404 AMD64G_CC_OP_INCQ, /* 24 */
406 AMD64G_CC_OP_DECB, /* 25 */
407 AMD64G_CC_OP_DECW, /* 26 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
408 AMD64G_CC_OP_DECL, /* 27 */
409 AMD64G_CC_OP_DECQ, /* 28 */
411 AMD64G_CC_OP_SHLB, /* 29 DEP1 = res, DEP2 = res', NDEP = unused */
412 AMD64G_CC_OP_SHLW, /* 30 where res' is like res but shifted one bit less */
413 AMD64G_CC_OP_SHLL, /* 31 */
414 AMD64G_CC_OP_SHLQ, /* 32 */
416 AMD64G_CC_OP_SHRB, /* 33 DEP1 = res, DEP2 = res', NDEP = unused */
417 AMD64G_CC_OP_SHRW, /* 34 where res' is like res but shifted one bit less */
418 AMD64G_CC_OP_SHRL, /* 35 */
419 AMD64G_CC_OP_SHRQ, /* 36 */
421 AMD64G_CC_OP_ROLB, /* 37 */
422 AMD64G_CC_OP_ROLW, /* 38 DEP1 = res, DEP2 = 0, NDEP = old flags */
423 AMD64G_CC_OP_ROLL, /* 39 */
424 AMD64G_CC_OP_ROLQ, /* 40 */
426 AMD64G_CC_OP_RORB, /* 41 */
427 AMD64G_CC_OP_RORW, /* 42 DEP1 = res, DEP2 = 0, NDEP = old flags */
428 AMD64G_CC_OP_RORL, /* 43 */
429 AMD64G_CC_OP_RORQ, /* 44 */
431 AMD64G_CC_OP_UMULB, /* 45 */
432 AMD64G_CC_OP_UMULW, /* 46 DEP1 = argL, DEP2 = argR, NDEP = unused */
433 AMD64G_CC_OP_UMULL, /* 47 */
434 AMD64G_CC_OP_UMULQ, /* 48 */
436 AMD64G_CC_OP_SMULB, /* 49 */
437 AMD64G_CC_OP_SMULW, /* 50 DEP1 = argL, DEP2 = argR, NDEP = unused */
438 AMD64G_CC_OP_SMULL, /* 51 */
439 AMD64G_CC_OP_SMULQ, /* 52 */
446 AMD64CondO = 0, /* overflow */
447 AMD64CondNO = 1, /* no overflow */
449 AMD64CondB = 2, /* below */
450 AMD64CondNB = 3, /* not below */
452 AMD64CondZ = 4, /* zero */
453 AMD64CondNZ = 5, /* not zero */
455 AMD64CondBE = 6, /* below or equal */
456 AMD64CondNBE = 7, /* not below or equal */
458 AMD64CondS = 8, /* negative */
459 AMD64CondNS = 9, /* not negative */
461 AMD64CondP = 10, /* parity even */
462 AMD64CondNP = 11, /* not parity even */
464 AMD64CondL = 12, /* jump less */
465 AMD64CondNL = 13, /* not less */
467 AMD64CondLE = 14, /* less or equal */
468 AMD64CondNLE = 15, /* not less or equal */
470 AMD64CondAlways = 16 /* HACK */
474 #endif /* ndef __VEX_GUEST_AMD64_DEFS_H */
476 /*---------------------------------------------------------------*/
477 /*--- end guest_amd64_defs.h ---*/
478 /*---------------------------------------------------------------*/