2 /*---------------------------------------------------------------*/
3 /*--- begin guest_generic_x87.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2010 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* This file contains functions for doing some x87-specific
37 operations. Both the amd64 and x86 front ends (guests) indirectly
38 call these functions via guest helper calls. By putting them here,
39 code duplication is avoided. Some of these functions are tricky
40 and hard to verify, so there is much to be said for only having one
44 #include "libvex_basictypes.h"
46 #include "main_util.h"
47 #include "guest_generic_x87.h"
50 /* 80 and 64-bit floating point formats:
55 S 0 0X------X denormals
56 S 1-7FFE 1X------X normals (all normals have leading 1)
57 S 7FFF 10------0 infinity
61 S is the sign bit. For runs X----X, at least one of the Xs must be
62 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
63 there is an explicitly represented leading 1, and a sign bit,
66 64-bit avoids the confusion of an explicitly represented leading 1
70 S 0 X------X denormals
72 S 7FF 0------0 infinity
76 Exponent is 11 bits, fractional part is 52 bits, and there is a
77 sign bit, giving 64 in total.
81 static inline UInt read_bit_array ( UChar* arr, UInt n )
83 UChar c = arr[n >> 3];
88 static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
90 UChar c = arr[n >> 3];
91 c = toUChar( c & ~(1 << (n&7)) );
92 c = toUChar( c | ((b&1) << (n&7)) );
96 /* Convert an IEEE754 double (64-bit) into an x87 extended double
97 (80-bit), mimicing the hardware fairly closely. Both numbers are
98 stored little-endian. Limitations, all of which could be fixed,
99 given some level of hassle:
101 * Identity of NaNs is not preserved.
103 See comments in the code for more details.
105 void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
108 Int bexp, i, j, shift;
111 sign = toUChar( (f64[7] >> 7) & 1 );
112 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
115 mantissaIsZero = False;
116 if (bexp == 0 || bexp == 0x7FF) {
117 /* We'll need to know whether or not the mantissa (bits 51:0) is
118 all zeroes in order to handle these cases. So figure it
123 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
128 /* If the exponent is zero, either we have a zero or a denormal.
129 Produce a zero. This is a hack in that it forces denormals to
130 zero. Could do better. */
132 f80[9] = toUChar( sign << 7 );
133 f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
134 = f80[3] = f80[2] = f80[1] = f80[0] = 0;
137 /* It really is zero, so that's all we can do. */
140 /* There is at least one 1-bit in the mantissa. So it's a
141 potentially denormalised double -- but we can produce a
142 normalised long double. Count the leading zeroes in the
143 mantissa so as to decide how much to bump the exponent down
144 by. Note, this is SLOW. */
146 for (i = 51; i >= 0; i--) {
147 if (read_bit_array(f64, i))
152 /* and copy into place as many bits as we can get our hands on. */
154 for (i = 51 - shift; i >= 0; i--) {
155 write_bit_array( f80, j,
156 read_bit_array( f64, i ) );
160 /* Set the exponent appropriately, and we're done. */
162 bexp += (16383 - 1023);
163 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
164 f80[8] = toUChar( bexp & 0xFF );
168 /* If the exponent is 7FF, this is either an Infinity, a SNaN or
169 QNaN, as determined by examining bits 51:0, thus:
173 where at least one of the Xs is not zero.
176 if (mantissaIsZero) {
177 /* Produce an appropriately signed infinity:
178 S 1--1 (15) 1 0--0 (63)
180 f80[9] = toUChar( (sign << 7) | 0x7F );
183 f80[6] = f80[5] = f80[4] = f80[3]
184 = f80[2] = f80[1] = f80[0] = 0;
187 /* So it's either a QNaN or SNaN. Distinguish by considering
188 bit 51. Note, this destroys all the trailing bits
189 (identity?) of the NaN. IEEE754 doesn't require preserving
190 these (it only requires that there be one QNaN value and one
191 SNaN value), but x87 does seem to have some ability to
192 preserve them. Anyway, here, the NaN's identity is
193 destroyed. Could be improved. */
195 /* QNaN. Make a QNaN:
196 S 1--1 (15) 1 1--1 (63)
198 f80[9] = toUChar( (sign << 7) | 0x7F );
201 f80[6] = f80[5] = f80[4] = f80[3]
202 = f80[2] = f80[1] = f80[0] = 0xFF;
204 /* SNaN. Make a SNaN:
205 S 1--1 (15) 0 1--1 (63)
207 f80[9] = toUChar( (sign << 7) | 0x7F );
210 f80[6] = f80[5] = f80[4] = f80[3]
211 = f80[2] = f80[1] = f80[0] = 0xFF;
216 /* It's not a zero, denormal, infinity or nan. So it must be a
217 normalised number. Rebias the exponent and build the new
219 bexp += (16383 - 1023);
221 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
222 f80[8] = toUChar( bexp & 0xFF );
223 f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
224 | ((f64[5] >> 5) & 7) );
225 f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
226 f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
227 f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
228 f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
229 f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
230 f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
231 f80[0] = toUChar( 0 );
235 /* Convert an x87 extended double (80-bit) into an IEEE 754 double
236 (64-bit), mimicking the hardware fairly closely. Both numbers are
237 stored little-endian. Limitations, both of which could be fixed,
238 given some level of hassle:
240 * Rounding following truncation could be a bit better.
242 * Identity of NaNs is not preserved.
244 See comments in the code for more details.
246 void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
252 sign = toUChar((f80[9] >> 7) & 1);
253 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
256 /* If the exponent is zero, either we have a zero or a denormal.
257 But an extended precision denormal becomes a double precision
258 zero, so in either case, just produce the appropriately signed
261 f64[7] = toUChar(sign << 7);
262 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
266 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
267 QNaN, as determined by examining bits 62:0, thus:
271 where at least one of the Xs is not zero.
273 if (bexp == 0x7FFF) {
276 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
277 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
281 if (0 == (f80[7] & 0x80))
283 /* Produce an appropriately signed infinity:
284 S 1--1 (11) 0--0 (52)
286 f64[7] = toUChar((sign << 7) | 0x7F);
288 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
291 /* So it's either a QNaN or SNaN. Distinguish by considering
292 bit 62. Note, this destroys all the trailing bits
293 (identity?) of the NaN. IEEE754 doesn't require preserving
294 these (it only requires that there be one QNaN value and one
295 SNaN value), but x87 does seem to have some ability to
296 preserve them. Anyway, here, the NaN's identity is
297 destroyed. Could be improved. */
299 /* QNaN. Make a QNaN:
300 S 1--1 (11) 1 1--1 (51)
302 f64[7] = toUChar((sign << 7) | 0x7F);
304 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
306 /* SNaN. Make a SNaN:
307 S 1--1 (11) 0 1--1 (51)
309 f64[7] = toUChar((sign << 7) | 0x7F);
311 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
316 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
317 zero, the x87 FPU appears to consider the number denormalised
318 and converts it to a QNaN. */
319 if (0 == (f80[7] & 0x80)) {
321 /* Strange hardware QNaN:
322 S 1--1 (11) 1 0--0 (51)
324 /* On a PIII, these QNaNs always appear with sign==1. I have
326 f64[7] = (1 /*sign*/ << 7) | 0x7F;
328 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
332 /* It's not a zero, denormal, infinity or nan. So it must be a
333 normalised number. Rebias the exponent and consider. */
334 bexp -= (16383 - 1023);
336 /* It's too big for a double. Construct an infinity. */
337 f64[7] = toUChar((sign << 7) | 0x7F);
339 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
344 /* It's too small for a normalised double. First construct a
345 zero and then see if it can be improved into a denormal. */
346 f64[7] = toUChar(sign << 7);
347 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
350 /* Too small even for a denormal. */
353 /* Ok, let's make a denormal. Note, this is SLOW. */
354 /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
355 indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
356 /* bexp is in range -52 .. 0 inclusive */
357 for (i = 63; i >= 0; i--) {
360 /* We shouldn't really call vassert from generated code. */
361 vassert(j >= 0 && j < 52);
362 write_bit_array ( f64,
364 read_bit_array ( f80, i ) );
366 /* and now we might have to round ... */
367 if (read_bit_array(f80, 10+1 - bexp) == 1)
373 /* Ok, it's a normalised number which is representable as a double.
374 Copy the exponent and mantissa into place. */
376 for (i = 0; i < 52; i++)
377 write_bit_array ( f64,
379 read_bit_array ( f80, i+11 ) );
381 f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
382 f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
383 f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
384 f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
385 f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
386 f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
388 f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
390 f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
392 /* Now consider any rounding that needs to happen as a result of
393 truncating the mantissa. */
394 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
396 /* If the bottom bits of f80 are "100 0000 0000", then the
397 infinitely precise value is deemed to be mid-way between the
398 two closest representable values. Since we're doing
399 round-to-nearest (the default mode), in that case it is the
400 bit immediately above which indicates whether we should round
401 upwards or not -- if 0, we don't. All that is encapsulated
402 in the following simple test. */
403 if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
407 /* Round upwards. This is a kludge. Once in every 2^24
408 roundings (statistically) the bottom three bytes are all 0xFF
409 and so we don't round at all. Could be improved. */
410 if (f64[0] != 0xFF) {
414 if (f64[0] == 0xFF && f64[1] != 0xFF) {
419 if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
424 /* else we don't round, but we should. */
429 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
430 /* Extract the signed significand or exponent component as per
431 fxtract. Arg and result are doubles travelling under the guise of
432 ULongs. Returns significand when getExp is zero and exponent
434 ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
442 S 7FF 0------0 infinity
446 const ULong posInf = 0x7FF0000000000000ULL;
447 const ULong negInf = 0xFFF0000000000000ULL;
448 const ULong nanMask = 0x7FF0000000000000ULL;
449 const ULong qNan = 0x7FF8000000000000ULL;
450 const ULong posZero = 0x0000000000000000ULL;
451 const ULong negZero = 0x8000000000000000ULL;
452 const ULong bit51 = 1ULL << 51;
453 const ULong bit52 = 1ULL << 52;
454 const ULong sigMask = bit52 - 1;
456 /* Mimic PIII behaviour for special cases. */
458 return getExp ? posInf : posInf;
460 return getExp ? posInf : negInf;
461 if ((arg & nanMask) == nanMask)
464 return getExp ? negInf : posZero;
466 return getExp ? negInf : negZero;
468 /* Split into sign, exponent and significand. */
469 sign = ((UInt)(arg >> 63)) & 1;
471 /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
472 uSig = arg & sigMask;
474 /* Get the exponent. */
475 sExp = ((Int)(arg >> 52)) & 0x7FF;
477 /* Deal with denormals: if the exponent is zero, then the
478 significand cannot possibly be zero (negZero/posZero are handled
479 above). Shift the significand left until bit 51 of it becomes
480 1, and decrease the exponent accordingly.
483 for (i = 0; i < 52; i++) {
491 /* Add the implied leading-1 in the significand. */
495 /* Roll in the sign. */
497 /* if (sign) sSig =- sSig; */
499 /* Convert sig into a double. This should be an exact conversion.
500 Then divide by 2^52, which should give a value in the range 1.0
501 to 2.0-epsilon, at least for normalised args. */
502 /* dSig = (Double)sSig; */
503 /* dSig /= 67108864.0; */ /* 2^26 */
504 /* dSig /= 67108864.0; */ /* 2^26 */
506 uSig |= 0x3FF0000000000000ULL;
510 /* Convert exp into a double. Also an exact conversion. */
511 /* dExp = (Double)(sExp - 1023); */
516 uExp = sExp < 0 ? -sExp : sExp;
518 /* 1 <= uExp <= 1074 */
519 /* Skip first 42 iterations of normalisation loop as we know they
520 will always happen */
523 for (i = 0; i < 52-42; i++) {
530 uExp |= ((ULong)expExp) << 52;
531 if (sExp < 0) uExp ^= negZero;
534 return getExp ? uExp : uSig;
538 /*---------------------------------------------------------------*/
539 /*--- end guest_generic_x87.c ---*/
540 /*---------------------------------------------------------------*/