l4/pkg/valgrind/src/valgrind-3.6.0-svn/none/tests/ppc32/round.c

   1
   2 /*  Copyright (C) 2006 Dave Nomura
   3        dcnltc@us.ibm.com
   4
   5     This program is free software; you can redistribute it and/or
   6     modify it under the terms of the GNU General Public License as
   7     published by the Free Software Foundation; either version 2 of the
   8     License, or (at your option) any later version.
   9
  10     This program is distributed in the hope that it will be useful, but
  11     WITHOUT ANY WARRANTY; without even the implied warranty of
  12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13     General Public License for more details.
  14
  15     You should have received a copy of the GNU General Public License
  16     along with this program; if not, write to the Free Software
  17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18     02111-1307, USA.
  19
  20     The GNU General Public License is contained in the file COPYING.
  21 */
  22
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <limits.h>
  26
  27 typedef enum { FALSE=0, TRUE } bool_t;
  28
  29 typedef enum {
  30         FADDS, FSUBS, FMULS, FDIVS,
  31         FMADDS, FMSUBS, FNMADDS, FNMSUBS,
  32         FADD, FSUB, FMUL, FDIV, FMADD,
  33         FMSUB, FNMADD, FNMSUB, FSQRT
  34 } flt_op_t;
  35
  36 typedef enum {
  37         TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
  38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
  39
  40 const char *flt_op_names[] = {
  41         "fadds", "fsubs", "fmuls", "fdivs",
  42         "fmadds", "fmsubs", "fnmadds", "fnmsubs",
  43         "fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
  44         "fnmsub", "fsqrt"
  45 };
  46
  47 typedef unsigned int fpscr_t;
  48
  49 typedef union {
  50         float flt;
  51         struct {
  52                 unsigned int sign:1;
  53                 unsigned int exp:8;
  54                 unsigned int frac:23;
  55         } layout;
  56 } flt_overlay;
  57
  58 typedef union {
  59         double dbl;
  60         struct {
  61                 unsigned int sign:1;
  62                 unsigned int exp:11;
  63                 unsigned int frac_hi:20;
  64                 unsigned int frac_lo:32;
  65         } layout;
  66         struct {
  67                 unsigned int hi;
  68                 unsigned int lo;
  69         } dbl_pair;
  70 } dbl_overlay;
  71
  72 void assert_fail(const char *msg,
  73         const char* expr, const char* file, int line, const char*fn);
  74
  75 #define STRING(__str)  #__str
  76 #define assert(msg, expr)                                           \
  77   ((void) ((expr) ? 0 :                                         \
  78            (assert_fail (msg, STRING(expr),                  \
  79                              __FILE__, __LINE__,                \
  80                              __PRETTY_FUNCTION__), 0)))
  81 float denorm_small;
  82 double dbl_denorm_small;
  83 float norm_small;
  84 bool_t debug = FALSE;
  85 bool_t long_is_64_bits = sizeof(long) == 8;
  86
  87 void assert_fail (msg, expr, file, line, fn)
  88 const char* msg;
  89 const char* expr;
  90 const char* file;
  91 int line;
  92 const char*fn;
  93 {
  94    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
  95                msg, file, line, fn, expr );
  96    exit( 1 );
  97 }
  98 void set_rounding_mode(round_mode_t mode)
  99 {
 100         switch(mode) {
 101         case TO_NEAREST:
 102                 asm volatile("mtfsfi 7, 0");
 103                 break;
 104         case TO_ZERO:
 105                 asm volatile("mtfsfi 7, 1");
 106                 break;
 107         case TO_PLUS_INFINITY:
 108                 asm volatile("mtfsfi 7, 2");
 109                 break;
 110         case TO_MINUS_INFINITY:
 111                 asm volatile("mtfsfi 7, 3");
 112                 break;
 113         }
 114 }
 115
 116 void print_double(char *msg, double dbl)
 117 {
 118         dbl_overlay D;
 119         D.dbl = dbl;
 120
 121         printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
 122                         msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
 123                         D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
 124 }
 125
 126 void print_single(char *msg, float *flt)
 127 {
 128         flt_overlay F;
 129         F.flt = *flt;
 130
 131         /* NOTE: for the purposes of comparing the fraction of a single with
 132         **       a double left shift the .frac so that hex digits are grouped
 133         **           from left to right.  this is necessary because the size of a
 134         **               single mantissa (23) bits is not a multiple of 4
 135         */
 136         printf("%15s : flt %-20a = %c(%4d, %06x)\n",
 137                 msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
 138 }
 139
 140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
 141 {
 142         int status = 0;
 143         flt_overlay R, E;
 144         char *result;
 145         char *eq_ne;
 146
 147         set_rounding_mode(mode);
 148
 149         E.flt = *expected;
 150         R.flt = (float)dbl;
 151
 152         if ((R.layout.sign != E.layout.sign) ||
 153                 (R.layout.exp != E.layout.exp) ||
 154                 (R.layout.frac != E.layout.frac)) {
 155                 result = "FAILED";
 156                 eq_ne = "!=";
 157                 status = 1;
 158         } else {
 159                 result = "PASSED";
 160                 eq_ne = "==";
 161                 status = 0;
 162         }
 163         printf("%s:%s:(double)(%-20a) = %20a",
 164                 round_mode_name[mode], result, R.flt, dbl);
 165         if (status) {
 166                 print_single("\n\texpected", &E.flt);
 167                 print_single("\n\trounded ", &R.flt);
 168         }
 169         putchar('\n');
 170         return status;
 171 }
 172
 173 int test_dbl_to_float_convert(char *msg, float *base)
 174 {
 175         int status = 0;
 176         double half = (double)denorm_small/2;
 177         double qtr = half/2;
 178         double D_hi = (double)*base + half + qtr;
 179         double D_lo = (double)*base + half - qtr;
 180         float F_lo = *base;
 181         float F_hi = F_lo + denorm_small;
 182
 183
 184         /*
 185         ** .....+-----+-----+-----+-----+---....
 186         **      ^F_lo ^           ^     ^
 187         **            D_lo
 188         **                        D_hi
 189         **                              F_hi
 190         ** F_lo and F_hi are two consecutive single float model numbers
 191         ** denorm_small distance apart. D_lo and D_hi are two numbers
 192         ** within that range that are not representable as single floats
 193         ** and will be rounded to either F_lo or F_hi.
 194         */
 195         printf("-------------------------- %s --------------------------\n", msg);
 196         if (debug) {
 197                 print_double("D_lo", D_lo);
 198                 print_double("D_hi", D_hi);
 199                 print_single("F_lo", &F_lo);
 200                 print_single("F_hi", &F_hi);
 201         }
 202
 203         /* round to nearest */
 204         status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
 205         status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
 206
 207         /* round to zero */
 208         status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
 209         status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
 210
 211         /* round to +inf */
 212         status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
 213         status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
 214
 215         /* round to -inf */
 216         status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
 217         status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
 218         return status;
 219 }
 220
 221 void
 222 init()
 223 {
 224         flt_overlay F;
 225         dbl_overlay D;
 226
 227         /* small is the smallest denormalized single float number */
 228         F.layout.sign = 0;
 229         F.layout.exp = 0;
 230         F.layout.frac = 1;
 231         denorm_small = F.flt;   /* == 2^(-149) */
 232         if (debug) {
 233                 print_double("float small", F.flt);
 234         }
 235
 236         D.layout.sign = 0;
 237         D.layout.exp = 0;
 238         D.layout.frac_hi = 0;
 239         D.layout.frac_lo = 1;
 240         dbl_denorm_small = D.dbl;       /* == 2^(-1022) */
 241         if (debug) {
 242                 print_double("double small", D.dbl);
 243         }
 244
 245         /* n_small is the smallest normalized single precision float */
 246         F.layout.exp = 1;
 247         norm_small = F.flt;
 248 }
 249
 250 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
 251 {
 252         int status = 0;
 253         int I = L;
 254         char *int_name = "int";
 255         flt_overlay R, E;
 256         char *result;
 257         int iter;
 258
 259         set_rounding_mode(mode);
 260         E.flt = *expected;
 261
 262         for (iter = 0; iter < 2; iter++) {
 263                 int stat = 0;
 264                 R.flt = (iter == 0 ? (float)I : (float)L);
 265
 266                 if ((R.layout.sign != E.layout.sign) ||
 267                         (R.layout.exp != E.layout.exp) ||
 268                         (R.layout.frac != E.layout.frac)) {
 269                         result = "FAILED";
 270                         stat = 1;
 271                 } else {
 272                         result = "PASSED";
 273                         stat = 0;
 274                 }
 275                 printf("%s:%s:(float)(%4s)%9d = %11.1f",
 276                         round_mode_name[mode], result, int_name, I, R.flt);
 277                 if (stat) {
 278                         print_single("\n\texpected: %.1f ", &E.flt);
 279                         print_single("\n\trounded ", &R.flt);
 280                 }
 281                 putchar('\n');
 282                 status |= stat;
 283
 284                 if (!long_is_64_bits) break;
 285                 int_name = "long";
 286         }
 287         return status;
 288 }
 289
 290 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
 291 {
 292         int status = 0;
 293         dbl_overlay R, E;
 294         char *result;
 295
 296         set_rounding_mode(mode);
 297         E.dbl = *expected;
 298
 299         R.dbl = (double)L;
 300
 301         if ((R.layout.sign != E.layout.sign) ||
 302                 (R.layout.exp != E.layout.exp) ||
 303                 (R.layout.frac_lo != E.layout.frac_lo) ||
 304                 (R.layout.frac_hi != E.layout.frac_hi)) {
 305                 result = "FAILED";
 306                 status = 1;
 307         } else {
 308                 result = "PASSED";
 309                 status = 0;
 310         }
 311         printf("%s:%s:(double)(%18ld) = %20.1f",
 312                 round_mode_name[mode], result, L, R.dbl);
 313         if (status) {
 314                 printf("\n\texpected %.1f : ", E.dbl);
 315         }
 316         putchar('\n');
 317         return status;
 318 }
 319
 320 int test_int_to_float_convert(char *msg)
 321 {
 322         int status = 0;
 323         int int24_hi = 0x03ff0fff;
 324         int int24_lo = 0x03ff0ffd;
 325         float pos_flt_lo = 67047420.0;
 326         float pos_flt_hi = 67047424.0;
 327         float neg_flt_lo = -67047420.0;
 328         float neg_flt_hi = -67047424.0;
 329
 330         printf("-------------------------- %s --------------------------\n", msg);
 331         status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
 332         status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
 333         status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
 334         status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
 335         status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
 336         status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
 337         status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
 338         status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
 339
 340         status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
 341         status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
 342         status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
 343         status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
 344         status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
 345         status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
 346         status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
 347         status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
 348         return status;
 349 }
 350
 351 #ifdef __powerpc64__
 352 int test_long_to_double_convert(char *msg)
 353 {
 354         int status = 0;
 355         long long55_hi = 0x07ff0ffffffffff;
 356         long long55_lo = 0x07ff0fffffffffd;
 357         double pos_dbl_lo = 36012304344547324.0;
 358         double pos_dbl_hi = 36012304344547328.0;
 359         double neg_dbl_lo = -36012304344547324.0;
 360         double neg_dbl_hi = -36012304344547328.0;
 361
 362         printf("-------------------------- %s --------------------------\n", msg);
 363         status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
 364         status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
 365         status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
 366         status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
 367         status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
 368         status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
 369         status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
 370         status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
 371
 372         status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
 373         status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
 374         status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
 375         status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
 376         status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
 377         status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
 378         status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
 379         status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
 380         return status;
 381 }
 382 #endif
 383
 384 int check_single_arithmetic_op(flt_op_t op)
 385 {
 386                 char *result;
 387         int status = 0;
 388         dbl_overlay R, E;
 389         double qtr, half, fA, fB, fD;
 390                 round_mode_t mode;
 391                 int q, s;
 392                 bool_t two_args = TRUE;
 393                 float whole = denorm_small;
 394
 395 #define BINOP(op) \
 396         __asm__ volatile( \
 397                                         op" %0, %1, %2\n\t" \
 398                                         : "=f"(fD) : "f"(fA) , "f"(fB));
 399 #define UNOP(op) \
 400         __asm__ volatile( \
 401                                         op" %0, %1\n\t" \
 402                                         : "=f"(fD) : "f"(fA));
 403
 404                 half = (double)whole/2;
 405                 qtr = half/2;
 406
 407                 if (debug) {
 408                         print_double("qtr", qtr);
 409                         print_double("whole", whole);
 410                         print_double("2*whole", 2*whole);
 411                 }
 412
 413                 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
 414                 for (s = -1; s < 2; s += 2)
 415                 for (q = 1; q < 4; q += 2) {
 416                         double expected;
 417                         double lo = s*whole;
 418                         double hi = s*2*whole;
 419
 420                         switch(op) {
 421                         case FADDS:
 422                                 fA = s*whole;
 423                                 fB = s*q*qtr;
 424                                 break;
 425                         case FSUBS:
 426                                 fA = s*2*whole;
 427                                 fB = s*(q == 1 ? 3 : 1)*qtr;
 428                                 break;
 429                         case FMULS:
 430                                 fA = 0.5;
 431                                 fB = s*(4+q)*half;
 432                                 break;
 433                         case FDIVS:
 434                                 fA = s*(4+q)*half;
 435                                 fB = 2.0;
 436                                 break;
 437                         default:
 438                                 assert("check_single_arithmetic_op: unexpected op",
 439                                         FALSE);
 440                                 break;
 441                         }
 442
 443                         switch(mode) {
 444                         case TO_NEAREST:
 445                                 expected = (q == 1 ? lo : hi);
 446                                 break;
 447                         case TO_ZERO:
 448                                 expected = lo;
 449                                 break;
 450                         case TO_PLUS_INFINITY:
 451                                 expected = (s == 1 ? hi : lo);
 452                                 break;
 453                         case TO_MINUS_INFINITY:
 454                                 expected = (s == 1 ? lo : hi);
 455                                 break;
 456                         }
 457
 458                         set_rounding_mode(mode);
 459
 460                         /*
 461                         ** do the double precision dual operation just for comparison
 462                         ** when debugging
 463                         */
 464                         switch(op) {
 465                         case FADDS:
 466                                 BINOP("fadds");
 467                                 R.dbl = fD;
 468                                 BINOP("fadd");
 469                                 break;
 470                         case FSUBS:
 471                                 BINOP("fsubs");
 472                                 R.dbl = fD;
 473                                 BINOP("fsub");
 474                                 break;
 475                         case FMULS:
 476                                 BINOP("fmuls");
 477                                 R.dbl = fD;
 478                                 BINOP("fmul");
 479                                 break;
 480                         case FDIVS:
 481                                 BINOP("fdivs");
 482                                 R.dbl = fD;
 483                                 BINOP("fdiv");
 484                                 break;
 485                         default:
 486                                 assert("check_single_arithmetic_op: unexpected op",
 487                                         FALSE);
 488                                 break;
 489                         }
 490 #undef UNOP
 491 #undef BINOP
 492
 493                         E.dbl = expected;
 494
 495                         if ((R.layout.sign != E.layout.sign) ||
 496                                 (R.layout.exp != E.layout.exp) ||
 497                                 (R.layout.frac_lo != E.layout.frac_lo) ||
 498                                 (R.layout.frac_hi != E.layout.frac_hi)) {
 499                                 result = "FAILED";
 500                                 status = 1;
 501                         } else {
 502                                 result = "PASSED";
 503                                 status = 0;
 504                         }
 505
 506                         printf("%s:%s:%s(%-13a",
 507                                 round_mode_name[mode], result, flt_op_names[op], fA);
 508                         if (two_args) printf(", %-13a", fB);
 509                         printf(") = %-13a", R.dbl);
 510                         if (status) printf("\n\texpected %a", E.dbl);
 511                         putchar('\n');
 512
 513                         if (debug) {
 514                                 print_double("hi", hi);
 515                                 print_double("lo", lo);
 516                                 print_double("expected", expected);
 517                                 print_double("got", R.dbl);
 518                                 print_double("double result", fD);
 519                         }
 520                 }
 521
 522                 return status;
 523 }
 524
 525 int check_single_guarded_arithmetic_op(flt_op_t op)
 526 {
 527                 typedef struct {
 528                         int num, den, frac;
 529                 } fdivs_t;
 530
 531                 char *result;
 532         int status = 0;
 533         flt_overlay A, B, Z;
 534         dbl_overlay Res, Exp;
 535         double fA, fB, fC, fD;
 536                 round_mode_t mode;
 537                 int g, s;
 538                 int arg_count;
 539
 540                 fdivs_t divs_guard_cases[16] = {
 541                         { 105, 56, 0x700000 },  /* : 0 */
 542                         { 100, 57, 0x608FB8 },  /* : 1 */
 543                         { 000, 00, 0x000000 },  /* : X */
 544                         { 100, 52, 0x762762 },  /* : 3 */
 545                         { 000, 00, 0x000000 },  /* : X */
 546                         { 100, 55, 0x68BA2E },  /* : 5 */
 547                         { 000, 00, 0x000000 },  /* : X */
 548                         { 100, 51, 0x7AFAFA },  /* : 7 */
 549                         { 000, 00, 0x000000 },  /* : X */
 550                         { 100, 56, 0x649249 },  /* : 9 */
 551                         { 000, 00, 0x000000 },  /* : X */
 552                         { 100, 54, 0x6D097B },  /* : B */
 553                         { 000, 00, 0x000000 },  /* : X */
 554                         { 100, 59, 0x58F2FB },  /* : D */
 555                         { 000, 00, 0x000000 },  /* : X */
 556                         { 101, 52, 0x789D89 }  /* : F */
 557                 };
 558
 559                 /*      0x1.00000 00000000p-3 */
 560                 /* set up the invariant fields of B, the arg to cause rounding */
 561                 B.flt = 0.0;
 562                 B.layout.exp = 124;  /* -3 */
 563
 564                 /* set up args so result is always Z = 1.200000000000<g>p+0 */
 565                 Z.flt = 1.0;
 566                 Z.layout.sign = 0;
 567
 568 #define TERNOP(op) \
 569                 arg_count = 3; \
 570         __asm__ volatile( \
 571                                         op" %0, %1, %2, %3\n\t" \
 572                                         : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
 573 #define BINOP(op) \
 574                 arg_count = 2; \
 575         __asm__ volatile( \
 576                                         op" %0, %1, %2\n\t" \
 577                                         : "=f"(fD) : "f"(fA) , "f"(fB));
 578 #define UNOP(op) \
 579                 arg_count = 1; \
 580         __asm__ volatile( \
 581                                         op" %0, %1\n\t" \
 582                                         : "=f"(fD) : "f"(fA));
 583
 584         for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
 585         for (s = -1; s < 2; s += 2)
 586         for (g = 0; g < 16; g += 1) {
 587                 double lo, hi, expected;
 588                 int LSB;
 589                 int guard = 0;
 590                 int z_sign = s;
 591
 592                 /*
 593                 ** one argument will have exponent = 0 as will the result (by
 594                 ** design) so choose the other argument with exponent -3 to
 595                 ** force a 3 bit shift for scaling leaving us with 3 guard bits
 596                 ** and the LSB bit at the bottom of the manitssa.
 597                 */
 598                 switch(op) {
 599                 case FADDS:
 600                         /* 1p+0 + 1.00000<g>p-3 */
 601                         B.layout.frac = g;
 602
 603                         fB = s*B.flt;
 604                         fA = s*1.0;
 605
 606                         /* set up Z to be truncated result */
 607
 608                         /* mask off LSB from resulting guard bits */
 609                         guard = g & 7;
 610
 611                         Z.layout.frac = 0x100000 | (g >> 3);
 612                         break;
 613                 case FSUBS:
 614                         /* 1.200002p+0 - 1.000000000000<g>p-3 */
 615                         A.flt = 1.125;
 616                         /* add enough to avoid scaling of the result */
 617                         A.layout.frac |= 0x2;
 618                         fA = s*A.flt;
 619
 620                         B.layout.frac = g;
 621                         fB = s*B.flt;
 622
 623                         /* set up Z to be truncated result */
 624                         guard = (0x10-g);
 625                         Z.layout.frac = guard>>3;
 626
 627                         /* mask off LSB from resulting guard bits */
 628                         guard &= 7;
 629                         break;
 630                 case FMULS:
 631                         /* 1 + g*2^-23 */
 632                         A.flt = 1.0;
 633                         A.layout.frac = g;
 634                         fA = s*A.flt;
 635                         fB = 1.125;
 636
 637                         /* set up Z to be truncated result */
 638                         Z.flt = 1.0;
 639                         Z.layout.frac = 0x100000;
 640                         Z.layout.frac |= g + (g>>3);
 641                         guard = g & 7;
 642                         break;
 643                 case FDIVS:
 644                         /* g >> 3 == LSB, g & 7 == guard bits */
 645                         guard = g & 7;
 646                         if ((guard & 1) == 0) {
 647                                 /* special case: guard bit X = 0 */
 648                                 A.flt = denorm_small;
 649                                 A.layout.frac = g;
 650                                 fA = A.flt;
 651                                 fB = s*8.0;
 652                                 Z.flt = 0.0;
 653                                 Z.layout.frac |= (g >> 3);
 654                         } else {
 655                                 fA = s*divs_guard_cases[g].num;
 656                                 fB = divs_guard_cases[g].den;
 657
 658                                 Z.flt = 1.0;
 659                                 Z.layout.frac = divs_guard_cases[g].frac;
 660                         }
 661                         break;
 662                 case FMADDS:
 663                 case FMSUBS:
 664                 case FNMADDS:
 665                 case FNMSUBS:
 666                         /* 1 + g*2^-23 */
 667                         A.flt = 1.0;
 668                         A.layout.frac = g;
 669                         fA = s*A.flt;
 670                         fB = 1.125;
 671
 672                         /* 1.000001p-1 */
 673                         A.flt = 0.5;
 674                         A.layout.frac = 1;
 675                         fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
 676
 677                         /* set up Z to be truncated result */
 678                         z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
 679                         guard = ((g & 7) + 0x4) & 7;
 680                         Z.flt = 1.0;
 681                         Z.layout.frac = 0x500000;
 682                         Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
 683                         break;
 684                 default:
 685                         assert("check_single_arithmetic_op: unexpected op",
 686                                 FALSE);
 687                         break;
 688                 }
 689
 690                 /* get LSB for tie breaking */
 691                 LSB = Z.layout.frac & 1;
 692
 693                 /* set up hi and lo */
 694                 lo = z_sign*Z.flt;
 695                 Z.layout.frac += 1;
 696                 hi = z_sign*Z.flt;
 697
 698                 switch(mode) {
 699                 case TO_NEAREST:
 700                         /* look at 3 guard bits to determine expected rounding */
 701                         switch(guard) {
 702                         case 0:
 703                         case 1: case 2: case 3:
 704                                 expected = lo;
 705                                 break;
 706                         case 4: /* tie: round to even */
 707                                 if (debug) printf("tie: LSB = %d\n", LSB);
 708                                 expected = (LSB == 0 ? lo : hi);
 709                                 break;
 710                         case 5: case 6: case 7:
 711                                 expected = hi;
 712                                 break;
 713                         default:
 714                                 assert("check_single_guarded_arithmetic_op: unexpected guard",
 715                                         FALSE);
 716                         }
 717                         break;
 718                 case TO_ZERO:
 719                         expected = lo;
 720                         break;
 721                 case TO_PLUS_INFINITY:
 722                         if (guard == 0) {
 723                                 /* no rounding */
 724                                 expected = lo;
 725                         } else {
 726                                 expected = (s == 1 ? hi : lo);
 727                         }
 728                         break;
 729                 case TO_MINUS_INFINITY:
 730                         if (guard == 0) {
 731                                 /* no rounding */
 732                                 expected = lo;
 733                         } else {
 734                                 expected = (s == 1 ? lo : hi);
 735                         }
 736                         break;
 737                 }
 738
 739                 set_rounding_mode(mode);
 740
 741                 /*
 742                 ** do the double precision dual operation just for comparison
 743                 ** when debugging
 744                 */
 745                 switch(op) {
 746                 case FADDS:
 747                         BINOP("fadds");
 748                         Res.dbl = fD;
 749                         break;
 750                 case FSUBS:
 751                         BINOP("fsubs");
 752                         Res.dbl = fD;
 753                         break;
 754                 case FMULS:
 755                         BINOP("fmuls");
 756                         Res.dbl = fD;
 757                         break;
 758                 case FDIVS:
 759                         BINOP("fdivs");
 760                         Res.dbl = fD;
 761                         break;
 762                 case FMADDS:
 763                         TERNOP("fmadds");
 764                         Res.dbl = fD;
 765                         break;
 766                 case FMSUBS:
 767                         TERNOP("fmsubs");
 768                         Res.dbl = fD;
 769                         break;
 770                 case FNMADDS:
 771                         TERNOP("fnmadds");
 772                         Res.dbl = fD;
 773                         break;
 774                 case FNMSUBS:
 775                         TERNOP("fnmsubs");
 776                         Res.dbl = fD;
 777                         break;
 778                 default:
 779                         assert("check_single_guarded_arithmetic_op: unexpected op",
 780                                 FALSE);
 781                         break;
 782                 }
 783 #undef UNOP
 784 #undef BINOP
 785 #undef TERNOP
 786
 787                 Exp.dbl = expected;
 788
 789                 if ((Res.layout.sign != Exp.layout.sign) ||
 790                         (Res.layout.exp != Exp.layout.exp) ||
 791                         (Res.layout.frac_lo != Exp.layout.frac_lo) ||
 792                         (Res.layout.frac_hi != Exp.layout.frac_hi)) {
 793                         result = "FAILED";
 794                         status = 1;
 795                 } else {
 796                         result = "PASSED";
 797                         status = 0;
 798                 }
 799
 800                 printf("%s:%s:%s(%-13f",
 801                         round_mode_name[mode], result, flt_op_names[op], fA);
 802                 if (arg_count > 1) printf(", %-13a", fB);
 803                 if (arg_count > 2) printf(", %-13a", fC);
 804                 printf(") = %-13a", Res.dbl);
 805                 if (status) printf("\n\texpected %a", Exp.dbl);
 806                 putchar('\n');
 807
 808                 if (debug) {
 809                         print_double("hi", hi);
 810                         print_double("lo", lo);
 811                         print_double("expected", expected);
 812                         print_double("got", Res.dbl);
 813                 }
 814         }
 815
 816         return status;
 817 }
 818
 819 int check_double_guarded_arithmetic_op(flt_op_t op)
 820 {
 821         typedef struct {
 822                 int num, den, hi, lo;
 823         } fdiv_t;
 824         typedef struct {
 825                 double arg;
 826                 int exp, hi, lo;
 827         } fsqrt_t;
 828
 829         char *result;
 830         int status = 0;
 831         dbl_overlay A, B, Z;
 832         dbl_overlay Res, Exp;
 833         double fA, fB, fC, fD;
 834         round_mode_t mode;
 835         int g, s;
 836         int arg_count;
 837         fdiv_t div_guard_cases[16] = {
 838                 { 62, 62, 0x00000, 0x00000000 },        /* 0 */
 839                 { 64, 62, 0x08421, 0x08421084 },        /* 1 */
 840                 { 66, 62, 0x10842, 0x10842108 },        /* 2 */
 841                 { 100, 62, 0x9ce73, 0x9ce739ce },       /* 3 */
 842                 { 100, 62, 0x9ce73, 0x9ce739ce },       /* X */
 843                 { 102, 62, 0xa5294, 0xa5294a52 },       /* 5 */
 844                 { 106, 62, 0xb5ad6, 0xb5ad6b5a },       /* 6 */
 845                 { 108, 62, 0xbdef7, 0xbdef7bde },       /* 7 */
 846                 { 108, 108, 0x00000, 0x00000000 },      /* 8 */
 847                 { 112, 62, 0xce739, 0xce739ce7 },       /* 9 */
 848                 { 114, 62, 0xd6b5a, 0xd6b5ad6b },       /* A */
 849                 { 116, 62, 0xdef7b, 0xdef7bdef },       /* B */
 850                 { 84, 62, 0x5ad6b, 0x5ad6b5ad },        /* X */
 851                 { 118, 62, 0xe739c, 0xe739ce73 },       /* D */
 852                 { 90, 62, 0x739ce, 0x739ce739 },        /* E */
 853                 { 92, 62, 0x7bdef, 0x7bdef7bd }         /* F */
 854         };
 855
 856
 857         fsqrt_t sqrt_guard_cases[16] = {
 858                 { 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
 859                 { 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
 860                 { 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
 861                 { 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
 862                 { 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
 863                 { 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
 864                 { 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
 865                 { 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
 866                 { 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
 867                 { 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
 868                 { 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
 869                 { 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
 870                 { 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
 871                 { 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
 872                 { 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
 873                 { 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
 874         };
 875
 876         /*      0x1.00000 00000000p-3 */
 877         /* set up the invariant fields of B, the arg to cause rounding */
 878         B.dbl = 0.0;
 879         B.layout.exp = 1020;
 880
 881         /* set up args so result is always Z = 1.200000000000<g>p+0 */
 882         Z.dbl = 1.0;
 883         Z.layout.sign = 0;
 884
 885 #define TERNOP(op) \
 886                 arg_count = 3; \
 887         __asm__ volatile( \
 888                                         op" %0, %1, %2, %3\n\t" \
 889                                         : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
 890 #define BINOP(op) \
 891                 arg_count = 2; \
 892         __asm__ volatile( \
 893                                         op" %0, %1, %2\n\t" \
 894                                         : "=f"(fD) : "f"(fA) , "f"(fB));
 895 #define UNOP(op) \
 896                 arg_count = 1; \
 897         __asm__ volatile( \
 898                                         op" %0, %1\n\t" \
 899                                         : "=f"(fD) : "f"(fA));
 900
 901         for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
 902         for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
 903         for (g = 0; g < 16; g += 1) {
 904                 double lo, hi, expected;
 905                 int LSB;
 906                 int guard;
 907                 int z_sign = s;
 908
 909                 /*
 910                 ** one argument will have exponent = 0 as will the result (by
 911                 ** design) so choose the other argument with exponent -3 to
 912                 ** force a 3 bit shift for scaling leaving us with 3 guard bits
 913                 ** and the LSB bit at the bottom of the manitssa.
 914                 */
 915                 switch(op) {
 916                 case FADD:
 917                         /* 1p+0 + 1.000000000000<g>p-3 */
 918                         B.layout.frac_lo = g;
 919
 920                         fB = s*B.dbl;
 921                         fA = s*1.0;
 922
 923                         /* set up Z to be truncated result */
 924
 925                         /* mask off LSB from resulting guard bits */
 926                         guard = g & 7;
 927
 928                         Z.layout.frac_hi = 0x20000;
 929                         Z.layout.frac_lo = g >> 3;
 930
 931                         break;
 932                 case FSUB:
 933                         /* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
 934                         A.dbl = 1.125;
 935                         /* add enough to avoid scaling of the result */
 936                         A.layout.frac_lo = 0x2;
 937                         fA = s*A.dbl;
 938
 939                         B.layout.frac_lo = g;
 940                         fB = s*B.dbl;
 941
 942                         /* set up Z to be truncated result */
 943                         guard = (0x10-g);
 944                         Z.layout.frac_hi = 0x0;
 945                         Z.layout.frac_lo = guard>>3;
 946
 947                         /* mask off LSB from resulting guard bits */
 948                         guard &= 7;
 949                         break;
 950                 case FMUL:
 951                         /* 1 + g*2^-52 */
 952                         A.dbl = 1.0;
 953                         A.layout.frac_lo = g;
 954                         fA = s*A.dbl;
 955                         fB = 1.125;
 956
 957                         /* set up Z to be truncated result */
 958                         Z.dbl = 1.0;
 959                         Z.layout.frac_hi = 0x20000;
 960                         Z.layout.frac_lo = g + (g>>3);
 961                         guard = g & 7;
 962                         break;
 963                 case FMADD:
 964                 case FMSUB:
 965                 case FNMADD:
 966                 case FNMSUB:
 967                         /* 1 + g*2^-52 */
 968                         A.dbl = 1.0;
 969                         A.layout.frac_lo = g;
 970                         fA = s*A.dbl;
 971                         fB = 1.125;
 972
 973                         /* 1.0000000000001p-1 */
 974                         A.dbl = 0.5;
 975                         A.layout.frac_lo = 1;
 976                         fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
 977
 978                         /* set up Z to be truncated result */
 979                         z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
 980                         guard = ((g & 7) + 0x4) & 7;
 981                         Z.dbl = 1.0;
 982                         Z.layout.frac_hi = 0xa0000;
 983                         Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
 984                         break;
 985                 case FDIV:
 986                         /* g >> 3 == LSB, g & 7 == guard bits */
 987                         guard = g & 7;
 988                         if (guard == 0x4) {
 989                                 /* special case guard bits == 4, inexact tie */
 990                                 fB = s*2.0;
 991                                 Z.dbl = 0.0;
 992                                 if (g >> 3) {
 993                                         fA = dbl_denorm_small + 2*dbl_denorm_small;
 994                                         Z.layout.frac_lo = 0x1;
 995                                 } else {
 996                                         fA = dbl_denorm_small;
 997                                 }
 998                         } else {
 999                                 fA = s*div_guard_cases[g].num;
1000                                 fB = div_guard_cases[g].den;
1001
1002                                 printf("%d/%d\n",
1003                                         s*div_guard_cases[g].num,
1004                                         div_guard_cases[g].den);
1005                                 Z.dbl = 1.0;
1006                                 Z.layout.frac_hi = div_guard_cases[g].hi;
1007                                 Z.layout.frac_lo = div_guard_cases[g].lo;
1008                         }
1009                         break;
1010                 case FSQRT:
1011                         fA = s*sqrt_guard_cases[g].arg;
1012                         Z.dbl = 1.0;
1013                         Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1014                         Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1015                         Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1016                         guard = g >> 1;
1017                         if (g & 1) guard |= 1;
1018                         /* don't have test cases for when X bit = 0 */
1019                         if (guard == 0 || guard == 4) continue;
1020                         break;
1021                 default:
1022                         assert("check_double_guarded_arithmetic_op: unexpected op",
1023                                 FALSE);
1024                         break;
1025                 }
1026
1027                 /* get LSB for tie breaking */
1028                 LSB = Z.layout.frac_lo & 1;
1029
1030                 /* set up hi and lo */
1031                 lo = z_sign*Z.dbl;
1032                 Z.layout.frac_lo += 1;
1033                 hi = z_sign*Z.dbl;
1034
1035                 switch(mode) {
1036                 case TO_NEAREST:
1037                         /* look at 3 guard bits to determine expected rounding */
1038                         switch(guard) {
1039                         case 0:
1040                         case 1: case 2: case 3:
1041                                 expected = lo;
1042                                 break;
1043                         case 4: /* tie: round to even */
1044                                 if (debug) printf("tie: LSB = %d\n", LSB);
1045                                 expected = (LSB == 0 ? lo : hi);
1046                                 break;
1047                         case 5: case 6: case 7:
1048                                 expected = hi;
1049                                 break;
1050                         default:
1051                                 assert("check_double_guarded_arithmetic_op: unexpected guard",
1052                                         FALSE);
1053                         }
1054                         break;
1055                 case TO_ZERO:
1056                         expected = lo;
1057                         break;
1058                 case TO_PLUS_INFINITY:
1059                         if (guard == 0) {
1060                                 /* no rounding */
1061                                 expected = lo;
1062                         } else {
1063                                 expected = (s == 1 ? hi : lo);
1064                         }
1065                         break;
1066                 case TO_MINUS_INFINITY:
1067                         if (guard == 0) {
1068                                 /* no rounding */
1069                                 expected = lo;
1070                         } else {
1071                                 expected = (s == 1 ? lo : hi);
1072                         }
1073                         break;
1074                 }
1075
1076                 set_rounding_mode(mode);
1077
1078                 /*
1079                 ** do the double precision dual operation just for comparison
1080                 ** when debugging
1081                 */
1082                 switch(op) {
1083                 case FADD:
1084                         BINOP("fadd");
1085                         Res.dbl = fD;
1086                         break;
1087                 case FSUB:
1088                         BINOP("fsub");
1089                         Res.dbl = fD;
1090                         break;
1091                 case FMUL:
1092                         BINOP("fmul");
1093                         Res.dbl = fD;
1094                         break;
1095                 case FMADD:
1096                         TERNOP("fmadd");
1097                         Res.dbl = fD;
1098                         break;
1099                 case FMSUB:
1100                         TERNOP("fmsub");
1101                         Res.dbl = fD;
1102                         break;
1103                 case FNMADD:
1104                         TERNOP("fnmadd");
1105                         Res.dbl = fD;
1106                         break;
1107                 case FNMSUB:
1108                         TERNOP("fnmsub");
1109                         Res.dbl = fD;
1110                         break;
1111                 case FDIV:
1112                         BINOP("fdiv");
1113                         Res.dbl = fD;
1114                         break;
1115                 case FSQRT:
1116                         UNOP("fsqrt");
1117                         Res.dbl = fD;
1118                         break;
1119                 default:
1120                         assert("check_double_guarded_arithmetic_op: unexpected op",
1121                                 FALSE);
1122                         break;
1123                 }
1124 #undef UNOP
1125 #undef BINOP
1126 #undef TERNOP
1127
1128                 Exp.dbl = expected;
1129
1130                 if ((Res.layout.sign != Exp.layout.sign) ||
1131                         (Res.layout.exp != Exp.layout.exp) ||
1132                         (Res.layout.frac_lo != Exp.layout.frac_lo) ||
1133                         (Res.layout.frac_hi != Exp.layout.frac_hi)) {
1134                         result = "FAILED";
1135                         status = 1;
1136                 } else {
1137                         result = "PASSED";
1138                         status = 0;
1139                 }
1140
1141                 printf("%s:%s:%s(%-13a",
1142                         round_mode_name[mode], result, flt_op_names[op], fA);
1143                 if (arg_count > 1) printf(", %-13a", fB);
1144                 if (arg_count > 2) printf(", %-13a", fC);
1145                 printf(") = %-13a", Res.dbl);
1146                 if (status) printf("\n\texpected %a", Exp.dbl);
1147                 putchar('\n');
1148
1149                 if (debug) {
1150                         print_double("hi", hi);
1151                         print_double("lo", lo);
1152                         print_double("expected", expected);
1153                         print_double("got", Res.dbl);
1154                 }
1155         }
1156
1157         return status;
1158 }
1159
1160 int test_float_arithmetic_ops()
1161 {
1162         int status = 0;
1163         flt_op_t op;
1164
1165         /*
1166         ** choose FP operands whose result should be rounded to either
1167         ** lo or hi.
1168         */
1169
1170         printf("-------------------------- %s --------------------------\n",
1171                 "test rounding of float operators without guard bits");
1172         for (op = FADDS; op <= FDIVS; op++) {
1173                 status |= check_single_arithmetic_op(op);
1174         }
1175
1176         printf("-------------------------- %s --------------------------\n",
1177                 "test rounding of float operators with guard bits");
1178         for (op = FADDS; op <= FNMSUBS; op++) {
1179                 status |= check_single_guarded_arithmetic_op(op);
1180         }
1181
1182         printf("-------------------------- %s --------------------------\n",
1183                 "test rounding of double operators with guard bits");
1184         for (op = FADD; op <= FSQRT; op++) {
1185                 status |= check_double_guarded_arithmetic_op(op);
1186         }
1187         return status;
1188 }
1189
1190
1191 int
1192 main()
1193 {
1194         int status = 0;
1195
1196         init();
1197
1198         status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1199         status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1200         status |= test_int_to_float_convert("test (float)int convert");
1201         status |= test_int_to_float_convert("test (float)int convert");
1202
1203 #ifdef __powerpc64__
1204         status |= test_long_to_double_convert("test (double)long convert");
1205 #endif
1206         status |= test_float_arithmetic_ops();
1207         return status;
1208 }