l4/pkg/valgrind/src/valgrind-3.6.0-svn/memcheck/mc_replace_strmem.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Replacements for strcpy(), memcpy() et al, which run on the  ---*/
   4 /*--- simulated CPU.                                               ---*/
   5 /*---                                          mc_replace_strmem.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of MemCheck, a heavyweight Valgrind tool for
  10    detecting memory errors.
  11
  12    Copyright (C) 2000-2010 Julian Seward
  13       jseward@acm.org
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, write to the Free Software
  27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  28    02111-1307, USA.
  29
  30    The GNU General Public License is contained in the file COPYING.
  31 */
  32
  33 #include "pub_tool_basics.h"
  34 #include "pub_tool_hashtable.h"
  35 #include "pub_tool_redir.h"
  36 #include "pub_tool_tooliface.h"
  37 #include "valgrind.h"
  38
  39 #include "mc_include.h"
  40 #include "memcheck.h"
  41
  42 /* ---------------------------------------------------------------------
  43    We have our own versions of these functions for two reasons:
  44    (a) it allows us to do overlap checking
  45    (b) some of the normal versions are hyper-optimised, which fools
  46        Memcheck and cause spurious value warnings.  Our versions are
  47        simpler.
  48
  49    Note that overenthusiastic use of PLT bypassing by the glibc people also
  50    means that we need to patch multiple versions of some of the functions to
  51    our own implementations.
  52
  53    THEY RUN ON THE SIMD CPU!
  54    ------------------------------------------------------------------ */
  55
  56 /* Figure out if [dst .. dst+dstlen-1] overlaps with
  57                  [src .. src+srclen-1].
  58    We assume that the address ranges do not wrap around
  59    (which is safe since on Linux addresses >= 0xC0000000
  60    are not accessible and the program will segfault in this
  61    circumstance, presumably).
  62 */
  63 static __inline__
  64 Bool is_overlap ( void* dst, const void* src, SizeT dstlen, SizeT srclen )
  65 {
  66    Addr loS, hiS, loD, hiD;
  67
  68    if (dstlen == 0 || srclen == 0)
  69       return False;
  70
  71    loS = (Addr)src;
  72    loD = (Addr)dst;
  73    hiS = loS + srclen - 1;
  74    hiD = loD + dstlen - 1;
  75
  76    /* So figure out if [loS .. hiS] overlaps with [loD .. hiD]. */
  77    if (loS < loD) {
  78       return !(hiS < loD);
  79    }
  80    else if (loD < loS) {
  81       return !(hiD < loS);
  82    }
  83    else {
  84       /* They start at same place.  Since we know neither of them has
  85          zero length, they must overlap. */
  86       return True;
  87    }
  88 }
  89
  90 // This is a macro rather than a function because we don't want to have an
  91 // extra function in the stack trace.
  92 #define RECORD_OVERLAP_ERROR(s, src, dst, len) \
  93 { \
  94    Word unused_res; \
  95    VALGRIND_DO_CLIENT_REQUEST(unused_res, 0, \
  96                               _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR, \
  97                               s, src, dst, len, 0); \
  98 }
  99
 100
 101 #define STRRCHR(soname, fnname) \
 102    char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* s, int c ); \
 103    char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* s, int c ) \
 104    { \
 105       UChar  ch   = (UChar)((UInt)c); \
 106       UChar* p    = (UChar*)s; \
 107       UChar* last = NULL; \
 108       while (True) { \
 109          if (*p == ch) last = p; \
 110          if (*p == 0) return last; \
 111          p++; \
 112       } \
 113    }
 114
 115 // Apparently rindex() is the same thing as strrchr()
 116 STRRCHR(VG_Z_LIBC_SONAME,   strrchr)
 117 STRRCHR(VG_Z_LIBC_SONAME,   rindex)
 118 #if defined(VGO_linux)
 119 STRRCHR(VG_Z_LIBC_SONAME,   __GI_strrchr)
 120 STRRCHR(VG_Z_LD_LINUX_SO_2, rindex)
 121 #elif defined(VGO_darwin)
 122 STRRCHR(VG_Z_DYLD,          strrchr)
 123 STRRCHR(VG_Z_DYLD,          rindex)
 124 #endif
 125
 126
 127 #define STRCHR(soname, fnname) \
 128    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ); \
 129    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ) \
 130    { \
 131       UChar  ch = (UChar)((UInt)c); \
 132       UChar* p  = (UChar*)s; \
 133       while (True) { \
 134          if (*p == ch) return p; \
 135          if (*p == 0) return NULL; \
 136          p++; \
 137       } \
 138    }
 139
 140 // Apparently index() is the same thing as strchr()
 141 STRCHR(VG_Z_LIBC_SONAME,          strchr)
 142 STRCHR(VG_Z_LIBC_SONAME,          index)
 143 #if defined(VGO_linux)
 144 STRCHR(VG_Z_LIBC_SONAME,          __GI_strchr)
 145 STRCHR(VG_Z_LD_LINUX_SO_2,        strchr)
 146 STRCHR(VG_Z_LD_LINUX_SO_2,        index)
 147 STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, strchr)
 148 STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, index)
 149 #elif defined(VGO_darwin)
 150 STRCHR(VG_Z_DYLD,                 strchr)
 151 STRCHR(VG_Z_DYLD,                 index)
 152 #endif
 153
 154
 155 #define STRCAT(soname, fnname) \
 156    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ); \
 157    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ) \
 158    { \
 159       const Char* src_orig = src; \
 160             Char* dst_orig = dst; \
 161       while (*dst) dst++; \
 162       while (*src) *dst++ = *src++; \
 163       *dst = 0; \
 164       \
 165       /* This is a bit redundant, I think;  any overlap and the strcat will */ \
 166       /* go forever... or until a seg fault occurs. */ \
 167       if (is_overlap(dst_orig,  \
 168                      src_orig,  \
 169                      (Addr)dst-(Addr)dst_orig+1,  \
 170                      (Addr)src-(Addr)src_orig+1)) \
 171          RECORD_OVERLAP_ERROR("strcat", dst_orig, src_orig, 0); \
 172       \
 173       return dst_orig; \
 174    }
 175
 176 STRCAT(VG_Z_LIBC_SONAME, strcat)
 177 #if defined(VGO_linux)
 178 STRCAT(VG_Z_LIBC_SONAME, __GI_strcat)
 179 #endif
 180
 181 #define STRNCAT(soname, fnname) \
 182    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 183             ( char* dst, const char* src, SizeT n ); \
 184    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 185             ( char* dst, const char* src, SizeT n ) \
 186    { \
 187       const Char* src_orig = src; \
 188             Char* dst_orig = dst; \
 189       SizeT m = 0; \
 190       \
 191       while (*dst) dst++; \
 192       while (m < n && *src) { m++; *dst++ = *src++; } /* concat <= n chars */ \
 193       *dst = 0;                                       /* always add null   */ \
 194       \
 195       /* This checks for overlap after copying, unavoidable without */ \
 196       /* pre-counting lengths... should be ok */ \
 197       if (is_overlap(dst_orig,  \
 198                      src_orig,  \
 199                      (Addr)dst-(Addr)dst_orig+1,  \
 200                      (Addr)src-(Addr)src_orig+1)) \
 201          RECORD_OVERLAP_ERROR("strncat", dst_orig, src_orig, n); \
 202       \
 203       return dst_orig; \
 204    }
 205
 206 STRNCAT(VG_Z_LIBC_SONAME, strncat)
 207 #if defined(VGO_darwin)
 208 STRNCAT(VG_Z_DYLD,        strncat)
 209 #endif
 210
 211
 212 /* Append src to dst. n is the size of dst's buffer. dst is guaranteed
 213    to be nul-terminated after the copy, unless n <= strlen(dst_orig).
 214    Returns min(n, strlen(dst_orig)) + strlen(src_orig).
 215    Truncation occurred if retval >= n.
 216 */
 217 #define STRLCAT(soname, fnname) \
 218     SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 219         ( char* dst, const char* src, SizeT n ); \
 220     SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 221         ( char* dst, const char* src, SizeT n ) \
 222    { \
 223       const Char* src_orig = src; \
 224       Char* dst_orig = dst; \
 225       SizeT m = 0; \
 226 \
 227       while (m < n && *dst) { m++; dst++; } \
 228       if (m < n) { \
 229          /* Fill as far as dst_orig[n-2], then nul-terminate. */ \
 230          while (m < n-1 && *src) { m++; *dst++ = *src++; } \
 231          *dst = 0; \
 232       } else { \
 233          /* No space to copy anything to dst. m == n */ \
 234       } \
 235       /* Finish counting min(n, strlen(dst_orig)) + strlen(src_orig) */ \
 236       while (*src) { m++; src++; } \
 237       /* This checks for overlap after copying, unavoidable without */ \
 238       /* pre-counting lengths... should be ok */ \
 239       if (is_overlap(dst_orig,  \
 240                      src_orig,  \
 241                      (Addr)dst-(Addr)dst_orig+1,  \
 242                      (Addr)src-(Addr)src_orig+1)) \
 243          RECORD_OVERLAP_ERROR("strlcat", dst_orig, src_orig, n); \
 244 \
 245       return m; \
 246    }
 247
 248 #if defined(VGO_darwin)
 249 STRLCAT(VG_Z_LIBC_SONAME, strlcat)
 250 STRLCAT(VG_Z_DYLD,        strlcat)
 251 #endif
 252
 253
 254 #define STRNLEN(soname, fnname) \
 255    SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT n ); \
 256    SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT n ) \
 257    { \
 258       SizeT i = 0; \
 259       while (i < n && str[i] != 0) i++; \
 260       return i; \
 261    }
 262
 263 STRNLEN(VG_Z_LIBC_SONAME, strnlen)
 264 #if defined(VGO_linux)
 265 STRNLEN(VG_Z_LIBC_SONAME, __GI_strnlen)
 266 #endif
 267
 268
 269 // Note that this replacement often doesn't get used because gcc inlines
 270 // calls to strlen() with its own built-in version.  This can be very
 271 // confusing if you aren't expecting it.  Other small functions in this file
 272 // may also be inline by gcc.
 273 #define STRLEN(soname, fnname) \
 274    SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ); \
 275    SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ) \
 276    { \
 277       SizeT i = 0; \
 278       while (str[i] != 0) i++; \
 279       return i; \
 280    }
 281
 282 STRLEN(VG_Z_LIBC_SONAME,          strlen)
 283 #if defined(VGO_linux)
 284 STRLEN(VG_Z_LIBC_SONAME,          __GI_strlen)
 285 STRLEN(VG_Z_LD_LINUX_SO_2,        strlen)
 286 STRLEN(VG_Z_LD_LINUX_X86_64_SO_2, strlen)
 287 #endif
 288
 289
 290 #define STRCPY(soname, fnname) \
 291    char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ); \
 292    char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ) \
 293    { \
 294       const Char* src_orig = src; \
 295             Char* dst_orig = dst; \
 296       \
 297       while (*src) *dst++ = *src++; \
 298       *dst = 0; \
 299       \
 300       /* This checks for overlap after copying, unavoidable without */ \
 301       /* pre-counting length... should be ok */ \
 302       if (is_overlap(dst_orig,  \
 303                      src_orig,  \
 304                      (Addr)dst-(Addr)dst_orig+1,  \
 305                      (Addr)src-(Addr)src_orig+1)) \
 306          RECORD_OVERLAP_ERROR("strcpy", dst_orig, src_orig, 0); \
 307       \
 308       return dst_orig; \
 309    }
 310
 311 STRCPY(VG_Z_LIBC_SONAME, strcpy)
 312 #if defined(VGO_linux)
 313 STRCPY(VG_Z_LIBC_SONAME, __GI_strcpy)
 314 #elif defined(VGO_darwin)
 315 STRCPY(VG_Z_DYLD,        strcpy)
 316 #endif
 317
 318
 319 #define STRNCPY(soname, fnname) \
 320    char* VG_REPLACE_FUNCTION_ZU(soname, fnname) \
 321             ( char* dst, const char* src, SizeT n ); \
 322    char* VG_REPLACE_FUNCTION_ZU(soname, fnname) \
 323             ( char* dst, const char* src, SizeT n ) \
 324    { \
 325       const Char* src_orig = src; \
 326             Char* dst_orig = dst; \
 327       SizeT m = 0; \
 328       \
 329       while (m   < n && *src) { m++; *dst++ = *src++; } \
 330       /* Check for overlap after copying; all n bytes of dst are relevant, */ \
 331       /* but only m+1 bytes of src if terminator was found */ \
 332       if (is_overlap(dst_orig, src_orig, n, (m < n) ? m+1 : n)) \
 333          RECORD_OVERLAP_ERROR("strncpy", dst, src, n); \
 334       while (m++ < n) *dst++ = 0;         /* must pad remainder with nulls */ \
 335  \
 336       return dst_orig; \
 337    }
 338
 339 STRNCPY(VG_Z_LIBC_SONAME, strncpy)
 340 #if defined(VGO_linux)
 341 STRNCPY(VG_Z_LIBC_SONAME, __GI_strncpy)
 342 #elif defined(VGO_darwin)
 343 STRNCPY(VG_Z_DYLD,        strncpy)
 344 #endif
 345
 346
 347 /* Copy up to n-1 bytes from src to dst. Then nul-terminate dst if n > 0.
 348    Returns strlen(src). Does not zero-fill the remainder of dst. */
 349 #define STRLCPY(soname, fnname) \
 350    SizeT VG_REPLACE_FUNCTION_ZU(soname, fnname) \
 351        ( char* dst, const char* src, SizeT n ); \
 352    SizeT VG_REPLACE_FUNCTION_ZU(soname, fnname) \
 353        ( char* dst, const char* src, SizeT n ) \
 354    { \
 355       const char* src_orig = src; \
 356       char* dst_orig = dst; \
 357       SizeT m = 0; \
 358 \
 359       while (m < n-1 && *src) { m++; *dst++ = *src++; } \
 360       /* m non-nul bytes have now been copied, and m <= n-1. */ \
 361       /* Check for overlap after copying; all n bytes of dst are relevant, */ \
 362       /* but only m+1 bytes of src if terminator was found */ \
 363       if (is_overlap(dst_orig, src_orig, n, (m < n) ? m+1 : n)) \
 364           RECORD_OVERLAP_ERROR("strlcpy", dst, src, n); \
 365       /* Nul-terminate dst. */ \
 366       if (n > 0) *dst = 0; \
 367       /* Finish counting strlen(src). */ \
 368       while (*src) src++; \
 369       return src - src_orig; \
 370    }
 371
 372 #if defined(VGO_darwin)
 373 STRLCPY(VG_Z_LIBC_SONAME, strlcpy)
 374 STRLCPY(VG_Z_DYLD,        strlcpy)
 375 #endif
 376
 377
 378 #define STRNCMP(soname, fnname) \
 379    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 380           ( const char* s1, const char* s2, SizeT nmax ); \
 381    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 382           ( const char* s1, const char* s2, SizeT nmax ) \
 383    { \
 384       SizeT n = 0; \
 385       while (True) { \
 386          if (n >= nmax) return 0; \
 387          if (*s1 == 0 && *s2 == 0) return 0; \
 388          if (*s1 == 0) return -1; \
 389          if (*s2 == 0) return 1; \
 390          \
 391          if (*(unsigned char*)s1 < *(unsigned char*)s2) return -1; \
 392          if (*(unsigned char*)s1 > *(unsigned char*)s2) return 1; \
 393          \
 394          s1++; s2++; n++; \
 395       } \
 396    }
 397
 398 STRNCMP(VG_Z_LIBC_SONAME, strncmp)
 399 #if defined(VGO_linux)
 400 STRNCMP(VG_Z_LIBC_SONAME, __GI_strncmp)
 401 #elif defined(VGO_darwin)
 402 STRNCMP(VG_Z_DYLD,        strncmp)
 403 #endif
 404
 405
 406 #define STRCMP(soname, fnname) \
 407    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 408           ( const char* s1, const char* s2 ); \
 409    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 410           ( const char* s1, const char* s2 ) \
 411    { \
 412       register unsigned char c1; \
 413       register unsigned char c2; \
 414       while (True) { \
 415          c1 = *(unsigned char *)s1; \
 416          c2 = *(unsigned char *)s2; \
 417          if (c1 != c2) break; \
 418          if (c1 == 0) break; \
 419          s1++; s2++; \
 420       } \
 421       if ((unsigned char)c1 < (unsigned char)c2) return -1; \
 422       if ((unsigned char)c1 > (unsigned char)c2) return 1; \
 423       return 0; \
 424    }
 425
 426 STRCMP(VG_Z_LIBC_SONAME,          strcmp)
 427 #if defined(VGO_linux)
 428 STRCMP(VG_Z_LIBC_SONAME,          __GI_strcmp)
 429 STRCMP(VG_Z_LD_LINUX_X86_64_SO_2, strcmp)
 430 STRCMP(VG_Z_LD64_SO_1,            strcmp)
 431 #endif
 432
 433
 434 #define MEMCHR(soname, fnname) \
 435    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n); \
 436    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n) \
 437    { \
 438       SizeT i; \
 439       UChar c0 = (UChar)c; \
 440       UChar* p = (UChar*)s; \
 441       for (i = 0; i < n; i++) \
 442          if (p[i] == c0) return (void*)(&p[i]); \
 443       return NULL; \
 444    }
 445
 446 MEMCHR(VG_Z_LIBC_SONAME, memchr)
 447 #if defined(VGO_darwin)
 448 MEMCHR(VG_Z_DYLD,        memchr)
 449 #endif
 450
 451
 452 #define MEMCPY(soname, fnname) \
 453    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 454             ( void *dst, const void *src, SizeT len ); \
 455    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 456             ( void *dst, const void *src, SizeT len ) \
 457    { \
 458       register char *d; \
 459       register char *s; \
 460       \
 461       if (len == 0) \
 462          return dst; \
 463       \
 464       if (is_overlap(dst, src, len, len)) \
 465          RECORD_OVERLAP_ERROR("memcpy", dst, src, len); \
 466       \
 467       if ( dst > src ) { \
 468          d = (char *)dst + len - 1; \
 469          s = (char *)src + len - 1; \
 470          while ( len >= 4 ) { \
 471             *d-- = *s--; \
 472             *d-- = *s--; \
 473             *d-- = *s--; \
 474             *d-- = *s--; \
 475             len -= 4; \
 476          } \
 477          while ( len-- ) { \
 478             *d-- = *s--; \
 479          } \
 480       } else if ( dst < src ) { \
 481          d = (char *)dst; \
 482          s = (char *)src; \
 483          while ( len >= 4 ) { \
 484             *d++ = *s++; \
 485             *d++ = *s++; \
 486             *d++ = *s++; \
 487             *d++ = *s++; \
 488             len -= 4; \
 489          } \
 490          while ( len-- ) { \
 491             *d++ = *s++; \
 492          } \
 493       } \
 494       return dst; \
 495    }
 496
 497 MEMCPY(VG_Z_LIBC_SONAME, memcpy)
 498 #if defined(VGO_linux)
 499 MEMCPY(VG_Z_LD_SO_1,     memcpy) /* ld.so.1 */
 500 MEMCPY(VG_Z_LD64_SO_1,   memcpy) /* ld64.so.1 */
 501 #elif defined(VGO_darwin)
 502 MEMCPY(VG_Z_DYLD,        memcpy)
 503 #endif
 504 /* icc9 blats these around all over the place.  Not only in the main
 505    executable but various .so's.  They are highly tuned and read
 506    memory beyond the source boundary (although work correctly and
 507    never go across page boundaries), so give errors when run natively,
 508    at least for misaligned source arg.  Just intercepting in the exe
 509    only until we understand more about the problem.  See
 510    http://bugs.kde.org/show_bug.cgi?id=139776
 511  */
 512 MEMCPY(NONE, _intel_fast_memcpy)
 513
 514
 515 #define MEMCMP(soname, fnname) \
 516    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 517           ( const void *s1V, const void *s2V, SizeT n ); \
 518    int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 519           ( const void *s1V, const void *s2V, SizeT n ) \
 520    { \
 521       int res; \
 522       unsigned char a0; \
 523       unsigned char b0; \
 524       unsigned char* s1 = (unsigned char*)s1V; \
 525       unsigned char* s2 = (unsigned char*)s2V; \
 526       \
 527       while (n != 0) { \
 528          a0 = s1[0]; \
 529          b0 = s2[0]; \
 530          s1 += 1; \
 531          s2 += 1; \
 532          res = ((int)a0) - ((int)b0); \
 533          if (res != 0) \
 534             return res; \
 535          n -= 1; \
 536       } \
 537       return 0; \
 538    }
 539
 540 MEMCMP(VG_Z_LIBC_SONAME, memcmp)
 541 MEMCMP(VG_Z_LIBC_SONAME, bcmp)
 542 #if defined(VGO_linux)
 543 MEMCMP(VG_Z_LD_SO_1,     bcmp)
 544 #elif defined(VGO_darwin)
 545 MEMCMP(VG_Z_DYLD,        memcmp)
 546 MEMCMP(VG_Z_DYLD,        bcmp)
 547 #endif
 548
 549
 550 /* Copy SRC to DEST, returning the address of the terminating '\0' in
 551    DEST. (minor variant of strcpy) */
 552 #define STPCPY(soname, fnname) \
 553    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ); \
 554    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ) \
 555    { \
 556       const Char* src_orig = src; \
 557             Char* dst_orig = dst; \
 558       \
 559       while (*src) *dst++ = *src++; \
 560       *dst = 0; \
 561       \
 562       /* This checks for overlap after copying, unavoidable without */ \
 563       /* pre-counting length... should be ok */ \
 564       if (is_overlap(dst_orig,  \
 565                      src_orig,  \
 566                      (Addr)dst-(Addr)dst_orig+1,  \
 567                      (Addr)src-(Addr)src_orig+1)) \
 568          RECORD_OVERLAP_ERROR("stpcpy", dst_orig, src_orig, 0); \
 569       \
 570       return dst; \
 571    }
 572
 573 STPCPY(VG_Z_LIBC_SONAME,          stpcpy)
 574 #if defined(VGO_linux)
 575 STPCPY(VG_Z_LIBC_SONAME,          __GI_stpcpy)
 576 STPCPY(VG_Z_LD_LINUX_SO_2,        stpcpy)
 577 STPCPY(VG_Z_LD_LINUX_X86_64_SO_2, stpcpy)
 578 #elif defined(VGO_darwin)
 579 STPCPY(VG_Z_DYLD,                 stpcpy)
 580 #endif
 581
 582
 583 #define MEMSET(soname, fnname) \
 584    void* VG_REPLACE_FUNCTION_ZU(soname,fnname)(void *s, Int c, SizeT n); \
 585    void* VG_REPLACE_FUNCTION_ZU(soname,fnname)(void *s, Int c, SizeT n) \
 586    { \
 587       unsigned char *cp = s; \
 588       while (n >= 4) { \
 589          cp[0] = c; \
 590          cp[1] = c; \
 591          cp[2] = c; \
 592          cp[3] = c; \
 593          cp += 4; \
 594          n -= 4; \
 595       } \
 596       while (n--) { \
 597          *cp++ = c; \
 598       } \
 599       return s; \
 600    }
 601
 602 MEMSET(VG_Z_LIBC_SONAME, memset)
 603 #if defined(VGO_darwin)
 604 MEMSET(VG_Z_DYLD,        memset)
 605 #endif
 606
 607
 608 #define MEMMOVE(soname, fnname) \
 609    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 610             (void *dstV, const void *srcV, SizeT n); \
 611    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 612             (void *dstV, const void *srcV, SizeT n) \
 613    { \
 614       SizeT i; \
 615       Char* dst = (Char*)dstV; \
 616       Char* src = (Char*)srcV; \
 617       if (dst < src) { \
 618          for (i = 0; i < n; i++) \
 619             dst[i] = src[i]; \
 620       } \
 621       else  \
 622       if (dst > src) { \
 623          for (i = 0; i < n; i++) \
 624             dst[n-i-1] = src[n-i-1]; \
 625       } \
 626       return dst; \
 627    }
 628
 629 MEMMOVE(VG_Z_LIBC_SONAME, memmove)
 630 #if defined(VGO_darwin)
 631 MEMMOVE(VG_Z_DYLD,        memmove)
 632 #endif
 633
 634
 635 #define BCOPY(soname, fnname) \
 636    void VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 637             (const void *srcV, void *dstV, SizeT n); \
 638    void VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 639             (const void *srcV, void *dstV, SizeT n) \
 640    { \
 641       SizeT i; \
 642       Char* dst = (Char*)dstV; \
 643       Char* src = (Char*)srcV; \
 644       if (dst < src) { \
 645          for (i = 0; i < n; i++) \
 646             dst[i] = src[i]; \
 647       } \
 648       else  \
 649       if (dst > src) { \
 650          for (i = 0; i < n; i++) \
 651             dst[n-i-1] = src[n-i-1]; \
 652       } \
 653    }
 654
 655 #if defined(VGO_darwin)
 656 BCOPY(VG_Z_LIBC_SONAME, bcopy)
 657 BCOPY(VG_Z_DYLD,        bcopy)
 658 #endif
 659
 660
 661 /* glibc 2.5 variant of memmove which checks the dest is big enough.
 662    There is no specific part of glibc that this is copied from. */
 663 #define GLIBC25___MEMMOVE_CHK(soname, fnname) \
 664    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 665             (void *dstV, const void *srcV, SizeT n, SizeT destlen); \
 666    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 667             (void *dstV, const void *srcV, SizeT n, SizeT destlen) \
 668    { \
 669       extern void _exit(int status); \
 670       SizeT i; \
 671       Char* dst = (Char*)dstV; \
 672       Char* src = (Char*)srcV; \
 673       if (destlen < n) \
 674          goto badness; \
 675       if (dst < src) { \
 676          for (i = 0; i < n; i++) \
 677             dst[i] = src[i]; \
 678       } \
 679       else  \
 680       if (dst > src) { \
 681          for (i = 0; i < n; i++) \
 682             dst[n-i-1] = src[n-i-1]; \
 683       } \
 684       return dst; \
 685      badness: \
 686       VALGRIND_PRINTF_BACKTRACE( \
 687          "*** memmove_chk: buffer overflow detected ***: " \
 688          "program terminated\n"); \
 689      _exit(127); \
 690      /*NOTREACHED*/ \
 691      return NULL; \
 692    }
 693
 694 GLIBC25___MEMMOVE_CHK(VG_Z_LIBC_SONAME, __memmove_chk)
 695
 696
 697 /* Find the first occurrence of C in S or the final NUL byte.  */
 698 #define GLIBC232_STRCHRNUL(soname, fnname) \
 699    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in); \
 700    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in) \
 701    { \
 702       unsigned char  c        = (unsigned char) c_in; \
 703       unsigned char* char_ptr = (unsigned char *)s; \
 704       while (1) { \
 705          if (*char_ptr == 0) return char_ptr; \
 706          if (*char_ptr == c) return char_ptr; \
 707          char_ptr++; \
 708       } \
 709    }
 710
 711 GLIBC232_STRCHRNUL(VG_Z_LIBC_SONAME, strchrnul)
 712
 713
 714 /* Find the first occurrence of C in S.  */
 715 #define GLIBC232_RAWMEMCHR(soname, fnname) \
 716    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in); \
 717    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in) \
 718    { \
 719       unsigned char  c        = (unsigned char) c_in; \
 720       unsigned char* char_ptr = (unsigned char *)s; \
 721       while (1) { \
 722          if (*char_ptr == c) return char_ptr; \
 723          char_ptr++; \
 724       } \
 725    }
 726
 727 GLIBC232_RAWMEMCHR(VG_Z_LIBC_SONAME, rawmemchr)
 728 #if defined (VGO_linux)
 729 GLIBC232_RAWMEMCHR(VG_Z_LIBC_SONAME, __GI___rawmemchr)
 730 #endif
 731
 732 /* glibc variant of strcpy that checks the dest is big enough.
 733    Copied from glibc-2.5/debug/test-strcpy_chk.c. */
 734 #define GLIBC25___STRCPY_CHK(soname,fnname) \
 735    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 736                                (char* dst, const char* src, SizeT len); \
 737    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 738                                (char* dst, const char* src, SizeT len) \
 739    { \
 740       extern void _exit(int status); \
 741       char* ret = dst; \
 742       if (! len) \
 743          goto badness; \
 744       while ((*dst++ = *src++) != '\0') \
 745          if (--len == 0) \
 746             goto badness; \
 747       return ret; \
 748      badness: \
 749       VALGRIND_PRINTF_BACKTRACE( \
 750          "*** strcpy_chk: buffer overflow detected ***: " \
 751          "program terminated\n"); \
 752      _exit(127); \
 753      /*NOTREACHED*/ \
 754      return NULL; \
 755    }
 756
 757 GLIBC25___STRCPY_CHK(VG_Z_LIBC_SONAME, __strcpy_chk)
 758
 759
 760 /* glibc variant of stpcpy that checks the dest is big enough.
 761    Copied from glibc-2.5/debug/test-stpcpy_chk.c. */
 762 #define GLIBC25___STPCPY_CHK(soname,fnname) \
 763    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 764                                (char* dst, const char* src, SizeT len); \
 765    char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 766                                (char* dst, const char* src, SizeT len) \
 767    { \
 768       extern void _exit(int status); \
 769       if (! len) \
 770          goto badness; \
 771       while ((*dst++ = *src++) != '\0') \
 772          if (--len == 0) \
 773             goto badness; \
 774       return dst - 1; \
 775      badness: \
 776       VALGRIND_PRINTF_BACKTRACE( \
 777          "*** stpcpy_chk: buffer overflow detected ***: " \
 778          "program terminated\n"); \
 779      _exit(127); \
 780      /*NOTREACHED*/ \
 781      return NULL; \
 782    }
 783
 784 GLIBC25___STPCPY_CHK(VG_Z_LIBC_SONAME, __stpcpy_chk)
 785
 786
 787 /* mempcpy */
 788 #define GLIBC25_MEMPCPY(soname, fnname) \
 789    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 790             ( void *dst, const void *src, SizeT len ); \
 791    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 792             ( void *dst, const void *src, SizeT len ) \
 793    { \
 794       register char *d; \
 795       register char *s; \
 796       SizeT len_saved = len; \
 797       \
 798       if (len == 0) \
 799          return dst; \
 800       \
 801       if (is_overlap(dst, src, len, len)) \
 802          RECORD_OVERLAP_ERROR("mempcpy", dst, src, len); \
 803       \
 804       if ( dst > src ) { \
 805          d = (char *)dst + len - 1; \
 806          s = (char *)src + len - 1; \
 807          while ( len-- ) { \
 808             *d-- = *s--; \
 809          } \
 810       } else if ( dst < src ) { \
 811          d = (char *)dst; \
 812          s = (char *)src; \
 813          while ( len-- ) { \
 814             *d++ = *s++; \
 815          } \
 816       } \
 817       return (void*)( ((char*)dst) + len_saved ); \
 818    }
 819
 820 GLIBC25_MEMPCPY(VG_Z_LIBC_SONAME, mempcpy)
 821 #if defined(VGO_linux)
 822 GLIBC25_MEMPCPY(VG_Z_LD_SO_1,     mempcpy) /* ld.so.1 */
 823 #endif
 824
 825
 826 #define GLIBC26___MEMCPY_CHK(soname, fnname) \
 827    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 828             (void* dst, const void* src, SizeT len, SizeT dstlen ); \
 829    void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
 830             (void* dst, const void* src, SizeT len, SizeT dstlen ) \
 831    { \
 832       extern void _exit(int status); \
 833       register char *d; \
 834       register char *s; \
 835       \
 836       if (dstlen < len) goto badness; \
 837       \
 838       if (len == 0) \
 839          return dst; \
 840       \
 841       if (is_overlap(dst, src, len, len)) \
 842          RECORD_OVERLAP_ERROR("memcpy_chk", dst, src, len); \
 843       \
 844       if ( dst > src ) { \
 845          d = (char *)dst + len - 1; \
 846          s = (char *)src + len - 1; \
 847          while ( len-- ) { \
 848             *d-- = *s--; \
 849          } \
 850       } else if ( dst < src ) { \
 851          d = (char *)dst; \
 852          s = (char *)src; \
 853          while ( len-- ) { \
 854             *d++ = *s++; \
 855          } \
 856       } \
 857       return dst; \
 858      badness: \
 859       VALGRIND_PRINTF_BACKTRACE( \
 860          "*** memcpy_chk: buffer overflow detected ***: " \
 861          "program terminated\n"); \
 862      _exit(127); \
 863      /*NOTREACHED*/ \
 864      return NULL; \
 865    }
 866
 867 GLIBC26___MEMCPY_CHK(VG_Z_LIBC_SONAME, __memcpy_chk)
 868
 869
 870 /*------------------------------------------------------------*/
 871 /*--- Improve definedness checking of process environment  ---*/
 872 /*------------------------------------------------------------*/
 873
 874 #if defined(VGO_linux)
 875
 876 /* putenv */
 877 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, putenv) (char* string);
 878 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, putenv) (char* string)
 879 {
 880     OrigFn fn;
 881     Word result;
 882     const char* p = string;
 883     VALGRIND_GET_ORIG_FN(fn);
 884     /* Now by walking over the string we magically produce
 885        traces when hitting undefined memory. */
 886     if (p)
 887         while (*p++)
 888             ;
 889     CALL_FN_W_W(result, fn, string);
 890     return result;
 891 }
 892
 893 /* unsetenv */
 894 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, unsetenv) (const char* name);
 895 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, unsetenv) (const char* name)
 896 {
 897     OrigFn fn;
 898     Word result;
 899     const char* p = name;
 900     VALGRIND_GET_ORIG_FN(fn);
 901     /* Now by walking over the string we magically produce
 902        traces when hitting undefined memory. */
 903     if (p)
 904         while (*p++)
 905             ;
 906     CALL_FN_W_W(result, fn, name);
 907     return result;
 908 }
 909
 910 /* setenv */
 911 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, setenv)
 912     (const char* name, const char* value, int overwrite);
 913 int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, setenv)
 914     (const char* name, const char* value, int overwrite)
 915 {
 916     OrigFn fn;
 917     Word result;
 918     const char* p;
 919     VALGRIND_GET_ORIG_FN(fn);
 920     /* Now by walking over the string we magically produce
 921        traces when hitting undefined memory. */
 922     if (name)
 923         for (p = name; *p; p++)
 924             ;
 925     if (value)
 926         for (p = value; *p; p++)
 927             ;
 928     VALGRIND_CHECK_VALUE_IS_DEFINED (overwrite);
 929     CALL_FN_W_WWW(result, fn, name, value, overwrite);
 930     return result;
 931 }
 932
 933 #endif /* defined(VGO_linux) */
 934
 935
 936 /*------------------------------------------------------------*/
 937 /*--- AIX stuff only after this point                      ---*/
 938 /*------------------------------------------------------------*/
 939
 940 /* Generate replacements for strcat, strncat, strcpy, strncpy, strcmp
 941    in the given soname. */
 942 #define Str5FNs(_soname)       \
 943     STRCAT(_soname, strcat)    \
 944    STRNCAT(_soname, strncat)   \
 945     STRCPY(_soname, strcpy)    \
 946    STRNCPY(_soname, strncpy)   \
 947     STRCMP(_soname, strcmp)
 948
 949 #if defined(VGP_ppc32_aix5)
 950 Str5FNs(NONE)                             /* in main exe */
 951 Str5FNs(libCZdaZLshrcoreZdoZR)            /* libC.a(shrcore.o) */
 952 Str5FNs(libX11ZdaZLshr4ZdoZR)             /* libX11.a(shr4.o) */
 953 Str5FNs(libXmZdaZLshrZaZdoZR)             /* libXm.a(shr*.o) */
 954 Str5FNs(libXtZdaZLshr4ZdoZR)              /* libXt.a(shr4.o) */
 955 Str5FNs(libppeZurZdaZLdynamicZdoZR)       /* libppe_r.a(dynamic.o) */
 956 Str5FNs(libodmZdaZLshrZdoZR)              /* libodm.a(shr.o) */
 957 Str5FNs(libmpiZurZdaZLmpicoreZurZdoZR)    /* libmpi_r.a(mpicore_r.o) */
 958 Str5FNs(libmpiZurZdaZLmpipoeZurZdoZR)     /* libmpi_r.a(mpipoe_r.o) */
 959 Str5FNs(libmpiZurZdaZLmpciZurZdoZR)       /* libmpi_r.a(mpci_r.o) */
 960 Str5FNs(libslurmZdso)                     /* libslurm.so */
 961 Str5FNs(libglibZdso)                      /* libglib.so */
 962 Str5FNs(libIMZdaZLshrZdoZR)               /* libIM.a(shr.o) */
 963 Str5FNs(libiconvZdaZLshr4ZdoZR)           /* libiconv.a(shr4.o) */
 964 Str5FNs(libGLZdaZLshrZdoZR)               /* libGL.a(shr.o) */
 965 Str5FNs(libgdkZdso)                       /* libgdk.so */
 966 Str5FNs(libcursesZdaZLshr42ZdoZR)         /* libcurses.a(shr42.o) */
 967 Str5FNs(libqtZda)                         /* libqt.a */
 968 Str5FNs(ZaZLlibglibZhZaZdsoZaZR)          /* *(libglib-*.so*) */
 969 Str5FNs(ZaZLlibfontconfigZdsoZaZR)        /* *(libfontconfig.so*) */
 970 Str5FNs(libQtZaa)                         /* libQt*.a */
 971 #endif
 972 #if defined(VGP_ppc64_aix5)
 973 Str5FNs(NONE)                             /* in main exe */
 974 Str5FNs(libX11ZdaZLshrZu64ZdoZR)          /* libX11.a(shr_64.o) */
 975 Str5FNs(libiconvZdaZLshr4Zu64ZdoZR)       /* libiconv.a(shr4_64.o) */
 976 Str5FNs(libGLZdaZLshrZu64ZdoZR)           /* libGL.a(shr_64.o) */
 977 Str5FNs(libppeZurZdaZLdynamic64ZdoZR)     /* libppe_r.a(dynamic64.o) */
 978 Str5FNs(libodmZdaZLshrZu64ZdoZR)          /* libodm.a(shr_64.o) */
 979 Str5FNs(libmpiZurZdaZLmpicore64ZurZdoZR)  /* libmpi_r.a(mpicore64_r.o) */
 980 Str5FNs(libmpiZurZdaZLmpipoe64ZurZdoZR)   /* libmpi_r.a(mpipoe64_r.o) */
 981 Str5FNs(libCZdaZLshrcoreZu64ZdoZR)        /* libC.a(shrcore_64.o) */
 982 Str5FNs(libmpiZurZdaZLmpci64ZurZdoZR)     /* libmpi_r.a(mpci64_r.o) */
 983 Str5FNs(libqtZda)                         /* libqt.a */
 984 Str5FNs(ZaZLlibglibZhZaZdsoZaZR)          /* *(libglib-*.so*) */
 985 Str5FNs(ZaZLlibfontconfigZdsoZaZR)        /* *(libfontconfig.so*) */
 986 Str5FNs(libQtZaa)                         /* libQt*.a */
 987 #endif
 988
 989
 990 /* AIX's libm contains a sqrt implementation which does a nasty thing:
 991    it loads the initial estimate of the root into a FP register, but
 992    only the upper half of the number is initialised data.  Hence the
 993    least significant 32 mantissa bits are undefined, and it then uses
 994    Newton-Raphson iteration to compute the final, defined result.
 995    This fools memcheck completely; the only solution I can think of is
 996    provide our own substitute.  The _FAST variant is almost right
 997    except the result is not correctly rounded.  The _EXACT variant,
 998    which is selected by default, is always right; but it's also pretty
 999    darn slow. */
1000
1001 #if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
1002 #define SQRT_FAST(soname, fnname) \
1003    double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
1004    double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
1005    { \
1006       static UInt T1[32] =  \
1007          { 0,       1024,   3062,   5746,   9193,  13348, \
1008            18162,  23592,  29598,  36145,  43202,  50740, \
1009            58733,  67158,  75992,  85215,  83599,  71378, \
1010            60428,  50647,  41945,  34246,  27478,  21581, \
1011            16499,  12183,   8588,   5674,   3403,   1742, \
1012            661,    130 }; \
1013       UInt x0, x1, sign, expo, mant0, bIGENDIAN = 1; \
1014       union { UInt w[2]; double d; } u; \
1015       u.d   = x; \
1016       x0    = u.w[1 - bIGENDIAN]; /* high half */ \
1017       x1    = u.w[bIGENDIAN];  /* low half */ \
1018       sign  = x0 >> 31; \
1019       expo  = (x0 >> 20) & 0x7FF; \
1020       mant0 = x0 & 0xFFFFF; \
1021       if ( (sign == 0 && expo >= 1 && expo <= 0x7FE) /* +normal */ \
1022            || (sign == 0 && expo == 0  \
1023                          && (mant0 | x1) > 0) /* +denorm */) { \
1024          /* common case; do Newton-Raphson */ \
1025          /* technically k should be signed int32, but since we're \
1026             always entering here with x > 0, doesn't matter that it's \
1027             unsigned. */ \
1028          double y; \
1029          UInt k = (x0>>1) + 0x1ff80000; \
1030          u.w[1 - bIGENDIAN] = k - T1[31&(k>>15)]; \
1031          u.w[bIGENDIAN] = 0; \
1032          y = u.d; \
1033          y = (y+x/y)/2.0 ; \
1034          y = (y+x/y)/2.0 ; \
1035          y = y-(y-x/y)/2.0 ; \
1036          return y; \
1037       } \
1038       if ( (sign == 1 && expo >= 1 && expo <= 0x7FE) /* -normal */ \
1039            || (sign == 1 && expo == 0  \
1040                          && (mant0 | x1) > 0) /* -denorm */) { \
1041          u.w[1 - bIGENDIAN] = 0xFFF00000; \
1042          u.w[bIGENDIAN] = 0x1; \
1043          return u.d; /* -Inf -> NaN */ \
1044       } \
1045       if ((expo | mant0 | x1) == 0) \
1046          return x; /* +/-zero -> self */ \
1047       if (expo == 0x7FF && (mant0 | x1) == 0) { \
1048          if (sign == 0) \
1049             return x; /* +Inf -> self */ \
1050          u.w[1 - bIGENDIAN] = 0xFFF00000; \
1051          u.w[bIGENDIAN] = 0x1; \
1052          return u.d; /* -Inf -> NaN */ \
1053       } \
1054       /* must be +/- NaN */ \
1055       return x; /* +/-NaN -> self */ \
1056    }
1057
1058 #define SQRT_EXACT(soname, fnname) \
1059    /* \
1060     * ==================================================== \
1061     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. \
1062     * \
1063     * Developed at SunPro, a Sun Microsystems, Inc. business. \
1064     * Permission to use, copy, modify, and distribute this \
1065     * software is freely granted, provided that this notice \
1066     * is preserved. \
1067     * ==================================================== \
1068     */ \
1069    /* \
1070     * Return correctly rounded sqrt. \
1071     *           ------------------------------------------ \
1072     *           |  Use the hardware sqrt if you have one | \
1073     *           ------------------------------------------ \
1074     * Method: \
1075     *   Bit by bit method using integer arithmetic. (Slow, but portable) \
1076     *   1. Normalization \
1077     *      Scale x to y in [1,4) with even powers of 2: \
1078     *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then \
1079     *              sqrt(x) = 2^k * sqrt(y) \
1080     *   2. Bit by bit computation \
1081     *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1), \
1082     *           i                                                   0 \
1083     *                                     i+1         2 \
1084     *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1) \
1085     *           i      i            i                 i \
1086     * \
1087     *      To compute q    from q , one checks whether \
1088     *                  i+1       i \
1089     * \
1090     *                            -(i+1) 2 \
1091     *                      (q + 2      ) <= y.                     (2) \
1092     *                        i \
1093     *                                                            -(i+1) \
1094     *      If (2) is false, then q   = q ; otherwise q   = q  + 2      . \
1095     *                             i+1   i             i+1   i \
1096     * \
1097     *      With some algebric manipulation, it is not difficult to see \
1098     *      that (2) is equivalent to \
1099     *                             -(i+1) \
1100     *                      s  +  2       <= y                      (3) \
1101     *                       i                i \
1102     * \
1103     *      The advantage of (3) is that s  and y  can be computed by \
1104     *                                    i      i \
1105     *      the following recurrence formula: \
1106     *          if (3) is false \
1107     * \
1108     *          s     =  s  ,       y    = y   ;                    (4) \
1109     *           i+1      i          i+1    i \
1110     * \
1111     *          otherwise, \
1112     *                         -i                     -(i+1) \
1113     *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5) \
1114     *           i+1      i          i+1    i     i \
1115     * \
1116     * \
1117     *      One may easily use induction to prove (4) and (5). \
1118     *      Note. Since the left hand side of (3) contain only i+2 bits, \
1119     *            it does not necessary to do a full (53-bit) comparison \
1120     *            in (3). \
1121     *   3. Final rounding \
1122     *      After generating the 53 bits result, we compute one more bit. \
1123     *      Together with the remainder, we can decide whether the \
1124     *      result is exact, bigger than 1/2ulp, or less than 1/2ulp \
1125     *      (it will never equal to 1/2ulp). \
1126     *      The rounding mode can be detected by checking whether \
1127     *      huge + tiny is equal to huge, and whether huge - tiny is \
1128     *      equal to huge for some floating point number "huge" and "tiny". \
1129     * \
1130     * Special cases: \
1131     *      sqrt(+-0) = +-0         ... exact \
1132     *      sqrt(inf) = inf \
1133     *      sqrt(-ve) = NaN         ... with invalid signal \
1134     *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN \
1135     * \
1136     */ \
1137    double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
1138    double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
1139    {  \
1140       const Int    bIGENDIAN = 1; \
1141       const double one = 1.0, tiny=1.0e-300; \
1142       double z; \
1143       Int sign = (Int)0x80000000; \
1144       Int ix0,s0,q,m,t,i; \
1145       UInt r,t1,s1,ix1,q1; \
1146       union { UInt w[2]; double d; } u; \
1147       u.d = x; \
1148       ix0 = u.w[1-bIGENDIAN]; \
1149       ix1 = u.w[bIGENDIAN];    \
1150       \
1151       /* take care of Inf and NaN */ \
1152       if((ix0&0x7ff00000)==0x7ff00000) { \
1153          return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf \
1154                                         sqrt(-inf)=sNaN */ \
1155       } \
1156       /* take care of zero */ \
1157       if(ix0<=0) { \
1158          if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ \
1159          else if(ix0<0) \
1160               return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */ \
1161       } \
1162       /* normalize x */ \
1163       m = (ix0>>20); \
1164       if(m==0) {                              /* subnormal x */ \
1165          while(ix0==0) { \
1166             m -= 21; \
1167             ix0 |= (ix1>>11); ix1 <<= 21; \
1168          } \
1169          for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; \
1170          m -= i-1; \
1171          ix0 |= (ix1>>(32-i)); \
1172          ix1 <<= i; \
1173       } \
1174       m -= 1023;      /* unbias exponent */ \
1175       ix0 = (ix0&0x000fffff)|0x00100000; \
1176       if(m&1){        /* odd m, double x to make it even */ \
1177          ix0 += ix0 + ((ix1&sign)>>31); \
1178          ix1 += ix1; \
1179       } \
1180       m >>= 1;        /* m = [m/2] */ \
1181       /* generate sqrt(x) bit by bit */ \
1182       ix0 += ix0 + ((ix1&sign)>>31); \
1183       ix1 += ix1; \
1184       q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */ \
1185       r = 0x00200000;         /* r = moving bit from right to left */ \
1186       while(r!=0) { \
1187          t = s0+r; \
1188          if(t<=ix0) { \
1189             s0   = t+r; \
1190             ix0 -= t; \
1191             q   += r; \
1192          } \
1193          ix0 += ix0 + ((ix1&sign)>>31); \
1194          ix1 += ix1; \
1195          r>>=1; \
1196       } \
1197       r = sign; \
1198       while(r!=0) { \
1199          t1 = s1+r; \
1200          t  = s0; \
1201          if((t<ix0)||((t==ix0)&&(t1<=ix1))) { \
1202             s1  = t1+r; \
1203             if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; \
1204             ix0 -= t; \
1205             if (ix1 < t1) ix0 -= 1; \
1206             ix1 -= t1; \
1207             q1  += r; \
1208          } \
1209          ix0 += ix0 + ((ix1&sign)>>31); \
1210          ix1 += ix1; \
1211          r>>=1; \
1212       } \
1213       /* use floating add to find out rounding direction */ \
1214       if((ix0|ix1)!=0) { \
1215          z = one-tiny; /* trigger inexact flag */ \
1216          if (z>=one) { \
1217             z = one+tiny; \
1218             if (q1==(UInt)0xffffffff) { q1=0; q += 1;} \
1219             else if (z>one) { \
1220                     if (q1==(UInt)0xfffffffe) q+=1; \
1221                     q1+=2; \
1222                  } else \
1223                     q1 += (q1&1); \
1224          } \
1225       } \
1226       ix0 = (q>>1)+0x3fe00000; \
1227       ix1 = q1>>1; \
1228       if ((q&1)==1) ix1 |= sign; \
1229       ix0 += (m <<20); \
1230       ix0 = u.w[1-bIGENDIAN] = ix0; \
1231       ix1 = u.w[bIGENDIAN] = ix1;    \
1232       z = u.d; \
1233       return z; \
1234    }
1235
1236 #if 0
1237 SQRT_FAST(NONE, sqrt)  /* xlC generates these */
1238 SQRT_FAST(NONE, _sqrt) /* xlf generates these */
1239 #else
1240 SQRT_EXACT(NONE, sqrt)  /* xlC generates these */
1241 SQRT_EXACT(NONE, _sqrt) /* xlf generates these */
1242 #endif
1243
1244 #endif /* defined(VGP_ppc32_aix5) */
1245
1246 /*--------------------------------------------------------------------*/
1247 /*--- end                                                          ---*/
1248 /*--------------------------------------------------------------------*/