kernel/math.c

   1 /*--------------------------------------------------------------------
   2  * TITLE: Plasma Floating Point Library
   3  * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
   4  * DATE CREATED: 3/2/06
   5  * FILENAME: math.c
   6  * PROJECT: Plasma CPU core
   7  * COPYRIGHT: Software placed into the public domain by the author.
   8  *    Software 'as is' without warranty.  Author liable for nothing.
   9  * DESCRIPTION:
  10  *    Plasma Floating Point Library
  11  *--------------------------------------------------------------------
  12  * IEEE_fp = sign(1) | exponent(8) | fraction(23)
  13  * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
  14  * exp(x)=1+x+x^2/2!+x^3/3!+...
  15  * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
  16  * atan(x)=x-x^3/3+x^5/5-x^7/7+...
  17  * pow(x,y)=exp(y*ln(x))
  18  * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
  19  * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
  20  * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
  21  *--------------------------------------------------------------------*/
  22 #include "rtos.h"
  23
  24 //#define USE_SW_MULT
  25 #if !defined(WIN32) && !defined(USE_SW_MULT)
  26 #define USE_MULT64
  27 #endif
  28
  29 #define PI ((float)3.1415926)
  30 #define PI_2 ((float)(PI/2.0))
  31 #define PI2 ((float)(PI*2.0))
  32
  33 #define FtoL(X) (*(unsigned long*)&(X))
  34 #define LtoF(X) (*(float*)&(X))
  35
  36
  37 float FP_Neg(float a_fp)
  38 {
  39    unsigned long a;
  40    a = FtoL(a_fp);
  41    a ^= 0x80000000;
  42    return LtoF(a);
  43 }
  44
  45
  46 float FP_Add(float a_fp, float b_fp)
  47 {
  48    unsigned long a, b, c;
  49    unsigned long as, bs, cs;     //sign
  50    long ae, af, be, bf, ce, cf;  //exponent and fraction
  51    a = FtoL(a_fp);
  52    b = FtoL(b_fp);
  53    as = a >> 31;                        //sign
  54    ae = (a >> 23) & 0xff;               //exponent
  55    af = 0x00800000 | (a & 0x007fffff);  //fraction
  56    bs = b >> 31;
  57    be = (b >> 23) & 0xff;
  58    bf = 0x00800000 | (b & 0x007fffff);
  59    if(ae > be)
  60    {
  61       if(ae - be < 30)
  62          bf >>= ae - be;
  63       else
  64          bf = 0;
  65       ce = ae;
  66    }
  67    else
  68    {
  69       if(be - ae < 30)
  70          af >>= be - ae;
  71       else
  72          af = 0;
  73       ce = be;
  74    }
  75    cf = (as ? -af : af) + (bs ? -bf : bf);
  76    cs = cf < 0;
  77    cf = cf>=0 ? cf : -cf;
  78    if(cf == 0)
  79       return LtoF(cf);
  80    while(cf & 0xff000000)
  81    {
  82       ++ce;
  83       cf >>= 1;
  84    }
  85    while((cf & 0xff800000) == 0)
  86    {
  87       --ce;
  88       cf <<= 1;
  89    }
  90    c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
  91    if(ce < 1)
  92       c = 0;
  93    return LtoF(c);
  94 }
  95
  96
  97 float FP_Sub(float a_fp, float b_fp)
  98 {
  99    return FP_Add(a_fp, FP_Neg(b_fp));
 100 }
 101
 102
 103 float FP_Mult(float a_fp, float b_fp)
 104 {
 105    unsigned long a, b, c;
 106    unsigned long as, af, bs, bf, cs, cf;
 107    long ae, be, ce;
 108 #ifndef USE_MULT64
 109    unsigned long a2, a1, b2, b1, med1, med2;
 110 #endif
 111    unsigned long hi, lo;
 112    a = FtoL(a_fp);
 113    b = FtoL(b_fp);
 114    as = a >> 31;
 115    ae = (a >> 23) & 0xff;
 116    af = 0x00800000 | (a & 0x007fffff);
 117    bs = b >> 31;
 118    be = (b >> 23) & 0xff;
 119    bf = 0x00800000 | (b & 0x007fffff);
 120    cs = as ^ bs;
 121 #ifndef USE_MULT64
 122    a1 = af & 0xffff;
 123    a2 = af >> 16;
 124    b1 = bf & 0xffff;
 125    b2 = bf >> 16;
 126    lo = a1 * b1;
 127    med1 = a2 * b1 + (lo >> 16);
 128    med2 = a1 * b2;
 129    hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
 130    med1 = (med1 & 0xffff) + (med2 & 0xffff);
 131    hi += (med1 >> 16);
 132    lo = (med1 << 16) | (lo & 0xffff);
 133 #else
 134    lo = OS_AsmMult(af, bf, &hi);
 135 #endif
 136    cf = (hi << 9) | (lo >> 23);
 137    ce = ae + be - 0x80 + 1;
 138    if(cf == 0)
 139       return LtoF(cf);
 140    while(cf & 0xff000000)
 141    {
 142       ++ce;
 143       cf >>= 1;
 144    }
 145    c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
 146    if(ce < 1)
 147       c = 0;
 148    return LtoF(c);
 149 }
 150
 151
 152 float FP_Div(float a_fp, float b_fp)
 153 {
 154    unsigned long a, b, c;
 155    unsigned long as, af, bs, bf, cs, cf;
 156    unsigned long a1, b1;
 157 #ifndef USE_MULT64
 158    unsigned long a2, b2, med1, med2;
 159 #endif
 160    unsigned long hi, lo;
 161    long ae, be, ce, d;
 162    a = FtoL(a_fp);
 163    b = FtoL(b_fp);
 164    as = a >> 31;
 165    ae = (a >> 23) & 0xff;
 166    af = 0x00800000 | (a & 0x007fffff);
 167    bs = b >> 31;
 168    be = (b >> 23) & 0xff;
 169    bf = 0x00800000 | (b & 0x007fffff);
 170    cs = as ^ bs;
 171    ce = ae - (be - 0x80) + 6 - 8;
 172    a1 = af << 4; //8
 173    b1 = bf >> 8;
 174    cf = a1 / b1;
 175    cf <<= 12; //8
 176 #if 1                  /*non-quick*/
 177 #ifndef USE_MULT64
 178    a1 = cf & 0xffff;
 179    a2 = cf >> 16;
 180    b1 = bf & 0xffff;
 181    b2 = bf >> 16;
 182    lo = a1 * b1;
 183    med1 =a2 * b1 + (lo >> 16);
 184    med2 = a1 * b2;
 185    hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
 186    med1 = (med1 & 0xffff) + (med2 & 0xffff);
 187    hi += (med1 >> 16);
 188    lo = (med1 << 16) | (lo & 0xffff);
 189 #else
 190    lo = OS_AsmMult(cf, bf, &hi);
 191 #endif
 192    lo = (hi << 8) | (lo >> 24);
 193    d = af - lo;    //remainder
 194    assert(-0xffff < d && d < 0xffff);
 195    d <<= 16;
 196    b1 = bf >> 8;
 197    d = d / (long)b1;
 198    cf += d;
 199 #endif
 200    if(cf == 0)
 201       return LtoF(cf);
 202    while(cf & 0xff000000)
 203    {
 204       ++ce;
 205       cf >>= 1;
 206    }
 207    if(ce < 0)
 208       ce = 0;
 209    c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
 210    if(ce < 1)
 211       c = 0;
 212    return LtoF(c);
 213 }
 214
 215
 216 long FP_ToLong(float a_fp)
 217 {
 218    unsigned long a;
 219    unsigned long as;
 220    long ae;
 221    long af, shift;
 222    a = FtoL(a_fp);
 223    as = a >> 31;
 224    ae = (a >> 23) & 0xff;
 225    af = 0x00800000 | (a & 0x007fffff);
 226    af <<= 7;
 227    shift = -(ae - 0x80 - 29);
 228    if(shift > 0)
 229    {
 230       if(shift < 31)
 231          af >>= shift;
 232       else
 233          af = 0;
 234    }
 235    af = as ? -af: af;
 236    return af;
 237 }
 238
 239
 240 float FP_ToFloat(long af)
 241 {
 242    unsigned long a;
 243    unsigned long as, ae;
 244    as = af>=0 ? 0: 1;
 245    af = af>=0 ? af: -af;
 246    ae = 0x80 + 22;
 247    if(af == 0)
 248       return LtoF(af);
 249    while(af & 0xff000000)
 250    {
 251       ++ae;
 252       af >>= 1;
 253    }
 254    while((af & 0xff800000) == 0)
 255    {
 256       --ae;
 257       af <<= 1;
 258    }
 259    a = (as << 31) | (ae << 23) | (af & 0x007fffff);
 260    return LtoF(a);
 261 }
 262
 263
 264 //0 iff a==b; 1 iff a>b; -1 iff a<b
 265 int FP_Cmp(float a_fp, float b_fp)
 266 {
 267    unsigned long a, b;
 268    unsigned long as, ae, af, bs, be, bf;
 269    int gt;
 270    a = FtoL(a_fp);
 271    b = FtoL(b_fp);
 272    if(a == b)
 273       return 0;
 274    as = a >> 31;
 275    bs = b >> 31;
 276    if(as > bs)
 277       return -1;
 278    if(as < bs)
 279       return 1;
 280    gt = as ? -1 : 1;
 281    ae = (a >> 23) & 0xff;
 282    be = (b >> 23) & 0xff;
 283    if(ae > be)
 284       return gt;
 285    if(ae < be)
 286       return -gt;
 287    af = 0x00800000 | (a & 0x007fffff);
 288    bf = 0x00800000 | (b & 0x007fffff);
 289    if(af > bf)
 290       return gt;
 291    return -gt;
 292 }
 293
 294
 295 int __ltsf2(float a, float b)
 296 {
 297    return FP_Cmp(a, b);
 298 }
 299
 300 int __lesf2(float a, float b)
 301 {
 302    return FP_Cmp(a, b);
 303 }
 304
 305 int __gtsf2(float a, float b)
 306 {
 307    return FP_Cmp(a, b);
 308 }
 309
 310 int __gesf2(float a, float b)
 311 {
 312    return FP_Cmp(a, b);
 313 }
 314
 315 int __eqsf2(float a, float b)
 316 {
 317    return FtoL(a) != FtoL(b);
 318 }
 319
 320 int __nesf2(float a, float b)
 321 {
 322    return FtoL(a) != FtoL(b);
 323 }
 324
 325
 326 float FP_Sqrt(float a)
 327 {
 328    float x1, y1, x2, y2, x3;
 329    long i;
 330    x1 = FP_ToFloat(1);
 331    y1 = FP_Sub(FP_Mult(x1, x1), a);  //y1=x1*x1-a;
 332    x2 = FP_ToFloat(100);
 333    y2 = FP_Sub(FP_Mult(x2, x2), a);
 334    for(i = 0; i < 10; ++i)
 335    {
 336       if(FtoL(y1) == FtoL(y2))
 337          return x2;
 338       //x3=x2-(x1-x2)*y2/(y1-y2);
 339       x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
 340       x1 = x2;
 341       y1 = y2;
 342       x2 = x3;
 343       y2 = FP_Sub(FP_Mult(x2, x2), a);
 344    }
 345    return x2;
 346 }
 347
 348
 349 float FP_Cos(float rad)
 350 {
 351    int n;
 352    float answer, x2, top, bottom, sign;
 353    while(FP_Cmp(rad, PI2) > 0)
 354       rad = FP_Sub(rad, PI2);
 355    while(FP_Cmp(rad, (float)0.0) < 0)
 356       rad = FP_Add(rad, PI2);
 357    answer = 1.0;
 358    sign = 1.0;
 359    if(FP_Cmp(rad, PI) >= 0)
 360    {
 361       rad = FP_Sub(rad, PI);
 362       sign = FP_ToFloat(-1);
 363    }
 364    if(FP_Cmp(rad, PI_2) >= 0)
 365    {
 366       rad = FP_Sub(PI, rad);
 367       sign = FP_Neg(sign);
 368    }
 369    x2 = FP_Mult(rad, rad);
 370    top = 1.0;
 371    bottom = 1.0;
 372    for(n = 2; n < 12; n += 2)
 373    {
 374       top = FP_Mult(top, FP_Neg(x2));
 375       bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
 376       answer = FP_Add(answer, FP_Div(top, bottom));
 377    }
 378    return FP_Mult(answer, sign);
 379 }
 380
 381
 382 float FP_Sin(float rad)
 383 {
 384    const float pi_2=(float)(PI/2.0);
 385    return FP_Cos(FP_Sub(rad, pi_2));
 386 }
 387
 388
 389 float FP_Atan(float x)
 390 {
 391    const float b=(float)(PI/8.0);
 392    const float atan_b=(float)0.37419668; //atan(b);
 393    int n;
 394    float answer, x2, top;
 395    if(FP_Cmp(x, (float)0.0) >= 0)
 396    {
 397       if(FP_Cmp(x, (float)1.0) > 0)
 398          return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));
 399    }
 400    else
 401    {
 402       if(FP_Cmp(x, (float)-1.0) > 0)
 403          return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));
 404    }
 405    if(FP_Cmp(x, (float)0.45) > 0)
 406    {
 407       //answer = (x - atan_b) / (1 + x * atan_b);
 408       answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
 409       //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
 410       answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
 411       return answer;
 412    }
 413    if(FP_Cmp(x, (float)-0.45) < 0)
 414    {
 415       x = FP_Neg(x);
 416       //answer = (x - atan_b) / (1 + x * atan_b);
 417       answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
 418       //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
 419       answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
 420       return FP_Neg(answer);
 421    }
 422    answer = x;
 423    x2 = FP_Mult(FP_Neg(x), x);
 424    top = x;
 425    for(n = 3; n < 14; n += 2)
 426    {
 427       top = FP_Mult(top, x2);
 428       answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
 429    }
 430    return answer;
 431 }
 432
 433
 434 float FP_Atan2(float y, float x)
 435 {
 436    float answer,r;
 437    r = y / x;
 438    answer = FP_Atan(r);
 439    if(FP_Cmp(x, (float)0.0) < 0)
 440    {
 441       if(FP_Cmp(y, (float)0.0) > 0)
 442          answer = FP_Add(answer, PI);
 443       else
 444          answer = FP_Sub(answer, PI);
 445    }
 446    return answer;
 447 }
 448
 449
 450 float FP_Exp(float x)
 451 {
 452    const float e2=(float)7.389056099;
 453    const float inv_e2=(float)0.135335283;
 454    float answer, top, bottom, mult;
 455    int n;
 456
 457    mult = 1.0;
 458    while(FP_Cmp(x, (float)2.0) > 0)
 459    {
 460       mult = FP_Mult(mult, e2);
 461       x = FP_Add(x, (float)-2.0);
 462    }
 463    while(FP_Cmp(x, (float)-2.0) < 0)
 464    {
 465       mult = FP_Mult(mult, inv_e2);
 466       x = FP_Add(x, (float)2.0);
 467    }
 468    answer = FP_Add((float)1.0, x);
 469    top = x;
 470    bottom = 1.0;
 471    for(n = 2; n < 15; ++n)
 472    {
 473       top = FP_Mult(top, x);
 474       bottom = FP_Mult(bottom, FP_ToFloat(n));
 475       answer = FP_Add(answer, FP_Div(top, bottom));
 476    }
 477    return FP_Mult(answer, mult);
 478 }
 479
 480
 481 float FP_Log(float x)
 482 {
 483    const float log_2=(float)0.69314718; /*log(2.0)*/
 484    int n;
 485    float answer, top, add;
 486    add = 0.0;
 487    while(FP_Cmp(x, 16.0) > 0)
 488    {
 489       x = FP_Mult(x, (float)0.0625);
 490       add = FP_Add(add, (float)(log_2 * 4));
 491    }
 492    while(FP_Cmp(x, 1.5) > 0)
 493    {
 494       x = FP_Mult(x, 0.5);
 495       add = FP_Add(add, log_2);
 496    }
 497    while(FP_Cmp(x, 0.5) < 0)
 498    {
 499       x = FP_Mult(x, 2.0);
 500       add = FP_Sub(add, log_2);
 501    }
 502    x = FP_Sub(x, 1.0);
 503    answer = 0.0;
 504    top = -1.0;
 505    for(n = 1; n < 14; ++n)
 506    {
 507       top = FP_Mult(top, FP_Neg(x));
 508       answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
 509    }
 510    return FP_Add(answer, add);
 511 }
 512
 513
 514 float FP_Pow(float x, float y)
 515 {
 516    return FP_Exp(y * FP_Log(x));
 517 }
 518
 519
 520 /********************************************/
 521 //These five functions will only be used if the flag "-mno-mul" is enabled
 522 #ifdef USE_SW_MULT
 523 unsigned long __mulsi3(unsigned long a, unsigned long b)
 524 {
 525    unsigned long answer = 0;
 526    while(b)
 527    {
 528       if(b & 1)
 529          answer += a;
 530       a <<= 1;
 531       b >>= 1;
 532    }
 533    return answer;
 534 }
 535
 536
 537 static unsigned long DivideMod(unsigned long a, unsigned long b, int doMod)
 538 {
 539    unsigned long upper=a, lower=0;
 540    int i;
 541    a = b << 31;
 542    for(i = 0; i < 32; ++i)
 543    {
 544       lower = lower << 1;
 545       if(upper >= a && a && b < 2)
 546       {
 547          upper = upper - a;
 548          lower |= 1;
 549       }
 550       a = ((b&2) << 30) | (a >> 1);
 551       b = b >> 1;
 552    }
 553    if(!doMod)
 554       return lower;
 555    return upper;
 556 }
 557
 558
 559 unsigned long __udivsi3(unsigned long a, unsigned long b)
 560 {
 561    return DivideMod(a, b, 0);
 562 }
 563
 564
 565 long __divsi3(long a, long b)
 566 {
 567    long answer, negate=0;
 568    if(a < 0)
 569    {
 570       a = -a;
 571       negate = !negate;
 572    }
 573    if(b < 0)
 574    {
 575       b = -b;
 576       negate = !negate;
 577    }
 578    answer = DivideMod(a, b, 0);
 579    if(negate)
 580       answer = -answer;
 581    return answer;
 582 }
 583
 584
 585 unsigned long __umodsi3(unsigned long a, unsigned long b)
 586 {
 587    return DivideMod(a, b, 1);
 588 }
 589 #endif
 590
 591
 592 /*************** Test *****************/
 593 #ifdef WIN32
 594 #undef _LIBC
 595 #include <math.h>
 596 #undef printf
 597 #undef getch
 598 int printf(const char *, ...);
 599 struct {
 600    char *name;
 601    float low, high;
 602    double (*func1)(double);
 603    float (*func2)(float);
 604 } test_info[]={
 605    {"cos", -2*PI, 2*PI, cos, FP_Cos},
 606    {"sin", -2*PI, 2*PI, sin, FP_Sin},
 607    {"atan", -3.0, 2.0, atan, FP_Atan},
 608    {"log", (float)0.01, (float)4.0, log, FP_Log},
 609    {"exp", (float)-5.01, (float)30.0, exp, FP_Exp},
 610    {"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}
 611 };
 612
 613
 614 void TestMathFull(void)
 615 {
 616    float a, b, c, d;
 617    float error1, error2, error3, error4, error5;
 618    int test;
 619
 620    a = PI * PI;
 621    b = PI;
 622    c = FP_Div(a, b);
 623    printf("%10f %10f %10f %10f %10f\n",
 624       (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
 625    a = a * 200;
 626    for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)
 627    {
 628       c = FP_Div(a, b);
 629       d = a / b - c;
 630       printf("%10f %10f %10f %10f %10f\n",
 631          (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
 632    }
 633    //getch();
 634
 635    for(test = 0; test < 6; ++test)
 636    {
 637       printf("\nTesting %s\n", test_info[test].name);
 638       for(a = test_info[test].low;
 639           a <= test_info[test].high;
 640           a += (test_info[test].high-test_info[test].low)/(float)20.0)
 641       {
 642          b = (float)test_info[test].func1(a);
 643          c = test_info[test].func2(a);
 644          d = b - c;
 645          printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);
 646       }
 647       //getch();
 648    }
 649
 650    a = FP_ToFloat((long)6.0);
 651    b = FP_ToFloat((long)2.0);
 652    printf("%f %f\n", (double)a, (double)b);
 653    c = FP_Add(a, b);
 654    printf("add %f %f\n", (double)(a + b), (double)c);
 655    c = FP_Sub(a, b);
 656    printf("sub %f %f\n", (double)(a - b), (double)c);
 657    c = FP_Mult(a, b);
 658    printf("mult %f %f\n", (double)(a * b), (double)c);
 659    c = FP_Div(a, b);
 660    printf("div %f %f\n", (double)(a / b), (double)c);
 661    //getch();
 662
 663    for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)
 664    {
 665       for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)
 666       {
 667          error1 = (float)1.0 - (a + b) / FP_Add(a, b);
 668          error2 = (float)1.0 - (a * b) / FP_Mult(a, b);
 669          error3 = (float)1.0 - (a / b) / FP_Div(a, b);
 670          error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));
 671          error5 = error1 + error2 + error3 + error4;
 672          if(error5 < 0.00005)
 673             continue;
 674          printf("ERROR!\n");
 675          printf("a=%f b=%f\n", (double)a, (double)b);
 676          printf("  a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));
 677          printf("  a*b=%f %f\n", (double)(a*b), (double)FP_Mult(a, b));
 678          printf("  a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));
 679          printf("  a=%f %ld %f\n", (double)a, FP_ToLong(a),
 680                                    (double)FP_ToFloat((long)a));
 681          printf("  %f %f %f %f\n", (double)error1, (double)error2,
 682             (double)error3, (double)error4);
 683          //if(error5 > 0.001)
 684          //   getch();
 685       }
 686    }
 687    printf("done.\n");
 688    //getch();
 689 }
 690 #endif
 691
 692