1 /*--------------------------------------------------------------------
2 * TITLE: Plasma Floating Point Library
3 * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
6 * PROJECT: Plasma CPU core
7 * COPYRIGHT: Software placed into the public domain by the author.
8 * Software 'as is' without warranty. Author liable for nothing.
10 * Plasma Floating Point Library
11 *--------------------------------------------------------------------
12 * IEEE_fp = sign(1) | exponent(8) | fraction(23)
13 * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
14 * exp(x)=1+x+x^2/2!+x^3/3!+...
15 * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
16 * atan(x)=x-x^3/3+x^5/5-x^7/7+...
17 * pow(x,y)=exp(y*ln(x))
18 * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
19 * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
20 * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
21 *--------------------------------------------------------------------*/
25 #if !defined(WIN32) && !defined(USE_SW_MULT)
29 #define PI ((float)3.1415926)
30 #define PI_2 ((float)(PI/2.0))
31 #define PI2 ((float)(PI*2.0))
33 #define FtoL(X) (*(unsigned long*)&(X))
34 #define LtoF(X) (*(float*)&(X))
37 float FP_Neg(float a_fp)
46 float FP_Add(float a_fp, float b_fp)
48 unsigned long a, b, c;
49 unsigned long as, bs, cs; //sign
50 long ae, af, be, bf, ce, cf; //exponent and fraction
54 ae = (a >> 23) & 0xff; //exponent
55 af = 0x00800000 | (a & 0x007fffff); //fraction
57 be = (b >> 23) & 0xff;
58 bf = 0x00800000 | (b & 0x007fffff);
75 cf = (as ? -af : af) + (bs ? -bf : bf);
77 cf = cf>=0 ? cf : -cf;
80 while(cf & 0xff000000)
85 while((cf & 0xff800000) == 0)
90 c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
97 float FP_Sub(float a_fp, float b_fp)
99 return FP_Add(a_fp, FP_Neg(b_fp));
103 float FP_Mult(float a_fp, float b_fp)
105 unsigned long a, b, c;
106 unsigned long as, af, bs, bf, cs, cf;
109 unsigned long a2, a1, b2, b1, med1, med2;
111 unsigned long hi, lo;
115 ae = (a >> 23) & 0xff;
116 af = 0x00800000 | (a & 0x007fffff);
118 be = (b >> 23) & 0xff;
119 bf = 0x00800000 | (b & 0x007fffff);
127 med1 = a2 * b1 + (lo >> 16);
129 hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
130 med1 = (med1 & 0xffff) + (med2 & 0xffff);
132 lo = (med1 << 16) | (lo & 0xffff);
134 lo = OS_AsmMult(af, bf, &hi);
136 cf = (hi << 9) | (lo >> 23);
137 ce = ae + be - 0x80 + 1;
140 while(cf & 0xff000000)
145 c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
152 float FP_Div(float a_fp, float b_fp)
154 unsigned long a, b, c;
155 unsigned long as, af, bs, bf, cs, cf;
156 unsigned long a1, b1;
158 unsigned long a2, b2, med1, med2;
160 unsigned long hi, lo;
165 ae = (a >> 23) & 0xff;
166 af = 0x00800000 | (a & 0x007fffff);
168 be = (b >> 23) & 0xff;
169 bf = 0x00800000 | (b & 0x007fffff);
171 ce = ae - (be - 0x80) + 6 - 8;
183 med1 =a2 * b1 + (lo >> 16);
185 hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
186 med1 = (med1 & 0xffff) + (med2 & 0xffff);
188 lo = (med1 << 16) | (lo & 0xffff);
190 lo = OS_AsmMult(cf, bf, &hi);
192 lo = (hi << 8) | (lo >> 24);
193 d = af - lo; //remainder
194 assert(-0xffff < d && d < 0xffff);
202 while(cf & 0xff000000)
209 c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
216 long FP_ToLong(float a_fp)
224 ae = (a >> 23) & 0xff;
225 af = 0x00800000 | (a & 0x007fffff);
227 shift = -(ae - 0x80 - 29);
240 float FP_ToFloat(long af)
243 unsigned long as, ae;
245 af = af>=0 ? af: -af;
249 while(af & 0xff000000)
254 while((af & 0xff800000) == 0)
259 a = (as << 31) | (ae << 23) | (af & 0x007fffff);
264 //0 iff a==b; 1 iff a>b; -1 iff a<b
265 int FP_Cmp(float a_fp, float b_fp)
268 unsigned long as, ae, af, bs, be, bf;
281 ae = (a >> 23) & 0xff;
282 be = (b >> 23) & 0xff;
287 af = 0x00800000 | (a & 0x007fffff);
288 bf = 0x00800000 | (b & 0x007fffff);
295 int __ltsf2(float a, float b)
300 int __lesf2(float a, float b)
305 int __gtsf2(float a, float b)
310 int __gesf2(float a, float b)
315 int __eqsf2(float a, float b)
317 return FtoL(a) != FtoL(b);
320 int __nesf2(float a, float b)
322 return FtoL(a) != FtoL(b);
326 float FP_Sqrt(float a)
328 float x1, y1, x2, y2, x3;
331 y1 = FP_Sub(FP_Mult(x1, x1), a); //y1=x1*x1-a;
332 x2 = FP_ToFloat(100);
333 y2 = FP_Sub(FP_Mult(x2, x2), a);
334 for(i = 0; i < 10; ++i)
336 if(FtoL(y1) == FtoL(y2))
338 //x3=x2-(x1-x2)*y2/(y1-y2);
339 x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
343 y2 = FP_Sub(FP_Mult(x2, x2), a);
349 float FP_Cos(float rad)
352 float answer, x2, top, bottom, sign;
353 while(FP_Cmp(rad, PI2) > 0)
354 rad = FP_Sub(rad, PI2);
355 while(FP_Cmp(rad, (float)0.0) < 0)
356 rad = FP_Add(rad, PI2);
359 if(FP_Cmp(rad, PI) >= 0)
361 rad = FP_Sub(rad, PI);
362 sign = FP_ToFloat(-1);
364 if(FP_Cmp(rad, PI_2) >= 0)
366 rad = FP_Sub(PI, rad);
369 x2 = FP_Mult(rad, rad);
372 for(n = 2; n < 12; n += 2)
374 top = FP_Mult(top, FP_Neg(x2));
375 bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
376 answer = FP_Add(answer, FP_Div(top, bottom));
378 return FP_Mult(answer, sign);
382 float FP_Sin(float rad)
384 const float pi_2=(float)(PI/2.0);
385 return FP_Cos(FP_Sub(rad, pi_2));
389 float FP_Atan(float x)
391 const float b=(float)(PI/8.0);
392 const float atan_b=(float)0.37419668; //atan(b);
394 float answer, x2, top;
395 if(FP_Cmp(x, (float)0.0) >= 0)
397 if(FP_Cmp(x, (float)1.0) > 0)
398 return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));
402 if(FP_Cmp(x, (float)-1.0) > 0)
403 return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));
405 if(FP_Cmp(x, (float)0.45) > 0)
407 //answer = (x - atan_b) / (1 + x * atan_b);
408 answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
409 //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
410 answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
413 if(FP_Cmp(x, (float)-0.45) < 0)
416 //answer = (x - atan_b) / (1 + x * atan_b);
417 answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
418 //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
419 answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
420 return FP_Neg(answer);
423 x2 = FP_Mult(FP_Neg(x), x);
425 for(n = 3; n < 14; n += 2)
427 top = FP_Mult(top, x2);
428 answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
434 float FP_Atan2(float y, float x)
439 if(FP_Cmp(x, (float)0.0) < 0)
441 if(FP_Cmp(y, (float)0.0) > 0)
442 answer = FP_Add(answer, PI);
444 answer = FP_Sub(answer, PI);
450 float FP_Exp(float x)
452 const float e2=(float)7.389056099;
453 const float inv_e2=(float)0.135335283;
454 float answer, top, bottom, mult;
458 while(FP_Cmp(x, (float)2.0) > 0)
460 mult = FP_Mult(mult, e2);
461 x = FP_Add(x, (float)-2.0);
463 while(FP_Cmp(x, (float)-2.0) < 0)
465 mult = FP_Mult(mult, inv_e2);
466 x = FP_Add(x, (float)2.0);
468 answer = FP_Add((float)1.0, x);
471 for(n = 2; n < 15; ++n)
473 top = FP_Mult(top, x);
474 bottom = FP_Mult(bottom, FP_ToFloat(n));
475 answer = FP_Add(answer, FP_Div(top, bottom));
477 return FP_Mult(answer, mult);
481 float FP_Log(float x)
483 const float log_2=(float)0.69314718; /*log(2.0)*/
485 float answer, top, add;
487 while(FP_Cmp(x, 16.0) > 0)
489 x = FP_Mult(x, (float)0.0625);
490 add = FP_Add(add, (float)(log_2 * 4));
492 while(FP_Cmp(x, 1.5) > 0)
495 add = FP_Add(add, log_2);
497 while(FP_Cmp(x, 0.5) < 0)
500 add = FP_Sub(add, log_2);
505 for(n = 1; n < 14; ++n)
507 top = FP_Mult(top, FP_Neg(x));
508 answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
510 return FP_Add(answer, add);
514 float FP_Pow(float x, float y)
516 return FP_Exp(y * FP_Log(x));
520 /********************************************/
521 //These five functions will only be used if the flag "-mno-mul" is enabled
523 unsigned long __mulsi3(unsigned long a, unsigned long b)
525 unsigned long answer = 0;
537 static unsigned long DivideMod(unsigned long a, unsigned long b, int doMod)
539 unsigned long upper=a, lower=0;
542 for(i = 0; i < 32; ++i)
545 if(upper >= a && a && b < 2)
550 a = ((b&2) << 30) | (a >> 1);
559 unsigned long __udivsi3(unsigned long a, unsigned long b)
561 return DivideMod(a, b, 0);
565 long __divsi3(long a, long b)
567 long answer, negate=0;
578 answer = DivideMod(a, b, 0);
585 unsigned long __umodsi3(unsigned long a, unsigned long b)
587 return DivideMod(a, b, 1);
592 /*************** Test *****************/
598 int printf(const char *, ...);
602 double (*func1)(double);
603 float (*func2)(float);
605 {"cos", -2*PI, 2*PI, cos, FP_Cos},
606 {"sin", -2*PI, 2*PI, sin, FP_Sin},
607 {"atan", -3.0, 2.0, atan, FP_Atan},
608 {"log", (float)0.01, (float)4.0, log, FP_Log},
609 {"exp", (float)-5.01, (float)30.0, exp, FP_Exp},
610 {"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}
614 void TestMathFull(void)
617 float error1, error2, error3, error4, error5;
623 printf("%10f %10f %10f %10f %10f\n",
624 (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
626 for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)
630 printf("%10f %10f %10f %10f %10f\n",
631 (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
635 for(test = 0; test < 6; ++test)
637 printf("\nTesting %s\n", test_info[test].name);
638 for(a = test_info[test].low;
639 a <= test_info[test].high;
640 a += (test_info[test].high-test_info[test].low)/(float)20.0)
642 b = (float)test_info[test].func1(a);
643 c = test_info[test].func2(a);
645 printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);
650 a = FP_ToFloat((long)6.0);
651 b = FP_ToFloat((long)2.0);
652 printf("%f %f\n", (double)a, (double)b);
654 printf("add %f %f\n", (double)(a + b), (double)c);
656 printf("sub %f %f\n", (double)(a - b), (double)c);
658 printf("mult %f %f\n", (double)(a * b), (double)c);
660 printf("div %f %f\n", (double)(a / b), (double)c);
663 for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)
665 for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)
667 error1 = (float)1.0 - (a + b) / FP_Add(a, b);
668 error2 = (float)1.0 - (a * b) / FP_Mult(a, b);
669 error3 = (float)1.0 - (a / b) / FP_Div(a, b);
670 error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));
671 error5 = error1 + error2 + error3 + error4;
675 printf("a=%f b=%f\n", (double)a, (double)b);
676 printf(" a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));
677 printf(" a*b=%f %f\n", (double)(a*b), (double)FP_Mult(a, b));
678 printf(" a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));
679 printf(" a=%f %ld %f\n", (double)a, FP_ToLong(a),
680 (double)FP_ToFloat((long)a));
681 printf(" %f %f %f %f\n", (double)error1, (double)error2,
682 (double)error3, (double)error4);