sw/app/rocon/math_sqrtll.c

   1 /*******************************************************************
   2   Components for embedded applications builded for
   3   laboratory and medical instruments firmware
   4
   5   math_sqrtll.c - fast routine for 64-bit unsigned square root
   6
   7   Copyright (C) 2001-2014 by Pavel Pisa - originator
   8                           pisa@cmp.felk.cvut.cz
   9             (C) 2001-2014 by PiKRON Ltd. - originator
  10                     http://www.pikron.com
  11
  12   This file can be used and copied according to next
  13   license alternatives
  14    - GPL - GNU Public License
  15    - other license provided by project originators
  16
  17  *******************************************************************/
  18
  19 #define _GNU_SOURCE
  20
  21 #include <string.h>
  22 #include <stdint.h>
  23
  24 /*
  25         y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
  26
  27         y := (y+x/y)/2          ... almost 17 sig. bits
  28         y := (y+x/y)/2          ... almost 35 sig. bits
  29 */
  30
  31 #ifdef __GNUC__
  32 uint32_t __attribute__((unused))
  33 #else
  34 uint32_t
  35 #endif
  36   sqrtll_T1[64]={
  37     2147418619,2146907255,2145906284,2144436844,
  38     2142518597,2140169865,2137407758,2134248281,
  39     2130706432,2126796288,2122531084,2117923281,
  40     2112984627,2107726213,2102158525,2096291488,
  41     2090134507,2083696503,2076985953,2070010911,
  42     2062779045,2055297659,2047573718,2039613867,
  43     2031424454,2023011548,2014380954,2005538230,
  44     1996488704,1987237480,1977789459,1968149347,
  45     1970116061,1983291585,1995774699,2007595298,
  46     2018781187,2029358280,2039350771,2048781297,
  47     2057671066,2066039988,2073906780,2081289063,
  48     2088203452,2094665633,2100690434,2106291892,
  49     2111483306,2116277295,2120685844,2124720346,
  50     2128391644,2131710068,2134685466,2137327238,
  51     2139644360,2141645415,2143338612,2144731814,
  52     2145832551,2146648046,2147185228,2147450751
  53 };
  54
  55 #if 1
  56
  57 unsigned long sqrtll(unsigned long long x)
  58 {
  59   uint32_t y,t1,t2;
  60   uint32_t x0 = x, x1 = x >> 32;
  61
  62  #ifndef __GNUC__
  63
  64   if (x1) {                     /* x[32:63] != 0 */
  65     if (x1 & 0xffff0000){       /* x[48:63] != 0 */
  66       t1 = x1;
  67       if (x1 >= 0xfffffffe)     /* exception bypass */
  68         return 0xffffffff;
  69       t2 = 63;
  70     } else {                    /* x[48:63] == 0 */
  71       t1 = x0 >> 16 | x1 << 16;
  72       t2 = 47;
  73     }
  74   } else {                      /* x[32:64] == 0 */
  75     if (!x0) return 0;
  76     if (x0 & 0xffff0000){       /* x[16:31] != 0 */
  77       t1 = x0;
  78       t2 = 31;
  79     } else {                    /* x[16:31] == 0 */
  80       t1 = x0 << 16;
  81       t2 = 15;
  82     }
  83   }
  84
  85   while (!(t1 & 0x80000000)) {
  86     t1 <<= 1;
  87     t2--;
  88   }
  89  #else /*__GNUC__*/
  90   if (x1) {                     /* x[32:63] != 0 */
  91     if (x1 >= 0xfffffffe)       /* exception bypass */
  92       return 0xffffffff;
  93     t2 = __builtin_clz(x1);
  94     t1 = (x1 << t2) | ((x0 >> (32 - t2)));
  95     t2 = 63 - t2;
  96   } else {
  97     if (!x0)
  98       return 0;
  99     t2 = __builtin_clz(x0);
 100     t1 = x0 << t2;
 101     t2 = 31 - t2;
 102   }
 103  #endif /*__GNUC__*/
 104
 105   if (t2 & 1){
 106     t1 |= 0x80000000;
 107   } else {
 108     t1 &= ~0x80000000;
 109   }
 110   t2 >>= 1;
 111
 112   y = t1;
 113   t1 >>= 32 - 6;
 114   y >>= 1;
 115   y += sqrtll_T1[t1];
 116   if (t2 != 31) {
 117     y >>= 31 - t2 - 1;
 118     y = (y >> 1) + (y & 1);
 119   }
 120  #if 1
 121   if (!x1) {
 122     y= (y + x0 / y + 1) / 2;
 123     y= (y + x0 / y + 1) / 2;
 124   } else if (x1 < 0x00010000) {
 125     uint32_t y0, y1, y2, xt, r;
 126
 127     xt = (x1 << 16) | (x0 >> 16);
 128     y2 = xt / y;
 129     r = xt % y;
 130     xt = (r << 8);
 131     y1 = xt / y;
 132     r = xt % y;
 133     xt = (r << 8) + (x0 & 0xffff);
 134     y0 = (xt / y) + (y1 << 8) + (y2 << 16);
 135     y = (y + y0 + 1) / 2;
 136
 137     xt = (x1 << 16) | (x0 >> 16);
 138     y2 = xt / y;
 139     r = xt % y;
 140     xt = (r << 8);
 141     y1 = xt / y;
 142     r = xt % y;
 143     xt = (r << 8) + (x0 & 0xffff);
 144     y0 = (xt / y) + (y1 << 8) + (y2 << 16);
 145     y = (y + y0 + 1) / 2;
 146
 147   } else
 148  #endif
 149   {
 150     y = (y + x / y + 1) / 2;
 151     y = (y + x / y + 1) / 2;
 152   }
 153   return y;
 154 }
 155
 156 #ifdef __GNUC__
 157
 158 unsigned long sqrtll_approx(unsigned long long x)
 159 {
 160   unsigned int r;
 161   uint32_t x1 = x >> 32;
 162   if (x1 < 0x00010000)
 163     return sqrtll(x);
 164
 165   r = __builtin_clz(x1);
 166   r = (17 - r) / 2;
 167   x1 = sqrtll(x >> (r * 2));
 168
 169   return x1 << r;
 170 }
 171
 172 #endif /*__GNUC__*/
 173
 174 #else
 175
 176 unsigned long sqrtll(unsigned long long x)
 177 {
 178   unsigned long y,t1,t2,t3;
 179   unsigned long x0=x, x1=x>>32;
 180   __asm__ (
 181     "   movel   %2,%3\n"
 182     "   beqs    2f\n"
 183     "   swap    %3\n"
 184     "   tstw    %3\n"
 185     "   beqs    1f\n"
 186     "   movel   %2,%3\n"        /* x[48:64] != 0 */
 187     "   movql   #63,%4\n"
 188     "   cmpl    #0xfffffffe,%3\n"
 189     "   bcss    4f\n"
 190     "   movl    #0xffffffff,%0\n"
 191     "   bras    9f\n"
 192
 193     "1: movel   %1,%4\n"        /* x[48:64] == 0 */
 194     "   swap    %4\n"
 195     "   orw     %4,%3\n"
 196     "   movql   #47,%4\n"
 197     "   bras    4f\n"
 198
 199     "2: movel   %1,%3\n"        /* x[32:64] == 0 */
 200     "   bnes    3f\n"
 201     "   clrl    %0\n"           /* x==0 */
 202     "   bras    9f\n"
 203     "3: swap    %3\n"
 204     "   movql   #15,%4\n"
 205     "   tstw    %3\n"
 206     "   beqs    4f\n"
 207     "   movel   %1,%3\n"        /* x[16:31] != 0 */
 208     "   movql   #31,%4\n"
 209     "4: lsll    #1,%3\n"        /* %3 .. normalized */
 210     "   dbcs    %4,4b\n"        /* %4 .. exponent bit order */
 211     "   lsrl    #1,%4\n"
 212     "   roxrl   #1,%3\n"        /* exp%2 -> %4.31, exp/=2 */
 213     "   movel   %3,%0\n"
 214     "   roll    #6,%3\n"        /* 6 MSB bits of %4 index to table */
 215     "   andw    #63,%3\n"
 216     "   lsrl    #1,%0\n"
 217     "   addl    @(sqrtll_T1,%3:w:4),%0\n"  /* lin. approximated table */
 218     "   movql   #31,%3\n"
 219     "   subl    %4,%3\n"
 220     "   lsrl    %3,%0\n"        /* denormalize result */
 221
 222     "   clrl    %5\n"
 223     "   addxl   %5,%0\n"
 224     "   movel   %1,%3\n"
 225     "   movel   %2,%4\n"
 226     "   divul   %0,%4,%3\n"     /* y := (y+x/y)/2 */
 227     "   bvcs    5f\n"
 228     "   subl    %0,%4\n"        /* divission overflow */
 229     "   divul   %0,%4,%3\n"
 230     "   addl    %3,%0\n"
 231     "   movql   #-1,%3\n"
 232     "5: addl    %3,%0\n"        /* regular path without ov */
 233     "   roxrl   #1,%0\n"
 234     "   addxl   %5,%0\n"
 235     "6: movel   %1,%3\n"
 236     "   movel   %2,%4\n"
 237     "   divul   %0,%4,%3\n"     /* y := (y+x/y)/2 */
 238     "   addl    %3,%0\n"
 239     "   roxrl   #1,%0\n"
 240     "   addxl   %5,%0\n"
 241     "9:\n"
 242
 243     :"=d"(y),"=d"(x0),"=d"(x1),"=d"(t1),"=d"(t2),"=d"(t3)
 244     :"1"(x0),"2"(x1)
 245     :"cc"
 246   );
 247
 248   return y;
 249 }
 250
 251 #endif
 252