l4/pkg/uclibc/lib/contrib/uclibc/libc/sysdeps/linux/sparc/sparcv7/sdiv.S

   1    /* This file is generated from divrem.m4; DO NOT EDIT! */
   2 /*
   3  * Division and remainder, from Appendix E of the Sparc Version 8
   4  * Architecture Manual, with fixes from Gordon Irlam.
   5  */
   6
   7 /*
   8  * Input: dividend and divisor in %o0 and %o1 respectively.
   9  *
  10  * m4 parameters:
  11  *  .div        name of function to generate
  12  *  div         div=div => %o0 / %o1; div=rem => %o0 % %o1
  13  *  true                true=true => signed; true=false => unsigned
  14  *
  15  * Algorithm parameters:
  16  *  N           how many bits per iteration we try to get (4)
  17  *  WORDSIZE    total number of bits (32)
  18  *
  19  * Derived constants:
  20  *  TOPBITS     number of bits in the top decade of a number
  21  *
  22  * Important variables:
  23  *  Q           the partial quotient under development (initially 0)
  24  *  R           the remainder so far, initially the dividend
  25  *  ITER        number of main division loop iterations required;
  26  *              equal to ceil(log2(quotient) / N).  Note that this
  27  *              is the log base (2^N) of the quotient.
  28  *  V           the current comparand, initially divisor*2^(ITER*N-1)
  29  *
  30  * Cost:
  31  *  Current estimate for non-large dividend is
  32  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
  33  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
  34  *  different path, as the upper bits of the quotient must be developed
  35  *  one bit at a time.
  36  */
  37
  38
  39
  40 ENTRY(.div)
  41         ! compute sign of result; if neither is negative, no problem
  42         orcc    %o1, %o0, %g0   ! either negative?
  43         bge     2f                      ! no, go do the divide
  44         xor     %o1, %o0, %g3   ! compute sign in any case
  45         tst     %o1
  46         bge     1f
  47         tst     %o0
  48         ! %o1 is definitely negative; %o0 might also be negative
  49         bge     2f                      ! if %o0 not negative...
  50         sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
  51 1:      ! %o0 is negative, %o1 is nonnegative
  52         sub     %g0, %o0, %o0   ! make %o0 nonnegative
  53 2:
  54
  55         ! Ready to divide.  Compute size of quotient; scale comparand.
  56         orcc    %o1, %g0, %o5
  57         bne     1f
  58         mov     %o0, %o3
  59
  60                 ! Divide by zero trap.  If it returns, return 0 (about as
  61                 ! wrong as possible, but that is what SunOS does...).
  62                 ta      ST_DIV0
  63                 retl
  64                 clr     %o0
  65
  66 1:
  67         cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
  68         blu     LOC(got_result)         ! (and algorithm fails otherwise)
  69         clr     %o2
  70         sethi   %hi(1 << (32 - 4 - 1)), %g1
  71         cmp     %o3, %g1
  72         blu     LOC(not_really_big)
  73         clr     %o4
  74
  75         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
  76         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
  77         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
  78         ! Compute ITER in an unorthodox manner: know we need to shift V into
  79         ! the top decade: so do not even bother to compare to R.
  80         1:
  81                 cmp     %o5, %g1
  82                 bgeu    3f
  83                 mov     1, %g2
  84                 sll     %o5, 4, %o5
  85                 b       1b
  86                 add     %o4, 1, %o4
  87
  88         ! Now compute %g2.
  89         2:      addcc   %o5, %o5, %o5
  90                 bcc     LOC(not_too_big)
  91                 add     %g2, 1, %g2
  92
  93                 ! We get here if the %o1 overflowed while shifting.
  94                 ! This means that %o3 has the high-order bit set.
  95                 ! Restore %o5 and subtract from %o3.
  96                 sll     %g1, 4, %g1     ! high order bit
  97                 srl     %o5, 1, %o5             ! rest of %o5
  98                 add     %o5, %g1, %o5
  99                 b       LOC(do_single_div)
 100                 sub     %g2, 1, %g2
 101
 102         LOC(not_too_big):
 103         3:      cmp     %o5, %o3
 104                 blu     2b
 105                 nop
 106                 be      LOC(do_single_div)
 107                 nop
 108         /* NB: these are commented out in the V8-Sparc manual as well */
 109         /* (I do not understand this) */
 110         ! %o5 > %o3: went too far: back up 1 step
 111         !       srl     %o5, 1, %o5
 112         !       dec     %g2
 113         ! do single-bit divide steps
 114         !
 115         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 116         ! first divide step without thinking.  BUT, the others are conditional,
 117         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 118         ! order bit set in the first step, just falling into the regular
 119         ! division loop will mess up the first time around.
 120         ! So we unroll slightly...
 121         LOC(do_single_div):
 122                 subcc   %g2, 1, %g2
 123                 bl      LOC(end_regular_divide)
 124                 nop
 125                 sub     %o3, %o5, %o3
 126                 mov     1, %o2
 127                 b       LOC(end_single_divloop)
 128                 nop
 129         LOC(single_divloop):
 130                 sll     %o2, 1, %o2
 131                 bl      1f
 132                 srl     %o5, 1, %o5
 133                 ! %o3 >= 0
 134                 sub     %o3, %o5, %o3
 135                 b       2f
 136                 add     %o2, 1, %o2
 137         1:      ! %o3 < 0
 138                 add     %o3, %o5, %o3
 139                 sub     %o2, 1, %o2
 140         2:
 141         LOC(end_single_divloop):
 142                 subcc   %g2, 1, %g2
 143                 bge     LOC(single_divloop)
 144                 tst     %o3
 145                 b,a     LOC(end_regular_divide)
 146
 147 LOC(not_really_big):
 148 1:
 149         sll     %o5, 4, %o5
 150         cmp     %o5, %o3
 151         bleu    1b
 152         addcc   %o4, 1, %o4
 153         be      LOC(got_result)
 154         sub     %o4, 1, %o4
 155
 156         tst     %o3     ! set up for initial iteration
 157 LOC(divloop):
 158         sll     %o2, 4, %o2
 159                 ! depth 1, accumulated bits 0
 160         bl      LOC(1.16)
 161         srl     %o5,1,%o5
 162         ! remainder is positive
 163         subcc   %o3,%o5,%o3
 164                         ! depth 2, accumulated bits 1
 165         bl      LOC(2.17)
 166         srl     %o5,1,%o5
 167         ! remainder is positive
 168         subcc   %o3,%o5,%o3
 169                         ! depth 3, accumulated bits 3
 170         bl      LOC(3.19)
 171         srl     %o5,1,%o5
 172         ! remainder is positive
 173         subcc   %o3,%o5,%o3
 174                         ! depth 4, accumulated bits 7
 175         bl      LOC(4.23)
 176         srl     %o5,1,%o5
 177         ! remainder is positive
 178         subcc   %o3,%o5,%o3
 179                 b       9f
 180                 add     %o2, (7*2+1), %o2
 181
 182 LOC(4.23):
 183         ! remainder is negative
 184         addcc   %o3,%o5,%o3
 185                 b       9f
 186                 add     %o2, (7*2-1), %o2
 187
 188
 189 LOC(3.19):
 190         ! remainder is negative
 191         addcc   %o3,%o5,%o3
 192                         ! depth 4, accumulated bits 5
 193         bl      LOC(4.21)
 194         srl     %o5,1,%o5
 195         ! remainder is positive
 196         subcc   %o3,%o5,%o3
 197                 b       9f
 198                 add     %o2, (5*2+1), %o2
 199
 200 LOC(4.21):
 201         ! remainder is negative
 202         addcc   %o3,%o5,%o3
 203                 b       9f
 204                 add     %o2, (5*2-1), %o2
 205
 206
 207
 208 LOC(2.17):
 209         ! remainder is negative
 210         addcc   %o3,%o5,%o3
 211                         ! depth 3, accumulated bits 1
 212         bl      LOC(3.17)
 213         srl     %o5,1,%o5
 214         ! remainder is positive
 215         subcc   %o3,%o5,%o3
 216                         ! depth 4, accumulated bits 3
 217         bl      LOC(4.19)
 218         srl     %o5,1,%o5
 219         ! remainder is positive
 220         subcc   %o3,%o5,%o3
 221                 b       9f
 222                 add     %o2, (3*2+1), %o2
 223
 224 LOC(4.19):
 225         ! remainder is negative
 226         addcc   %o3,%o5,%o3
 227                 b       9f
 228                 add     %o2, (3*2-1), %o2
 229
 230
 231 LOC(3.17):
 232         ! remainder is negative
 233         addcc   %o3,%o5,%o3
 234                         ! depth 4, accumulated bits 1
 235         bl      LOC(4.17)
 236         srl     %o5,1,%o5
 237         ! remainder is positive
 238         subcc   %o3,%o5,%o3
 239                 b       9f
 240                 add     %o2, (1*2+1), %o2
 241
 242 LOC(4.17):
 243         ! remainder is negative
 244         addcc   %o3,%o5,%o3
 245                 b       9f
 246                 add     %o2, (1*2-1), %o2
 247
 248
 249
 250
 251 LOC(1.16):
 252         ! remainder is negative
 253         addcc   %o3,%o5,%o3
 254                         ! depth 2, accumulated bits -1
 255         bl      LOC(2.15)
 256         srl     %o5,1,%o5
 257         ! remainder is positive
 258         subcc   %o3,%o5,%o3
 259                         ! depth 3, accumulated bits -1
 260         bl      LOC(3.15)
 261         srl     %o5,1,%o5
 262         ! remainder is positive
 263         subcc   %o3,%o5,%o3
 264                         ! depth 4, accumulated bits -1
 265         bl      LOC(4.15)
 266         srl     %o5,1,%o5
 267         ! remainder is positive
 268         subcc   %o3,%o5,%o3
 269                 b       9f
 270                 add     %o2, (-1*2+1), %o2
 271
 272 LOC(4.15):
 273         ! remainder is negative
 274         addcc   %o3,%o5,%o3
 275                 b       9f
 276                 add     %o2, (-1*2-1), %o2
 277
 278
 279 LOC(3.15):
 280         ! remainder is negative
 281         addcc   %o3,%o5,%o3
 282                         ! depth 4, accumulated bits -3
 283         bl      LOC(4.13)
 284         srl     %o5,1,%o5
 285         ! remainder is positive
 286         subcc   %o3,%o5,%o3
 287                 b       9f
 288                 add     %o2, (-3*2+1), %o2
 289
 290 LOC(4.13):
 291         ! remainder is negative
 292         addcc   %o3,%o5,%o3
 293                 b       9f
 294                 add     %o2, (-3*2-1), %o2
 295
 296
 297
 298 LOC(2.15):
 299         ! remainder is negative
 300         addcc   %o3,%o5,%o3
 301                         ! depth 3, accumulated bits -3
 302         bl      LOC(3.13)
 303         srl     %o5,1,%o5
 304         ! remainder is positive
 305         subcc   %o3,%o5,%o3
 306                         ! depth 4, accumulated bits -5
 307         bl      LOC(4.11)
 308         srl     %o5,1,%o5
 309         ! remainder is positive
 310         subcc   %o3,%o5,%o3
 311                 b       9f
 312                 add     %o2, (-5*2+1), %o2
 313
 314 LOC(4.11):
 315         ! remainder is negative
 316         addcc   %o3,%o5,%o3
 317                 b       9f
 318                 add     %o2, (-5*2-1), %o2
 319
 320
 321 LOC(3.13):
 322         ! remainder is negative
 323         addcc   %o3,%o5,%o3
 324                         ! depth 4, accumulated bits -7
 325         bl      LOC(4.9)
 326         srl     %o5,1,%o5
 327         ! remainder is positive
 328         subcc   %o3,%o5,%o3
 329                 b       9f
 330                 add     %o2, (-7*2+1), %o2
 331
 332 LOC(4.9):
 333         ! remainder is negative
 334         addcc   %o3,%o5,%o3
 335                 b       9f
 336                 add     %o2, (-7*2-1), %o2
 337
 338
 339
 340
 341         9:
 342 LOC(end_regular_divide):
 343         subcc   %o4, 1, %o4
 344         bge     LOC(divloop)
 345         tst     %o3
 346         bl,a    LOC(got_result)
 347         ! non-restoring fixup here (one instruction only!)
 348         sub     %o2, 1, %o2
 349
 350
 351 LOC(got_result):
 352         ! check to see if answer should be < 0
 353         tst     %g3
 354         bl,a    1f
 355         sub %g0, %o2, %o2
 356 1:
 357         retl
 358         mov %o2, %o0
 359
 360 END(.div)