arch/parisc/lib/milli/div_const.S

   1 /* 32 and 64-bit millicode, original author Hewlett-Packard
   2    adapted for gcc by Paul Bame <bame@debian.org>
   3    and Alan Modra <alan@linuxcare.com.au>.
   4
   5    Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
   6
   7    This file is part of GCC and is released under the terms of
   8    of the GNU General Public License as published by the Free Software
   9    Foundation; either version 2, or (at your option) any later version.
  10    See the file COPYING in the top-level GCC source directory for a copy
  11    of the license.  */
  12
  13 #include "milli.h"
  14
  15 #ifdef L_div_const
  16 /* ROUTINE:     $$divI_2
  17    .            $$divI_3        $$divU_3
  18    .            $$divI_4
  19    .            $$divI_5        $$divU_5
  20    .            $$divI_6        $$divU_6
  21    .            $$divI_7        $$divU_7
  22    .            $$divI_8
  23    .            $$divI_9        $$divU_9
  24    .            $$divI_10       $$divU_10
  25    .
  26    .            $$divI_12       $$divU_12
  27    .
  28    .            $$divI_14       $$divU_14
  29    .            $$divI_15       $$divU_15
  30    .            $$divI_16
  31    .            $$divI_17       $$divU_17
  32    .
  33    .    Divide by selected constants for single precision binary integers.
  34
  35    INPUT REGISTERS:
  36    .    arg0 == dividend
  37    .    mrp  == return pc
  38    .    sr0  == return space when called externally
  39
  40    OUTPUT REGISTERS:
  41    .    arg0 =  undefined
  42    .    arg1 =  undefined
  43    .    ret1 =  quotient
  44
  45    OTHER REGISTERS AFFECTED:
  46    .    r1   =  undefined
  47
  48    SIDE EFFECTS:
  49    .    Causes a trap under the following conditions: NONE
  50    .    Changes memory at the following places:  NONE
  51
  52    PERMISSIBLE CONTEXT:
  53    .    Unwindable.
  54    .    Does not create a stack frame.
  55    .    Suitable for internal or external millicode.
  56    .    Assumes the special millicode register conventions.
  57
  58    DISCUSSION:
  59    .    Calls other millicode routines using mrp:  NONE
  60    .    Calls other millicode routines:  NONE  */
  61
  62
  63 /* TRUNCATED DIVISION BY SMALL INTEGERS
  64
  65    We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
  66    (with y fixed).
  67
  68    Let a = floor(z/y), for some choice of z.  Note that z will be
  69    chosen so that division by z is cheap.
  70
  71    Let r be the remainder(z/y).  In other words, r = z - ay.
  72
  73    Now, our method is to choose a value for b such that
  74
  75    q'(x) = floor((ax+b)/z)
  76
  77    is equal to q(x) over as large a range of x as possible.  If the
  78    two are equal over a sufficiently large range, and if it is easy to
  79    form the product (ax), and it is easy to divide by z, then we can
  80    perform the division much faster than the general division algorithm.
  81
  82    So, we want the following to be true:
  83
  84    .    For x in the following range:
  85    .
  86    .        ky <= x < (k+1)y
  87    .
  88    .    implies that
  89    .
  90    .        k <= (ax+b)/z < (k+1)
  91
  92    We want to determine b such that this is true for all k in the
  93    range {0..K} for some maximum K.
  94
  95    Since (ax+b) is an increasing function of x, we can take each
  96    bound separately to determine the "best" value for b.
  97
  98    (ax+b)/z < (k+1)            implies
  99
 100    (a((k+1)y-1)+b < (k+1)z     implies
 101
 102    b < a + (k+1)(z-ay)         implies
 103
 104    b < a + (k+1)r
 105
 106    This needs to be true for all k in the range {0..K}.  In
 107    particular, it is true for k = 0 and this leads to a maximum
 108    acceptable value for b.
 109
 110    b < a+r   or   b <= a+r-1
 111
 112    Taking the other bound, we have
 113
 114    k <= (ax+b)/z               implies
 115
 116    k <= (aky+b)/z              implies
 117
 118    k(z-ay) <= b                implies
 119
 120    kr <= b
 121
 122    Clearly, the largest range for k will be achieved by maximizing b,
 123    when r is not zero.  When r is zero, then the simplest choice for b
 124    is 0.  When r is not 0, set
 125
 126    .    b = a+r-1
 127
 128    Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
 129    for all x in the range:
 130
 131    .    0 <= x < (K+1)y
 132
 133    We need to determine what K is.  Of our two bounds,
 134
 135    .    b < a+(k+1)r    is satisfied for all k >= 0, by construction.
 136
 137    The other bound is
 138
 139    .    kr <= b
 140
 141    This is always true if r = 0.  If r is not 0 (the usual case), then
 142    K = floor((a+r-1)/r), is the maximum value for k.
 143
 144    Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
 145    answer for q(x) = floor(x/y) when x is in the range
 146
 147    (0,(K+1)y-1)        K = floor((a+r-1)/r)
 148
 149    To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
 150    the formula for q'(x) yields the correct value of q(x) for all x
 151    representable by a single word in HPPA.
 152
 153    We are also constrained in that computing the product (ax), adding
 154    b, and dividing by z must all be done quickly, otherwise we will be
 155    better off going through the general algorithm using the DS
 156    instruction, which uses approximately 70 cycles.
 157
 158    For each y, there is a choice of z which satisfies the constraints
 159    for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
 160    timing constraints for arbitrary y.  It seems that z being equal to
 161    a power of 2 or a power of 2 minus 1 is as good as we can do, since
 162    it minimizes the time to do division by z.  We want the choice of z
 163    to also result in a value for (a) that minimizes the computation of
 164    the product (ax).  This is best achieved if (a) has a regular bit
 165    pattern (so the multiplication can be done with shifts and adds).
 166    The value of (a) also needs to be less than 2**32 so the product is
 167    always guaranteed to fit in 2 words.
 168
 169    In actual practice, the following should be done:
 170
 171    1) For negative x, you should take the absolute value and remember
 172    .  the fact so that the result can be negated.  This obviously does
 173    .  not apply in the unsigned case.
 174    2) For even y, you should factor out the power of 2 that divides y
 175    .  and divide x by it.  You can then proceed by dividing by the
 176    .  odd factor of y.
 177
 178    Here is a table of some odd values of y, and corresponding choices
 179    for z which are "good".
 180
 181     y     z       r      a (hex)     max x (hex)
 182
 183     3   2**32     1     55555555      100000001
 184     5   2**32     1     33333333      100000003
 185     7  2**24-1    0       249249     (infinite)
 186     9  2**24-1    0       1c71c7     (infinite)
 187    11  2**20-1    0        1745d     (infinite)
 188    13  2**24-1    0       13b13b     (infinite)
 189    15   2**32     1     11111111      10000000d
 190    17   2**32     1      f0f0f0f      10000000f
 191
 192    If r is 1, then b = a+r-1 = a.  This simplifies the computation
 193    of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
 194    then b = 0 is ok to use which simplifies (ax+b).
 195
 196    The bit patterns for 55555555, 33333333, and 11111111 are obviously
 197    very regular.  The bit patterns for the other values of a above are:
 198
 199     y      (hex)          (binary)
 200
 201     7     249249  001001001001001001001001  << regular >>
 202     9     1c71c7  000111000111000111000111  << regular >>
 203    11      1745d  000000010111010001011101  << irregular >>
 204    13     13b13b  000100111011000100111011  << irregular >>
 205
 206    The bit patterns for (a) corresponding to (y) of 11 and 13 may be
 207    too irregular to warrant using this method.
 208
 209    When z is a power of 2 minus 1, then the division by z is slightly
 210    more complicated, involving an iterative solution.
 211
 212    The code presented here solves division by 1 through 17, except for
 213    11 and 13. There are algorithms for both signed and unsigned
 214    quantities given.
 215
 216    TIMINGS (cycles)
 217
 218    divisor  positive  negative  unsigned
 219
 220    .   1        2          2         2
 221    .   2        4          4         2
 222    .   3       19         21        19
 223    .   4        4          4         2
 224    .   5       18         22        19
 225    .   6       19         22        19
 226    .   8        4          4         2
 227    .  10       18         19        17
 228    .  12       18         20        18
 229    .  15       16         18        16
 230    .  16        4          4         2
 231    .  17       16         18        16
 232
 233    Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
 234    a loop body is executed until the tentative quotient is 0.  The
 235    number of times the loop body is executed varies depending on the
 236    dividend, but is never more than two times.  If the dividend is
 237    less than the divisor, then the loop body is not executed at all.
 238    Each iteration adds 4 cycles to the timings.
 239
 240    divisor  positive  negative  unsigned
 241
 242    .   7       19+4n     20+4n     20+4n    n = number of iterations
 243    .   9       21+4n     22+4n     21+4n
 244    .  14       21+4n     22+4n     20+4n
 245
 246    To give an idea of how the number of iterations varies, here is a
 247    table of dividend versus number of iterations when dividing by 7.
 248
 249    smallest      largest       required
 250    dividend     dividend      iterations
 251
 252    .    0            6              0
 253    .    7        0x6ffffff          1
 254    0x1000006    0xffffffff          2
 255
 256    There is some overlap in the range of numbers requiring 1 and 2
 257    iterations.  */
 258
 259 RDEFINE(t2,r1)
 260 RDEFINE(x2,arg0)        /*  r26 */
 261 RDEFINE(t1,arg1)        /*  r25 */
 262 RDEFINE(x1,ret1)        /*  r29 */
 263
 264         SUBSPA_MILLI_DIV
 265         ATTR_MILLI
 266
 267         .proc
 268         .callinfo       millicode
 269         .entry
 270 /* NONE of these routines require a stack frame
 271    ALL of these routines are unwindable from millicode  */
 272
 273 GSYM($$divide_by_constant)
 274         .export $$divide_by_constant,millicode
 275 /*  Provides a "nice" label for the code covered by the unwind descriptor
 276     for things like gprof.  */
 277
 278 /* DIVISION BY 2 (shift by 1) */
 279 GSYM($$divI_2)
 280         .export         $$divI_2,millicode
 281         comclr,>=       arg0,0,0
 282         addi            1,arg0,arg0
 283         MILLIRET
 284         extrs           arg0,30,31,ret1
 285
 286
 287 /* DIVISION BY 4 (shift by 2) */
 288 GSYM($$divI_4)
 289         .export         $$divI_4,millicode
 290         comclr,>=       arg0,0,0
 291         addi            3,arg0,arg0
 292         MILLIRET
 293         extrs           arg0,29,30,ret1
 294
 295
 296 /* DIVISION BY 8 (shift by 3) */
 297 GSYM($$divI_8)
 298         .export         $$divI_8,millicode
 299         comclr,>=       arg0,0,0
 300         addi            7,arg0,arg0
 301         MILLIRET
 302         extrs           arg0,28,29,ret1
 303
 304 /* DIVISION BY 16 (shift by 4) */
 305 GSYM($$divI_16)
 306         .export         $$divI_16,millicode
 307         comclr,>=       arg0,0,0
 308         addi            15,arg0,arg0
 309         MILLIRET
 310         extrs           arg0,27,28,ret1
 311
 312 /****************************************************************************
 313 *
 314 *       DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
 315 *
 316 *       includes 3,5,15,17 and also 6,10,12
 317 *
 318 ****************************************************************************/
 319
 320 /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
 321
 322 GSYM($$divI_3)
 323         .export         $$divI_3,millicode
 324         comb,<,N        x2,0,LREF(neg3)
 325
 326         addi            1,x2,x2         /* this cannot overflow */
 327         extru           x2,1,2,x1       /* multiply by 5 to get started */
 328         sh2add          x2,x2,x2
 329         b               LREF(pos)
 330         addc            x1,0,x1
 331
 332 LSYM(neg3)
 333         subi            1,x2,x2         /* this cannot overflow */
 334         extru           x2,1,2,x1       /* multiply by 5 to get started */
 335         sh2add          x2,x2,x2
 336         b               LREF(neg)
 337         addc            x1,0,x1
 338
 339 GSYM($$divU_3)
 340         .export         $$divU_3,millicode
 341         addi            1,x2,x2         /* this CAN overflow */
 342         addc            0,0,x1
 343         shd             x1,x2,30,t1     /* multiply by 5 to get started */
 344         sh2add          x2,x2,x2
 345         b               LREF(pos)
 346         addc            x1,t1,x1
 347
 348 /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
 349
 350 GSYM($$divI_5)
 351         .export         $$divI_5,millicode
 352         comb,<,N        x2,0,LREF(neg5)
 353
 354         addi            3,x2,t1         /* this cannot overflow */
 355         sh1add          x2,t1,x2        /* multiply by 3 to get started */
 356         b               LREF(pos)
 357         addc            0,0,x1
 358
 359 LSYM(neg5)
 360         sub             0,x2,x2         /* negate x2                    */
 361         addi            1,x2,x2         /* this cannot overflow */
 362         shd             0,x2,31,x1      /* get top bit (can be 1)       */
 363         sh1add          x2,x2,x2        /* multiply by 3 to get started */
 364         b               LREF(neg)
 365         addc            x1,0,x1
 366
 367 GSYM($$divU_5)
 368         .export         $$divU_5,millicode
 369         addi            1,x2,x2         /* this CAN overflow */
 370         addc            0,0,x1
 371         shd             x1,x2,31,t1     /* multiply by 3 to get started */
 372         sh1add          x2,x2,x2
 373         b               LREF(pos)
 374         addc            t1,x1,x1
 375
 376 /* DIVISION BY  6 (shift to divide by 2 then divide by 3) */
 377 GSYM($$divI_6)
 378         .export         $$divI_6,millicode
 379         comb,<,N        x2,0,LREF(neg6)
 380         extru           x2,30,31,x2     /* divide by 2                  */
 381         addi            5,x2,t1         /* compute 5*(x2+1) = 5*x2+5    */
 382         sh2add          x2,t1,x2        /* multiply by 5 to get started */
 383         b               LREF(pos)
 384         addc            0,0,x1
 385
 386 LSYM(neg6)
 387         subi            2,x2,x2         /* negate, divide by 2, and add 1 */
 388                                         /* negation and adding 1 are done */
 389                                         /* at the same time by the SUBI   */
 390         extru           x2,30,31,x2
 391         shd             0,x2,30,x1
 392         sh2add          x2,x2,x2        /* multiply by 5 to get started */
 393         b               LREF(neg)
 394         addc            x1,0,x1
 395
 396 GSYM($$divU_6)
 397         .export         $$divU_6,millicode
 398         extru           x2,30,31,x2     /* divide by 2 */
 399         addi            1,x2,x2         /* cannot carry */
 400         shd             0,x2,30,x1      /* multiply by 5 to get started */
 401         sh2add          x2,x2,x2
 402         b               LREF(pos)
 403         addc            x1,0,x1
 404
 405 /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
 406 GSYM($$divU_10)
 407         .export         $$divU_10,millicode
 408         extru           x2,30,31,x2     /* divide by 2 */
 409         addi            3,x2,t1         /* compute 3*(x2+1) = (3*x2)+3  */
 410         sh1add          x2,t1,x2        /* multiply by 3 to get started */
 411         addc            0,0,x1
 412 LSYM(pos)
 413         shd             x1,x2,28,t1     /* multiply by 0x11 */
 414         shd             x2,0,28,t2
 415         add             x2,t2,x2
 416         addc            x1,t1,x1
 417 LSYM(pos_for_17)
 418         shd             x1,x2,24,t1     /* multiply by 0x101 */
 419         shd             x2,0,24,t2
 420         add             x2,t2,x2
 421         addc            x1,t1,x1
 422
 423         shd             x1,x2,16,t1     /* multiply by 0x10001 */
 424         shd             x2,0,16,t2
 425         add             x2,t2,x2
 426         MILLIRET
 427         addc            x1,t1,x1
 428
 429 GSYM($$divI_10)
 430         .export         $$divI_10,millicode
 431         comb,<          x2,0,LREF(neg10)
 432         copy            0,x1
 433         extru           x2,30,31,x2     /* divide by 2 */
 434         addib,TR        1,x2,LREF(pos)  /* add 1 (cannot overflow)     */
 435         sh1add          x2,x2,x2        /* multiply by 3 to get started */
 436
 437 LSYM(neg10)
 438         subi            2,x2,x2         /* negate, divide by 2, and add 1 */
 439                                         /* negation and adding 1 are done */
 440                                         /* at the same time by the SUBI   */
 441         extru           x2,30,31,x2
 442         sh1add          x2,x2,x2        /* multiply by 3 to get started */
 443 LSYM(neg)
 444         shd             x1,x2,28,t1     /* multiply by 0x11 */
 445         shd             x2,0,28,t2
 446         add             x2,t2,x2
 447         addc            x1,t1,x1
 448 LSYM(neg_for_17)
 449         shd             x1,x2,24,t1     /* multiply by 0x101 */
 450         shd             x2,0,24,t2
 451         add             x2,t2,x2
 452         addc            x1,t1,x1
 453
 454         shd             x1,x2,16,t1     /* multiply by 0x10001 */
 455         shd             x2,0,16,t2
 456         add             x2,t2,x2
 457         addc            x1,t1,x1
 458         MILLIRET
 459         sub             0,x1,x1
 460
 461 /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
 462 GSYM($$divI_12)
 463         .export         $$divI_12,millicode
 464         comb,<          x2,0,LREF(neg12)
 465         copy            0,x1
 466         extru           x2,29,30,x2     /* divide by 4                  */
 467         addib,tr        1,x2,LREF(pos)  /* compute 5*(x2+1) = 5*x2+5    */
 468         sh2add          x2,x2,x2        /* multiply by 5 to get started */
 469
 470 LSYM(neg12)
 471         subi            4,x2,x2         /* negate, divide by 4, and add 1 */
 472                                         /* negation and adding 1 are done */
 473                                         /* at the same time by the SUBI   */
 474         extru           x2,29,30,x2
 475         b               LREF(neg)
 476         sh2add          x2,x2,x2        /* multiply by 5 to get started */
 477
 478 GSYM($$divU_12)
 479         .export         $$divU_12,millicode
 480         extru           x2,29,30,x2     /* divide by 4   */
 481         addi            5,x2,t1         /* cannot carry */
 482         sh2add          x2,t1,x2        /* multiply by 5 to get started */
 483         b               LREF(pos)
 484         addc            0,0,x1
 485
 486 /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
 487 GSYM($$divI_15)
 488         .export         $$divI_15,millicode
 489         comb,<          x2,0,LREF(neg15)
 490         copy            0,x1
 491         addib,tr        1,x2,LREF(pos)+4
 492         shd             x1,x2,28,t1
 493
 494 LSYM(neg15)
 495         b               LREF(neg)
 496         subi            1,x2,x2
 497
 498 GSYM($$divU_15)
 499         .export         $$divU_15,millicode
 500         addi            1,x2,x2         /* this CAN overflow */
 501         b               LREF(pos)
 502         addc            0,0,x1
 503
 504 /* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
 505 GSYM($$divI_17)
 506         .export         $$divI_17,millicode
 507         comb,<,n        x2,0,LREF(neg17)
 508         addi            1,x2,x2         /* this cannot overflow */
 509         shd             0,x2,28,t1      /* multiply by 0xf to get started */
 510         shd             x2,0,28,t2
 511         sub             t2,x2,x2
 512         b               LREF(pos_for_17)
 513         subb            t1,0,x1
 514
 515 LSYM(neg17)
 516         subi            1,x2,x2         /* this cannot overflow */
 517         shd             0,x2,28,t1      /* multiply by 0xf to get started */
 518         shd             x2,0,28,t2
 519         sub             t2,x2,x2
 520         b               LREF(neg_for_17)
 521         subb            t1,0,x1
 522
 523 GSYM($$divU_17)
 524         .export         $$divU_17,millicode
 525         addi            1,x2,x2         /* this CAN overflow */
 526         addc            0,0,x1
 527         shd             x1,x2,28,t1     /* multiply by 0xf to get started */
 528 LSYM(u17)
 529         shd             x2,0,28,t2
 530         sub             t2,x2,x2
 531         b               LREF(pos_for_17)
 532         subb            t1,x1,x1
 533
 534
 535 /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
 536    includes 7,9 and also 14
 537
 538
 539    z = 2**24-1
 540    r = z mod x = 0
 541
 542    so choose b = 0
 543
 544    Also, in order to divide by z = 2**24-1, we approximate by dividing
 545    by (z+1) = 2**24 (which is easy), and then correcting.
 546
 547    (ax) = (z+1)q' + r
 548    .    = zq' + (q'+r)
 549
 550    So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
 551    Then the true remainder of (ax)/z is (q'+r).  Repeat the process
 552    with this new remainder, adding the tentative quotients together,
 553    until a tentative quotient is 0 (and then we are done).  There is
 554    one last correction to be done.  It is possible that (q'+r) = z.
 555    If so, then (q'+r)/(z+1) = 0 and it looks like we are done.  But,
 556    in fact, we need to add 1 more to the quotient.  Now, it turns
 557    out that this happens if and only if the original value x is
 558    an exact multiple of y.  So, to avoid a three instruction test at
 559    the end, instead use 1 instruction to add 1 to x at the beginning.  */
 560
 561 /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
 562 GSYM($$divI_7)
 563         .export         $$divI_7,millicode
 564         comb,<,n        x2,0,LREF(neg7)
 565 LSYM(7)
 566         addi            1,x2,x2         /* cannot overflow */
 567         shd             0,x2,29,x1
 568         sh3add          x2,x2,x2
 569         addc            x1,0,x1
 570 LSYM(pos7)
 571         shd             x1,x2,26,t1
 572         shd             x2,0,26,t2
 573         add             x2,t2,x2
 574         addc            x1,t1,x1
 575
 576         shd             x1,x2,20,t1
 577         shd             x2,0,20,t2
 578         add             x2,t2,x2
 579         addc            x1,t1,t1
 580
 581         /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
 582
 583         copy            0,x1
 584         shd,=           t1,x2,24,t1     /* tentative quotient  */
 585 LSYM(1)
 586         addb,tr         t1,x1,LREF(2)   /* add to previous quotient   */
 587         extru           x2,31,24,x2     /* new remainder (unadjusted) */
 588
 589         MILLIRETN
 590
 591 LSYM(2)
 592         addb,tr         t1,x2,LREF(1)   /* adjust remainder */
 593         extru,=         x2,7,8,t1       /* new quotient     */
 594
 595 LSYM(neg7)
 596         subi            1,x2,x2         /* negate x2 and add 1 */
 597 LSYM(8)
 598         shd             0,x2,29,x1
 599         sh3add          x2,x2,x2
 600         addc            x1,0,x1
 601
 602 LSYM(neg7_shift)
 603         shd             x1,x2,26,t1
 604         shd             x2,0,26,t2
 605         add             x2,t2,x2
 606         addc            x1,t1,x1
 607
 608         shd             x1,x2,20,t1
 609         shd             x2,0,20,t2
 610         add             x2,t2,x2
 611         addc            x1,t1,t1
 612
 613         /* computed <t1,x2>.  Now divide it by (2**24 - 1)      */
 614
 615         copy            0,x1
 616         shd,=           t1,x2,24,t1     /* tentative quotient  */
 617 LSYM(3)
 618         addb,tr         t1,x1,LREF(4)   /* add to previous quotient   */
 619         extru           x2,31,24,x2     /* new remainder (unadjusted) */
 620
 621         MILLIRET
 622         sub             0,x1,x1         /* negate result    */
 623
 624 LSYM(4)
 625         addb,tr         t1,x2,LREF(3)   /* adjust remainder */
 626         extru,=         x2,7,8,t1       /* new quotient     */
 627
 628 GSYM($$divU_7)
 629         .export         $$divU_7,millicode
 630         addi            1,x2,x2         /* can carry */
 631         addc            0,0,x1
 632         shd             x1,x2,29,t1
 633         sh3add          x2,x2,x2
 634         b               LREF(pos7)
 635         addc            t1,x1,x1
 636
 637 /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
 638 GSYM($$divI_9)
 639         .export         $$divI_9,millicode
 640         comb,<,n        x2,0,LREF(neg9)
 641         addi            1,x2,x2         /* cannot overflow */
 642         shd             0,x2,29,t1
 643         shd             x2,0,29,t2
 644         sub             t2,x2,x2
 645         b               LREF(pos7)
 646         subb            t1,0,x1
 647
 648 LSYM(neg9)
 649         subi            1,x2,x2         /* negate and add 1 */
 650         shd             0,x2,29,t1
 651         shd             x2,0,29,t2
 652         sub             t2,x2,x2
 653         b               LREF(neg7_shift)
 654         subb            t1,0,x1
 655
 656 GSYM($$divU_9)
 657         .export         $$divU_9,millicode
 658         addi            1,x2,x2         /* can carry */
 659         addc            0,0,x1
 660         shd             x1,x2,29,t1
 661         shd             x2,0,29,t2
 662         sub             t2,x2,x2
 663         b               LREF(pos7)
 664         subb            t1,x1,x1
 665
 666 /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
 667 GSYM($$divI_14)
 668         .export         $$divI_14,millicode
 669         comb,<,n        x2,0,LREF(neg14)
 670 GSYM($$divU_14)
 671         .export         $$divU_14,millicode
 672         b               LREF(7)         /* go to 7 case */
 673         extru           x2,30,31,x2     /* divide by 2  */
 674
 675 LSYM(neg14)
 676         subi            2,x2,x2         /* negate (and add 2) */
 677         b               LREF(8)
 678         extru           x2,30,31,x2     /* divide by 2        */
 679         .exit
 680         .procend
 681         .end
 682 #endif