^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* mpihelp-div.c - MPI helper functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 1994, 1996 Free Software Foundation, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This file is part of GnuPG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Note: This code is heavily based on the GNU MP Library.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Actually it's the same code with only minor changes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * way the data is stored; this is to support the abstraction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * of an optional secure memory allocation which may be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * to avoid revealing of sensitive data due to paging etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * The GNU MP Library itself is published under the LGPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * however I decided to publish this code under the plain GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "mpi-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "longlong.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #ifndef UMUL_TIME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define UMUL_TIME 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #ifndef UDIV_TIME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define UDIV_TIME UMUL_TIME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) mpi_limb_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) mpi_limb_t divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) mpi_limb_t n1, n0, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) mpi_limb_t dummy __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /* Botch: Should this be handled at all? Rely on callers? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) if (!dividend_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* If multiplication is much faster than division, and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * dividend is large, pre-invert the divisor, and use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * only multiplications in the inner loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * This test should be read:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * Does it ever help to use udiv_qrnnd_preinv?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * && Does what we save compensate for the inversion overhead?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (UDIV_TIME > (2 * UMUL_TIME + 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) int normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) normalization_steps = count_leading_zeros(divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) if (normalization_steps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) mpi_limb_t divisor_limb_inverted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) divisor_limb <<= normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * most significant bit (with weight 2**N) implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Special case for DIVISOR_LIMB == 100...000.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (!(divisor_limb << 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) divisor_limb_inverted = ~(mpi_limb_t)0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) udiv_qrnnd(divisor_limb_inverted, dummy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) -divisor_limb, 0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) n1 = dividend_ptr[dividend_size - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Possible optimization:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * if (r == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * && divisor_limb > ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * | (dividend_ptr[dividend_size - 2] >> ...)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * ...one division less...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) for (i = dividend_size - 2; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) UDIV_QRNND_PREINV(dummy, r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) n1 = n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) UDIV_QRNND_PREINV(dummy, r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) n1 << normalization_steps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return r >> normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) mpi_limb_t divisor_limb_inverted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * most significant bit (with weight 2**N) implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * Special case for DIVISOR_LIMB == 100...000.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (!(divisor_limb << 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) divisor_limb_inverted = ~(mpi_limb_t)0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) udiv_qrnnd(divisor_limb_inverted, dummy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) -divisor_limb, 0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) i = dividend_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) r = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (r >= divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) i--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) for ( ; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) UDIV_QRNND_PREINV(dummy, r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) n0, divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) if (UDIV_NEEDS_NORMALIZATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) normalization_steps = count_leading_zeros(divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (normalization_steps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) divisor_limb <<= normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) n1 = dividend_ptr[dividend_size - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* Possible optimization:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * if (r == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * && divisor_limb > ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * | (dividend_ptr[dividend_size - 2] >> ...)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * ...one division less...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) for (i = dividend_size - 2; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) udiv_qrnnd(dummy, r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) n1 = n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) udiv_qrnnd(dummy, r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) n1 << normalization_steps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return r >> normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /* No normalization needed, either because udiv_qrnnd doesn't require
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * it, or because DIVISOR_LIMB is already normalized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) i = dividend_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) r = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (r >= divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) i--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) for (; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) udiv_qrnnd(dummy, r, r, n0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * the NSIZE-DSIZE least significant quotient limbs at QP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * non-zero, generate that many fraction bits and append them after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * other quotient limbs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * Return the most significant limb of the quotient, this is always 0 or 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * Preconditions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * 0. NSIZE >= DSIZE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * 1. The most significant bit of the divisor must be set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * 2. QP must either not overlap with the input operands at all, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * QP + DSIZE >= NP must hold true. (This means that it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * possible to put the quotient in the high part of NUM, right after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * remainder in NUM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) mpi_limb_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) mpi_limb_t most_significant_q_limb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) switch (dsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /* We are asked to divide by zero, so go ahead and do it! (To make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) the compiler not remove this statement, return the value.) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * existing clients of this function have been modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * not to call it with dsize == 0, so this should not happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return 1 / dsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) mpi_limb_t n1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) mpi_limb_t d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) d = dp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) n1 = np[nsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (n1 >= d) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) n1 -= d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) most_significant_q_limb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) qp += qextra_limbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) for (i = nsize - 2; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) udiv_qrnnd(qp[i], n1, n1, np[i], d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) qp -= qextra_limbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) for (i = qextra_limbs - 1; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) udiv_qrnnd(qp[i], n1, n1, 0, d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) np[0] = n1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) mpi_limb_t n1, n0, n2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) mpi_limb_t d1, d0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) np += nsize - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) d1 = dp[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) d0 = dp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) n1 = np[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) n0 = np[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) sub_ddmmss(n1, n0, n1, n0, d1, d0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) most_significant_q_limb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) mpi_limb_t q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) mpi_limb_t r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (i >= qextra_limbs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) np--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) np[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (n1 == d1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) /* Q should be either 111..111 or 111..110. Need special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * treatment of this rare case as normal division would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * give overflow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) q = ~(mpi_limb_t) 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) r = n0 + d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) if (r < d1) { /* Carry in the addition? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) add_ssaaaa(n1, n0, r - d0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) np[0], 0, d0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) qp[i] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) n1 = d0 - (d0 != 0 ? 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) n0 = -d0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) udiv_qrnnd(q, r, n1, n0, d1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) umul_ppmm(n1, n0, d0, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) n2 = np[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) q_test:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (n1 > r || (n1 == r && n0 > n2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /* The estimated Q was too large. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) q--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) sub_ddmmss(n1, n0, n1, n0, 0, d0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) r += d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (r >= d1) /* If not carry, test Q again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) goto q_test;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) qp[i] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) sub_ddmmss(n1, n0, r, n2, n1, n0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) np[1] = n1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) np[0] = n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) mpi_limb_t dX, d1, n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) np += nsize - dsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) dX = dp[dsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) d1 = dp[dsize - 2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) n0 = np[dsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (n0 >= dX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (n0 > dX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) mpihelp_sub_n(np, np, dp, dsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) n0 = np[dsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) most_significant_q_limb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) mpi_limb_t q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) mpi_limb_t n1, n2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (i >= qextra_limbs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) np--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) n2 = np[dsize];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) n2 = np[dsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) MPN_COPY_DECR(np + 1, np, dsize - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) np[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (n0 == dX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /* This might over-estimate q, but it's probably not worth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * the extra code here to find out. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) q = ~(mpi_limb_t) 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) mpi_limb_t r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) umul_ppmm(n1, n0, d1, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) while (n1 > r
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) || (n1 == r
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) && n0 > np[dsize - 2])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) q--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) r += dX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (r < dX) /* I.e. "carry in previous addition?" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) n1 -= n0 < d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) n0 -= d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /* Possible optimization: We already have (q * n0) and (1 * n1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * after the calculation of q. Taking advantage of that, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * could make this loop make two iterations less. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) cy_limb = mpihelp_submul_1(np, dp, dsize, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (n2 != cy_limb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) mpihelp_add_n(np, np, dp, dsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) q--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) qp[i] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) n0 = np[dsize - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) return most_significant_q_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) /****************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Return the single-limb remainder.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * There are no constraints on the value of the divisor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) mpi_limb_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) mpihelp_divmod_1(mpi_ptr_t quot_ptr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) mpi_limb_t divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) mpi_limb_t n1, n0, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) mpi_limb_t dummy __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (!dividend_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /* If multiplication is much faster than division, and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * dividend is large, pre-invert the divisor, and use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * only multiplications in the inner loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * This test should be read:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * Does it ever help to use udiv_qrnnd_preinv?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * && Does what we save compensate for the inversion overhead?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (UDIV_TIME > (2 * UMUL_TIME + 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) int normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) normalization_steps = count_leading_zeros(divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (normalization_steps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) mpi_limb_t divisor_limb_inverted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) divisor_limb <<= normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * most significant bit (with weight 2**N) implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) /* Special case for DIVISOR_LIMB == 100...000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (!(divisor_limb << 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) divisor_limb_inverted = ~(mpi_limb_t)0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) udiv_qrnnd(divisor_limb_inverted, dummy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) -divisor_limb, 0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) n1 = dividend_ptr[dividend_size - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /* Possible optimization:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * if (r == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * && divisor_limb > ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * | (dividend_ptr[dividend_size - 2] >> ...)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * ...one division less...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) for (i = dividend_size - 2; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) n1 = n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) UDIV_QRNND_PREINV(quot_ptr[0], r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) n1 << normalization_steps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) return r >> normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) mpi_limb_t divisor_limb_inverted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * most significant bit (with weight 2**N) implicit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /* Special case for DIVISOR_LIMB == 100...000. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (!(divisor_limb << 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) divisor_limb_inverted = ~(mpi_limb_t) 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) udiv_qrnnd(divisor_limb_inverted, dummy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) -divisor_limb, 0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) i = dividend_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) r = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) if (r >= divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) quot_ptr[i--] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) for ( ; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) UDIV_QRNND_PREINV(quot_ptr[i], r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) n0, divisor_limb, divisor_limb_inverted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (UDIV_NEEDS_NORMALIZATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) int normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) normalization_steps = count_leading_zeros(divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) if (normalization_steps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) divisor_limb <<= normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) n1 = dividend_ptr[dividend_size - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /* Possible optimization:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) * if (r == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * && divisor_limb > ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * | (dividend_ptr[dividend_size - 2] >> ...)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * ...one division less...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) for (i = dividend_size - 2; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) udiv_qrnnd(quot_ptr[i + 1], r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) ((n1 << normalization_steps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) n1 = n0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) udiv_qrnnd(quot_ptr[0], r, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) n1 << normalization_steps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return r >> normalization_steps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /* No normalization needed, either because udiv_qrnnd doesn't require
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * it, or because DIVISOR_LIMB is already normalized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) i = dividend_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) r = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (r >= divisor_limb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) quot_ptr[i--] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) for (; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) n0 = dividend_ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }