^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* mpihelp-mul.c - MPI helper functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 1994, 1996, 1998, 1999,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * 2000 Free Software Foundation, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This file is part of GnuPG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Note: This code is heavily based on the GNU MP Library.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Actually it's the same code with only minor changes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * way the data is stored; this is to support the abstraction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * of an optional secure memory allocation which may be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * to avoid revealing of sensitive data due to paging etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * The GNU MP Library itself is published under the LGPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * however I decided to publish this code under the plain GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "mpi-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "longlong.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) if ((size) < KARATSUBA_THRESHOLD) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) mul_n_basecase(prodp, up, vp, size); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) else \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) mul_n(prodp, up, vp, size, tspace); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) } while (0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) if ((size) < KARATSUBA_THRESHOLD) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) mpih_sqr_n_basecase(prodp, up, size); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) else \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) mpih_sqr_n(prodp, up, size, tspace); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) } while (0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * always stored. Return the most significant limb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Argument constraints:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * 1. PRODP != UP and PRODP != VP, i.e. the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * must be distinct from the multiplier and the multiplicand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Handle simple cases with traditional multiplication.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * This is the most critical code of multiplication. All multiplies rely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * on this, both small and huge. Small ones arrive here immediately. Huge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * ones arrive here as this is the base case for Karatsuba's recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * algorithm below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static mpi_limb_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * stored (not added) to PROD. We also avoid a loop for zeroing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) v_limb = vp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) MPN_COPY(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) MPN_ZERO(prodp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) cy = mpihelp_mul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) prodp[size] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * U with one limb from V, and add it to PROD. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) for (i = 1; i < size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) v_limb = vp[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) cy = mpihelp_add_n(prodp, prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) cy = mpihelp_addmul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) prodp[size] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) mpi_size_t size, mpi_ptr_t tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (size & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* The size is odd, and the code below doesn't handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * Multiply the least significant (size - 1) limbs with a recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * call, and handle the most significant limb of S1 and S2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * A slightly faster way to do this would be to make the Karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * code below behave as if the size were even, and let it check for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * odd size in the end. I.e., in essence move this code to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Doing so would save us a recursive call, and potentially make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * stack grow a lot less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) mpi_size_t esize = size - 1; /* even size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) prodp[esize + esize] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) prodp[esize + size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * Split U in two pieces, U1 and U0, such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * U = U0 + U1*(B**n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * and V in V1 and V0, such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * V = V0 + V1*(B**n).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * UV is then computed recursively using the identity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * 2n n n n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * 1 1 1 0 0 1 0 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * Where B = 2**BITS_PER_MP_LIMB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) mpi_size_t hsize = size >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) int negflg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* Product H. ________________ ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * |_____U1 x V1____||____U0 x V0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Put result in upper part of PROD and pass low part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* Product M. ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * |_(U1-U0)(V0-V1)_|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) mpihelp_sub_n(prodp, up + hsize, up, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) negflg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) mpihelp_sub_n(prodp, up, up + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) negflg = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) negflg ^= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /* No change of NEGFLG. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /* Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* Add/copy product H. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) MPN_COPY(prodp + hsize, prodp + size, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) cy = mpihelp_add_n(prodp + size, prodp + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) prodp + size + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /* Add product M (if NEGFLG M is a negative number) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (negflg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) cy -=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) cy +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* Product L. ________________ ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * |________________||____U0 x V0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) /* Add/copy Product L (twice) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) mpihelp_add_1(prodp + hsize + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) prodp + hsize + size, hsize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) MPN_COPY(prodp, tspace, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) mpihelp_add_1(prodp + size, prodp + size, size, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * stored (not added) to PROD. We also avoid a loop for zeroing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) v_limb = up[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) MPN_COPY(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) MPN_ZERO(prodp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) cy_limb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) prodp[size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * U with one limb from V, and add it to PROD. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) for (i = 1; i < size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) v_limb = up[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) cy_limb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) cy_limb = mpihelp_add_n(prodp, prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) prodp[size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (size & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* The size is odd, and the code below doesn't handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * Multiply the least significant (size - 1) limbs with a recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * call, and handle the most significant limb of S1 and S2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * A slightly faster way to do this would be to make the Karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * code below behave as if the size were even, and let it check for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * odd size in the end. I.e., in essence move this code to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * Doing so would save us a recursive call, and potentially make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * stack grow a lot less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) mpi_size_t esize = size - 1; /* even size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) prodp[esize + esize] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) prodp[esize + size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) mpi_size_t hsize = size >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* Product H. ________________ ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * |_____U1 x U1____||____U0 x U0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * Put result in upper part of PROD and pass low part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /* Product M. ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * |_(U1-U0)(U0-U1)_|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) mpihelp_sub_n(prodp, up + hsize, up, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) mpihelp_sub_n(prodp, up, up + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) /* Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * as new TSPACE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /* Add/copy product H */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) MPN_COPY(prodp + hsize, prodp + size, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) cy = mpihelp_add_n(prodp + size, prodp + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) prodp + size + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /* Add product M (if NEGFLG M is a negative number). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /* Product L. ________________ ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * |________________||____U0 x U0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * as new TSPACE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* Add/copy Product L (twice). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) mpihelp_add_1(prodp + hsize + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) prodp + hsize + size, hsize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) MPN_COPY(prodp, tspace, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) mpihelp_add_1(prodp + size, prodp + size, size, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) void mpihelp_mul_n(mpi_ptr_t prodp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) if (up == vp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) if (size < KARATSUBA_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) mpih_sqr_n_basecase(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) mpi_ptr_t tspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) tspace = mpi_alloc_limb_space(2 * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) mpih_sqr_n(prodp, up, size, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) mpi_free_limb_space(tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) if (size < KARATSUBA_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) mul_n_basecase(prodp, up, vp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) mpi_ptr_t tspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) tspace = mpi_alloc_limb_space(2 * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) mul_n(prodp, up, vp, size, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) mpi_free_limb_space(tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) mpi_ptr_t up, mpi_size_t usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) mpi_ptr_t vp, mpi_size_t vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct karatsuba_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (!ctx->tspace || ctx->tspace_size < vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ctx->tspace = mpi_alloc_limb_space(2 * vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (!ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) ctx->tspace_size = vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) prodp += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) up += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) usize -= vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) if (usize >= vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (!ctx->tp || ctx->tp_size < vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) ctx->tp = mpi_alloc_limb_space(2 * vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (!ctx->tp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) ctx->tspace = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) ctx->tp_size = vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) prodp += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) up += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) usize -= vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) } while (usize >= vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (usize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (usize < KARATSUBA_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) mpi_limb_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (!ctx->next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (!ctx->next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (mpihelp_mul_karatsuba_case(ctx->tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) vp, vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) up, usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) ctx->next) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) struct karatsuba_ctx *ctx2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) for (ctx = ctx->next; ctx; ctx = ctx2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) ctx2 = ctx->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * and v (pointed to by VP, with VSIZE limbs), and store the result at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * PRODP. USIZE + VSIZE limbs are always stored, but if the input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * operands are normalized. Return the most significant limb of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * NOTE: The space pointed to by PRODP is overwritten before finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * with U and V, so overlap is an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * Argument constraints:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * 1. USIZE >= VSIZE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * 2. PRODP != UP and PRODP != VP, i.e. the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * must be distinct from the multiplier and the multiplicand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) struct karatsuba_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (vsize < KARATSUBA_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) if (!vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) *_result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) /* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * stored (not added) to PROD. We also avoid a loop for zeroing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) v_limb = vp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) MPN_COPY(prodp, up, usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) MPN_ZERO(prodp, usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) cy = mpihelp_mul_1(prodp, up, usize, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) prodp[usize] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) /* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * U with one limb from V, and add it to PROD. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) for (i = 1; i < vsize; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) v_limb = vp[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) cy = mpihelp_add_n(prodp, prodp, up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) prodp[usize] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) *_result = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) memset(&ctx, 0, sizeof ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) mpihelp_release_karatsuba_ctx(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) *_result = *prod_endp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }