Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* mpihelp-mul.c  -  MPI helper functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Copyright (C) 1994, 1996, 1998, 1999,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *               2000 Free Software Foundation, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * This file is part of GnuPG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * Note: This code is heavily based on the GNU MP Library.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *	 Actually it's the same code with only minor changes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *	 way the data is stored; this is to support the abstraction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *	 of an optional secure memory allocation which may be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *	 to avoid revealing of sensitive data due to paging etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *	 The GNU MP Library itself is published under the LGPL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *	 however I decided to publish this code under the plain GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include "mpi-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include "longlong.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	do {							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 		if ((size) < KARATSUBA_THRESHOLD)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 			mul_n_basecase(prodp, up, vp, size);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 		else						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 			mul_n(prodp, up, vp, size, tspace);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	} while (0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 	do {							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 		if ((size) < KARATSUBA_THRESHOLD)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 			mpih_sqr_n_basecase(prodp, up, size);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 		else						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 			mpih_sqr_n(prodp, up, size, tspace);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	} while (0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38)  * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39)  * always stored.  Return the most significant limb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41)  * Argument constraints:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42)  * 1. PRODP != UP and PRODP != VP, i.e. the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43)  *    must be distinct from the multiplier and the multiplicand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46)  * Handle simple cases with traditional multiplication.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48)  * This is the most critical code of multiplication.  All multiplies rely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49)  * on this, both small and huge.  Small ones arrive here immediately.  Huge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50)  * ones arrive here as this is the base case for Karatsuba's recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51)  * algorithm below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) static mpi_limb_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	/* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	v_limb = vp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 		if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 			MPN_COPY(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 			MPN_ZERO(prodp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		cy = mpihelp_mul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	prodp[size] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	/* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	 * U with one limb from V, and add it to PROD.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	for (i = 1; i < size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		v_limb = vp[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 		if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 			cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 			if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 				cy = mpihelp_add_n(prodp, prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 			cy = mpihelp_addmul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 		prodp[size] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 		prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	return cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		mpi_size_t size, mpi_ptr_t tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	if (size & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		/* The size is odd, and the code below doesn't handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 		 * Multiply the least significant (size - 1) limbs with a recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		 * call, and handle the most significant limb of S1 and S2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		 * separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		 * A slightly faster way to do this would be to make the Karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 		 * code below behave as if the size were even, and let it check for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		 * odd size in the end.  I.e., in essence move this code to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		 * Doing so would save us a recursive call, and potentially make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		 * stack grow a lot less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 		mpi_size_t esize = size - 1;	/* even size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 		mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		prodp[esize + esize] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		prodp[esize + size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 		 * Split U in two pieces, U1 and U0, such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		 * U = U0 + U1*(B**n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 		 * and V in V1 and V0, such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 		 * V = V0 + V1*(B**n).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		 * UV is then computed recursively using the identity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		 *        2n   n          n                     n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		 *                1 1        1  0   0  1              0 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 		 * Where B = 2**BITS_PER_MP_LIMB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		mpi_size_t hsize = size >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 		mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		int negflg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		/* Product H.      ________________  ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		 *                |_____U1 x V1____||____U0 x V0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		 * Put result in upper part of PROD and pass low part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 		 * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 				  tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		/* Product M.      ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		 *                |_(U1-U0)(V0-V1)_|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 			mpihelp_sub_n(prodp, up + hsize, up, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 			negflg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 			mpihelp_sub_n(prodp, up, up + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 			negflg = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 			negflg ^= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 			/* No change of NEGFLG.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 		/* Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		 * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		 * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 				  tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		/* Add/copy product H. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 		MPN_COPY(prodp + hsize, prodp + size, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 		cy = mpihelp_add_n(prodp + size, prodp + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 				   prodp + size + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 		/* Add product M (if NEGFLG M is a negative number) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 		if (negflg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 			cy -=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 					  size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 			cy +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 					  size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 		/* Product L.      ________________  ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 		 *                |________________||____U0 x V0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 		 * Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 		 * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 		 * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 		/* Add/copy Product L (twice) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 		if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 			mpihelp_add_1(prodp + hsize + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 				      prodp + hsize + size, hsize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 		MPN_COPY(prodp, tspace, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 				   hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 		if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 			mpihelp_add_1(prodp + size, prodp + size, size, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	/* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	v_limb = up[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 			MPN_COPY(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 			MPN_ZERO(prodp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 		cy_limb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	prodp[size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	/* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	 * U with one limb from V, and add it to PROD.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	for (i = 1; i < size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 		v_limb = up[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 		if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 			cy_limb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 			if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 				cy_limb = mpihelp_add_n(prodp, prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 		prodp[size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 		prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	if (size & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 		/* The size is odd, and the code below doesn't handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 		 * Multiply the least significant (size - 1) limbs with a recursive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 		 * call, and handle the most significant limb of S1 and S2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 		 * separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 		 * A slightly faster way to do this would be to make the Karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		 * code below behave as if the size were even, and let it check for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		 * odd size in the end.  I.e., in essence move this code to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		 * Doing so would save us a recursive call, and potentially make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		 * stack grow a lot less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		mpi_size_t esize = size - 1;	/* even size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 		mpi_limb_t cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 		prodp[esize + esize] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 		prodp[esize + size] = cy_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 		mpi_size_t hsize = size >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 		mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 		/* Product H.      ________________  ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 		 *                |_____U1 x U1____||____U0 x U0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 		 * Put result in upper part of PROD and pass low part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 		 * as new TSPACE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 		/* Product M.      ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 		 *                |_(U1-U0)(U0-U1)_|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 		if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 			mpihelp_sub_n(prodp, up + hsize, up, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 			mpihelp_sub_n(prodp, up, up + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 		/* Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 		 * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 		 * as new TSPACE.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 		/* Add/copy product H  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 		MPN_COPY(prodp + hsize, prodp + size, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 		cy = mpihelp_add_n(prodp + size, prodp + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 				   prodp + size + hsize, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 		/* Add product M (if NEGFLG M is a negative number).  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 		/* Product L.      ________________  ________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 		 *                |________________||____U0 x U0_____|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 		 * Read temporary operands from low part of PROD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		 * Put result in low part of TSPACE using upper part of TSPACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		 * as new TSPACE.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 		/* Add/copy Product L (twice).  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 		if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 			mpihelp_add_1(prodp + hsize + size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 				      prodp + hsize + size, hsize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 		MPN_COPY(prodp, tspace, hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 				   hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 		if (cy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 			mpihelp_add_1(prodp + size, prodp + size, size, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) void mpihelp_mul_n(mpi_ptr_t prodp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	if (up == vp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 		if (size < KARATSUBA_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 			mpih_sqr_n_basecase(prodp, up, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 			mpi_ptr_t tspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 			tspace = mpi_alloc_limb_space(2 * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 			mpih_sqr_n(prodp, up, size, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 			mpi_free_limb_space(tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		if (size < KARATSUBA_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 			mul_n_basecase(prodp, up, vp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 			mpi_ptr_t tspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 			tspace = mpi_alloc_limb_space(2 * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 			mul_n(prodp, up, vp, size, tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 			mpi_free_limb_space(tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 			   mpi_ptr_t up, mpi_size_t usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 			   mpi_ptr_t vp, mpi_size_t vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 			   struct karatsuba_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	if (!ctx->tspace || ctx->tspace_size < vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 			mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		ctx->tspace = mpi_alloc_limb_space(2 * vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 		if (!ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 		ctx->tspace_size = vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	prodp += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	up += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	usize -= vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	if (usize >= vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 		if (!ctx->tp || ctx->tp_size < vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 			if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 				mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 			ctx->tp = mpi_alloc_limb_space(2 * vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 			if (!ctx->tp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 				if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 					mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 				ctx->tspace = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 				return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 			ctx->tp_size = vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 				      cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 			prodp += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 			up += vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 			usize -= vsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		} while (usize >= vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	if (usize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		if (usize < KARATSUBA_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 			mpi_limb_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			    < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 				return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 			if (!ctx->next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 				if (!ctx->next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 					return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 			if (mpihelp_mul_karatsuba_case(ctx->tspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 						       vp, vsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 						       up, usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 						       ctx->next) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 				return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 	struct karatsuba_ctx *ctx2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 		mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 		mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	for (ctx = ctx->next; ctx; ctx = ctx2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 		ctx2 = ctx->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 		if (ctx->tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 			mpi_free_limb_space(ctx->tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 		if (ctx->tspace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 			mpi_free_limb_space(ctx->tspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 		kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)  * and v (pointed to by VP, with VSIZE limbs), and store the result at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)  * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)  * operands are normalized.  Return the most significant limb of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)  * result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)  * NOTE: The space pointed to by PRODP is overwritten before finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)  * with U and V, so overlap is an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)  * Argument constraints:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)  * 1. USIZE >= VSIZE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)  * 2. PRODP != UP and PRODP != VP, i.e. the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)  *    must be distinct from the multiplier and the multiplicand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	mpi_limb_t cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	struct karatsuba_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	if (vsize < KARATSUBA_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		mpi_size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 		mpi_limb_t v_limb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 		if (!vsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 			*_result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 		/* Multiply by the first limb in V separately, as the result can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 		v_limb = vp[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 		if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 			if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 				MPN_COPY(prodp, up, usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 				MPN_ZERO(prodp, usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 			cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 			cy = mpihelp_mul_1(prodp, up, usize, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 		prodp[usize] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 		prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 		/* For each iteration in the outer loop, multiply one limb from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 		 * U with one limb from V, and add it to PROD.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 		for (i = 1; i < vsize; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 			v_limb = vp[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 			if (v_limb <= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 				cy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 				if (v_limb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 					cy = mpihelp_add_n(prodp, prodp, up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 							   usize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 			} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 				cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 			prodp[usize] = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 			prodp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 		*_result = cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 	memset(&ctx, 0, sizeof ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 	mpihelp_release_karatsuba_ctx(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 	*_result = *prod_endp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }