Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* IEEE754 floating point arithmetic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * double precision square root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * MIPS floating point support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * Copyright (C) 1994-2000 Algorithmics Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) #include "ieee754dp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) static const unsigned int table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) 	0, 1204, 3062, 5746, 9193, 13348, 18162, 23592,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) 	29598, 36145, 43202, 50740, 58733, 67158, 75992,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) 	85215, 83599, 71378, 60428, 50647, 41945, 34246,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 	27478, 21581, 16499, 12183, 8588, 5674, 3403,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 	1742, 661, 130
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) union ieee754dp ieee754dp_sqrt(union ieee754dp x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	struct _ieee754_csr oldcsr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 	union ieee754dp y, z, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 	unsigned int scalx, yh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 	COMPXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	EXPLODEXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 	ieee754_clearcx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	FLUSHXDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	/* x == INF or NAN? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	switch (xc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 	case IEEE754_CLASS_SNAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 		return ieee754dp_nanxcpt(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	case IEEE754_CLASS_QNAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 		/* sqrt(Nan) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 		return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	case IEEE754_CLASS_ZERO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 		/* sqrt(0) = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 		return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	case IEEE754_CLASS_INF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		if (xs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 			/* sqrt(-Inf) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 			ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 			return ieee754dp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 		/* sqrt(+Inf) = Inf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 		return x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	case IEEE754_CLASS_DNORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		DPDNORMX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	case IEEE754_CLASS_NORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		if (xs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 			/* sqrt(-x) = Nan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 			ieee754_setcx(IEEE754_INVALID_OPERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 			return ieee754dp_indef();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	/* save old csr; switch off INX enable & flag; set RN rounding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	oldcsr = ieee754_csr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	ieee754_csr.mx &= ~IEEE754_INEXACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	ieee754_csr.sx &= ~IEEE754_INEXACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	ieee754_csr.rm = FPU_CSR_RN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	/* adjust exponent to prevent overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 	scalx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	if (xe > 512) {		/* x > 2**-512? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		xe -= 512;	/* x = x / 2**512 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 		scalx += 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	} else if (xe < -512) { /* x < 2**-512? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 		xe += 512;	/* x = x * 2**512 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		scalx -= 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	y = x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	/* magic initial approximation to almost 8 sig. bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	yh = y.bits >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	yh = (yh >> 1) + 0x1ff80000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	yh = yh - table[(yh >> 15) & 31];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	/* Heron's rule once with correction to improve to ~18 sig. bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	/* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	t = ieee754dp_div(x, y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	y = ieee754dp_add(y, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	y.bits -= 0x0010000600000000LL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	y.bits &= 0xffffffff00000000LL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	/* t=y*y; z=t;	pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	t = ieee754dp_mul(y, y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	z = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	t.bexp += 0x001;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	t = ieee754dp_add(t, z);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	z = ieee754dp_mul(ieee754dp_sub(x, z), y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	/* t=z/(t+x) ;	pt[n0]+=0x00100000; y+=t; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	t = ieee754dp_div(z, ieee754dp_add(t, x));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	t.bexp += 0x001;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	y = ieee754dp_add(y, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	/* twiddle last bit to force y correctly rounded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	/* set RZ, clear INEX flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	ieee754_csr.rm = FPU_CSR_RZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	ieee754_csr.sx &= ~IEEE754_INEXACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	/* t=x/y; ...chopped quotient, possibly inexact */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	t = ieee754dp_div(x, y);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		if (!(ieee754_csr.sx & IEEE754_INEXACT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 			/* t = t-ulp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 			t.bits -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		/* add inexact to result status */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 		oldcsr.cx |= IEEE754_INEXACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		oldcsr.sx |= IEEE754_INEXACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		switch (oldcsr.rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 		case FPU_CSR_RU:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 			y.bits += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 			fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		case FPU_CSR_RN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 			t.bits += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		/* y=y+t; ...chopped sum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		y = ieee754dp_add(y, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		/* adjust scalx for correctly rounded sqrt(x) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		scalx -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	/* py[n0]=py[n0]+scalx; ...scale back y */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	y.bexp += scalx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	/* restore rounding mode, possibly set inexact */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	ieee754_csr = oldcsr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	return y;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }