^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2003-2013 Altera Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/entry.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) .set nobreak
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Explicitly allow the use of r1 (the assembler temporary register)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * within this code. This register is normally reserved for the use of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * the compiler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) ENTRY(instruction_trap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) ldw r1, PT_R1(sp) // Restore registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) ldw r2, PT_R2(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) ldw r3, PT_R3(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) ldw r4, PT_R4(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) ldw r5, PT_R5(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ldw r6, PT_R6(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) ldw r7, PT_R7(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) ldw r8, PT_R8(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) ldw r9, PT_R9(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) ldw r10, PT_R10(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) ldw r11, PT_R11(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) ldw r12, PT_R12(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) ldw r13, PT_R13(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ldw r14, PT_R14(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) ldw r15, PT_R15(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) ldw ra, PT_RA(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) ldw fp, PT_FP(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) ldw gp, PT_GP(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) ldw et, PT_ESTATUS(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) wrctl estatus, et
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ldw ea, PT_EA(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ldw et, PT_SP(sp) /* backup sp in et */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) addi sp, sp, PT_REGS_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /* INSTRUCTION EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * ---------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Nios II processors generate exceptions for unimplemented instructions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * The routines below emulate these instructions. Depending on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * processor core, the only instructions that might need to be emulated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * The emulations match the instructions, except for the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * limitations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * 1) The emulation routines do not emulate the use of the exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * temporary register (et) as a source operand because the exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * handler already has modified it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * 2) The routines do not emulate the use of the stack pointer (sp) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * the exception return address register (ea) as a destination because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * modifying these registers crashes the exception handler or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * interrupted routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Detailed Design
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * ---------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * The emulation routines expect the contents of integer registers r0-r31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * routines retrieve source operands from the stack and modify the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * destination register's value on the stack prior to the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * exception handler. Then all registers except the destination register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * are restored to their previous values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * The instruction that causes the exception is found at address -4(ea).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * The instruction's OP and OPX fields identify the operation to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * performed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * One instruction, muli, is an I-type instruction that is identified by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * an OP field of 0x24.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * 27 22 6 0 <-- LSB of field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * The remaining emulated instructions are R-type and have an OP field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * of 0x3a. Their OPX fields identify them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * 27 22 17 11 6 0 <-- LSB of field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * Opcode Encoding. muli is identified by its OP value. Then OPX & 0x02
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * is used to differentiate between the division opcodes and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * remaining multiplication opcodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Instruction OP OPX OPX & 0x02
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * ----------- ---- ---- ----------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * muli 0x24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * divu 0x3a 0x24 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * div 0x3a 0x25 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * mul 0x3a 0x27 != 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * mulxuu 0x3a 0x07 != 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * mulxsu 0x3a 0x17 != 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * mulxss 0x3a 0x1f != 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * Save everything on the stack to make it easy for the emulation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * routines to retrieve the source register operands.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) addi sp, sp, -128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) stw zero, 0(sp) /* Save zero on stack to avoid special case for r0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) stw r1, 4(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) stw r2, 8(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) stw r3, 12(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) stw r4, 16(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) stw r5, 20(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) stw r6, 24(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) stw r7, 28(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) stw r8, 32(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) stw r9, 36(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) stw r10, 40(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) stw r11, 44(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) stw r12, 48(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) stw r13, 52(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) stw r14, 56(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) stw r15, 60(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) stw r16, 64(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) stw r17, 68(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) stw r18, 72(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) stw r19, 76(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) stw r20, 80(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) stw r21, 84(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) stw r22, 88(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) stw r23, 92(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /* Don't bother to save et. It's already been changed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) rdctl r5, estatus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) stw r5, 100(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) stw gp, 104(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) stw et, 108(sp) /* et contains previous sp value. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) stw fp, 112(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) stw ea, 116(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) stw ra, 120(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * Split the instruction into its fields. We need 4*A, 4*B, and 4*C as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * offsets to the stack pointer for access to the stored register values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ldw r2,-4(ea) /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) roli r5, r4, 2 /* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) roli r6, r5, 5 /* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) andi r2, r2, 0x3f /* r2 = 00000000000000000000000000,PPPPPP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,AAAAA,00 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) andi r5, r5, 0x7c /* r5 = 0000000000000000000000000,BBBBB,00 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,CCCCC,00 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * r2 = OP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * r3 = 4*A
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * r4 = IMM16 (sign extended)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * r5 = 4*B
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * r6 = 4*C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * Get the operands.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * It is necessary to check for muli because it uses an I-type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * instruction format, while the other instructions are have an R-type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * format.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * Prepare for either multiplication or division loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * They both loop 32 times.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) movi r14, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) add r3, r3, sp /* r3 = address of A-operand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) ldw r3, 0(r3) /* r3 = A-operand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) movi r7, 0x24 /* muli opcode (I-type instruction format) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) add r5, r5, sp /* r5 = address of B-operand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ldw r5, 0(r5) /* r5 = B-operand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) /* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* IMM16 not needed, align OPX portion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * r2 = OP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * r3 = src1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * r5 = src2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * r4 = OPX (no longer can be muli)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * r6 = 4*C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * Multiply or Divide?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) andi r7, r4, 0x02 /* For R-type multiply instructions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) OPX & 0x02 != 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) bne r7, zero, multiply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* DIVISION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * Divide an unsigned dividend by an unsigned divisor using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * a shift-and-subtract algorithm. The example below shows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * single register to store both the dividend and the quotient,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * allowing both values to be shifted with a single instruction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * remainder dividend:quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * --------- -----------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * initialize 00000000 00101011:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * shift 00000000 0101011:_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * remainder >= divisor? no 00000000 0101011:0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * shift 00000000 101011:0_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * remainder >= divisor? no 00000000 101011:00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * shift 00000001 01011:00_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * remainder >= divisor? no 00000001 01011:000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * shift 00000010 1011:000_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * remainder >= divisor? no 00000010 1011:0000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * shift 00000101 011:0000_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * remainder >= divisor? no 00000101 011:00000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * shift 00001010 11:00000_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * remainder >= divisor? yes 00001010 11:000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * remainder -= divisor - 00000111
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * ----------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * 00000011 11:000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * shift 00000111 1:000001_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * remainder >= divisor? yes 00000111 1:0000011
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * remainder -= divisor - 00000111
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * ----------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * 00000000 1:0000011
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * shift 00000001 :0000011_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * remainder >= divisor? no 00000001 :00000110
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * The quotient is 00000110.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) divide:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * Prepare for division by assuming the result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * is unsigned, and storing its "sign" as 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) movi r17, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* Which division opcode? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) xori r7, r4, 0x25 /* OPX of div */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) bne r7, zero, unsigned_division
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * OPX is div. Determine and store the sign of the quotient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * Then take the absolute value of both operands.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) xor r17, r3, r5 /* MSB contains sign of quotient */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) bge r3,zero,dividend_is_nonnegative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) sub r3, zero, r3 /* -r3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) dividend_is_nonnegative:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) bge r5, zero, divisor_is_nonnegative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) sub r5, zero, r5 /* -r5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) divisor_is_nonnegative:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) unsigned_division:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /* Initialize the unsigned-division loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) movi r13, 0 /* remainder = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * r3 = dividend : quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * r4 = 0x25 for div, 0x24 for divu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * r5 = divisor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * r13 = remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * r14 = loop counter (already initialized to 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * r17 = MSB contains sign of quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * for (count = 32; count > 0; --count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) divide_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * Division:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * (remainder:dividend:quotient) <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) slli r13, r13, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) cmplt r7, r3, zero /* r7 = MSB of r3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) or r13, r13, r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) slli r3, r3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * if (remainder >= divisor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * set LSB of quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * remainder -= divisor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) bltu r13, r5, div_skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) ori r3, r3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) sub r13, r13, r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) div_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) subi r14, r14, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) bne r14, zero, divide_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * r3 = quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * r4 = 0x25 for div, 0x24 for divu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * r6 = 4*C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * r17 = MSB contains sign of quotient
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * Conditionally negate signed quotient. If quotient is unsigned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * the sign already is initialized to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) bge r17, zero, quotient_is_nonnegative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) sub r3, zero, r3 /* -r3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) quotient_is_nonnegative:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * Final quotient is in r3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) add r6, r6, sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) stw r3, 0(r6) /* write quotient to stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) br restore_registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /* MULTIPLICATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * A "product" is the number that one gets by summing a "multiplicand"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * several times. The "multiplier" specifies the number of copies of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * multiplicand that are summed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Actual multiplication algorithms don't use repeated addition, however.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * Shift-and-add algorithms get the same answer as repeated addition, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * they are faster. To compute the lower half of a product (pppp below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * one shifts the product left before adding in each of the partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * products (a * mmmm) through (d * mmmm).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * To compute the upper half of a product (PPPP below), one adds in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * partial products (d * mmmm) through (a * mmmm), each time following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * the add by a right shift of the product.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * mmmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * * abcd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * ------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * #### = d * mmmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * #### = c * mmmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * #### = b * mmmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * #### = a * mmmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * --------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * PPPPpppp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * The example above shows 4 partial products. Computing actual Nios II
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * products requires 32 partials.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * It is possible to compute the result of mulxsu from the result of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * mulxuu because the only difference between the results of these two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * opcodes is the value of the partial product associated with the sign
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * bit of rA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * mulxsu = mulxuu - (rA < 0) ? rB : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * It is possible to compute the result of mulxss from the result of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * mulxsu because the only difference between the results of these two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * opcodes is the value of the partial product associated with the sign
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * bit of rB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * mulxss = mulxsu - (rB < 0) ? rA : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) mul_immed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) /* Opcode is muli. Change it into mul for remainder of algorithm. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) mov r6, r5 /* Field B is dest register, not field C. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) mov r5, r4 /* Field IMM16 is src2, not field B. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) movi r4, 0x27 /* OPX of mul is 0x27 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) multiply:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) /* Initialize the multiplication loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) movi r9, 0 /* mul_product = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) movi r10, 0 /* mulxuu_product = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) mov r11, r5 /* save original multiplier for mulxsu and mulxss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) mov r12, r5 /* mulxuu_multiplier (will be shifted) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * r3 = multiplicand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * r5 = mul_multiplier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * r6 = 4 * dest_register (used later as offset to sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * r7 = temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * r9 = mul_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * r10 = mulxuu_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * r11 = original multiplier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * r12 = mulxuu_multiplier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * r14 = loop counter (already initialized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * r16 = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * for (count = 32; count > 0; --count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) multiply_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * mul_product <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * lsb = multiplier & 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) slli r9, r9, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) andi r7, r12, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * if (lsb == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * mulxuu_product += multiplicand;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) beq r7, zero, mulx_skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) add r10, r10, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) ror r7, r7, r16 /* r7 = 0x80000000 on carry, or else 0x00000000 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) mulx_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * if (MSB of mul_multiplier == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * mul_product += multiplicand;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) bge r5, zero, mul_skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) add r9, r9, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) mul_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * mulxuu_product >>= 1; logical shift
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * mul_multiplier <<= 1; done with MSB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * mulx_multiplier >>= 1; done with LSB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) srli r10, r10, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) or r10, r10, r7 /* OR in the saved carry bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) slli r5, r5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) srli r12, r12, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) subi r14, r14, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) bne r14, zero, multiply_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * Multiply emulation loop done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /* Now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * r3 = multiplicand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * r4 = OPX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * r6 = 4 * dest_register (used later as offset to sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * r7 = temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * r9 = mul_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * r10 = mulxuu_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * r11 = original multiplier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) /* Calculate address for result from 4 * dest_register */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) add r6, r6, sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * Select/compute the result based on OPX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /* OPX == mul? Then store. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) xori r7, r4, 0x27
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) beq r7, zero, store_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /* It's one of the mulx.. opcodes. Move over the result. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) mov r9, r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /* OPX == mulxuu? Then store. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) xori r7, r4, 0x07
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) beq r7, zero, store_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /* Compute mulxsu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * mulxsu = mulxuu - (rA < 0) ? rB : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) bge r3, zero, mulxsu_skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) sub r9, r9, r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) mulxsu_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) /* OPX == mulxsu? Then store. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) xori r7, r4, 0x17
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) beq r7, zero, store_product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) /* Compute mulxss
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * mulxss = mulxsu - (rB < 0) ? rA : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) bge r11,zero,mulxss_skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) sub r9, r9, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) mulxss_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) /* At this point, assume that OPX is mulxss, so store*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) store_product:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) stw r9, 0(r6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) restore_registers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) /* No need to restore r0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) ldw r5, 100(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) wrctl estatus, r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) ldw r1, 4(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) ldw r2, 8(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ldw r3, 12(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) ldw r4, 16(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) ldw r5, 20(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) ldw r6, 24(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) ldw r7, 28(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) ldw r8, 32(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) ldw r9, 36(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) ldw r10, 40(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ldw r11, 44(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) ldw r12, 48(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) ldw r13, 52(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) ldw r14, 56(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) ldw r15, 60(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) ldw r16, 64(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) ldw r17, 68(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) ldw r18, 72(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) ldw r19, 76(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) ldw r20, 80(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) ldw r21, 84(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) ldw r22, 88(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) ldw r23, 92(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /* Does not need to restore et */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) ldw gp, 104(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) ldw fp, 112(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) ldw ea, 116(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) ldw ra, 120(sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) ldw sp, 108(sp) /* last restore sp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) eret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) .set at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) .set break