.file "reg_round.S" /*---------------------------------------------------------------------------+ | reg_round.S | | | | Rounding/truncation/etc for FPU basic arithmetic functions. | | | | Copyright (C) 1993,1995,1997 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@suburbia.net | | | | This code has four possible entry points. | | The following must be entered by a jmp instruction: | | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | | | | The FPU_round entry point is intended to be used by C code. | | From C, call as: | | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | | | | Return value is the tag of the answer, or-ed with FPU_Exception if | | one was raised, or -1 on internal error. | | | | For correct "up" and "down" rounding, the argument must have the correct | | sign. | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | Four entry points. | | | | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | | %eax:%ebx 64 bit significand | | %edx 32 bit extension of the significand | | %edi pointer to an FPU_REG for the result to be stored | | stack calling function must have set up a C stack frame and | | pushed %esi, %edi, and %ebx | | | | Needed just for the fpu_reg_round_sqrt entry point: | | %cx A control word in the same format as the FPU control word. | | Otherwise, PARAM4 must give such a value. | | | | | | The significand and its extension are assumed to be exact in the | | following sense: | | If the significand by itself is the exact result then the significand | | extension (%edx) must contain 0, otherwise the significand extension | | must be non-zero. | | If the significand extension is non-zero then the significand is | | smaller than the magnitude of the correct exact result by an amount | | greater than zero and less than one ls bit of the significand. | | The significand extension is only required to have three possible | | non-zero values: | | less than 0x80000000 <=> the significand is less than 1/2 an ls | | bit smaller than the magnitude of the | | true exact result. | | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | | smaller than the magnitude of the true | | exact result. | | greater than 0x80000000 <=> the significand is more than 1/2 an ls | | bit smaller than the magnitude of the | | true exact result. | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | The code in this module has become quite complex, but it should handle | | all of the FPU flags which are set at this stage of the basic arithmetic | | computations. | | There are a few rare cases where the results are not set identically to | | a real FPU. These require a bit more thought because at this stage the | | results of the code here appear to be more consistent... | | This may be changed in a future version. | +---------------------------------------------------------------------------*/ #include "fpu_emu.h" #include "exception.h" #include "control_w.h" /* Flags for FPU_bits_lost */ #define LOST_DOWN $1 #define LOST_UP $2 /* Flags for FPU_denormal */ #define DENORMAL $1 #define UNMASKED_UNDERFLOW $2 #ifndef NON_REENTRANT_FPU /* Make the code re-entrant by putting local storage on the stack: */ #define FPU_bits_lost (%esp) #define FPU_denormal 1(%esp) #else /* Not re-entrant, so we can gain speed by putting local storage in a static area: */ .data .align 4,0 FPU_bits_lost: .byte 0 FPU_denormal: .byte 0 #endif /* NON_REENTRANT_FPU */ .text .globl fpu_reg_round .globl fpu_Arith_exit /* Entry point when called from C */ ENTRY(FPU_round) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi pushl %ebx movl PARAM1,%edi movl SIGH(%edi),%eax movl SIGL(%edi),%ebx movl PARAM2,%edx fpu_reg_round: /* Normal entry point */ movl PARAM4,%ecx #ifndef NON_REENTRANT_FPU pushl %ebx /* adjust the stack pointer */ #endif /* NON_REENTRANT_FPU */ #ifdef PARANOID /* Cannot use this here yet */ /* orl %eax,%eax */ /* jns L_entry_bugged */ #endif /* PARANOID */ cmpw EXP_UNDER,EXP(%edi) jle L_Make_denorm /* The number is a de-normal */ movb $0,FPU_denormal /* 0 -> not a de-normal */ Denorm_done: movb $0,FPU_bits_lost /* No bits yet lost in rounding */ movl %ecx,%esi andl CW_PC,%ecx cmpl PR_64_BITS,%ecx je LRound_To_64 cmpl PR_53_BITS,%ecx je LRound_To_53 cmpl PR_24_BITS,%ecx je LRound_To_24 #ifdef PECULIAR_486 /* With the precision control bits set to 01 "(reserved)", a real 80486 behaves as if the precision control bits were set to 11 "64 bits" */ cmpl PR_RESERVED_BITS,%ecx je LRound_To_64 #ifdef PARANOID jmp L_bugged_denorm_486 #endif /* PARANOID */ #else #ifdef PARANOID jmp L_bugged_denorm /* There is no bug, just a bad control word */ #endif /* PARANOID */ #endif /* PECULIAR_486 */ /* Round etc to 24 bit precision */ LRound_To_24: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_24 cmpl RC_CHOP,%ecx je LCheck_truncate_24 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_24 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_24 #ifdef PARANOID jmp L_bugged_round24 #endif /* PARANOID */ LUp_24: cmpb SIGN_POS,PARAM5 jne LCheck_truncate_24 /* If negative then up==truncate */ jmp LCheck_24_round_up LDown_24: cmpb SIGN_POS,PARAM5 je LCheck_truncate_24 /* If positive then down==truncate */ LCheck_24_round_up: movl %eax,%ecx andl $0x000000ff,%ecx orl %ebx,%ecx orl %edx,%ecx jnz LDo_24_round_up jmp L_Re_normalise LRound_nearest_24: /* Do rounding of the 24th bit if needed (nearest or even) */ movl %eax,%ecx andl $0x000000ff,%ecx cmpl $0x00000080,%ecx jc LCheck_truncate_24 /* less than half, no increment needed */ jne LGreater_Half_24 /* greater than half, increment needed */ /* Possibly half, we need to check the ls bits */ orl %ebx,%ebx jnz LGreater_Half_24 /* greater than half, increment needed */ orl %edx,%edx jnz LGreater_Half_24 /* greater than half, increment needed */ /* Exactly half, increment only if 24th bit is 1 (round to even) */ testl $0x00000100,%eax jz LDo_truncate_24 LGreater_Half_24: /* Rounding: increment at the 24th bit */ LDo_24_round_up: andl $0xffffff00,%eax /* Truncate to 24 bits */ xorl %ebx,%ebx movb LOST_UP,FPU_bits_lost addl $0x00000100,%eax jmp LCheck_Round_Overflow LCheck_truncate_24: movl %eax,%ecx andl $0x000000ff,%ecx orl %ebx,%ecx orl %edx,%ecx jz L_Re_normalise /* No truncation needed */ LDo_truncate_24: andl $0xffffff00,%eax /* Truncate to 24 bits */ xorl %ebx,%ebx movb LOST_DOWN,FPU_bits_lost jmp L_Re_normalise /* Round etc to 53 bit precision */ LRound_To_53: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_53 cmpl RC_CHOP,%ecx je LCheck_truncate_53 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_53 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_53 #ifdef PARANOID jmp L_bugged_round53 #endif /* PARANOID */ LUp_53: cmpb SIGN_POS,PARAM5 jne LCheck_truncate_53 /* If negative then up==truncate */ jmp LCheck_53_round_up LDown_53: cmpb SIGN_POS,PARAM5 je LCheck_truncate_53 /* If positive then down==truncate */ LCheck_53_round_up: movl %ebx,%ecx andl $0x000007ff,%ecx orl %edx,%ecx jnz LDo_53_round_up jmp L_Re_normalise LRound_nearest_53: /* Do rounding of the 53rd bit if needed (nearest or even) */ movl %ebx,%ecx andl $0x000007ff,%ecx cmpl $0x00000400,%ecx jc LCheck_truncate_53 /* less than half, no increment needed */ jnz LGreater_Half_53 /* greater than half, increment needed */ /* Possibly half, we need to check the ls bits */ orl %edx,%edx jnz LGreater_Half_53 /* greater than half, increment needed */ /* Exactly half, increment only if 53rd bit is 1 (round to even) */ testl $0x00000800,%ebx jz LTruncate_53 LGreater_Half_53: /* Rounding: increment at the 53rd bit */ LDo_53_round_up: movb LOST_UP,FPU_bits_lost andl $0xfffff800,%ebx /* Truncate to 53 bits */ addl $0x00000800,%ebx adcl $0,%eax jmp LCheck_Round_Overflow LCheck_truncate_53: movl %ebx,%ecx andl $0x000007ff,%ecx orl %edx,%ecx jz L_Re_normalise LTruncate_53: movb LOST_DOWN,FPU_bits_lost andl $0xfffff800,%ebx /* Truncate to 53 bits */ jmp L_Re_normalise /* Round etc to 64 bit precision */ LRound_To_64: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_64 cmpl RC_CHOP,%ecx je LCheck_truncate_64 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_64 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_64 #ifdef PARANOID jmp L_bugged_round64 #endif /* PARANOID */ LUp_64: cmpb SIGN_POS,PARAM5 jne LCheck_truncate_64 /* If negative then up==truncate */ orl %edx,%edx jnz LDo_64_round_up jmp L_Re_normalise LDown_64: cmpb SIGN_POS,PARAM5 je LCheck_truncate_64 /* If positive then down==truncate */ orl %edx,%edx jnz LDo_64_round_up jmp L_Re_normalise LRound_nearest_64: cmpl $0x80000000,%edx jc LCheck_truncate_64 jne LDo_64_round_up /* Now test for round-to-even */ testb $1,%bl jz LCheck_truncate_64 LDo_64_round_up: movb LOST_UP,FPU_bits_lost addl $1,%ebx adcl $0,%eax LCheck_Round_Overflow: jnc L_Re_normalise /* Overflow, adjust the result (significand to 1.0) */ rcrl $1,%eax rcrl $1,%ebx incw EXP(%edi) jmp L_Re_normalise LCheck_truncate_64: orl %edx,%edx jz L_Re_normalise LTruncate_64: movb LOST_DOWN,FPU_bits_lost L_Re_normalise: testb $0xff,FPU_denormal jnz Normalise_result L_Normalised: movl TAG_Valid,%edx L_deNormalised: cmpb LOST_UP,FPU_bits_lost je L_precision_lost_up cmpb LOST_DOWN,FPU_bits_lost je L_precision_lost_down L_no_precision_loss: /* store the result */ L_Store_significand: movl %eax,SIGH(%edi) movl %ebx,SIGL(%edi) cmpw EXP_OVER,EXP(%edi) jge L_overflow movl %edx,%eax /* Convert the exponent to 80x87 form. */ addw EXTENDED_Ebias,EXP(%edi) andw $0x7fff,EXP(%edi) fpu_reg_round_signed_special_exit: cmpb SIGN_POS,PARAM5 je fpu_reg_round_special_exit orw $0x8000,EXP(%edi) /* Negative sign for the result. */ fpu_reg_round_special_exit: #ifndef NON_REENTRANT_FPU popl %ebx /* adjust the stack pointer */ #endif /* NON_REENTRANT_FPU */ fpu_Arith_exit: popl %ebx popl %edi popl %esi leave ret /* * Set the FPU status flags to represent precision loss due to * round-up. */ L_precision_lost_up: push %edx push %eax call set_precision_flag_up popl %eax popl %edx jmp L_no_precision_loss /* * Set the FPU status flags to represent precision loss due to * truncation. */ L_precision_lost_down: push %edx push %eax call set_precision_flag_down popl %eax popl %edx jmp L_no_precision_loss /* * The number is a denormal (which might get rounded up to a normal) * Shift the number right the required number of bits, which will * have to be undone later... */ L_Make_denorm: /* The action to be taken depends upon whether the underflow exception is masked */ testb CW_Underflow,%cl /* Underflow mask. */ jz Unmasked_underflow /* Do not make a denormal. */ movb DENORMAL,FPU_denormal pushl %ecx /* Save */ movw EXP_UNDER+1,%cx subw EXP(%edi),%cx cmpw $64,%cx /* shrd only works for 0..31 bits */ jnc Denorm_shift_more_than_63 cmpw $32,%cx /* shrd only works for 0..31 bits */ jnc Denorm_shift_more_than_32 /* * We got here without jumps by assuming that the most common requirement * is for a small de-normalising shift. * Shift by [1..31] bits */ addw %cx,EXP(%edi) orl %edx,%edx /* extension */ setne %ch /* Save whether %edx is non-zero */ xorl %edx,%edx shrd %cl,%ebx,%edx shrd %cl,%eax,%ebx shr %cl,%eax orb %ch,%dl popl %ecx jmp Denorm_done /* Shift by [32..63] bits */ Denorm_shift_more_than_32: addw %cx,EXP(%edi) subb $32,%cl orl %edx,%edx setne %ch orb %ch,%bl xorl %edx,%edx shrd %cl,%ebx,%edx shrd %cl,%eax,%ebx shr %cl,%eax orl %edx,%edx /* test these 32 bits */ setne %cl orb %ch,%bl orb %cl,%bl movl %ebx,%edx movl %eax,%ebx xorl %eax,%eax popl %ecx jmp Denorm_done /* Shift by [64..) bits */ Denorm_shift_more_than_63: cmpw $64,%cx jne Denorm_shift_more_than_64 /* Exactly 64 bit shift */ addw %cx,EXP(%edi) xorl %ecx,%ecx orl %edx,%edx setne %cl orl %ebx,%ebx setne %ch orb %ch,%cl orb %cl,%al movl %eax,%edx xorl %eax,%eax xorl %ebx,%ebx popl %ecx jmp Denorm_done Denorm_shift_more_than_64: movw EXP_UNDER+1,EXP(%edi) /* This is easy, %eax must be non-zero, so.. */ movl $1,%edx xorl %eax,%eax xorl %ebx,%ebx popl %ecx jmp Denorm_done Unmasked_underflow: movb UNMASKED_UNDERFLOW,FPU_denormal jmp Denorm_done /* Undo the de-normalisation. */ Normalise_result: cmpb UNMASKED_UNDERFLOW,FPU_denormal je Signal_underflow /* The number must be a denormal if we got here. */ #ifdef PARANOID /* But check it... just in case. */ cmpw EXP_UNDER+1,EXP(%edi) jne L_norm_bugged #endif /* PARANOID */ #ifdef PECULIAR_486 /* * This implements a special feature of 80486 behaviour. * Underflow will be signalled even if the number is * not a denormal after rounding. * This difference occurs only for masked underflow, and not * in the unmasked case. * Actual 80486 behaviour differs from this in some circumstances. */ orl %eax,%eax /* ms bits */ js LPseudoDenormal /* Will be masked underflow */ #else orl %eax,%eax /* ms bits */ js L_Normalised /* No longer a denormal */ #endif /* PECULIAR_486 */ jnz LDenormal_adj_exponent orl %ebx,%ebx jz L_underflow_to_zero /* The contents are zero */ LDenormal_adj_exponent: decw EXP(%edi) LPseudoDenormal: testb $0xff,FPU_bits_lost /* bits lost == underflow */ movl TAG_Special,%edx jz L_deNormalised /* There must be a masked underflow */ push %eax pushl EX_Underflow call EXCEPTION popl %eax popl %eax movl TAG_Special,%edx jmp L_deNormalised /* * The operations resulted in a number too small to represent. * Masked response. */ L_underflow_to_zero: push %eax call set_precision_flag_down popl %eax push %eax pushl EX_Underflow call EXCEPTION popl %eax popl %eax /* Reduce the exponent to EXP_UNDER */ movw EXP_UNDER,EXP(%edi) movl TAG_Zero,%edx jmp L_Store_significand /* The operations resulted in a number too large to represent. */ L_overflow: addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ push %edi call arith_overflow pop %edi jmp fpu_reg_round_signed_special_exit Signal_underflow: /* The number may have been changed to a non-denormal */ /* by the rounding operations. */ cmpw EXP_UNDER,EXP(%edi) jle Do_unmasked_underflow jmp L_Normalised Do_unmasked_underflow: /* Increase the exponent by the magic number */ addw $(3*(1<<13)),EXP(%edi) push %eax pushl EX_Underflow call EXCEPTION popl %eax popl %eax jmp L_Normalised #ifdef PARANOID #ifdef PECULIAR_486 L_bugged_denorm_486: pushl EX_INTERNAL|0x236 call EXCEPTION popl %ebx jmp L_exception_exit #else L_bugged_denorm: pushl EX_INTERNAL|0x230 call EXCEPTION popl %ebx jmp L_exception_exit #endif /* PECULIAR_486 */ L_bugged_round24: pushl EX_INTERNAL|0x231 call EXCEPTION popl %ebx jmp L_exception_exit L_bugged_round53: pushl EX_INTERNAL|0x232 call EXCEPTION popl %ebx jmp L_exception_exit L_bugged_round64: pushl EX_INTERNAL|0x233 call EXCEPTION popl %ebx jmp L_exception_exit L_norm_bugged: pushl EX_INTERNAL|0x234 call EXCEPTION popl %ebx jmp L_exception_exit L_entry_bugged: pushl EX_INTERNAL|0x235 call EXCEPTION popl %ebx L_exception_exit: mov $-1,%eax jmp fpu_reg_round_special_exit #endif /* PARANOID */