Qrack  9.0
General classical-emulating-quantum development framework
Classes | Typedefs | Variables
half_float::detail Namespace Reference

Classes

struct  conditional
 Conditional type. More...
 
struct  conditional< false, T, F >
 
struct  bool_type
 Helper for tag dispatching. More...
 
struct  is_float
 Type traits for floating-point types. More...
 
struct  is_float< const T >
 
struct  is_float< volatile T >
 
struct  is_float< const volatile T >
 
struct  is_float< float >
 
struct  is_float< double >
 
struct  is_float< long double >
 
struct  bits
 Type traits for floating-point bits. More...
 
struct  bits< const T >
 
struct  bits< volatile T >
 
struct  bits< const volatile T >
 
struct  bits< float >
 Unsigned integer of (at least) 32 bits width. More...
 
struct  bits< double >
 Unsigned integer of (at least) 64 bits width. More...
 
struct  binary_t
 Tag type for binary construction. More...
 
struct  f31
 Class for 1.31 unsigned floating-point computation. More...
 
struct  half_caster
 Helper class for half casts. More...
 
struct  half_caster< half, U, R >
 
struct  half_caster< T, half, R >
 
struct  half_caster< half, half, R >
 

Typedefs

typedef bool_type< true > true_type
 
typedef bool_type< false > false_type
 
typedef unsigned short uint16
 Unsigned integer of (at least) 16 bits width. More...
 
typedef unsigned long uint32
 Fastest unsigned integer of (at least) 32 bits width. More...
 
typedef long int32
 Fastest unsigned integer of (at least) 32 bits width. More...
 

Functions

Implementation defined classification and arithmetic
template<typename T >
bool builtin_isinf (T arg)
 Check for infinity. More...
 
template<typename T >
bool builtin_isnan (T arg)
 Check for NaN. More...
 
template<typename T >
bool builtin_signbit (T arg)
 Check sign. More...
 
uint32 sign_mask (uint32 arg)
 Platform-independent sign mask. More...
 
uint32 arithmetic_shift (uint32 arg, int i)
 Platform-independent arithmetic right shift. More...
 
Error handling
int & errflags ()
 Internal exception flags. More...
 
void raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
 Raise floating-point exception. More...
 
HALF_CONSTEXPR_NOERR bool compsignal (unsigned int x, unsigned int y)
 Check and signal for any NaN. More...
 
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int nan)
 Signal and silence signaling NaN. More...
 
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int x, unsigned int y)
 Signal and silence signaling NaNs. More...
 
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int x, unsigned int y, unsigned int z)
 Signal and silence signaling NaNs. More...
 
HALF_CONSTEXPR_NOERR unsigned int select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
 Select value or signaling NaN. More...
 
HALF_CONSTEXPR_NOERR unsigned int invalid ()
 Raise domain error and return NaN. More...
 
HALF_CONSTEXPR_NOERR unsigned int pole (unsigned int sign=0)
 Raise pole error and return infinity. More...
 
HALF_CONSTEXPR_NOERR unsigned int check_underflow (unsigned int arg)
 Check value for underflow. More...
 
Conversion and rounding
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int overflow (unsigned int sign=0)
 Half-precision overflow. More...
 
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int underflow (unsigned int sign=0)
 Half-precision underflow. More...
 
template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int rounded (unsigned int value, int g, int s)
 Round half-precision number. More...
 
template<std::float_round_style R, bool E, bool I>
unsigned int integral (unsigned int value)
 Round half-precision number to nearest integer value. More...
 
template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0)
 Convert fixed point to half-precision floating-point. More...
 
template<std::float_round_style R>
unsigned int float2half_impl (float value, true_type)
 Convert IEEE single-precision to half-precision. More...
 
template<std::float_round_style R>
unsigned int float2half_impl (double value, true_type)
 Convert IEEE double-precision to half-precision. More...
 
template<std::float_round_style R, typename T >
unsigned int float2half_impl (T value,...)
 Convert non-IEEE floating-point to half-precision. More...
 
template<std::float_round_style R, typename T >
unsigned int float2half (T value)
 Convert floating-point to half-precision. More...
 
template<std::float_round_style R, typename T >
unsigned int int2half (T value)
 Convert integer to half-precision floating-point. More...
 
float half2float_impl (unsigned int value, float, true_type)
 Convert half-precision to IEEE single-precision. More...
 
double half2float_impl (unsigned int value, double, true_type)
 Convert half-precision to IEEE double-precision. More...
 
template<typename T >
T half2float_impl (unsigned int value, T,...)
 Convert half-precision to non-IEEE floating-point. More...
 
template<typename T >
T half2float (unsigned int value)
 Convert half-precision to floating-point. More...
 
template<std::float_round_style R, bool E, bool I, typename T >
T half2int (unsigned int value)
 Convert half-precision floating-point to integer. More...
 
Mathematics
template<std::float_round_style R>
uint32 mulhi (uint32 x, uint32 y)
 upper part of 64-bit multiplication. More...
 
uint32 multiply64 (uint32 x, uint32 y)
 64-bit multiplication. More...
 
uint32 divide64 (uint32 x, uint32 y, int &s)
 64-bit division. More...
 
template<bool Q, bool R>
unsigned int mod (unsigned int x, unsigned int y, int *quo=NULL)
 Half precision positive modulus. More...
 
template<unsigned int F>
uint32 sqrt (uint32 &r, int &exp)
 Fixed point square root. More...
 
uint32 exp2 (uint32 m, unsigned int n=32)
 Fixed point binary exponential. More...
 
uint32 log2 (uint32 m, unsigned int n=32)
 Fixed point binary logarithm. More...
 
std::pair< uint32, uint32sincos (uint32 mz, unsigned int n=31)
 Fixed point sine and cosine. More...
 
uint32 atan2 (uint32 my, uint32 mx, unsigned int n=31)
 Fixed point arc tangent. More...
 
uint32 angle_arg (unsigned int abs, int &k)
 Reduce argument for trigonometric functions. More...
 
std::pair< uint32, uint32atan2_args (unsigned int abs)
 Get arguments for atan2 function. More...
 
std::pair< uint32, uint32hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32)
 Get exponentials for hyperbolic computation. More...
 
template<std::float_round_style R>
unsigned int exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32)
 Postprocessing for binary exponential. More...
 
template<std::float_round_style R, uint32 L>
unsigned int log2_post (uint32 m, int ilog, int exp, unsigned int sign=0)
 Postprocessing for binary logarithm. More...
 
template<std::float_round_style R>
unsigned int hypot_post (uint32 r, int exp)
 Hypotenuse square root and postprocessing. More...
 
template<std::float_round_style R>
unsigned int tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0)
 Division and postprocessing for tangents. More...
 
template<std::float_round_style R, bool S>
unsigned int area (unsigned int arg)
 Area function and postprocessing. More...
 
template<std::float_round_style R, bool C>
unsigned int erf (unsigned int arg)
 Error function and postprocessing. More...
 
template<std::float_round_style R, bool L>
unsigned int gamma (unsigned int arg)
 Gamma function and postprocessing. More...
 

Variables

HALF_CONSTEXPR_CONST binary_t binary = binary_t()
 Tag for binary construction. More...
 

Typedef Documentation

◆ false_type

◆ int32

Fastest unsigned integer of (at least) 32 bits width.

◆ true_type

◆ uint16

typedef unsigned short half_float::detail::uint16

Unsigned integer of (at least) 16 bits width.

◆ uint32

typedef unsigned long half_float::detail::uint32

Fastest unsigned integer of (at least) 32 bits width.

Function Documentation

◆ angle_arg()

uint32 half_float::detail::angle_arg ( unsigned int  abs,
int &  k 
)
inline

Reduce argument for trigonometric functions.

Parameters
abshalf-precision floating-point value
kvalue to take quarter period
Returns
abs reduced to [-pi/4,pi/4] as Q0.30

◆ area()

template<std::float_round_style R, bool S>
unsigned int half_float::detail::area ( unsigned int  arg)

Area function and postprocessing.

This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1)).

Template Parameters
Rrounding mode to use
Strue for asinh, false for acosh
Parameters
arghalf-precision argument
Returns
asinh|acosh(arg) converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ arithmetic_shift()

uint32 half_float::detail::arithmetic_shift ( uint32  arg,
int  i 
)
inline

Platform-independent arithmetic right shift.

Parameters
arginteger value in two's complement
ishift amount (at most 31)
Returns
arg right shifted for i bits with possible sign extension

◆ atan2()

uint32 half_float::detail::atan2 ( uint32  my,
uint32  mx,
unsigned int  n = 31 
)
inline

Fixed point arc tangent.

This uses the CORDIC algorithm in vectoring mode.

Parameters
myy coordinate as Q0.30
mxx coordinate as Q0.30
nnumber of iterations (at most 31)
Returns
arc tangent of my / mx as Q1.30

◆ atan2_args()

std::pair<uint32, uint32> half_float::detail::atan2_args ( unsigned int  abs)
inline

Get arguments for atan2 function.

Parameters
abshalf-precision floating-point value
Returns
abs and sqrt(1 - abs^2) as Q0.30

◆ builtin_isinf()

template<typename T >
bool half_float::detail::builtin_isinf ( T  arg)

Check for infinity.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif infinity
falseelse

◆ builtin_isnan()

template<typename T >
bool half_float::detail::builtin_isnan ( T  arg)

Check for NaN.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif not a number
falseelse

◆ builtin_signbit()

template<typename T >
bool half_float::detail::builtin_signbit ( T  arg)

Check sign.

Template Parameters
Targument type (builtin floating-point type)
Parameters
argvalue to query
Return values
trueif signbit set
falseelse

◆ check_underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::check_underflow ( unsigned int  arg)
inline

Check value for underflow.

Parameters
argnon-zero half-precision value to check
Returns
arg
Exceptions
FE_UNDERFLOWif arg is subnormal

◆ compsignal()

HALF_CONSTEXPR_NOERR bool half_float::detail::compsignal ( unsigned int  x,
unsigned int  y 
)
inline

Check and signal for any NaN.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
Return values
trueif either x or y is NaN
falseelse
Exceptions
FE_INVALIDif x or y is NaN

◆ divide64()

uint32 half_float::detail::divide64 ( uint32  x,
uint32  y,
int &  s 
)
inline

64-bit division.

Parameters
xupper 32 bit of dividend
ydivisor
svariable to store sticky bit for rounding
Returns
(x << 32) / y

◆ erf()

template<std::float_round_style R, bool C>
unsigned int half_float::detail::erf ( unsigned int  arg)

Error function and postprocessing.

This computes the value directly in Q1.31 using the approximations given here.

Template Parameters
Rrounding mode to use
Ctrue for comlementary error function, false else
Parameters
arghalf-precision function argument
Returns
approximated value of error function in half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ errflags()

int& half_float::detail::errflags ( )
inline

Internal exception flags.

Returns
reference to global exception flags

◆ exp2()

uint32 half_float::detail::exp2 ( uint32  m,
unsigned int  n = 32 
)
inline

Fixed point binary exponential.

This uses the BKM algorithm in E-mode.

Parameters
mexponent in [0,1) as Q0.31
nnumber of iterations (at most 32)
Returns
2 ^ m as Q1.31

◆ exp2_post()

template<std::float_round_style R>
unsigned int half_float::detail::exp2_post ( uint32  m,
int  exp,
bool  esign,
unsigned int  sign = 0,
unsigned int  n = 32 
)

Postprocessing for binary exponential.

Template Parameters
Rrounding mode to use
Parameters
mfractional part of as Q0.31
expabsolute value of unbiased exponent
esignsign of actual exponent
signsign bit of result
nnumber of BKM iterations (at most 32)
Returns
value converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

◆ fixed2half()

template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int half_float::detail::fixed2half ( uint32  m,
int  exp = 14,
unsigned int  sign = 0,
int  s = 0 
)

Convert fixed point to half-precision floating-point.

Template Parameters
Rrounding mode to use
Fnumber of fractional bits in [11,31]
Strue for signed, false for unsigned
Ntrue for additional normalization step, false if already normalized to 1.F
Itrue to always raise INEXACT exception, false to raise only for rounded results
Parameters
mmantissa in Q1.F fixed point format
expbiased exponent - 1
signhalf-precision value with sign bit only
ssticky bit (or of all but the most significant already discarded bits)
Returns
value converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

◆ float2half()

template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half ( T  value)

Convert floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
Parameters
valuefloating-point value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [1/3]

template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl ( double  value,
true_type   
)

Convert IEEE double-precision to half-precision.

Template Parameters
Rrounding mode to use
Parameters
valuedouble-precision value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [2/3]

template<std::float_round_style R>
unsigned int half_float::detail::float2half_impl ( float  value,
true_type   
)

Convert IEEE single-precision to half-precision.

Credit for this goes to Jeroen van der Zijp.

Template Parameters
Rrounding mode to use
Parameters
valuesingle-precision value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ float2half_impl() [3/3]

template<std::float_round_style R, typename T >
unsigned int half_float::detail::float2half_impl ( T  value,
  ... 
)

Convert non-IEEE floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
Parameters
valuefloating-point value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ gamma()

template<std::float_round_style R, bool L>
unsigned int half_float::detail::gamma ( unsigned int  arg)

Gamma function and postprocessing.

This approximates the value of either the gamma function or its logarithm directly in Q1.31.

Template Parameters
Rrounding mode to use
Ltrue for lograithm of gamma function, false for gamma function
Parameters
arghalf-precision floating-point value
Returns
lgamma/tgamma(arg) in half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif arg is not a positive integer

◆ half2float()

template<typename T >
T half_float::detail::half2float ( unsigned int  value)

Convert half-precision to floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
Parameters
valuehalf-precision value to convert
Returns
floating-point value

◆ half2float_impl() [1/3]

double half_float::detail::half2float_impl ( unsigned int  value,
double  ,
true_type   
)
inline

Convert half-precision to IEEE double-precision.

Parameters
valuehalf-precision value to convert
Returns
double-precision value

◆ half2float_impl() [2/3]

float half_float::detail::half2float_impl ( unsigned int  value,
float  ,
true_type   
)
inline

Convert half-precision to IEEE single-precision.

Credit for this goes to Jeroen van der Zijp.

Parameters
valuehalf-precision value to convert
Returns
single-precision value

◆ half2float_impl() [3/3]

template<typename T >
T half_float::detail::half2float_impl ( unsigned int  value,
T  ,
  ... 
)

Convert half-precision to non-IEEE floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
Parameters
valuehalf-precision value to convert
Returns
floating-point value

◆ half2int()

template<std::float_round_style R, bool E, bool I, typename T >
T half_float::detail::half2int ( unsigned int  value)

Convert half-precision floating-point to integer.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
Ttype to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
Parameters
valuehalf-precision value to convert
Returns
rounded integer value
Exceptions
FE_INVALIDif value is not representable in type T
FE_INEXACTif value had to be rounded and I is true

◆ hyperbolic_args()

std::pair<uint32, uint32> half_float::detail::hyperbolic_args ( unsigned int  abs,
int &  exp,
unsigned int  n = 32 
)
inline

Get exponentials for hyperbolic computation.

Parameters
abshalf-precision floating-point value
expvariable to take unbiased exponent of larger result
nnumber of BKM iterations (at most 32)
Returns
exp(abs) and exp(-abs) as Q1.31 with same exponent

◆ hypot_post()

template<std::float_round_style R>
unsigned int half_float::detail::hypot_post ( uint32  r,
int  exp 
)

Hypotenuse square root and postprocessing.

Template Parameters
Rrounding mode to use
Parameters
rmantissa as Q2.30
expbiased exponent
Returns
square root converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

◆ int2half()

template<std::float_round_style R, typename T >
unsigned int half_float::detail::int2half ( T  value)

Convert integer to half-precision floating-point.

Template Parameters
Rrounding mode to use
Ttype to convert (builtin integer type)
Parameters
valueintegral value to convert
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_INEXACTif value had to be rounded

◆ integral()

template<std::float_round_style R, bool E, bool I>
unsigned int half_float::detail::integral ( unsigned int  value)

Round half-precision number to nearest integer value.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
Parameters
valuehalf-precision value to round
Returns
half-precision bits for nearest integral value
Exceptions
FE_INVALIDfor signaling NaN
FE_INEXACTif value had to be rounded and I is true

◆ invalid()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::invalid ( )
inline

Raise domain error and return NaN.

return quiet NaN

Exceptions
FE_INVALID

◆ log2()

uint32 half_float::detail::log2 ( uint32  m,
unsigned int  n = 32 
)
inline

Fixed point binary logarithm.

This uses the BKM algorithm in L-mode.

Parameters
mmantissa in [1,2) as Q1.30
nnumber of iterations (at most 32)
Returns
log2(m) as Q0.31

◆ log2_post()

template<std::float_round_style R, uint32 L>
unsigned int half_float::detail::log2_post ( uint32  m,
int  ilog,
int  exp,
unsigned int  sign = 0 
)

Postprocessing for binary logarithm.

Template Parameters
Rrounding mode to use
Llogarithm for base transformation as Q1.31
Parameters
mfractional part of logarithm as Q0.31
ilogsigned integer part of logarithm
expbiased exponent of result
signsign bit of result
Returns
value base-transformed and converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ mod()

template<bool Q, bool R>
unsigned int half_float::detail::mod ( unsigned int  x,
unsigned int  y,
int *  quo = NULL 
)

Half precision positive modulus.

Template Parameters
Qtrue to compute full quotient, false else
Rtrue to compute signed remainder, false for positive remainder
Parameters
xfirst operand as positive finite half-precision value
ysecond operand as positive finite half-precision value
quoadress to store quotient at, nullptr if Q false
Returns
modulus of x / y

◆ mulhi()

template<std::float_round_style R>
uint32 half_float::detail::mulhi ( uint32  x,
uint32  y 
)

upper part of 64-bit multiplication.

Template Parameters
Rrounding mode to use
Parameters
xfirst factor
ysecond factor
Returns
upper 32 bit of x * y

◆ multiply64()

uint32 half_float::detail::multiply64 ( uint32  x,
uint32  y 
)
inline

64-bit multiplication.

Parameters
xfirst factor
ysecond factor
Returns
upper 32 bit of x * y rounded to nearest

◆ overflow()

template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow ( unsigned int  sign = 0)

Half-precision overflow.

Template Parameters
Rrounding mode to use
Parameters
signhalf-precision value with sign bit only
Returns
rounded overflowing half-precision value
Exceptions
FE_OVERFLOW

◆ pole()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::pole ( unsigned int  sign = 0)
inline

Raise pole error and return infinity.

Parameters
signhalf-precision value with sign bit only
Returns
half-precision infinity with sign of sign
Exceptions
FE_DIVBYZERO

◆ raise()

void half_float::detail::raise ( int   HALF_UNUSED_NOERRflags,
bool   HALF_UNUSED_NOERRcond = true 
)
inline

Raise floating-point exception.

Parameters
flagsexceptions to raise
condcondition to raise exceptions for

◆ rounded()

template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded ( unsigned int  value,
int  g,
int  s 
)

Round half-precision number.

Template Parameters
Rrounding mode to use
Itrue to always raise INEXACT exception, false to raise only for rounded results
Parameters
valuefinite half-precision number to round
gguard bit (most significant discarded bit)
ssticky bit (or of all but the most significant discarded bits)
Returns
rounded half-precision value
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

◆ select()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::select ( unsigned int  x,
unsigned int   HALF_UNUSED_NOERR
)
inline

Select value or signaling NaN.

Parameters
xpreferred half-precision value
yignored half-precision value except for signaling NaN
Returns
y if signaling NaN, x otherwise
Exceptions
FE_INVALIDif y is signaling NaN

◆ sign_mask()

uint32 half_float::detail::sign_mask ( uint32  arg)
inline

Platform-independent sign mask.

Parameters
arginteger value in two's complement
Return values
-1if arg negative
0if arg positive

◆ signal() [1/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  nan)
inline

Signal and silence signaling NaN.

Parameters
nanhalf-precision NaN value
Returns
quiet NaN
Exceptions
FE_INVALIDif nan is signaling NaN

◆ signal() [2/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y 
)
inline

Signal and silence signaling NaNs.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
Returns
quiet NaN
Exceptions
FE_INVALIDif x or y is signaling NaN

◆ signal() [3/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y,
unsigned int  z 
)
inline

Signal and silence signaling NaNs.

Parameters
xfirst half-precision value to check
ysecond half-precision value to check
zthird half-precision value to check
Returns
quiet NaN
Exceptions
FE_INVALIDif x, y or z is signaling NaN

◆ sincos()

std::pair<uint32, uint32> half_float::detail::sincos ( uint32  mz,
unsigned int  n = 31 
)
inline

Fixed point sine and cosine.

This uses the CORDIC algorithm in rotation mode.

Parameters
mzangle in [-pi/2,pi/2] as Q1.30
nnumber of iterations (at most 31)
Returns
sine and cosine of mz as Q1.30

◆ sqrt()

template<unsigned int F>
uint32 half_float::detail::sqrt ( uint32 r,
int &  exp 
)

Fixed point square root.

Template Parameters
Fnumber of fractional bits
Parameters
rradicand in Q1.F fixed point format
expexponent
Returns
square root as Q1.F/2

◆ tangent_post()

template<std::float_round_style R>
unsigned int half_float::detail::tangent_post ( uint32  my,
uint32  mx,
int  exp,
unsigned int  sign = 0 
)

Division and postprocessing for tangents.

Template Parameters
Rrounding mode to use
Parameters
mydividend as Q1.31
mxdivisor as Q1.31
expbiased exponent of result
signsign bit of result
Returns
quotient converted to half-precision
Exceptions
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

◆ underflow()

template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow ( unsigned int  sign = 0)

Half-precision underflow.

Template Parameters
Rrounding mode to use
Parameters
signhalf-precision value with sign bit only
Returns
rounded underflowing half-precision value
Exceptions
FE_UNDERFLOW

Variable Documentation

◆ binary

HALF_CONSTEXPR_CONST binary_t half_float::detail::binary = binary_t()

Tag for binary construction.