You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
462 lines
19 KiB
462 lines
19 KiB
2 years ago
|
/*=========================================================================
|
||
|
|
||
|
Program: Visualization Toolkit
|
||
|
Module: $RCSfile: vtkFastNumericConversion.h,v $
|
||
|
|
||
|
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
|
||
|
All rights reserved.
|
||
|
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
|
||
|
|
||
|
This software is distributed WITHOUT ANY WARRANTY; without even
|
||
|
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||
|
PURPOSE. See the above copyright notice for more information.
|
||
|
|
||
|
=========================================================================*/
|
||
|
// .NAME vtkFastNumericConversion - Enables fast conversion of floating point to fixed point
|
||
|
// .SECTION Description
|
||
|
// vtkFastNumericConversion uses a portable (assuming IEEE format) method for
|
||
|
// converting single and double precision floating point values to a fixed
|
||
|
// point representation. This allows fast integer floor operations on
|
||
|
// platforms, such as Intel X86, in which CPU floating point conversion
|
||
|
// algorithms are very slow. It is based on the techniques described in Chris
|
||
|
// Hecker's article, "Let's Get to the (Floating) Point", in Game Developer
|
||
|
// Magazine, Feb/Mar 1996, and the techniques described in Michael Herf's
|
||
|
// website, http://www.stereopsis.com/FPU.html. The Hecker article can be
|
||
|
// found at http://www.d6.com/users/checker/pdfs/gdmfp.pdf. Unfortunately,
|
||
|
// each of these techniques is incomplete, and doesn't convert properly, in a
|
||
|
// way that depends on how many bits are reserved for fixed point fractional
|
||
|
// use, due to failing to properly account for the default round-towards-even
|
||
|
// rounding mode of the X86. Thus, my implementation incorporates some
|
||
|
// rounding correction that undoes the rounding that the FPU performs during
|
||
|
// denormalization of the floating point value. Note that the rounding affect
|
||
|
// I'm talking about here is not the effect on the fistp instruction, but
|
||
|
// rather the effect that occurs during the denormalization of a value that
|
||
|
// occurs when adding it to a much larger value. The bits must be shifted to
|
||
|
// the right, and when a "1" bit falls off the edge, the rounding mode
|
||
|
// determines what happens next, in order to avoid completely "losing" the
|
||
|
// 1-bit. Furthermore, my implementation works on Linux, where the default
|
||
|
// precision mode is 64-bit extended precision.
|
||
|
|
||
|
// This class is contributed to VTK by Chris Volpe of Applied Research
|
||
|
// Associates, Inc. (My employer requires me to say that -- CRV)
|
||
|
|
||
|
// This code assumes that the FPU is in round-to-nearest mode. It assumes, on
|
||
|
// Linux, that the default extended precision mode is in effect, and it
|
||
|
// assumes, on Windows, that the default double precision mode is in effect.
|
||
|
|
||
|
#ifndef __vtkFastNumericConversion_h
|
||
|
#define __vtkFastNumericConversion_h
|
||
|
|
||
|
#include "vtkObject.h"
|
||
|
|
||
|
// Use the bit-representation trick only on X86, and only when producing
|
||
|
// optimized code
|
||
|
#if defined(NDEBUG) && (defined i386 || defined _M_IX86)
|
||
|
#define VTK_USE_TRICK
|
||
|
#endif
|
||
|
|
||
|
// Linux puts the FPU in extended precision. Windows and FreeBSD keep it in
|
||
|
// double precision. If other operating systems for i386 (Solaris?) behave
|
||
|
// like Linux, add them below. Special care needs to be taken when dealing
|
||
|
// with extended precision mode because even though we are eventually writing
|
||
|
// out to a double-precision variable to capture the fixed-point or integer
|
||
|
// results, the extra bits maintained in the internal computations disrupt
|
||
|
// the bit-playing that we're doing here.
|
||
|
#if defined(__linux__)
|
||
|
#define VTK_EXT_PREC
|
||
|
#endif
|
||
|
|
||
|
//#define VTK_TEST_HACK_TO_EMULATE_LINUX_UNDER_WINDOWS
|
||
|
#ifdef VTK_TEST_HACK_TO_EMULATE_LINUX_UNDER_WINDOWS
|
||
|
#define VTK_EXT_PREC
|
||
|
#endif
|
||
|
|
||
|
|
||
|
class VTK_COMMON_EXPORT vtkFastNumericConversion : public vtkObject
|
||
|
{
|
||
|
public:
|
||
|
static vtkFastNumericConversion *New();
|
||
|
vtkTypeRevisionMacro(vtkFastNumericConversion, vtkObject);
|
||
|
void PrintSelf(ostream& os, vtkIndent indent);
|
||
|
|
||
|
// Description:
|
||
|
// Wrappable method for script-testing of correct cross-platform
|
||
|
// functionality
|
||
|
int TestQuickFloor(double val);
|
||
|
|
||
|
// Description:
|
||
|
// Wrappable method for script-testing of correct cross-platform
|
||
|
// functionality
|
||
|
int TestSafeFloor(double val);
|
||
|
|
||
|
// Description:
|
||
|
// Wrappable method for script-testing of correct cross-platform
|
||
|
// functionality
|
||
|
int TestRound(double val);
|
||
|
|
||
|
// Description:
|
||
|
// Wrappable method for script-testing of correct cross-platform
|
||
|
// functionality
|
||
|
int TestConvertFixedPointIntPart(double val);
|
||
|
|
||
|
// Description:
|
||
|
// Wrappable method for script-testing of correct cross-platform
|
||
|
// functionality
|
||
|
int TestConvertFixedPointFracPart(double val);
|
||
|
|
||
|
protected:
|
||
|
//BTX
|
||
|
// Description:
|
||
|
// Internal use: multiply the denormalizer value by 1.5 to ensure that it
|
||
|
// has a "1" bit, other than the implicit initial "1" bit, from which to
|
||
|
// borrow when adding (flooring) a negative number, so that we don't borrow
|
||
|
// from the implicit "1" bit, which would cause partial re-normalization,
|
||
|
// resulting in a shift of our integer bits.
|
||
|
static inline double BorrowBit() { return 1.5;};
|
||
|
|
||
|
// Description:
|
||
|
// Represent 2^30 as a double precision float. Use as a stepping
|
||
|
// stone for computing 2^52 as a double, since we can't represent 2^52 as an
|
||
|
// int before converting to double.
|
||
|
static inline double two30() { return (double)(((unsigned long)1) << 30); };
|
||
|
|
||
|
// Description:
|
||
|
// Represent 2^52 as a double precision float. This value is
|
||
|
// significant because doubles have 52 bits of precision in the mantissa
|
||
|
static inline double two52()
|
||
|
{ return (((unsigned long)1) << (52-30)) * two30(); };
|
||
|
|
||
|
// Description:
|
||
|
// Represent 2^51 as a double precision float. This value is significant
|
||
|
// because doubles have 52 (explicit) bits of precision in the mantissa,
|
||
|
// but we're going to pretend we only have 51 to play with when using safe
|
||
|
// floor, since the default round-to-even on an X86 mucks with the LSB
|
||
|
// during the denormalizing shift.
|
||
|
static inline double two51()
|
||
|
{ return (((unsigned long)1) << (51-30)) * two30(); };
|
||
|
|
||
|
// Description:
|
||
|
// Represent 2^63 as a double precision float. We need this value to shift
|
||
|
// unwanted fractional bits off the end of an extended precision value
|
||
|
static inline double two63()
|
||
|
{ return (((unsigned long)1) << (63-60)) * two30() * two30();};
|
||
|
|
||
|
// Description:
|
||
|
// Represent 2^62 as a double precision float. We need this value to shift
|
||
|
// unwanted fractional bits off the end of an extended precision value. Use
|
||
|
// when we're doing a SafeFloor.
|
||
|
static inline double two62()
|
||
|
{ return (((unsigned long)1) << (62-60)) * two30() * two30();};
|
||
|
|
||
|
// Define number of bits of precision for various data types.
|
||
|
// Note: INT_BITS is really 31, (rather than 32, since one of the bits is
|
||
|
// just used for the two's-complement sign), but we say 30 because we don't
|
||
|
// need to be able to handle 31-bit magnitudes correctly. I say that
|
||
|
// because this is used for the QuickFloor code, and the SafeFloor code
|
||
|
// retains an extra bit of fixed point precision which it shifts-out at the
|
||
|
// end, thus reducing the magnitude of integers that it can handle. That's
|
||
|
// an inherent limitation of using SafeFloor to prevent round-ups under any
|
||
|
// circumstances, and there's no need to make QuickFloor handle a wider
|
||
|
// range of numbers than SafeFloor.
|
||
|
#define INT_BITS 30
|
||
|
#define EXT_BITS 64
|
||
|
#define DBL_BITS 53
|
||
|
|
||
|
// Description:
|
||
|
// Small amount to use as a rounding tie-breaker to prevent
|
||
|
// round-to-nearest-and-even mode from flooring-down odd numbered
|
||
|
// integers. But number to nudge by depends on number of bits mantissa in
|
||
|
// our floating point representation minus number of mantissa bits in the
|
||
|
// range of signed ints we need to handle. In order to ensure that
|
||
|
// flooring-down doesn't happen even for very large odd-integer values, the
|
||
|
// number of bits used to represent the tie-breaker (i.e. to the right of
|
||
|
// the binary-point), plus the number of bits needed to represent the
|
||
|
// integer (to the left of the binary point), can not exceeds the number of
|
||
|
// bits in the current precision mode. Thus, in selecting the tie-breaker
|
||
|
// value, we select the largest number of bits to the right of the binary
|
||
|
// point as possible while still maintining that inequality. Thus, extended
|
||
|
// precision mode allows a larger number of bits to the right of the binary
|
||
|
// point. This, in turn, implies a smaller value of the tie-breaker. And a
|
||
|
// smaller tie-breaker will impose a tighter window on the range of values
|
||
|
// that are erroneously rounded-up by a floor operation. Under double
|
||
|
// precision, a QuickFloor of 0.9999998 (six 9's and an 8) correctly yields
|
||
|
// 0. A value must be very close to 1.0, in fact, at least as close as
|
||
|
// 0.9999999 (seven 9's)in order for the tie-breaker to bump it up to 1.
|
||
|
// Under extended precision, an even smaller tie-breaker can be used. In
|
||
|
// this mode, a QuickFloor of 0.9999999999 (ten 9's) correctly yields 0. A
|
||
|
// QuickFloor of 0.99999999999 (eleven 9's) gets rounded up to 1. Since
|
||
|
// these spurious round-ups occur only when the given value is virtually
|
||
|
// indistinguishable from the next higher integer, the results should be
|
||
|
// acceptable in most situations where performance is of the essence.
|
||
|
// Make this public so that clients can account for the RoundingTieBreaker
|
||
|
// if necessary
|
||
|
public:
|
||
|
#ifdef VTK_EXT_PREC
|
||
|
// Compute (0.5 ^ (EXT_BITS-INT_BITS)) as a compile-time constant
|
||
|
static inline double RoundingTieBreaker() {return 1.0 / (two30() * (((unsigned long)1) << (EXT_BITS - INT_BITS - 30))); };
|
||
|
#else
|
||
|
// Compute (0.5 ^ (DBL_BITS-INT_BITS)) as a compile-time constant
|
||
|
static inline double RoundingTieBreaker() { return 1.0 / (((unsigned long)1) << (DBL_BITS - INT_BITS)); };
|
||
|
#endif
|
||
|
|
||
|
protected:
|
||
|
// Description:
|
||
|
// This is the magic floating point value which when added to any other
|
||
|
// floating point value, causes the rounded integer portion of that
|
||
|
// floating point value to appear in the least significant bits of the
|
||
|
// mantissa, which is what we want.
|
||
|
static inline double QuickFloorDenormalizer()
|
||
|
{return two52() * BorrowBit(); };
|
||
|
|
||
|
// Description:
|
||
|
// This is the magic floating point value which when added to any other
|
||
|
// floating point value, causes the rounded integer portion of that
|
||
|
// floating point value to appear in the NEXT TO least significant bits of
|
||
|
// the mantissa, which is what we want. This allows the CPU rounding mode
|
||
|
// to muck with the LSB which we can then discard in SafeFloor
|
||
|
static inline double SafeFloorDenormalizer()
|
||
|
{ return two51() * BorrowBit(); };
|
||
|
|
||
|
// Description:
|
||
|
// This value is added to and then subtracted from an extended precision
|
||
|
// value in order to clear the fractional bits so that they do not
|
||
|
// adversely affect the final double-precision result.
|
||
|
static inline double QuickExtPrecTempDenormalizer()
|
||
|
{return two63() * BorrowBit(); };
|
||
|
|
||
|
// Description:
|
||
|
// Just like QuickExtPrecTempDenormalizer(), but preserves one extra bit of
|
||
|
// fixed point precision to guard against the CPU mucking with the LSB
|
||
|
static inline double SafeExtPrecTempDenormalizer()
|
||
|
{return two62() * BorrowBit(); };
|
||
|
|
||
|
static inline double QuickRoundAdjust() {return 0.5;};
|
||
|
static inline double SafeRoundAdjust() {return 0.25;};
|
||
|
static inline int SafeFinalShift() {return 1;};
|
||
|
|
||
|
|
||
|
#ifdef VTK_WORDS_BIGENDIAN
|
||
|
enum {exponent_pos = 0, mantissa_pos = 1};
|
||
|
#else
|
||
|
enum {exponent_pos = 1, mantissa_pos = 0};
|
||
|
#endif
|
||
|
//ETX
|
||
|
|
||
|
public:
|
||
|
|
||
|
// Description:
|
||
|
// Set the number of bits reserved for fractional precision that are
|
||
|
// maintained as part of the flooring process. This number affects the
|
||
|
// flooring arithmetic. It may be useful if the factional part is to be
|
||
|
// used to index into a lookup table of some sort. However, if you are only
|
||
|
// interested in knowing the fractional remainder after flooring, there
|
||
|
// doesn't appear to be any advantage to using these bits, either in terms
|
||
|
// of a lookup table, or by directly multiplying by some unit fraction,
|
||
|
// over simply subtracting the floored value from the original value. Note
|
||
|
// that since only 32 bits are used for the entire fixed point
|
||
|
// representation, increasing the number of reserved fractional bits
|
||
|
// reduces the range of integer values that can be floored to.
|
||
|
void SetReservedFracBits(int bits)
|
||
|
{
|
||
|
// Add one to the requested number of fractional bits, to make
|
||
|
// the conversion safe with respect to rounding mode. This is the
|
||
|
// same as the difference between QuickFloor and SafeFloor.
|
||
|
bits++;
|
||
|
unsigned long mtime = this->GetMTime();
|
||
|
this->SetinternalReservedFracBits(bits);
|
||
|
if (mtime != this->GetMTime())
|
||
|
{
|
||
|
this->InternalRebuild();
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Description:
|
||
|
// Conduct timing tests so that the usefulness of this class can be
|
||
|
// ascertained on whatever platform it is being used. Output can be
|
||
|
// retrieved via Print method.
|
||
|
void PerformanceTests(void);
|
||
|
|
||
|
//BTX
|
||
|
// Description:
|
||
|
// Perform a quick flooring of the double-precision floating point
|
||
|
// value. The result is sometimes incorrect, but in a way that makes it
|
||
|
// acceptable for most uses. The naive way to implement floor(), given that
|
||
|
// the x86 FPU does round() by default, is to define floor(x) as
|
||
|
// round(x-.5). This would work fine except for the fact that the x86 FPU
|
||
|
// breaks rounding ties by selecting the even number. Thus, floor(4.0) =
|
||
|
// round(3.5) = 4, but floor(3.0) = round(2.5) = 2. As a result,
|
||
|
// subtracting .5 gives the wrong answer for odd integers. So, let's
|
||
|
// subtract just a TEENSY bit less than .5, to swing the odd-integer
|
||
|
// results up to their corect value. How teensy? Well, if it's too teensy,
|
||
|
// it will be insignificant compared to 0.5, and will become equivalent to
|
||
|
// 0.5. And if it's not teensy enough, we'll overshoot, causing results
|
||
|
// like floor(N-epsilon)==N, for some epsilon. Furthermore, the "too
|
||
|
// teensy" problem is exacerbated when trying to floor larger numbers, due
|
||
|
// to limitations of the representation's dynamic range. See the definition
|
||
|
// of RoundingTieBreaker() for details.
|
||
|
inline static int QuickFloor(const double &val)
|
||
|
{
|
||
|
#ifdef VTK_USE_TRICK
|
||
|
union { int i[2]; double d; } u;
|
||
|
#ifdef VTK_EXT_PREC
|
||
|
u.d = (((val - (QuickRoundAdjust() - RoundingTieBreaker()))
|
||
|
// Push off those extended precision bits
|
||
|
+ QuickExtPrecTempDenormalizer())
|
||
|
// Pull back the wanted bits into double range
|
||
|
- QuickExtPrecTempDenormalizer())
|
||
|
+ QuickFloorDenormalizer();
|
||
|
#else // ! VTK_EXT_PREC
|
||
|
u.d = (val - (QuickRoundAdjust() - RoundingTieBreaker()))
|
||
|
+ QuickFloorDenormalizer();
|
||
|
#endif // VTK_EXT_PREC
|
||
|
return u.i[mantissa_pos];
|
||
|
#else // ! VTK_USE_TRICK
|
||
|
return (int) val;
|
||
|
#endif // VTK_USE_TRICK
|
||
|
}
|
||
|
|
||
|
// Description:
|
||
|
// Perform a SAFE flooring. Similar to QuickFloor, but modified to return
|
||
|
// the correct result always. Use this when it absolutely positively needs
|
||
|
// to be the correct answer all the time, and considering 0.9999999 as
|
||
|
// being equal to 1.0 is simply not acceptable. It works similarly to
|
||
|
// QuickFloor, but it retains one extra bit of fixed point precision in the
|
||
|
// conversion process, so that the problem with QuickFloor affects only an
|
||
|
// unneeded bit, and then it ditches that bit from the resulting integer
|
||
|
// with a right-shift. In other words, it rounds to the nearest one-half,
|
||
|
// choosing the EVEN one-half (i.e. the integer) as a tie-breaker, and then
|
||
|
// shifting off that half-integer bit. As a result of maintaining one extra
|
||
|
// bit of fixed point precision in the intermediate calculation, the range
|
||
|
// of integers supported is reduced by one bit. Plus, it takes a little
|
||
|
// longer to execute, due to the final bit shift.
|
||
|
inline static int SafeFloor(const double &val)
|
||
|
{
|
||
|
#ifdef VTK_USE_TRICK
|
||
|
union { int i[2]; double d; } u;
|
||
|
#ifdef VTK_EXT_PREC
|
||
|
u.d = (((val - SafeRoundAdjust())
|
||
|
+ SafeExtPrecTempDenormalizer())
|
||
|
- SafeExtPrecTempDenormalizer())
|
||
|
+ SafeFloorDenormalizer();
|
||
|
#else // ! VTK_EXT_PREC
|
||
|
u.d = (val - SafeRoundAdjust())
|
||
|
+ SafeFloorDenormalizer();
|
||
|
#endif // VTK_EXT_PREC
|
||
|
return u.i[mantissa_pos] >> SafeFinalShift();
|
||
|
#else // ! VTK_USE_TRICK
|
||
|
return (int) val;
|
||
|
#endif // VTK_USE_TRICK
|
||
|
}
|
||
|
|
||
|
// Description:
|
||
|
// Round to nearest int. This is pretty sweet in the default
|
||
|
// round-to-nearest FPU mode, since it is generally immaterial how ties are
|
||
|
// broken when rounding. I.e., either "2" or "3" are acceptable results for
|
||
|
// "Round(2.5)", but only one of them (the one naively not chosen without
|
||
|
// jumping through the hoops in QuickFloor and SafeFloor) is the acceptable
|
||
|
// result for the analogous "Floor(3)". Therefore, we don't need to worry
|
||
|
// at all about adding a teensy but not too teensy tie breaker, or shifting
|
||
|
// off a half-integer bit. This makes it exceptionally fast.
|
||
|
inline static int Round(const double &val)
|
||
|
{
|
||
|
#ifdef VTK_USE_TRICK
|
||
|
union { int i[2]; double d; } u;
|
||
|
#ifdef VTK_EXT_PREC
|
||
|
u.d = ((val
|
||
|
+ QuickExtPrecTempDenormalizer())
|
||
|
- QuickExtPrecTempDenormalizer())
|
||
|
+ QuickFloorDenormalizer();
|
||
|
#else // ! VTK_EXT_PREC
|
||
|
u.d = val
|
||
|
+ QuickFloorDenormalizer();
|
||
|
#endif // VTK_EXT_PREC
|
||
|
return u.i[mantissa_pos];
|
||
|
#else // ! VTK_USE_TRICK
|
||
|
if (val>=0)
|
||
|
{
|
||
|
return (int) (val + 0.5);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return (int) (val - 0.5);
|
||
|
}
|
||
|
#endif // VTK_USE_TRICK
|
||
|
}
|
||
|
|
||
|
// Description:
|
||
|
// Convert the value to a fixed point representation, returning the
|
||
|
// integer portion as function value, and returning the fractional
|
||
|
// part in the second parameter.
|
||
|
inline int ConvertFixedPoint(const double &val, int &fracPart)
|
||
|
{
|
||
|
union { int i[2]; double d; } u;
|
||
|
#ifdef VTK_EXT_PREC
|
||
|
u.d = (((val - fixRound)
|
||
|
+ this->epTempDenormalizer)
|
||
|
- this->epTempDenormalizer)
|
||
|
+ this->fpDenormalizer;
|
||
|
#else // ! VTK_EXT_PREC
|
||
|
u.d = (val - fixRound)
|
||
|
+ this->fpDenormalizer;
|
||
|
#endif // VTK_EXT_PREC
|
||
|
fracPart = (u.i[mantissa_pos] & fracMask) >> 1;
|
||
|
return u.i[mantissa_pos] >> this->internalReservedFracBits;
|
||
|
}
|
||
|
//ETX
|
||
|
|
||
|
|
||
|
protected:
|
||
|
//BTX
|
||
|
vtkFastNumericConversion()
|
||
|
{
|
||
|
#ifdef VTK_TEST_HACK_TO_EMULATE_LINUX_UNDER_WINDOWS
|
||
|
_controlfp( _PC_64, MCW_PC );
|
||
|
#endif
|
||
|
|
||
|
this->fixRound = 0;
|
||
|
this->internalReservedFracBits = 0;
|
||
|
this->fracMask = 0;
|
||
|
this->fpDenormalizer = 0;
|
||
|
this->bare_time = 0;
|
||
|
this->cast_time = 0;
|
||
|
this->convert_time = 0;
|
||
|
this->quickfloor_time = 0;
|
||
|
this->safefloor_time = 0;
|
||
|
this->round_time = 0;
|
||
|
this->InternalRebuild();
|
||
|
};
|
||
|
~vtkFastNumericConversion() {};
|
||
|
void InternalRebuild(void);
|
||
|
|
||
|
private:
|
||
|
vtkSetMacro(internalReservedFracBits, int);
|
||
|
vtkGetMacro(internalReservedFracBits, int);
|
||
|
int internalReservedFracBits;
|
||
|
int fracMask;
|
||
|
|
||
|
// Used when doing fixed point conversions with a certain number of bits
|
||
|
// remaining for the fractional part, as opposed to the pure integer
|
||
|
// flooring
|
||
|
double fpDenormalizer;
|
||
|
|
||
|
// Used when doing fixed point conversions in extended precision mode
|
||
|
double epTempDenormalizer;
|
||
|
|
||
|
// Adjustment for rounding based on the number of bits reserved for
|
||
|
// fractional representation
|
||
|
double fixRound;
|
||
|
|
||
|
double bare_time;
|
||
|
double cast_time;
|
||
|
double convert_time;
|
||
|
double quickfloor_time;
|
||
|
double safefloor_time;
|
||
|
double round_time;
|
||
|
//ETX
|
||
|
|
||
|
vtkFastNumericConversion(const vtkFastNumericConversion&); // Not implemented
|
||
|
void operator=(const vtkFastNumericConversion&); // Not implemented
|
||
|
};
|
||
|
|
||
|
#endif
|