M
mike3
Hi.
(posted to comp.lang.c++ as well in case some C++-specific questions
get raised here)
I've got this routine from a bignum package I'm writing in C++, and
I'm having trouble with the performance. It's supposed to add two big
floating point numbers together nice and fast. But, by commenting out
everything sans the actual math loops to see the speed of the
components, I noticed that the non-math-loop stuff accounts for like
70%+ of the time! Many of those functions called are trivial in what
they do, they're not super-complicated. Even just the calls to
"GetExp()" which just _return a value_ of a data item (yes, something
THAT silly and trivial) take *25%* of the time, which I determined by
simply commenting them out and substituting the values they should
return in their place. That's just *Crazy*. I'm utterly, completely
mystified as to why this is. Is it because they're member functions of
objects? What's wrong with this picture?
---
/* Add two big floating point numbers, unsigned. */
/* These unsigned MP routines treat both operands as positive
* and the sign of the result is given accordingly.
*/
/* Special high-performance addition routine. This is much
* faster than the one in fladdu.cpp and is designed for use
* in the time-critical fractal-generating code. All operands --
* inputs and output, must be of the same length.
* No arithmetic from rawint.cpp is used here for the actual
* addition loop. Very fast -- 8 seconds for 100 million operations
* using Microsoft C++ compiler with all optimizations turned on.
*/
FG3DError BigFloat::FastAddUnsigned(const BigFloat *a, const BigFloat
*b)
{
/* Choke if the lengths are not equal */
if((length != a->length) || (a->length != b->length))
return(FG3DError(FG3D_MP_LENGTHS_UNEQUAL));
/* Handle exceptional/"error" values (infinity, NaNs) */
if(IsErr() || a->IsErr() || b->IsErr())
{
bool aIsInf(a->IsInfinity()), bIsInf(b->IsInfinity());
bool aIsQNaN(a->IsQNaN()), bIsQNaN(b->IsQNaN());
bool aIsSNaN(a->IsSNaN()), bIsSNaN(b->IsSNaN());
/* Anything + NaN = NaN */
if((aIsQNaN || bIsQNaN) && !(aIsSNaN || bIsSNaN))
{
MakeQNaN(SIGN_POSITIVE);
return(FG3DError(FG3D_SUCCESS));
} else if(aIsSNaN || bIsSNaN) {
MakeSNaN(SIGN_POSITIVE);
return(FG3DError(FG3D_BAD_FLOAT, aIsSNaN ?
reinterpret_cast<DWORD>(a)
:
reinterpret_cast<DWORD>(b)));
}
/* Infinity + Anything = Infinity */
if(aIsInf || bIsInf)
{
MakeInfinity(SIGN_POSITIVE);
return(FG3DError(FG3D_SUCCESS));
}
}
/* Next, fetch direct pointers to the digit data. We are
* going to get down and dirty with real live arrays and
* pointers here. Kids, don't try this at home! We're
* trying to squeeze every last ounce of speed there
* is!
*/
Digit *rPtr(&digits[0]);
const Digit *aPtr(&a->digits[0]), *bPtr(&b->digits[0]);
/* Now get the exponents and make sure b's is the smaller. */
s32 aExp(a->signexp.GetExp()), bExp(b->signexp.GetExp());
if(aExp < bExp)
{
std::swap(aPtr, bPtr);
std::swap(aExp, bExp);
}
/* Compute the difference of the exponents and the shifts. */
s32 expDiff(aExp - bExp);
s32 wsh(expDiff/DIGIT_SIZE); // word shift
s32 bsh(expDiff%DIGIT_SIZE); // bit shift
if(wsh > length) wsh = length; // safety
/* Apply the word shift. */
bPtr += wsh;
/* Now work through the digits, shifting b and adding it
* to our result as we go.
*/
Digit sumCarry(0);
for(int i(0);i<length-wsh-1;i++)
{
/* Do combined add-and-shift */
Digit aVal(*aPtr), bVal(*bPtr);
Digit rshCarry(bsh ? (*(bPtr+1) << (DIGIT_SIZE-bsh)) : 0);
Digit bRsh((bVal >> bsh) | rshCarry);
Digit sum(aVal + bRsh + sumCarry);
sumCarry = sumCarry ? (sum <= aVal) : (sum < aVal);
*rPtr = sum;
/* Increment pointers */
rPtr++;
aPtr++;
bPtr++;
}
/* Next, take care of the last digit of b, which has no shift
* carry.
*/
if(wsh < length)
{
Digit aVal(*aPtr), bRsh(*bPtr >> bsh);
Digit sum(aVal + bRsh + sumCarry);
sumCarry = sumCarry ? (sum <= aVal) : (sum < aVal);
*rPtr = sum;
rPtr++;
aPtr++;
bPtr++;
}
/* Finally, ripple the addition carry through the remaining
digits. */
for(int i(length-wsh);i<length;i++)
{
Digit sum(*aPtr + sumCarry);
sumCarry = sumCarry ? (sum == 0) : 0;
*rPtr = sum;
rPtr++;
aPtr++;
}
/* Set exponent */
FG3DError err;
ECHK_RET(err, signexp.SetExp(aExp));
/* Shift in any remaining carries */
if(sumCarry)
{
ECHK_RET(err, signexp.Inc());
Digit rshCarry(DIGIT_MSB_MASK);
rPtr = (&digits[0]+(length-1));
for(int i(length-1);i>=0;i--)
{
Digit rVal(*rPtr);
Digit tmp((rVal >> 1) | rshCarry);
rshCarry = rVal << 31;
*rPtr = tmp;
rPtr--;
}
}
/* Set sign */
signexp.SetSign(SIGN_POSITIVE);
/* Return success. */
return(FG3DError(FG3D_SUCCESS));
}
---
(posted to comp.lang.c++ as well in case some C++-specific questions
get raised here)
I've got this routine from a bignum package I'm writing in C++, and
I'm having trouble with the performance. It's supposed to add two big
floating point numbers together nice and fast. But, by commenting out
everything sans the actual math loops to see the speed of the
components, I noticed that the non-math-loop stuff accounts for like
70%+ of the time! Many of those functions called are trivial in what
they do, they're not super-complicated. Even just the calls to
"GetExp()" which just _return a value_ of a data item (yes, something
THAT silly and trivial) take *25%* of the time, which I determined by
simply commenting them out and substituting the values they should
return in their place. That's just *Crazy*. I'm utterly, completely
mystified as to why this is. Is it because they're member functions of
objects? What's wrong with this picture?
---
/* Add two big floating point numbers, unsigned. */
/* These unsigned MP routines treat both operands as positive
* and the sign of the result is given accordingly.
*/
/* Special high-performance addition routine. This is much
* faster than the one in fladdu.cpp and is designed for use
* in the time-critical fractal-generating code. All operands --
* inputs and output, must be of the same length.
* No arithmetic from rawint.cpp is used here for the actual
* addition loop. Very fast -- 8 seconds for 100 million operations
* using Microsoft C++ compiler with all optimizations turned on.
*/
FG3DError BigFloat::FastAddUnsigned(const BigFloat *a, const BigFloat
*b)
{
/* Choke if the lengths are not equal */
if((length != a->length) || (a->length != b->length))
return(FG3DError(FG3D_MP_LENGTHS_UNEQUAL));
/* Handle exceptional/"error" values (infinity, NaNs) */
if(IsErr() || a->IsErr() || b->IsErr())
{
bool aIsInf(a->IsInfinity()), bIsInf(b->IsInfinity());
bool aIsQNaN(a->IsQNaN()), bIsQNaN(b->IsQNaN());
bool aIsSNaN(a->IsSNaN()), bIsSNaN(b->IsSNaN());
/* Anything + NaN = NaN */
if((aIsQNaN || bIsQNaN) && !(aIsSNaN || bIsSNaN))
{
MakeQNaN(SIGN_POSITIVE);
return(FG3DError(FG3D_SUCCESS));
} else if(aIsSNaN || bIsSNaN) {
MakeSNaN(SIGN_POSITIVE);
return(FG3DError(FG3D_BAD_FLOAT, aIsSNaN ?
reinterpret_cast<DWORD>(a)
:
reinterpret_cast<DWORD>(b)));
}
/* Infinity + Anything = Infinity */
if(aIsInf || bIsInf)
{
MakeInfinity(SIGN_POSITIVE);
return(FG3DError(FG3D_SUCCESS));
}
}
/* Next, fetch direct pointers to the digit data. We are
* going to get down and dirty with real live arrays and
* pointers here. Kids, don't try this at home! We're
* trying to squeeze every last ounce of speed there
* is!
*/
Digit *rPtr(&digits[0]);
const Digit *aPtr(&a->digits[0]), *bPtr(&b->digits[0]);
/* Now get the exponents and make sure b's is the smaller. */
s32 aExp(a->signexp.GetExp()), bExp(b->signexp.GetExp());
if(aExp < bExp)
{
std::swap(aPtr, bPtr);
std::swap(aExp, bExp);
}
/* Compute the difference of the exponents and the shifts. */
s32 expDiff(aExp - bExp);
s32 wsh(expDiff/DIGIT_SIZE); // word shift
s32 bsh(expDiff%DIGIT_SIZE); // bit shift
if(wsh > length) wsh = length; // safety
/* Apply the word shift. */
bPtr += wsh;
/* Now work through the digits, shifting b and adding it
* to our result as we go.
*/
Digit sumCarry(0);
for(int i(0);i<length-wsh-1;i++)
{
/* Do combined add-and-shift */
Digit aVal(*aPtr), bVal(*bPtr);
Digit rshCarry(bsh ? (*(bPtr+1) << (DIGIT_SIZE-bsh)) : 0);
Digit bRsh((bVal >> bsh) | rshCarry);
Digit sum(aVal + bRsh + sumCarry);
sumCarry = sumCarry ? (sum <= aVal) : (sum < aVal);
*rPtr = sum;
/* Increment pointers */
rPtr++;
aPtr++;
bPtr++;
}
/* Next, take care of the last digit of b, which has no shift
* carry.
*/
if(wsh < length)
{
Digit aVal(*aPtr), bRsh(*bPtr >> bsh);
Digit sum(aVal + bRsh + sumCarry);
sumCarry = sumCarry ? (sum <= aVal) : (sum < aVal);
*rPtr = sum;
rPtr++;
aPtr++;
bPtr++;
}
/* Finally, ripple the addition carry through the remaining
digits. */
for(int i(length-wsh);i<length;i++)
{
Digit sum(*aPtr + sumCarry);
sumCarry = sumCarry ? (sum == 0) : 0;
*rPtr = sum;
rPtr++;
aPtr++;
}
/* Set exponent */
FG3DError err;
ECHK_RET(err, signexp.SetExp(aExp));
/* Shift in any remaining carries */
if(sumCarry)
{
ECHK_RET(err, signexp.Inc());
Digit rshCarry(DIGIT_MSB_MASK);
rPtr = (&digits[0]+(length-1));
for(int i(length-1);i>=0;i--)
{
Digit rVal(*rPtr);
Digit tmp((rVal >> 1) | rshCarry);
rshCarry = rVal << 31;
*rPtr = tmp;
rPtr--;
}
}
/* Set sign */
signexp.SetSign(SIGN_POSITIVE);
/* Return success. */
return(FG3DError(FG3D_SUCCESS));
}
---