A
Alf P. Steinbach
It would be nice if one could make do with a non-copyable forward
iterator for the (x,y) data points, without storing the points.
However, my current code requires the ability to iterate a second time,
from a copy of the start iterator.
Any suggestions for how to avoid the second iteration, or should I
perhaps just forget it (I only need this to check algorithm complexity
in unit test, so it's not a current problem, just "not ideal" code...)?
Disclaimer 1: I do not KNOW that the code is correct, in particular the
r_squared() implementation.
Disclaimer 2: In the mathworld reference I do not understand eq. 17.
Cheers,
- Alf
iterator for the (x,y) data points, without storing the points.
However, my current code requires the ability to iterate a second time,
from a copy of the start iterator.
Any suggestions for how to avoid the second iteration, or should I
perhaps just forget it (I only need this to check algorithm complexity
in unit test, so it's not a current problem, just "not ideal" code...)?
Code:
#pragma once
// Copyright (c) 2013 Alf P. Steinbach
// <url:
http://terpconnect.umd.edu/~toh/spectrum/CurveFitting.html#MathDetails>
// n = number of x,y data points
// sumx = Σx
// sumy = Σy
// sumxy = Σx*y
// sumx2 = Σx*x
// meanx = sumx / n
// meany = sumy / n
// slope = (n*sumxy - sumx*sumy) / (n*sumx2 - sumx*sumx)
// intercept = meany-(slope*meanx)
// ssy = Σ(y-meany)^2
// ssr = Σ(y-intercept-slope*x)^2
// R2 = 1-(ssr/ssy)
// Standard deviation of the slope = SQRT(ssr/(n-2))*SQRT(n/(n*sumx2 -
sumx*sumx))
// Standard deviation of the intercept =
SQRT(ssr/(n-2))*SQRT(sumx2/(n*sumx2 - sumx*sumx))
//
// Also, <url: http://mathworld.wolfram.com/LeastSquaresFitting.html>
#include <rfc/cppx/calc/Statistics.h> // cppx::calc::Statistics
namespace cppx{ namespace calc{
class Best_line_fit
: private Statistics
{
private:
double intercept_;
double slope_;
double ssy_; // Sum of squares of y-expressions
double ssr_; // Sum of squares of residue expressions
public:
auto stats() const -> Statistics const& { return *this; }
auto slope() const -> double { return slope_; }
auto intercept() const -> double { return intercept_; }
auto ssy() const -> double { return ssy_; }
auto ssr() const -> double { return ssr_; }
auto r_squared() const -> double { return (ssr_ == 0 || ssy_
== 0? 1.0 : 1.0 - ssr_/ssy_); }
auto r() const -> double { return sqrt( r_squared() ); }
template< class It >
Best_line_fit( It const first, It const end )
: Statistics( first, end )
, ssy_( 0.0 )
, ssr_( 0.0 )
{
slope_ = (n()*sum_xy() - sum_x()*sum_y()) / (n()*sum_xx() -
square( sum_x() ));
intercept_ = mean_y() - slope_*mean_x();
for( It it = first; it != end; ++it )
{
double const x = x_of( *it );
double const y = y_of( *it );
//ssy_ += square( y - mean_y() );
ssr_ += square( y - (slope_*x + intercept_) );
}
ssy_ = sum_yy() - 2*sum_y()*mean_y() + n()*square( mean_y() );
}
};
} } // namespace cppx::calc
Disclaimer 1: I do not KNOW that the code is correct, in particular the
r_squared() implementation.
Disclaimer 2: In the mathworld reference I do not understand eq. 17.
Cheers,
- Alf