问题
(Disclaimer: I am terrible with math and am coming from JavaScript, so I apologize for any inaccuracies and will do my best to correct them.)
The example on Rosetta Code shows how to calculate coefficients using gsl. Here is the code:
polifitgsl.h:
#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store); /* n, p */
#endif
polifitgsl.cpp:
#include "polifitgsl.h"
bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;
int i, j;
X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);
for(i=0; i < obs; i++) {
for(j=0; j < degree; j++) {
gsl_matrix_set(X, i, j, pow(dx[i], j));
}
gsl_vector_set(y, i, dy[i]);
}
ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);
/* store result ... */
for(i=0; i < degree; i++)
{
store[i] = gsl_vector_get(c, i);
}
gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
to know if the fit is "good" */
}
main.cpp (note I've replaced the sample numbers for x any y with my own):
#include <stdio.h>
#include "polifitgsl.h"
#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};
#define DEGREE 3
double coeff[DEGREE];
int main()
{
int i;
polynomialfit(NP, DEGREE, x, y, coeff);
for(i=0; i < DEGREE; i++) {
printf("%lf\n", coeff[i]);
}
return 0;
}
And here is the output:
98.030909
-0.016182
0.000909
So that gives me the coefficients. But what I really want is the actual fitted points. In JavaScript, I've used the regression package to calculate the points:
var regression = require('regression');
var calculateRegression = function(values, degree) {
var data = [];
var regressionOutput;
var valuesCount = values.length;
var i = 0;
// Format the data in a way the regression library expects.
for (i = 0; i < valuesCount; i++) {
data[i] = [i, values[i]];
}
// Use the library to calculate the regression.
regressionOutput = regression('polynomial', data, degree);
return regressionOutput;
};
var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];
console.log(calculateRegression(y, 3));
Which produces:
{ equation:
[ 98.02987916431594,
-0.017378390369880512,
0.0015748071645344357,
-0.00005721503635571101 ],
points:
[ [ 0, 98.02987916431594 ],
[ 1, 98.01401836607424 ],
[ 2, 98.00096389194348 ],
[ 3, 97.9903724517055 ],
[ 4, 97.98190075514219 ],
[ 5, 97.97520551203543 ],
[ 6, 97.96994343216707 ],
[ 7, 97.96577122531896 ],
[ 8, 97.96234560127297 ],
[ 9, 97.959323269811 ],
[ 10, 97.95636094071487 ],
[ 11, 97.95311532376647 ],
[ 12, 97.94924312874768 ],
[ 13, 97.94440106544033 ],
[ 14, 97.93824584362629 ],
[ 15, 97.93043417308745 ],
[ 16, 97.92062276360569 ],
[ 17, 97.90846832496283 ],
[ 18, 97.89362756694074 ],
[ 19, 97.87575719932133 ] ],
string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' }
(Note there are floating point issues in JavaScript, so the numbers aren't perfectly exact.)
points
here is what I'm wanting to generate using gsl. Is there a way I can do this?
回答1:
Chad, if I understand what you are needing, you simply need to compute the values based on the polynomial equation using the coefficients you found with your polynomialfit
function. After you compute the coefficients, you can find the value y
for any x
(for DEGREE = 3
) with the equation:
y = x^2*(coeff2) + x*(coeff1) + coeff0
or in C-syntax
y = x*x*coeff[2] + x*coeff[1] + coeff[0];
You can modify your main.cpp
as follows (you should rename both main.cpp
to main.c
and polyfitgsl.cpp
to polyfitgsl.c
-- as they are both C-source files, not C++)
#include <stdio.h>
#include "polifitgsl.h"
#define NP 20
double x[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
97.97, 97.96, 97.94, 97.96, 97.96,
97.97, 97.97, 97.94, 97.94, 97.94,
97.92, 97.96, 97.9, 97.85, 97.9 };
#define DEGREE 3
double coeff[DEGREE];
int main (void)
{
int i;
polynomialfit (NP, DEGREE, x, y, coeff);
printf ("\n polynomial coefficients:\n\n");
for (i = 0; i < DEGREE; i++) {
printf (" coeff[%d] : %11.7lf\n", i, coeff[i]);
}
putchar ('\n');
printf (" computed values:\n\n x y\n");
for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
printf (" %2d, %11.7lf\n", i,
i*i*coeff[2] + i*coeff[1] + coeff[0]);
putchar ('\n');
return 0;
}
Which provides the following output:
Example Use/Output
$ ./bin/main
polynomial coefficients:
coeff[0] : 98.0132468
coeff[1] : -0.0053003
coeff[2] : -0.0000558
computed values:
x y
0, 98.0132468
1, 98.0078906
2, 98.0024229
3, 97.9968435
4, 97.9911524
5, 97.9853497
6, 97.9794354
7, 97.9734094
8, 97.9672718
9, 97.9610226
10, 97.9546617
11, 97.9481891
12, 97.9416049
13, 97.9349091
14, 97.9281016
15, 97.9211825
16, 97.9141517
17, 97.9070093
18, 97.8997553
19, 97.8923896
That seems like what you are asking for. Look it over, compare your values and let me know if you need additional help.
Cleaning Up A Bit Further
Just to clean the code up a bit further, since there is no need for global declaration of x
, y
, or coeff
, and using an enum
to define the constants DEGREE
and NP
, a tidier version would be:
#include <stdio.h>
#include "polifitgsl.h"
enum { DEGREE = 3, NP = 20 };
int main (void)
{
int i;
double x[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
97.97, 97.96, 97.94, 97.96, 97.96,
97.97, 97.97, 97.94, 97.94, 97.94,
97.92, 97.96, 97.9, 97.85, 97.9 };
double coeff[DEGREE] = {0};
polynomialfit (NP, DEGREE, x, y, coeff);
printf ("\n polynomial coefficients:\n\n");
for (i = 0; i < DEGREE; i++)
printf (" coeff[%d] : %11.7lf\n", i, coeff[i]);
putchar ('\n');
printf (" computed values:\n\n x y\n");
for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
printf (" %2d, %11.7lf\n", i,
i*i*coeff[2] + i*coeff[1] + coeff[0]);
putchar ('\n');
return 0;
}
回答2:
I had the same problem and have taken the answers above and added a solution for direct calculation (without gsl) taken from http://www.cplusplus.com/forum/general/181580/
Below, you find a standalone test program with a gsl-based and a direct calculation solution.
I have done some profiling runs and the performance of direct calculation is an impressive 65 times higher on my system, 5.22s vs. 0.08s for 1000 calls to the respective functions.
As a side note, the direct calculation uses Cramer's rule, so you should be careful with ill conditioned data. Normally I would avoid Cramer's but using a full linear system solver for a 3x3 system seems overkill to me.
/*
* =====================================================================================
*
* Filename: polyfit.cpp
*
* Description: Least squares fit of second order polynomials
* Test program using gsl and direct calculation
* Version: 1.0
* Created: 2017-07-17 09:32:55
* Compiler: gcc
*
* Author: Bernhard Brunner, brb_blog@epr.ch
* References: This code was merged, adapted and optimized from these two sources:
* http://www.cplusplus.com/forum/general/181580/
* https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
* http://brb.epr.ch/blog/blog:least_squares_regression_of_parabola
* Build: compile and link using
* g++ -c -o polifitgsl.o polifitgsl.cpp
* gcc polifitgsl.o -lgsl -lm -lblas -o polifitgsl
*
* Profiling:
* valgrind --tool=callgrind ./polifitgsl
* kcachegrind
*
* polynomialfit takes 5.22s for 1000 calls
* findQuadCoefficients takes 0.08s for 1000 calls
* 65x faster
* =====================================================================================
*/
#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store); /* n, p */
double x[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
97.97, 97.96, 97.94, 97.96, 97.96,
97.97, 97.97, 97.94, 97.94, 97.94,
97.92, 97.96, 97.9, 97.85, 97.9 };
#define NP (sizeof(x)/sizeof(double)) // 20
#define DEGREE 3
double coeff[DEGREE];
bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
const double S00=PointsNum;//points number
double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
// const double MINvalue = valueArray[0];
// const double MINtime = timeArray[0];
for (int i=0; i<PointsNum; i++ ){
double value = valueArray[i]; // - MINvalue); //normalizing
// cout << "i=" << i << " index=" << index << " value=" << value << endl;
int index = timeArray[i]; // - MINtime;
int index2 = index * index;
int index3 = index2 * index;
int index4 = index3 * index;
S40+= index4;
S30+= index3;
S20+= index2;
S10+= index;
S01 += value;
S11 += value*index;
S21 += value*index2;
}
double S20squared = S20*S20;
//minors M_ij=M_ji
double M11 = S20*S00 - S10*S10;
double M21 = S30*S00 - S20*S10;
double M22 = S40*S00 - S20squared;
double M31 = S30*S10 - S20squared;
double M32 = S40*S10 - S20*S30;
// double M33 = S40*S20 - pow(S30,2);
double discriminant = S40*M11 - S30*M21 + S20*M31;
// printf("discriminant :%lf\n", discriminant);
if (abs(discriminant) < .00000000001) return false;
double Da = S21*M11
-S11*M21
+S01*M31;
coef[2] = Da/discriminant;
// cout << "discriminant=" << discriminant;
// cout << " Da=" << Da;
double Db = -S21*M21
+S11*M22
-S01*M32;
coef[1] = Db/discriminant;
// cout << " Db=" << Db << endl;
double Dc = S40*(S20*S01 - S10*S11)
- S30*(S30*S01 - S10*S21)
+ S20*(S30*S11 - S20*S21);
coef[0] = Dc/discriminant;
// printf("c=%lf\n", c);
critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);
return true;
}
bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;
int i, j;
X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);
for(i=0; i < obs; i++) {
for(j=0; j < degree; j++) {
gsl_matrix_set(X, i, j, pow(dx[i], j));
}
gsl_vector_set(y, i, dy[i]);
}
ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);
/* store result ... */
for(i=0; i < degree; i++) {
store[i] = gsl_vector_get(c, i);
}
gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
to know if the fit is "good" */
}
void testcoeff(double *coeff)
{
printf ("\n polynomial coefficients\n");
for (int i = 0; i < DEGREE; i++) {
printf (" coeff[%d] : %11.7lf\n", i, coeff[i]);
}
putchar ('\n');
printf (" computed values:\n\n x, yi, yip\n");
for (unsigned i = 0; i < NP; i++) {
printf ("%2u,%.7lf,%.7lf\n", i,
y[i],
i*i*coeff[2] + i*coeff[1] + coeff[0]);
}
putchar ('\n');
}
int main (void)
{
#define ITER 1000
for (int i=0; i< ITER; i++) {
polynomialfit (NP, DEGREE, x, y, coeff);
}
testcoeff(coeff);
double sx;
for (int i=0; i< ITER; i++) {
findQuadCoefficients(x, y, coeff, sx, NP);
}
printf("critical point %lf\n", sx);
testcoeff(coeff);
return 0;
}
References:
- http://www.cplusplus.com/forum/general/181580/
- http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html
来源:https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points