I have two series, series1 and series2. My aim is to find how much Series2 is different from Series1,on a bin to bin basis, (each bin represents a particula
Here is a C implementation of an algorithm to compute the divergence of actual data from predicted data. The algorithm comes from a book entitled Practical BASIC Programs from Osborne/McGraw-Hill copyright 1980.
Here is the .h file:
/*
* divergence.h
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#ifndef DIVERGENCE_H_
#define DIVERGENCE_H_
typedef struct
{
int DataSize;
float TotalError;
float AbsError; //< Total Absolute Error
float SqError; //< Total Squared Error
float MeanError;
float MeanAbsError;
float MeanSqError;
float RMSError; //< Root Mean Square Error
}DIVERGENCE_ERROR_TYPE;
void Divergence__Error(int size, float expected[], float actual[], DIVERGENCE_ERROR_TYPE *error);
// Prefer to use abs() from "stdlib.h"
#ifndef ABS
#define ABS(x) ((x)>0) ? (x) : (0-(x)) //< Not safe!!! - Do not increment parameter inside ABS()!
#endif
#endif /* DIVERGENCE_H_ */
...the .c file:
/*
* divergence.c
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#include "math.h"
#include "divergence.h"
/**
* @brief Compute divergence from expected values.
*
* @details Compute the raw errors, absolute errors, root mean square errors,
* etc. for a series of values.
*
* @param size - integer value defines the number of values to compare.
*/
void Divergence__Error(int size, float expected[], float actual[], DIVERGENCE_ERROR_TYPE *error)
{
double total_err = 0.0;
double abs_err = 0.0;
double abs_sqr_err = 0.0;
double temp = 0.0;
int index = 0;
for(index=0; index<size; index++)
{
temp = (double)(actual[index])-(double)(expected[index]);
total_err+=temp;
abs_err+=ABS(temp);
abs_sqr_err+=pow(ABS(temp),2);
}
temp = (double)size;
error->DataSize = (int)size;
error->TotalError = (float)total_err;
error->AbsError = (float)abs_err;
error->SqError = (float)abs_sqr_err;
error->MeanError = (float)(total_err/temp);
error->MeanAbsError = (float)(abs_err/temp);
error->MeanSqError = (float)(abs_sqr_err/temp);
error->RMSError = (float)(sqrt(abs_sqr_err/temp));
}
...and a sample main() for testing the function:
/*
* main.c
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#include <stdio.h>
#include "divergence.h"
float vote[]={40.3, 22.5, 16.3, 10.5, 7.2, 3.2};
float poll[]={42.7, 21.4, 18.2, 6.0, 7.4, 4.3};
float actual[] ={74, 70, 58, 60, 65, 73, 70};
float predict[]={49, 62, 75, 82, 37, 58, 92};
int main(int argc, char *argv[])
{
DIVERGENCE_ERROR_TYPE stats;
Divergence__Error(6, poll, vote, &stats);
printf("%i\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n\n\n",stats.DataSize,stats.TotalError,stats.AbsError,stats.SqError,stats.MeanError,stats.MeanAbsError,stats.MeanSqError,stats.RMSError);
Divergence__Error(7, predict, actual, &stats);
printf("%i\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n\n\n",stats.DataSize,stats.TotalError,stats.AbsError,stats.SqError,stats.MeanError,stats.MeanAbsError,stats.MeanSqError,stats.RMSError);
return(0);
}
I can't guarantee that this is the fastest method and the function could use some tweaking to make it more friendly to different data types, but it works and the results were verified against the samples provided in the book.