Answering to another question, I wrote the program below to compare different search methods in a sorted array. Basically I compared two implementations of Interpolation search
I have an excessively complicated solution, which requires a specialized sorting function. The sort is slightly slower than a good quicksort, but all of my tests show that the search function is much faster than a binary or interpolation search. I called it a regression sort before I found out that the name was already taken, but didn't bother to think of a new name (ideas?).
There are three files to demonstrate.
The regression sort/search code:
#include
#include
#include
#include "limits.h"
void insertionSort(int array[], int length) {
int key, j;
for(int i = 1; i < length; i++) {
key = array[i];
j = i - 1;
while (j >= 0 && array[j] > key) {
array[j + 1] = array[j];
--j;
}
array[j + 1] = key;
}
}
class RegressionTable {
public:
RegressionTable(int arr[], int s, int lower, int upper, double mult, int divs);
RegressionTable(int arr[], int s);
void sort(void);
int find(int key);
void printTable(void);
void showSize(void);
private:
void createTable(void);
inline unsigned int resolve(int n);
int * array;
int * table;
int * tableSize;
int size;
int lowerBound;
int upperBound;
int divisions;
int divisionSize;
int newSize;
double multiplier;
};
RegressionTable::RegressionTable(int arr[], int s) {
array = arr;
size = s;
multiplier = 1.35;
divisions = sqrt(size);
upperBound = INT_MIN;
lowerBound = INT_MAX;
for (int i = 0; i < size; ++i) {
if (array[i] > upperBound)
upperBound = array[i];
if (array[i] < lowerBound)
lowerBound = array[i];
}
createTable();
}
RegressionTable::RegressionTable(int arr[], int s, int lower, int upper, double mult, int divs) {
array = arr;
size = s;
lowerBound = lower;
upperBound = upper;
multiplier = mult;
divisions = divs;
createTable();
}
void RegressionTable::showSize(void) {
int bytes = sizeof(*this);
bytes = bytes + sizeof(int) * 2 * (divisions + 1);
}
void RegressionTable::createTable(void) {
divisionSize = size / divisions;
newSize = multiplier * double(size);
table = new int[divisions + 1];
tableSize = new int[divisions + 1];
for (int i = 0; i < divisions; ++i) {
table[i] = 0;
tableSize[i] = 0;
}
for (int i = 0; i < size; ++i) {
++table[((array[i] - lowerBound) / divisionSize) + 1];
}
for (int i = 1; i <= divisions; ++i) {
table[i] += table[i - 1];
}
table[0] = 0;
for (int i = 0; i < divisions; ++i) {
tableSize[i] = table[i + 1] - table[i];
}
}
int RegressionTable::find(int key) {
double temp = multiplier;
multiplier = 1;
int minIndex = table[(key - lowerBound) / divisionSize];
int maxIndex = minIndex + tableSize[key / divisionSize];
int guess = resolve(key);
double t;
while (array[guess] != key) {
// uncomment this line if you want to see where it is searching.
//cout << "Regression Guessing " << guess << ", not there." << endl;
if (array[guess] < key) {
minIndex = guess + 1;
}
if (array[guess] > key) {
maxIndex = guess - 1;
}
if (array[minIndex] > key || array[maxIndex] < key) {
return -1;
}
t = ((double)key - array[minIndex]) / ((double)array[maxIndex] - array[minIndex]);
guess = minIndex + t * (maxIndex - minIndex);
}
multiplier = temp;
return guess;
}
inline unsigned int RegressionTable::resolve(int n) {
float temp;
int subDomain = (n - lowerBound) / divisionSize;
temp = n % divisionSize;
temp /= divisionSize;
temp *= tableSize[subDomain];
temp += table[subDomain];
temp *= multiplier;
return (unsigned int)temp;
}
void RegressionTable::sort(void) {
int * out = new int[int(size * multiplier)];
bool * used = new bool[int(size * multiplier)];
int higher, lower;
bool placed;
for (int i = 0; i < size; ++i) {
/* Figure out where to put the darn thing */
higher = resolve(array[i]);
lower = higher - 1;
if (higher > newSize) {
higher = size;
lower = size - 1;
} else if (lower < 0) {
higher = 0;
lower = 0;
}
placed = false;
while (!placed) {
if (higher < size && !used[higher]) {
out[higher] = array[i];
used[higher] = true;
placed = true;
} else if (lower >= 0 && !used[lower]) {
out[lower] = array[i];
used[lower] = true;
placed = true;
}
--lower;
++higher;
}
}
int index = 0;
for (int i = 0; i < size * multiplier; ++i) {
if (used[i]) {
array[index] = out[i];
++index;
}
}
insertionSort(array, size);
}
And then there is the regular search functions:
#include
using namespace std;
int binarySearch(int array[], int start, int end, int key) {
// Determine the search point.
int searchPos = (start + end) / 2;
// If we crossed over our bounds or met in the middle, then it is not here.
if (start >= end)
return -1;
// Search the bottom half of the array if the query is smaller.
if (array[searchPos] > key)
return binarySearch (array, start, searchPos - 1, key);
// Search the top half of the array if the query is larger.
if (array[searchPos] < key)
return binarySearch (array, searchPos + 1, end, key);
// If we found it then we are done.
if (array[searchPos] == key)
return searchPos;
}
int binarySearch(int array[], int size, int key) {
return binarySearch(array, 0, size - 1, key);
}
int interpolationSearch(int array[], int size, int key) {
int guess = 0;
double t;
int minIndex = 0;
int maxIndex = size - 1;
while (array[guess] != key) {
t = ((double)key - array[minIndex]) / ((double)array[maxIndex] - array[minIndex]);
guess = minIndex + t * (maxIndex - minIndex);
if (array[guess] < key) {
minIndex = guess + 1;
}
if (array[guess] > key) {
maxIndex = guess - 1;
}
if (array[minIndex] > key || array[maxIndex] < key) {
return -1;
}
}
return guess;
}
And then I wrote a simple main to test out the different sorts.
#include
#include
#include
#include
#include "regression.h"
#include "search.h"
using namespace std;
void randomizeArray(int array[], int size) {
for (int i = 0; i < size; ++i) {
array[i] = rand() % size;
}
}
int main(int argc, char * argv[]) {
int size = 100000;
string arg;
if (argc > 1) {
arg = argv[1];
size = atoi(arg.c_str());
}
srand(time(NULL));
int * array;
cout << "Creating Array Of Size " << size << "...\n";
array = new int[size];
randomizeArray(array, size);
cout << "Sorting Array...\n";
RegressionTable t(array, size, 0, size*2.5, 1.5, size);
//RegressionTable t(array, size);
t.sort();
int trials = 10000000;
int start;
cout << "Binary Search...\n";
start = clock();
for (int i = 0; i < trials; ++i) {
binarySearch(array, size, i % size);
}
cout << clock() - start << endl;
cout << "Interpolation Search...\n";
start = clock();
for (int i = 0; i < trials; ++i) {
interpolationSearch(array, size, i % size);
}
cout << clock() - start << endl;
cout << "Regression Search...\n";
start = clock();
for (int i = 0; i < trials; ++i) {
t.find(i % size);
}
cout << clock() - start << endl;
return 0;
}
Give it a try and tell me if it's faster for you. It's super complicated, so it's really easy to break it if you don't know what you are doing. Be careful about modifying it.
I compiled the main with g++ on ubuntu.