Given a sequence of data (it may have duplicates), a fixed-sized moving window, move the window at each iteration from the start of the data sequence, such that (1) the oldes
I attach my segment tree (see my other post) which allows the frequency distribution of counts to be queried very efficiently.
This implements the following data structure:
|-------------------------------|
|---------------|---------------|
|-------|-------|-------|-------|
|---|---|---|---|---|---|---|---|
0 1 2 3 4 5 6 7
Each segment keeps the number of counts items in the range it covers. I use 2N segments for a range of value from 1..N. These are placed in a single rolled out vector rather than the tree format show figuratively above.
So if you are calculating rolling medians over a set of integers which vary from 1..65536, then you only need 128kb to store them, and can insert/delete/query using O(ln N) where N = the size of the range, i.e. 2**16 operations.
This is a big win if the data range is much smaller than your rolling window.
#if !defined(SEGMENT_TREE_H)
#define SEGMENT_TREE_H
#include
#include
#include
#include
#ifndef NDEBUG
#include
#endif
template
class t_segment_tree
{
static const unsigned cnt_elements = (1 << BITS);
static const unsigned cnt_storage = cnt_elements << 1;
std::array counts;
unsigned count;
#ifndef NDEBUG
std::multiset elements;
#endif
public:
//____________________________________________________________________________________
// constructor
//____________________________________________________________________________________
t_segment_tree(): count(0)
{
std::fill_n(counts.begin(), counts.size(), 0);
}
//~t_segment_tree();
//____________________________________________________________________________________
// size
//____________________________________________________________________________________
unsigned size() const { return count; }
//____________________________________________________________________________________
// constructor
//____________________________________________________________________________________
void insert(unsigned x)
{
#ifndef NDEBUG
elements.insert(x);
assert("...............This element is too large for the number of BITs!!..............." && cnt_elements > x);
#endif
unsigned ii = x + cnt_elements;
while (ii)
{
++counts[ii - 1];
ii >>= 1;
}
++count;
}
//____________________________________________________________________________________
// erase
// assumes erase is in the set
//____________________________________________________________________________________
void erase(unsigned x)
{
#ifndef NDEBUG
// if the assertion failed here, it means that x was never "insert"-ed in the first place
assert("...............This element was not 'insert'-ed before it is being 'erase'-ed!!..............." && elements.count(x));
elements.erase(elements.find(x));
#endif
unsigned ii = x + cnt_elements;
while (ii)
{
--counts[ii - 1];
ii >>= 1;
}
--count;
}
//
//____________________________________________________________________________________
// kth element
//____________________________________________________________________________________
unsigned operator[](unsigned k)
{
assert("...............The kth element: k needs to be smaller than the number of elements!!..............." && k < size());
unsigned ii = 1;
while (ii < cnt_storage)
{
if (counts[ii - 1] <= k)
k -= counts[ii++ - 1];
ii <<= 1;
}
return (ii >> 1) - cnt_elements;
}
};
#endif