find median in a fixed-size moving window along a long sequence of data

前端 未结 5 1159
挽巷
挽巷 2021-02-04 09:41

Given a sequence of data (it may have duplicates), a fixed-sized moving window, move the window at each iteration from the start of the data sequence, such that (1) the oldes

5条回答
  •  不思量自难忘°
    2021-02-04 10:17

    I attach my segment tree (see my other post) which allows the frequency distribution of counts to be queried very efficiently.

    This implements the following data structure:

    |-------------------------------|
    |---------------|---------------|
    |-------|-------|-------|-------|
    |---|---|---|---|---|---|---|---|
      0   1   2   3   4   5   6   7  
    

    Each segment keeps the number of counts items in the range it covers. I use 2N segments for a range of value from 1..N. These are placed in a single rolled out vector rather than the tree format show figuratively above.

    So if you are calculating rolling medians over a set of integers which vary from 1..65536, then you only need 128kb to store them, and can insert/delete/query using O(ln N) where N = the size of the range, i.e. 2**16 operations.

    This is a big win if the data range is much smaller than your rolling window.

    #if !defined(SEGMENT_TREE_H)
    #define SEGMENT_TREE_H
    #include 
    #include 
    #include 
    #include 
    
    #ifndef NDEBUG
    #include 
    #endif
    
    template
    class t_segment_tree
    {
        static const unsigned                       cnt_elements    = (1 << BITS);
        static const unsigned                       cnt_storage     = cnt_elements << 1;
        std::array    counts;
        unsigned                                    count;
    
    #ifndef NDEBUG
        std::multiset                     elements;
    #endif
        public:
    
        //____________________________________________________________________________________
    
        //  constructor
    
        //____________________________________________________________________________________
        t_segment_tree(): count(0)
        {
            std::fill_n(counts.begin(), counts.size(),  0);
        }
        //~t_segment_tree();
    
        //____________________________________________________________________________________
    
        //  size
    
        //____________________________________________________________________________________
        unsigned size() const  { return count; }
    
        //____________________________________________________________________________________
    
        //  constructor
    
        //____________________________________________________________________________________
        void insert(unsigned x)
        {
    #ifndef NDEBUG
            elements.insert(x);
            assert("...............This element is too large for the number of BITs!!..............." && cnt_elements > x);
    #endif
            unsigned ii = x + cnt_elements;
            while (ii)
            {
                ++counts[ii - 1];
                ii >>= 1;
            }
            ++count;
        }
    
        //____________________________________________________________________________________
    
        //  erase 
    
        //      assumes erase is in the set
        //____________________________________________________________________________________
        void erase(unsigned x)
        {
    #ifndef NDEBUG
            // if the assertion failed here, it means that x was never "insert"-ed in the first place
            assert("...............This element was not 'insert'-ed before it is being 'erase'-ed!!..............." && elements.count(x));
            elements.erase(elements.find(x));
    #endif
            unsigned ii = x + cnt_elements;
            while (ii)
            {
                --counts[ii - 1];
                ii >>= 1;
            }
            --count;
        }
    
        // 
        //____________________________________________________________________________________
    
        //  kth element
    
        //____________________________________________________________________________________
        unsigned operator[](unsigned k)
        {
            assert("...............The kth element: k needs to be smaller than the number of elements!!..............." && k < size());
            unsigned ii = 1;
            while (ii < cnt_storage)
            {
                if (counts[ii - 1] <= k)
                   k -= counts[ii++ - 1];
                ii <<= 1;
            }
            return (ii >> 1) - cnt_elements;
        }
    
    };
    #endif
    

提交回复
热议问题