Algorithm for finding the maximum difference in an array of numbers

后端 未结 7 485
夕颜
夕颜 2021-02-04 03:57

I have an array of a few million numbers.

double* const data = new double (3600000);

I need to iterate through the array and find the range (th

7条回答
  •  鱼传尺愫
    2021-02-04 04:08

    I decided to see what the most efficient algorithm I could think of to solve this problem was using actual code and actual timings. I first created a simple solution, one that tracks the min/max for the previous n entries using a circular buffer, and a test harness to measure the speed. In the simple solution, each data value is compared against the set of min/max values, so that's about window_size * count tests (where window size in the original question is 1000 and count is 3600000).

    I then thought about how to make it faster. First off, I created a solution that used a fifo queue to store window_size values and a linked list to store the values in ascending order where each node in the linked list was also a node in the queue. To process a data value, the item at the end of the fifo was removed from the linked list and the queue. The new value was added to the start of the queue and a linear search was used to find the position in the linked list. The min and max values could then be read from the start and end of the linked list. This was quick, but wouldn't scale well with increasing window_size (i.e. linearly).

    So I decided to add a binary tree to the system to try to speed up the search part of the algorithm. The final timings for window_size = 1000 and count = 3600000 were:

    Simple: 106875
    Quite Complex: 1218
    Complex: 1219
    

    which was both expected and unexpected. Expected in that using a sorted linked list helped, unexpected in that the overhead of having a self balancing tree didn't offset the advantage of a quicker search. I tried the latter two with an increased window size and found that the were always nearly identical up to a window_size of 100000.

    Which all goes to show that theorising about algorithms is one thing, implementing them is something else.

    Anyway, for those that are interested, here's the code I wrote (there's quite a bit!):

    Range.h:

    #include 
    #include 
    #include 
    
    using namespace std;
    
    //  Callback types.
    typedef void (*OutputCallback) (int min, int max);
    typedef int (*GeneratorCallback) ();
    
    //  Declarations of the test functions.
    clock_t Simple (int, int, GeneratorCallback, OutputCallback);
    clock_t QuiteComplex (int, int, GeneratorCallback, OutputCallback);
    clock_t Complex (int, int, GeneratorCallback, OutputCallback);
    

    main.cpp:

    #include "Range.h"
    
    int
      checksum;
    
    //  This callback is used to get data.
    int CreateData ()
    {
      return rand ();
    }
    
    //  This callback is used to output the results.
    void OutputResults (int min, int max)
    {
      //cout << min << " - " << max << endl;
      checksum += max - min;
    }
    
    //  The program entry point.
    void main ()
    {
      int
        count = 3600000,
        window = 1000;
    
      srand (0);
      checksum = 0;
      std::cout << "Simple: Ticks = " << Simple (count, window, CreateData, OutputResults) << ", checksum = " << checksum << std::endl;
      srand (0);
      checksum = 0;
      std::cout << "Quite Complex: Ticks = " << QuiteComplex (count, window, CreateData, OutputResults) << ", checksum = " << checksum << std::endl;
      srand (0);
      checksum = 0;
      std::cout << "Complex: Ticks = " << Complex (count, window, CreateData, OutputResults) << ", checksum = " << checksum << std::endl;
    }
    

    Simple.cpp:

    #include "Range.h"
    
    //  Function to actually process the data.
    //  A circular buffer of min/max values for the current window is filled
    //  and once full, the oldest min/max pair is sent to the output callback
    //  and replaced with the newest input value. Each value inputted is 
    //  compared against all min/max pairs.
    void ProcessData
    (
      int count,
      int window,
      GeneratorCallback input,
      OutputCallback output,
      int *min_buffer,
      int *max_buffer
    )
    {
      int
        i;
    
      for (i = 0 ; i < window ; ++i)
      {
        int
          value = input ();
    
        min_buffer [i] = max_buffer [i] = value;
    
        for (int j = 0 ; j < i ; ++j)
        {
          min_buffer [j] = min (min_buffer [j], value);
          max_buffer [j] = max (max_buffer [j], value);
        }
      }
    
      for ( ; i < count ; ++i)
      {
        int
          index = i % window;
    
        output (min_buffer [index], max_buffer [index]);
    
        int
          value = input ();
    
        min_buffer [index] = max_buffer [index] = value;
    
        for (int k = (i + 1) % window ; k != index ; k = (k + 1) % window)
        {
          min_buffer [k] = min (min_buffer [k], value);
          max_buffer [k] = max (max_buffer [k], value);
        }
      }
    
      output (min_buffer [count % window], max_buffer [count % window]);
    }
    
    //  A simple method of calculating the results.
    //  Memory management is done here outside of the timing portion.
    clock_t Simple
    (
      int count,
      int window,
      GeneratorCallback input,
      OutputCallback output
    )
    {
      int
        *min_buffer = new int [window],
        *max_buffer = new int [window];
    
      clock_t
        start = clock ();
    
      ProcessData (count, window, input, output, min_buffer, max_buffer);
    
      clock_t
        end = clock ();
    
      delete [] max_buffer;
      delete [] min_buffer;
    
      return end - start;
    }
    

    QuiteComplex.cpp:

    #include "Range.h"
    
    template 
    class Range
    {
    private:
      //  Class Types
    
      //  Node Data
      //  Stores a value and its position in various lists.
      struct Node
      {
        Node
          *m_queue_next,
          *m_list_greater,
          *m_list_lower;
    
        T
          m_value;
      };
    
    public:
      //  Constructor
      //  Allocates memory for the node data and adds all the allocated
      //  nodes to the unused/free list of nodes.
      Range
      (
        int window_size
      ) :
        m_nodes (new Node [window_size]),
        m_queue_tail (m_nodes),
        m_queue_head (0),
        m_list_min (0),
        m_list_max (0),
        m_free_list (m_nodes)
      {
        for (int i = 0 ; i < window_size - 1 ; ++i)
        {
          m_nodes [i].m_list_lower = &m_nodes [i + 1];
        }
    
        m_nodes [window_size - 1].m_list_lower = 0;
      }
    
      //  Destructor
      //  Tidy up allocated data.
      ~Range ()
      {
        delete [] m_nodes;
      }
    
      //  Function to add a new value into the data structure.
      void AddValue
      (
        T value
      )
      {
        Node
          *node = GetNode ();
    
        //  clear links
        node->m_queue_next = 0;
    
        //  set value of node
        node->m_value = value;
    
        //  find place to add node into linked list
        Node
          *search;
    
        for (search = m_list_max ; search ; search = search->m_list_lower)
        {
          if (search->m_value < value)
          {
            if (search->m_list_greater)
            {
              node->m_list_greater = search->m_list_greater;
              search->m_list_greater->m_list_lower = node;
            }
            else
            {
              m_list_max = node;
            }
    
            node->m_list_lower = search;
            search->m_list_greater = node;
          }
        }
    
        if (!search)
        {
          m_list_min->m_list_lower = node;
          node->m_list_greater = m_list_min;
          m_list_min = node;
        }
      }
    
      //  Accessor to determine if the first output value is ready for use.
      bool RangeAvailable ()
      {
        return !m_free_list;
      }
    
      //  Accessor to get the minimum value of all values in the current window.
      T Min ()
      {
        return m_list_min->m_value;
      }
    
      //  Accessor to get the maximum value of all values in the current window.
      T Max ()
      {
        return m_list_max->m_value;
      }
    
    private:
      //  Function to get a node to store a value into.
      //  This function gets nodes from one of two places:
      //    1. From the unused/free list
      //    2. From the end of the fifo queue, this requires removing the node from the list and tree
      Node *GetNode ()
      {
        Node
          *node;
    
        if (m_free_list)
        {
          //  get new node from unused/free list and place at head
          node = m_free_list;
    
          m_free_list = node->m_list_lower;
    
          if (m_queue_head)
          {
            m_queue_head->m_queue_next = node;
          }
    
          m_queue_head = node;
        }
        else
        {
          //  get node from tail of queue and place at head
          node = m_queue_tail;
    
          m_queue_tail = node->m_queue_next;
          m_queue_head->m_queue_next = node;
          m_queue_head = node;
    
          //  remove node from linked list
          if (node->m_list_lower)
          {
            node->m_list_lower->m_list_greater = node->m_list_greater;
          }
          else
          {
            m_list_min = node->m_list_greater;
          }
    
          if (node->m_list_greater)
          {
            node->m_list_greater->m_list_lower = node->m_list_lower;
          }
          else
          {
            m_list_max = node->m_list_lower;
          }
        }
    
        return node;
      }
    
      //  Member Data.
      Node
        *m_nodes,
        *m_queue_tail,
        *m_queue_head,
        *m_list_min,
        *m_list_max,
        *m_free_list;
    };
    
    //  A reasonable complex but more efficent method of calculating the results.
    //  Memory management is done here outside of the timing portion.
    clock_t QuiteComplex
    (
      int size,
      int window,
      GeneratorCallback input,
      OutputCallback output
    )
    {
      Range 
        range (window);
    
      clock_t
        start = clock ();
    
      for (int i = 0 ; i < size ; ++i)
      {   
        range.AddValue (input ());
    
        if (range.RangeAvailable ())
        {
          output (range.Min (), range.Max ());
        }
      }
    
      clock_t
        end = clock ();
    
      return end - start;
    }
    

    Complex.cpp:

    #include "Range.h"
    
    template 
    class Range
    {
    private:
      //  Class Types
    
      //  Red/Black tree node colours.
      enum NodeColour
      {
        Red,
        Black
      };
    
      //  Node Data
      //  Stores a value and its position in various lists and trees.
      struct Node
      {
        //  Function to get the sibling of a node.
        //  Because leaves are stored as null pointers, it must be possible
        //  to get the sibling of a null pointer. If the object is a null pointer
        //  then the parent pointer is used to determine the sibling.
        Node *Sibling
        (
          Node *parent
        )
        {
          Node
            *sibling;
    
          if (this)
          {
            sibling = m_tree_parent->m_tree_less == this ? m_tree_parent->m_tree_more : m_tree_parent->m_tree_less;
          }
          else
          {
            sibling = parent->m_tree_less ? parent->m_tree_less : parent->m_tree_more;
          }
    
          return sibling;
        }
    
        //  Node Members
        Node
          *m_queue_next,
          *m_tree_less,
          *m_tree_more,
          *m_tree_parent,
          *m_list_greater,
          *m_list_lower;
    
        NodeColour
          m_colour;
    
        T
          m_value;
      };
    
    public:
      //  Constructor
      //  Allocates memory for the node data and adds all the allocated
      //  nodes to the unused/free list of nodes.
      Range
      (
        int window_size
      ) :
        m_nodes (new Node [window_size]),
        m_queue_tail (m_nodes),
        m_queue_head (0),
        m_tree_root (0),
        m_list_min (0),
        m_list_max (0),
        m_free_list (m_nodes)
      {
        for (int i = 0 ; i < window_size - 1 ; ++i)
        {
          m_nodes [i].m_list_lower = &m_nodes [i + 1];
        }
    
        m_nodes [window_size - 1].m_list_lower = 0;
      }
    
      //  Destructor
      //  Tidy up allocated data.
      ~Range ()
      {
        delete [] m_nodes;
      }
    
      //  Function to add a new value into the data structure.
      void AddValue
      (
        T value
      )
      {
        Node
          *node = GetNode ();
    
        //  clear links
        node->m_queue_next = node->m_tree_more = node->m_tree_less = node->m_tree_parent = 0;
    
        //  set value of node
        node->m_value = value;
    
        //  insert node into tree
        if (m_tree_root)
        {
          InsertNodeIntoTree (node);
          BalanceTreeAfterInsertion (node);
        }
        else
        {
          m_tree_root = m_list_max = m_list_min = node;
          node->m_tree_parent = node->m_list_greater = node->m_list_lower = 0;
        }
    
        m_tree_root->m_colour = Black;
      }
    
      //  Accessor to determine if the first output value is ready for use.
      bool RangeAvailable ()
      {
        return !m_free_list;
      }
    
      //  Accessor to get the minimum value of all values in the current window.
      T Min ()
      {
        return m_list_min->m_value;
      }
    
      //  Accessor to get the maximum value of all values in the current window.
      T Max ()
      {
        return m_list_max->m_value;
      }
    
    private:
      //  Function to get a node to store a value into.
      //  This function gets nodes from one of two places:
      //    1. From the unused/free list
      //    2. From the end of the fifo queue, this requires removing the node from the list and tree
      Node *GetNode ()
      {
        Node
          *node;
    
        if (m_free_list)
        {
          //  get new node from unused/free list and place at head
          node = m_free_list;
    
          m_free_list = node->m_list_lower;
    
          if (m_queue_head)
          {
            m_queue_head->m_queue_next = node;
          }
    
          m_queue_head = node;
        }
        else
        {
          //  get node from tail of queue and place at head
          node = m_queue_tail;
    
          m_queue_tail = node->m_queue_next;
          m_queue_head->m_queue_next = node;
          m_queue_head = node;
    
          //  remove node from tree
          node = RemoveNodeFromTree (node);
          RebalanceTreeAfterDeletion (node);
    
          //  remove node from linked list
          if (node->m_list_lower)
          {
            node->m_list_lower->m_list_greater = node->m_list_greater;
          }
          else
          {
            m_list_min = node->m_list_greater;
          }
    
          if (node->m_list_greater)
          {
            node->m_list_greater->m_list_lower = node->m_list_lower;
          }
          else
          {
            m_list_max = node->m_list_lower;
          }
        }
    
        return node;
      }
    
      //  Rebalances the tree after insertion
      void BalanceTreeAfterInsertion
      (
        Node *node
      )
      {
        node->m_colour = Red;
    
        while (node != m_tree_root && node->m_tree_parent->m_colour == Red)
        {
          if (node->m_tree_parent == node->m_tree_parent->m_tree_parent->m_tree_more)
          {
            Node
              *uncle = node->m_tree_parent->m_tree_parent->m_tree_less;
    
            if (uncle && uncle->m_colour == Red)
            {
              node->m_tree_parent->m_colour = Black;
              uncle->m_colour = Black;
              node->m_tree_parent->m_tree_parent->m_colour = Red;
              node = node->m_tree_parent->m_tree_parent;
            }
            else
            {
              if (node == node->m_tree_parent->m_tree_less)
              {
                node = node->m_tree_parent;
                LeftRotate (node);
              }
    
              node->m_tree_parent->m_colour = Black;
              node->m_tree_parent->m_tree_parent->m_colour = Red;
              RightRotate (node->m_tree_parent->m_tree_parent);
            }
          }
          else
          {
            Node
              *uncle = node->m_tree_parent->m_tree_parent->m_tree_more;
    
            if (uncle && uncle->m_colour == Red)
            {
              node->m_tree_parent->m_colour = Black;
              uncle->m_colour = Black;
              node->m_tree_parent->m_tree_parent->m_colour = Red;
              node = node->m_tree_parent->m_tree_parent;
            }
            else
            {
              if (node == node->m_tree_parent->m_tree_more)
              {
                node = node->m_tree_parent;
                RightRotate (node);
              }
    
              node->m_tree_parent->m_colour = Black;
              node->m_tree_parent->m_tree_parent->m_colour = Red;
              LeftRotate (node->m_tree_parent->m_tree_parent);
            }
          }
        }
      }
    
      //  Adds a node into the tree and sorted linked list
      void InsertNodeIntoTree
      (
        Node *node
      )
      {
        Node
          *parent = 0,
          *child = m_tree_root;
    
        bool
          greater;
    
        while (child)
        {
          parent = child;
          child = (greater = node->m_value > child->m_value) ? child->m_tree_more : child->m_tree_less;
        }
    
        node->m_tree_parent = parent;
    
        if (greater)
        {
          parent->m_tree_more = node;
    
          //  insert node into linked list
          if (parent->m_list_greater)
          {
            parent->m_list_greater->m_list_lower = node;
          }
          else
          {
            m_list_max = node;
          }
    
          node->m_list_greater = parent->m_list_greater;
          node->m_list_lower = parent;
          parent->m_list_greater = node;
        }
        else
        {
          parent->m_tree_less = node;
    
          //  insert node into linked list
          if (parent->m_list_lower)
          {
            parent->m_list_lower->m_list_greater = node;
          }
          else
          {
            m_list_min = node;
          }
    
          node->m_list_lower = parent->m_list_lower;
          node->m_list_greater = parent;
          parent->m_list_lower = node;
        }
      }
    
      //  Red/Black tree manipulation routine, used for removing a node
      Node *RemoveNodeFromTree
      (
        Node *node
      )
      {
        if (node->m_tree_less && node->m_tree_more)
        {
          //  the complex case, swap node with a child node
          Node
            *child;
    
          if (node->m_tree_less)
          {
            // find largest value in lesser half (node with no greater pointer)
            for (child = node->m_tree_less ; child->m_tree_more ; child = child->m_tree_more)
            {
            }
          }
          else
          {
            // find smallest value in greater half (node with no lesser pointer)
            for (child = node->m_tree_more ; child->m_tree_less ; child = child->m_tree_less)
            {
            }
          }
    
          swap (child->m_colour, node->m_colour);
    
          if (child->m_tree_parent != node)
          {
            swap (child->m_tree_less, node->m_tree_less);
            swap (child->m_tree_more, node->m_tree_more);
            swap (child->m_tree_parent, node->m_tree_parent);
    
            if (!child->m_tree_parent)
            {
              m_tree_root = child;
            }
            else
            {
              if (child->m_tree_parent->m_tree_less == node)
              {
                child->m_tree_parent->m_tree_less = child;
              }
              else
              {
                child->m_tree_parent->m_tree_more = child;
              }
            }
    
            if (node->m_tree_parent->m_tree_less == child)
            {
              node->m_tree_parent->m_tree_less = node;
            }
            else
            {
              node->m_tree_parent->m_tree_more = node;
            }
          }
          else
          {
            child->m_tree_parent = node->m_tree_parent;
            node->m_tree_parent = child;
    
            Node
              *child_less = child->m_tree_less,
              *child_more = child->m_tree_more;
    
            if (node->m_tree_less == child)
            {
              child->m_tree_less = node;
              child->m_tree_more = node->m_tree_more;
              node->m_tree_less = child_less;
              node->m_tree_more = child_more;
            }
            else
            {
              child->m_tree_less = node->m_tree_less;
              child->m_tree_more = node;
              node->m_tree_less = child_less;
              node->m_tree_more = child_more;
            }
    
            if (!child->m_tree_parent)
            {
              m_tree_root = child;
            }
            else
            {
              if (child->m_tree_parent->m_tree_less == node)
              {
                child->m_tree_parent->m_tree_less = child;
              }
              else
              {
                child->m_tree_parent->m_tree_more = child;
              }
            }
          }
    
          if (child->m_tree_less)
          {
            child->m_tree_less->m_tree_parent = child;
          }
    
          if (child->m_tree_more)
          {
            child->m_tree_more->m_tree_parent = child;
          }
    
          if (node->m_tree_less)
          {
            node->m_tree_less->m_tree_parent = node;
          }
    
          if (node->m_tree_more)
          {
            node->m_tree_more->m_tree_parent = node;
          }
        }
    
        Node
          *child = node->m_tree_less ? node->m_tree_less : node->m_tree_more;
    
        if (node->m_tree_parent->m_tree_less == node)
        {
          node->m_tree_parent->m_tree_less = child;
        }
        else
        {
          node->m_tree_parent->m_tree_more = child;
        }
    
        if (child)
        {
          child->m_tree_parent = node->m_tree_parent;
        }
    
        return node;
      }
    
      //  Red/Black tree manipulation routine, used for rebalancing a tree after a deletion
      void RebalanceTreeAfterDeletion
      (
        Node *node
      )
      {
        Node
          *child = node->m_tree_less ? node->m_tree_less : node->m_tree_more;
    
        if (node->m_colour == Black)
        {
          if (child && child->m_colour == Red)
          {
            child->m_colour = Black;
          }
          else
          {
            Node
              *parent = node->m_tree_parent,
              *n = child;
    
            while (parent)
            {
              Node
                *sibling = n->Sibling (parent);
    
              if (sibling && sibling->m_colour == Red)
              {
                parent->m_colour = Red;
                sibling->m_colour = Black;
    
                if (n == parent->m_tree_more)
                {
                  LeftRotate (parent);
                }
                else
                {
                  RightRotate (parent);
                }
              }
    
              sibling = n->Sibling (parent);
    
              if (parent->m_colour == Black &&
                sibling->m_colour == Black &&
                (!sibling->m_tree_more || sibling->m_tree_more->m_colour == Black) &&
                (!sibling->m_tree_less || sibling->m_tree_less->m_colour == Black))
              {
                sibling->m_colour = Red;
                n = parent;
                parent = n->m_tree_parent;
                continue;
              }
              else
              {
                if (parent->m_colour == Red &&
                  sibling->m_colour == Black &&
                  (!sibling->m_tree_more || sibling->m_tree_more->m_colour == Black) &&
                  (!sibling->m_tree_less || sibling->m_tree_less->m_colour == Black))
                {
                  sibling->m_colour = Red;
                  parent->m_colour = Black;
                  break;
                }
                else
                {
                  if (n == parent->m_tree_more &&
                    sibling->m_colour == Black &&
                    (sibling->m_tree_more && sibling->m_tree_more->m_colour == Red) &&
                    (!sibling->m_tree_less || sibling->m_tree_less->m_colour == Black))
                  {
                    sibling->m_colour = Red;
                    sibling->m_tree_more->m_colour = Black;
                    RightRotate (sibling);
                  }
                  else
                  {
                    if (n == parent->m_tree_less &&
                      sibling->m_colour == Black &&
                      (!sibling->m_tree_more || sibling->m_tree_more->m_colour == Black) &&
                      (sibling->m_tree_less && sibling->m_tree_less->m_colour == Red))
                    {
                      sibling->m_colour = Red;
                      sibling->m_tree_less->m_colour = Black;
                      LeftRotate (sibling);
                    }
                  }
    
                  sibling = n->Sibling (parent);
                  sibling->m_colour = parent->m_colour;
                  parent->m_colour = Black;
    
                  if (n == parent->m_tree_more)
                  {
                    sibling->m_tree_less->m_colour = Black;
                    LeftRotate (parent);
                  }
                  else
                  {
                    sibling->m_tree_more->m_colour = Black;
                    RightRotate (parent);
                  }
                  break;
                }
              }
            }
          }
        }
      }
    
      //  Red/Black tree manipulation routine, used for balancing the tree
      void LeftRotate
      (
        Node *node
      )
      {
        Node
          *less = node->m_tree_less;
    
        node->m_tree_less = less->m_tree_more;
    
        if (less->m_tree_more)
        {
          less->m_tree_more->m_tree_parent = node;
        }
    
        less->m_tree_parent = node->m_tree_parent;
    
        if (!node->m_tree_parent)
        {
          m_tree_root = less;
        }
        else
        {
          if (node == node->m_tree_parent->m_tree_more)
          {
            node->m_tree_parent->m_tree_more = less;
          }
          else
          {
            node->m_tree_parent->m_tree_less = less;
          }
        }
    
        less->m_tree_more = node;
        node->m_tree_parent = less;
      }
    
      //  Red/Black tree manipulation routine, used for balancing the tree
      void RightRotate
      (
        Node *node
      )
      {
        Node
          *more = node->m_tree_more;
    
        node->m_tree_more = more->m_tree_less;
    
        if (more->m_tree_less)
        {
          more->m_tree_less->m_tree_parent = node;
        }
    
        more->m_tree_parent = node->m_tree_parent;
    
        if (!node->m_tree_parent)
        {
          m_tree_root = more;
        }
        else
        {
          if (node == node->m_tree_parent->m_tree_less)
          {
            node->m_tree_parent->m_tree_less = more;
          }
          else
          {
            node->m_tree_parent->m_tree_more = more;
          }
        }
    
        more->m_tree_less = node;
        node->m_tree_parent = more;
      }
    
      //  Member Data.
      Node
        *m_nodes,
        *m_queue_tail,
        *m_queue_head,
        *m_tree_root,
        *m_list_min,
        *m_list_max,
        *m_free_list;
    };
    
    //  A complex but more efficent method of calculating the results.
    //  Memory management is done here outside of the timing portion.
    clock_t Complex
    (
      int count,
      int window,
      GeneratorCallback input,
      OutputCallback output
    )
    {
      Range 
        range (window);
    
      clock_t
        start = clock ();
    
      for (int i = 0 ; i < count ; ++i)
      {   
        range.AddValue (input ());
    
        if (range.RangeAvailable ())
        {
          output (range.Min (), range.Max ());
        }
      }
    
      clock_t
        end = clock ();
    
      return end - start;
    }
    

提交回复
热议问题