find largest submatrix algorithm

前端 未结 2 981
暖寄归人
暖寄归人 2021-02-09 13:24

I have an N*N matrix (N=2 to 10000) of numbers that may range from 0 to 1000. How can I find the largest (rectangular) submatrix that consists of the same number?

Exampl

相关标签:
2条回答
  • 2021-02-09 14:09

    This is a work in progress

    I thought about this problem and I think I may have a O(w*h) algorithm.

    The idea goes like this:

    • for any (i,j) compute the highest number of cells with the same value in the column j starting from (i,j). Store this values as heights[i][j].
    • create an empty vector of sub matrix (a lifo)
    • for all row: i
      • for all column: j
        • pop all sub matrix whose height > heights[i][j]. Because the submatrix with height > heights[i][j] cannot continue on this cell
        • push a submatrix defined by (i,j,heights[i][j]) where j is the farest coordinate where we can fit a submatrix of height: heights[i][j]
        • update the current max sub matrix

    The tricky part is in the inner loop. I use something similar to the max subwindow algorithm to ensure it is O(1) on average for each cell.

    I will try to formulate a proof but in the meantime here is the code.

    #include <algorithm>
    #include <iterator>
    #include <iostream>
    #include <ostream>
    #include <vector>
    
    typedef std::vector<int>   row_t;
    typedef std::vector<row_t> matrix_t;
    
    std::size_t height(matrix_t const& M) { return M.size(); }
    std::size_t width (matrix_t const& M) { return M.size() ? M[0].size() : 0u; }
    
    std::ostream& operator<<(std::ostream& out, matrix_t const& M) {
      for(unsigned i=0; i<height(M); ++i) {
        std::copy(begin(M[i]), end(M[i]),
              std::ostream_iterator<int>(out, ", "));
        out << std::endl;
      }
      return out;
    }
    
    struct sub_matrix_t {
      int i, j, h, w;
      sub_matrix_t(): i(0),j(0),h(0),w(1) {}
      sub_matrix_t(int i_,int j_,int h_,int w_):i(i_),j(j_),h(h_),w(w_) {}
      bool operator<(sub_matrix_t const& rhs) const { return (w*h)<(rhs.w*rhs.h); }
    };
    
    
    // Pop all sub_matrix from the vector keeping only those with an height
    // inferior to the passed height.
    // Compute the max sub matrix while removing sub matrix with height > h
    void pop_sub_m(std::vector<sub_matrix_t>& subs,
               int i, int j, int h, sub_matrix_t& max_m) {
    
      sub_matrix_t sub_m(i, j, h, 1);
    
      while(subs.size() && subs.back().h >= h) {
        sub_m = subs.back();
        subs.pop_back();
        sub_m.w = j-sub_m.j;
        max_m = std::max(max_m, sub_m);
      }
    
      // Now sub_m.{i,j} is updated to the farest coordinates where there is a
      // submatrix with heights >= h
    
      // If we don't cut the current height (because we changed value) update
      // the current max submatrix
      if(h > 0) {
        sub_m.h = h;
        sub_m.w = j-sub_m.j+1;
        max_m = std::max(max_m, sub_m);
        subs.push_back(sub_m);
      }
    }
    
    void push_sub_m(std::vector<sub_matrix_t>& subs,
            int i, int j, int h, sub_matrix_t& max_m) {
      if(subs.empty() || subs.back().h < h)
        subs.emplace_back(i, j, h, 1);
    }
    
    void solve(matrix_t const& M, sub_matrix_t& max_m) {
      // Initialize answer suitable for an empty matrix
      max_m = sub_matrix_t();
      if(height(M) == 0 || width(M) == 0) return;
    
      // 1) Compute the heights of columns of the same values
      matrix_t heights(height(M), row_t(width(M), 1));
      for(unsigned i=height(M)-1; i>0; --i)
        for(unsigned j=0; j<width(M); ++j)
          if(M[i-1][j]==M[i][j])
        heights[i-1][j] = heights[i][j]+1;
    
      // 2) Run through all columns heights to compute local sub matrices
      std::vector<sub_matrix_t> subs;
      for(int i=height(M)-1; i>=0; --i) {
        push_sub_m(subs, i, 0, heights[i][0], max_m);
        for(unsigned j=1; j<width(M); ++j) {
          bool same_val  = (M[i][j]==M[i][j-1]);
          int pop_height = (same_val) ? heights[i][j] : 0;
          int pop_j      = (same_val) ? j             : j-1;
          pop_sub_m (subs, i, pop_j, pop_height,    max_m);
          push_sub_m(subs, i, j,     heights[i][j], max_m);
        }
        pop_sub_m(subs, i, width(M)-1, 0, max_m);
      }
    }
    
    matrix_t M1{
      {10,  9,  9,  9, 80},
      { 5,  9,  9,  9, 10},
      {85, 86, 54, 45, 45},
      {15, 21,  5,  1,  0},
      { 5,  6, 88, 11, 10},
    };
    
    matrix_t M2{
      {10, 19,  9, 29, 80},
      { 5,  9,  9,  9, 10},
      { 9,  9, 54, 45, 45},
      { 9,  9,  5,  1,  0},
      { 5,  6, 88, 11, 10},
    };
    
    
    int main() {
      sub_matrix_t answer;
    
      std::cout << M1 << std::endl;
      solve(M1, answer);
      std::cout << '(' << (answer.w*answer.h)
            << ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
            << std::endl;
    
      answer = sub_matrix_t();
      std::cout << M2 << std::endl;
      solve(M2, answer);
      std::cout << '(' << (answer.w*answer.h)
            << ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
            << std::endl;
    }
    
    0 讨论(0)
  • 2021-02-09 14:09

    This is an order Rows*Columns Solution

    It works by

    • starting at the bottom of the array, and determining how many items below each number match it in a column. This is done in O(MN) time (very trivially)
    • Then it goes top to bottom & left to right and sees if any given number matches the number to the left. If so, it keeps track of how the heights relate to each other to track the possible rectangle shapes

    Here is a working python implementation. Apologies since I'm not sure how to get the syntax highlighting working

    # this program finds the largest area in an array where all the elements have the same value
    # It solves in O(rows * columns) time  using  O(rows*columns) space using dynamic programming
    
    
    
    
    def max_area_subarray(array):
    
        rows = len(array)
        if (rows == 0):
            return [[]]
        columns = len(array[0])
    
    
        # initialize a blank new array
        # this will hold max elements of the same value in a column
        new_array = []
        for i in range(0,rows-1):
            new_array.append([0] * columns)
    
        # start with the bottom row, these all of 1 element of the same type 
        # below them, including themselves
        new_array.append([1] * columns)
    
        # go from the second to bottom row up, finding how many contiguous
        # elements of the same type there are
        for i in range(rows-2,-1,-1):
            for j in range(columns-1,-1,-1):
                if ( array[i][j] == array[i+1][j]):
                    new_array[i][j] = new_array[i+1][j]+1
                else:
                    new_array[i][j] = 1
    
    
        # go left to right and match up the max areas
        max_area = 0
        top = 0
        bottom = 0
        left = 0
        right = 0
        for i in range(0,rows):
            running_height =[[0,0,0]]
            for j in range(0,columns):
    
                matched = False
                if (j > 0):  # if this isn't the leftmost column
                    if (array[i][j] == array[i][j-1]):
                        # this matches the array to the left
                        # keep track of if this is a longer column, shorter column, or same as 
                        # the one on the left
                        matched = True
    
                        while( new_array[i][j] < running_height[-1][0]):
                            # this is less than the one on the left, pop that running 
                            # height from the list, and add it's columns to the smaller
                            # running height below it
                            if (running_height[-1][1] > max_area):
                                max_area = running_height[-1][1]
                                top = i
                                right = j-1
                                bottom = i + running_height[-1][0]-1
                                left = j - running_height[-1][2]
    
                            previous_column = running_height.pop()
                            num_columns = previous_column[2]
    
                            if (len(running_height) > 0):
                                running_height[-1][1] += running_height[-1][0] * (num_columns)
                                running_height[-1][2] += num_columns
    
                            else:
                                # for instance, if we have heights 2,2,1
                                # this will trigger on the 1 after we pop the 2 out, and save the current
                                # height of 1,  the running area of 3, and running columsn of 3
                                running_height.append([new_array[i][j],new_array[i][j]*(num_columns),num_columns])
    
    
                        if (new_array[i][j] > running_height[-1][0]):
                            # longer then the one on the left
                            # append this height and area
                            running_height.append([new_array[i][j],new_array[i][j],1])
                        elif (new_array[i][j] == running_height[-1][0]):   
                            # same as the one on the left, add this area to the one on the left
                            running_height[-1][1] += new_array[i][j]
                            running_height[-1][2] += 1
    
    
    
                if (matched == False or j == columns -1):
                    while(running_height):
                        # unwind the maximums & see if this is the new max area
                        if (running_height[-1][1] > max_area):
                            max_area = running_height[-1][1]
                            top = i
                            right = j
                            bottom = i + running_height[-1][0]-1
                            left = j - running_height[-1][2]+1
    
                            # this wasn't a match, so move everything one bay to the left
                            if (matched== False):
                                right = right-1
                                left = left-1
    
    
                        previous_column = running_height.pop()
                        num_columns = previous_column[2]
                        if (len(running_height) > 0):
                            running_height[-1][1] += running_height[-1][0] * num_columns
                            running_height[-1][2] += num_columns
    
                if (matched == False):
                    # this is either the left column, or we don't match to the column to the left, so reset
                    running_height = [[new_array[i][j],new_array[i][j],1]]
                    if (running_height[-1][1] > max_area):
                        max_area = running_height[-1][1]
                        top = i
                        right = j
                        bottom = i + running_height[-1][0]-1
                        left = j - running_height[-1][2]+1
    
    
        max_array = []
        for i in range(top,bottom+1):
            max_array.append(array[i][left:right+1])
    
    
        return max_array
    
    
    
    numbers = [[6,4,1,9],[5,2,2,7],[2,2,2,1],[2,3,1,5]]
    
    for row in numbers:
        print row
    
    print
    print
    
    max_array =  max_area_subarray(numbers)    
    
    
    max_area = len(max_array) * len(max_array[0])
    print 'max area is ',max_area
    print 
    for row in max_array:
        print row
    
    0 讨论(0)
提交回复
热议问题