Referring to this great sliding window implementation in python: https://github.com/keepitsimple/ocrtest/blob/master/sliding_window.py#blob_contributors_box, my question is
To update @ali_m answer's since scipy.misc.lena() is no longer available in >0.17. here is an example using the RGB image scipy.misc.face() with a slight modification to the sliding window source code provided in the OP.
import numpy as np
from scipy.misc import ascent, face
from matplotlib import pyplot as plt
from numpy.lib.stride_tricks import as_strided as ast
def get_win_pixel_coords(grid_pos, win_shape, shift_size=None):
if shift_size is None:
shift_size = win_shape
gr, gc = grid_pos
sr, sc = shift_size
wr, wc = win_shape
top, bottom = gr * sr, (gr * sr) + wr
left, right = gc * sc, (gc * sc) + wc
return top, bottom, left, right
def norm_shape(shape):
'''
Normalize numpy array shapes so they're always expressed as a tuple,
even for one-dimensional shapes.
Parameters
shape - an int, or a tuple of ints
Returns
a shape tuple
'''
try:
i = int(shape)
return (i,)
except TypeError:
# shape was not a number
pass
try:
t = tuple(shape)
return t
except TypeError:
# shape was not iterable
pass
raise TypeError('shape must be an int, or a tuple of ints')
def sliding_window(a,ws,ss = None,flatten = True):
'''
Return a sliding window over a in any number of dimensions
'''
if None is ss:
# ss was not provided. the windows will not overlap in any direction.
ss = ws
ws = norm_shape(ws)
ss = norm_shape(ss)
# convert ws, ss, and a.shape to numpy arrays
ws = np.array(ws)
ss = np.array(ss)
shap = np.array(a.shape)
# ensure that ws, ss, and a.shape all have the same number of dimensions
ls = [len(shap),len(ws),len(ss)]
if 1 != len(set(ls)):
raise ValueError(\
'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
# ensure that ws is smaller than a in every dimension
if np.any(ws > shap):
raise ValueError(\
'ws cannot be larger than a in any dimension.\
a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
# how many slices will there be in each dimension?
newshape = norm_shape(((shap - ws) // ss) + 1)
# the shape of the strided array will be the number of slices in each dimension
# plus the shape of the window (tuple addition)
newshape += norm_shape(ws)
# the strides tuple will be the array's strides multiplied by step size, plus
# the array's strides (tuple addition)
newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
a = ast(a,shape = newshape,strides = newstrides)
if not flatten:
return a
# Collapse strided so that it has one more dimension than the window. I.e.,
# the new array is a flat list of slices.
meat = len(ws) if ws.shape else 0
firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
dim = firstdim + (newshape[-meat:])
# remove any dimensions with size 1
#dim = filter(lambda i : i != 1,dim)
return a.reshape(dim), newshape
Adding the return variable newshape
to sliding_window()
results in the ability to pass flatten=True
and still know the nature of the grid created by the sliding window function. In my application a flattened vector of computational windows is desirable because it is a good point to scale your computations applied to each computational window.
If a 96x96 window (i.e. tile
x tile
) is applied with 50% overlap in both directions to an image with shape (768,1024,3)
, the input image could be padded to ensure that input image is divisible by N windows with no remainder before the sliding window is created.
img = face()
nxo,nyo,nzo = img.shape
tile=96
pad_img = np.vstack((np.hstack((img,np.fliplr(img))),np.flipud(np.hstack((img,np.fliplr(img))))))
pad_img = pad_img[:nxo+(nxo % tile),:nyo+(nyo % tile), :]
win, ind = sliding_window(pad_img, (96, 96,3), (48,48,3))
print(ind)
(15, 21, 1, 96, 96, 3)
print(win.shape)
(315, 96, 96, 3)
The grid of the computational windows contains 15 rows and 21 columns and 315 computational windows. grid_pos
can be determined using the index from the flattened flattened vector of computational windows (i.e. win
), ind[0]
and ind[1]
. If we were interested in the 239th computational window:
grid_pos = np.unravel_index(239,(ind[0],ind[1]))
print(grid_pos1)
#(11, 8)
Then the bounding coordinates for the computational window in the original image can be found using:
t, b, l, r = get_win_pixel_coords(grid_pos, (96, 96), (48,48))
print(np.all(pad_img[t:b, l:r] == win[239]))
#True