I have array and need max of rolling difference with dynamic window.
a = np.array([8, 18, 5,15,12])
print (a)
[ 8 18 5 15 12]
So first I create difference by itself:
b = a - a[:, None]
print (b)
[[ 0 10 -3 7 4]
[-10 0 -13 -3 -6]
[ 3 13 0 10 7]
[ -7 3 -10 0 -3]
[ -4 6 -7 3 0]]
Then replace upper triangle matrix to 0:
c = np.tril(b)
print (c)
[[ 0 0 0 0 0]
[-10 0 0 0 0]
[ 3 13 0 0 0]
[ -7 3 -10 0 0]
[ -4 6 -7 3 0]]
Last need max values per diagonal, so it means:
max([0,0,0,0,0]) = 0
max([-10,13,-10,3]) = 13
max([3,3,-7]) = 3
max([-7,6]) = 6
max([-4]) = -4
So expected output is:
[0, 13, 3, 6, -4]
What is some nice vectorized solution? Or is possible some another way for expected output?
Not sure exactly how efficient this is considering the advanced indexing involved, but this is one way to do that:
import numpy as np
a = np.array([8, 18, 5, 15, 12])
b = a[:, None] - a
# Fill lower triangle with largest negative
b[np.tril_indices(len(a))] = np.iinfo(b.dtype).min # np.finfo for float
# Put diagonals as rows
s = b.strides[1]
diags = np.ndarray((len(a) - 1, len(a) - 1), b.dtype, b, offset=s, strides=(s, (len(a) + 1) * s))
# Get maximum from each row and add initial zero
c = np.r_[0, diags.max(1)]
# [ 0 13 3 6 -4]
Another alternative, which may not be what you were looking for though, is just using Numba, for example like this:
import numpy as np
import numba as nb
def max_window_diffs_jdehesa(a):
a = np.asarray(a)
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
out = np.full_like(a, dtinf.min)
_pwise_diffs(a, out)
return out
def _pwise_diffs(a, out):
out[0] = 0
for w in nb.prange(1, len(a)):
for i in range(len(a) - w):
out[w] = max(a[i] - a[i + w], out[w])
a = np.array([8, 18, 5, 15, 12])
# [ 0 13 3 6 -4]
Comparing these methods to the original:
import numpy as np
import numba as nb
def max_window_diffs_orig(a):
a = np.asarray(a)
b = a - a[:, None]
out = np.zeros(len(a), b.dtype)
out[-1] = b[-1, 0]
for i in range(1, len(a) - 1):
out[i] = np.diag(b, -i).max()
return out
def max_window_diffs_jdehesa_np(a):
a = np.asarray(a)
b = a[:, None] - a
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
b[np.tril_indices(len(a))] = dtinf.min
s = b.strides[1]
diags = np.ndarray((len(a) - 1, len(a) - 1), b.dtype, b, offset=s, strides=(s, (len(a) + 1) * s))
return np.concatenate([[0], diags.max(1)])
def max_window_diffs_jdehesa_nb(a):
a = np.asarray(a)
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
out = np.full_like(a, dtinf.min)
_pwise_diffs(a, out)
return out
def _pwise_diffs(a, out):
out[0] = 0
for w in nb.prange(1, len(a)):
for i in range(len(a) - w):
out[w] = max(a[i] - a[i + w], out[w])
a = np.random.randint(0, 100, size=100)
r = max_window_diffs_orig(a)
print((max_window_diffs_jdehesa_np(a) == r).all())
# True
print((max_window_diffs_jdehesa_nb(a) == r).all())
# True
%timeit max_window_diffs_orig(a)
# 348 µs ± 986 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit max_window_diffs_jdehesa_np(a)
# 91.7 µs ± 1.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit max_window_diffs_jdehesa_nb(a)
# 19.7 µs ± 88.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
a = np.random.randint(0, 100, size=10000)
%timeit max_window_diffs_orig(a)
# 651 ms ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit max_window_diffs_jdehesa_np(a)
# 1.61 s ± 6.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit max_window_diffs_jdehesa_nb(a)
# 22 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
The first one may be a bit better for smaller arrays, but doesn't work well for bigger ones. Numba on the other hand is pretty good in all cases.
Use ndarray.diagonal
v = [max(c.diagonal(-i)) for i in range(b.shape[0])]
print(v) # [0, 13, 3, 6, -4]
Here's a vectorized solution with strides
from skimage.util import view_as_windows
n = len(a)
z = np.zeros(n-1,dtype=a.dtype)
p = np.concatenate((a,z))
s = view_as_windows(p,n)
mask = np.tri(n,k=-1,dtype=bool)[:,::-1]
v = s[0]-s
out = np.where(mask,v.min()-1,v).max(1)
With one-loop for memory-efficiency -
n = len(a)
out = [max(a[:-i+n]-a[i:]) for i in range(n)]
Use np.max
in place of max
for better use of array-memory.
You can abuse the fact that reshaping non-square arrays of shape (N+1, N)
to (N, N+1)
will make diagonals appear as columns
from scipy.linalg import toeplitz
a = toeplitz([1,2,3,4], [1,4,3])
# array([[1, 4, 3],
# [2, 1, 4],
# [3, 2, 1],
# [4, 3, 2]])
a.reshape(3, 4)
# array([[1, 4, 3, 2],
# [1, 4, 3, 2],
# [1, 4, 3, 2]])
Which you can then use like (note that I've swapped the sign and set the lower triangle to zero)
smallv = -10000 # replace this with np.nan if you have floats
a = np.array([8, 18, 5,15,12])
b = a[:, None] - a
b[np.tril_indices(len(b), -1)] = smallv
d = np.vstack((b, np.full(len(b), smallv)))
d.reshape(len(d) - 1, -1).max(0)[:-1]
# array([ 0, 13, 3, 6, -4])