I was answering a question about pandas interpolation method. The OP wanted to use only interpolate where the number of consecutive np.nan
s was one. The lim
I created this generalized solution
import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import as_strided as strided
def mask_knans(a, x):
a = np.asarray(a)
k = a.shape[0]
# I will stride n. I want to pad with 1 less False than
# the required number of np.nan's
n = np.append(np.isnan(a), [False] * (x - 1))
# prepare the mask and fill it with True
m = np.empty(k, np.bool8)
m.fill(True)
# stride n into a number of columns equal to
# the required number of np.nan's to mask
# this is essentially a rolling all operation on isnull
# also reshape with `[:, None]` in preparation for broadcasting
# np.where finds the indices where we successfully start
# x consecutive np.nan's
s = n.strides[0]
i = np.where(strided(n, (k + 1 - x, x), (s, s)).all(1))[0][:, None]
# since I prepped with `[:, None]` when I add `np.arange(x)`
# I'm including the subsequent indices where the remaining
# x - 1 np.nan's are
i = i + np.arange(x)
# I use `pd.unique` because it doesn't sort and I don't need to sort
i = pd.unique(i[i < k])
m[i] = False
return m
w/o comments
import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import as_strided as strided
def mask_knans(a, x):
a = np.asarray(a)
k = a.shape[0]
n = np.append(np.isnan(a), [False] * (x - 1))
m = np.empty(k, np.bool8)
m.fill(True)
s = n.strides[0]
i = np.where(strided(n, (k + 1 - x, x), (s, s)).all(1))[0][:, None]
i = i + np.arange(x)
i = pd.unique(i[i < k])
m[i] = False
return m
demo
mask_knans(a, 2)
[ True False False False True True True True False False True True]
mask_knans(a, 3)
[ True False False False True True True True True True True True]