change gaps in numpy array according to gap size change gaps in numpy array according to gap size numpy numpy

change gaps in numpy array according to gap size


You can use image-processing based binary_closing -

from scipy.ndimage.morphology import binary_closingdef remove_small_nnz(a, W):    K = np.ones(W, dtype=int)    m = a==0    p = binary_closing(m,K)    a[~m & p] = 0    return a

Sample run -

In [97]: aOut[97]: array([1, 3, 1, 0, 0, 1, 8, 3, 0, 8, 2, 4, 7, 0, 0, 4, 1])In [98]: remove_small_nnz(a, W=3)Out[98]: array([1, 3, 1, 0, 0, 1, 8, 3, 0, 8, 2, 4, 7, 0, 0, 4, 1])In [99]: remove_small_nnz(a, W=4)Out[99]: array([1, 3, 1, 0, 0, 0, 0, 0, 0, 8, 2, 4, 7, 0, 0, 4, 1])In [100]: remove_small_nnz(a, W=5)Out[100]: array([1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1])


Since you're only looking for nonzeros, you can cast the array to boolean, and look for spots where there is a sequence of however many Trues in a row as you're looking for.

import numpy as npdef orig(fhr, minseq):    p = np.where(fhr>0, 1, 0).astype(int)    s = np.array([1]+ list(np.diff(p)))    sind = np.where(s==1)[0][1:]    for i in range(len(sind) - 1):        s1 = sind[i]        e1 = sind[i+1]        subfhr = np.where(fhr[s1:e1] > 0, 1, 0).sum()        if (subfhr < minseq):            fhr[s1:e1] = 0    return fhrdef update(fhr, minseq):    # convert the sequence to boolean    nonzero = fhr.astype(bool)    # stack the boolean array with lagged copies of itself    seqs = np.stack([nonzero[i:-minseq+i] for i in range(minseq)],                    axis=1)    # find the spots where the sequence is long enough    inseq = np.r_[np.zeros(minseq, np.bool), seqs.sum(axis=1) == minseq]    # the start and end of the series is are assumed to be included in result    inseq[minseq] = True    inseq[-1] = True        # make sure that the full sequence is included.     # There may be a way to vectorize this further    for ind in np.where(inseq)[0]:        inseq[ind-minseq:ind] = True    # Apply the inseq array as a mask    return inseq * fhrfhr = np.array([1, 3, 1, 0, 0, 1, 8, 3, 0, 8, 2, 4, 7, 0,0,4,1])minseq = 4 print(np.all(orig(fhr, minseq) == update(fhr, minseq)))# True