resize with averaging or rebin a numpy 2d array resize with averaging or rebin a numpy 2d array numpy numpy

resize with averaging or rebin a numpy 2d array


Here's an example based on the answer you've linked (for clarity):

>>> import numpy as np>>> a = np.arange(24).reshape((4,6))>>> aarray([[ 0,  1,  2,  3,  4,  5],       [ 6,  7,  8,  9, 10, 11],       [12, 13, 14, 15, 16, 17],       [18, 19, 20, 21, 22, 23]])>>> a.reshape((2,a.shape[0]//2,3,-1)).mean(axis=3).mean(1)array([[  3.5,   5.5,   7.5],       [ 15.5,  17.5,  19.5]])

As a function:

def rebin(a, shape):    sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1]    return a.reshape(sh).mean(-1).mean(1)


J.F. Sebastian has a great answer for 2D binning. Here is a version of his "rebin" function that works for N dimensions:

def bin_ndarray(ndarray, new_shape, operation='sum'):    """    Bins an ndarray in all axes based on the target shape, by summing or        averaging.    Number of output dimensions must match number of input dimensions and         new axes must divide old ones.    Example    -------    >>> m = np.arange(0,100,1).reshape((10,10))    >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum')    >>> print(n)    [[ 22  30  38  46  54]     [102 110 118 126 134]     [182 190 198 206 214]     [262 270 278 286 294]     [342 350 358 366 374]]    """    operation = operation.lower()    if not operation in ['sum', 'mean']:        raise ValueError("Operation not supported.")    if ndarray.ndim != len(new_shape):        raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape,                                                           new_shape))    compression_pairs = [(d, c//d) for d,c in zip(new_shape,                                                  ndarray.shape)]    flattened = [l for p in compression_pairs for l in p]    ndarray = ndarray.reshape(flattened)    for i in range(len(new_shape)):        op = getattr(ndarray, operation)        ndarray = op(-1*(i+1))    return ndarray


Here's a way of doing what you ask using matrix multiplication that doesn't require the new array dimensions to divide the old.

First we generate a row compressor matrix and a column compressor matrix (I'm sure there's a cleaner way of doing this, maybe even using numpy operations alone):

def get_row_compressor(old_dimension, new_dimension):    dim_compressor = np.zeros((new_dimension, old_dimension))    bin_size = float(old_dimension) / new_dimension    next_bin_break = bin_size    which_row = 0    which_column = 0    while which_row < dim_compressor.shape[0] and which_column < dim_compressor.shape[1]:        if round(next_bin_break - which_column, 10) >= 1:            dim_compressor[which_row, which_column] = 1            which_column += 1        elif next_bin_break == which_column:            which_row += 1            next_bin_break += bin_size        else:            partial_credit = next_bin_break - which_column            dim_compressor[which_row, which_column] = partial_credit            which_row += 1            dim_compressor[which_row, which_column] = 1 - partial_credit            which_column += 1            next_bin_break += bin_size    dim_compressor /= bin_size    return dim_compressordef get_column_compressor(old_dimension, new_dimension):    return get_row_compressor(old_dimension, new_dimension).transpose()

... so, for instance, get_row_compressor(5, 3) gives you:

[[ 0.6  0.4  0.   0.   0. ] [ 0.   0.2  0.6  0.2  0. ] [ 0.   0.   0.   0.4  0.6]]

and get_column_compressor(3, 2) gives you:

[[ 0.66666667  0.        ] [ 0.33333333  0.33333333] [ 0.          0.66666667]]

Then simply premultiply by the row compressor and postmultiply by the column compressor to get the compressed matrix:

def compress_and_average(array, new_shape):    # Note: new shape should be smaller in both dimensions than old shape    return np.mat(get_row_compressor(array.shape[0], new_shape[0])) * \           np.mat(array) * \           np.mat(get_column_compressor(array.shape[1], new_shape[1]))

Using this technique,

compress_and_average(np.array([[50, 7, 2, 0, 1],                               [0, 0, 2, 8, 4],                               [4, 1, 1, 0, 0]]), (2, 3))

yields:

[[ 21.86666667   2.66666667   2.26666667] [  1.86666667   1.46666667   1.86666667]]