resize with averaging or rebin a numpy 2d array
Here's an example based on the answer you've linked (for clarity):
>>> import numpy as np>>> a = np.arange(24).reshape((4,6))>>> aarray([[ 0, 1, 2, 3, 4, 5], [ 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]])>>> a.reshape((2,a.shape[0]//2,3,-1)).mean(axis=3).mean(1)array([[ 3.5, 5.5, 7.5], [ 15.5, 17.5, 19.5]])
As a function:
def rebin(a, shape): sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1] return a.reshape(sh).mean(-1).mean(1)
J.F. Sebastian has a great answer for 2D binning. Here is a version of his "rebin" function that works for N dimensions:
def bin_ndarray(ndarray, new_shape, operation='sum'): """ Bins an ndarray in all axes based on the target shape, by summing or averaging. Number of output dimensions must match number of input dimensions and new axes must divide old ones. Example ------- >>> m = np.arange(0,100,1).reshape((10,10)) >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum') >>> print(n) [[ 22 30 38 46 54] [102 110 118 126 134] [182 190 198 206 214] [262 270 278 286 294] [342 350 358 366 374]] """ operation = operation.lower() if not operation in ['sum', 'mean']: raise ValueError("Operation not supported.") if ndarray.ndim != len(new_shape): raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape, new_shape)) compression_pairs = [(d, c//d) for d,c in zip(new_shape, ndarray.shape)] flattened = [l for p in compression_pairs for l in p] ndarray = ndarray.reshape(flattened) for i in range(len(new_shape)): op = getattr(ndarray, operation) ndarray = op(-1*(i+1)) return ndarray
Here's a way of doing what you ask using matrix multiplication that doesn't require the new array dimensions to divide the old.
First we generate a row compressor matrix and a column compressor matrix (I'm sure there's a cleaner way of doing this, maybe even using numpy operations alone):
def get_row_compressor(old_dimension, new_dimension): dim_compressor = np.zeros((new_dimension, old_dimension)) bin_size = float(old_dimension) / new_dimension next_bin_break = bin_size which_row = 0 which_column = 0 while which_row < dim_compressor.shape[0] and which_column < dim_compressor.shape[1]: if round(next_bin_break - which_column, 10) >= 1: dim_compressor[which_row, which_column] = 1 which_column += 1 elif next_bin_break == which_column: which_row += 1 next_bin_break += bin_size else: partial_credit = next_bin_break - which_column dim_compressor[which_row, which_column] = partial_credit which_row += 1 dim_compressor[which_row, which_column] = 1 - partial_credit which_column += 1 next_bin_break += bin_size dim_compressor /= bin_size return dim_compressordef get_column_compressor(old_dimension, new_dimension): return get_row_compressor(old_dimension, new_dimension).transpose()
... so, for instance, get_row_compressor(5, 3)
gives you:
[[ 0.6 0.4 0. 0. 0. ] [ 0. 0.2 0.6 0.2 0. ] [ 0. 0. 0. 0.4 0.6]]
and get_column_compressor(3, 2)
gives you:
[[ 0.66666667 0. ] [ 0.33333333 0.33333333] [ 0. 0.66666667]]
Then simply premultiply by the row compressor and postmultiply by the column compressor to get the compressed matrix:
def compress_and_average(array, new_shape): # Note: new shape should be smaller in both dimensions than old shape return np.mat(get_row_compressor(array.shape[0], new_shape[0])) * \ np.mat(array) * \ np.mat(get_column_compressor(array.shape[1], new_shape[1]))
Using this technique,
compress_and_average(np.array([[50, 7, 2, 0, 1], [0, 0, 2, 8, 4], [4, 1, 1, 0, 0]]), (2, 3))
yields:
[[ 21.86666667 2.66666667 2.26666667] [ 1.86666667 1.46666667 1.86666667]]