bounding box of numpy array bounding box of numpy array arrays arrays

bounding box of numpy array


You can roughly halve the execution time by using np.any to reduce the rows and columns that contain non-zero values to 1D vectors, rather than finding the indices of all non-zero values using np.where:

def bbox1(img):    a = np.where(img != 0)    bbox = np.min(a[0]), np.max(a[0]), np.min(a[1]), np.max(a[1])    return bboxdef bbox2(img):    rows = np.any(img, axis=1)    cols = np.any(img, axis=0)    rmin, rmax = np.where(rows)[0][[0, -1]]    cmin, cmax = np.where(cols)[0][[0, -1]]    return rmin, rmax, cmin, cmax

Some benchmarks:

%timeit bbox1(img2)10000 loops, best of 3: 63.5 µs per loop%timeit bbox2(img2)10000 loops, best of 3: 37.1 µs per loop

Extending this approach to the 3D case just involves performing the reduction along each pair of axes:

def bbox2_3D(img):    r = np.any(img, axis=(1, 2))    c = np.any(img, axis=(0, 2))    z = np.any(img, axis=(0, 1))    rmin, rmax = np.where(r)[0][[0, -1]]    cmin, cmax = np.where(c)[0][[0, -1]]    zmin, zmax = np.where(z)[0][[0, -1]]    return rmin, rmax, cmin, cmax, zmin, zmax

It's easy to generalize this to N dimensions by using itertools.combinations to iterate over each unique combination of axes to perform the reduction over:

import itertoolsdef bbox2_ND(img):    N = img.ndim    out = []    for ax in itertools.combinations(reversed(range(N)), N - 1):        nonzero = np.any(img, axis=ax)        out.extend(np.where(nonzero)[0][[0, -1]])    return tuple(out)

If you know the coordinates of the corners of the original bounding box, the angle of rotation, and the centre of rotation, you could get the coordinates of the transformed bounding box corners directly by computing the corresponding affine transformation matrix and dotting it with the input coordinates:

def bbox_rotate(bbox_in, angle, centre):    rmin, rmax, cmin, cmax = bbox_in    # bounding box corners in homogeneous coordinates    xyz_in = np.array(([[cmin, cmin, cmax, cmax],                        [rmin, rmax, rmin, rmax],                        [   1,    1,    1,    1]]))    # translate centre to origin    cr, cc = centre    cent2ori = np.eye(3)    cent2ori[:2, 2] = -cr, -cc    # rotate about the origin    theta = np.deg2rad(angle)    rmat = np.eye(3)    rmat[:2, :2] = np.array([[ np.cos(theta),-np.sin(theta)],                             [ np.sin(theta), np.cos(theta)]])    # translate from origin back to centre    ori2cent = np.eye(3)    ori2cent[:2, 2] = cr, cc    # combine transformations (rightmost matrix is applied first)    xyz_out = ori2cent.dot(rmat).dot(cent2ori).dot(xyz_in)    r, c = xyz_out[:2]    rmin = int(r.min())    rmax = int(r.max())    cmin = int(c.min())    cmax = int(c.max())    return rmin, rmax, cmin, cmax

This works out to be very slightly faster than using np.any for your small example array:

%timeit bbox_rotate([25, 75, 25, 75], 45, (50, 50))10000 loops, best of 3: 33 µs per loop

However, since the speed of this method is independent of the size of the input array, it can be quite a lot faster for larger arrays.

Extending the transformation approach to 3D is slightly more complicated, in that the rotation now has three different components (one about the x-axis, one about the y-axis and one about the z-axis), but the basic method is the same:

def bbox_rotate_3d(bbox_in, angle_x, angle_y, angle_z, centre):    rmin, rmax, cmin, cmax, zmin, zmax = bbox_in    # bounding box corners in homogeneous coordinates    xyzu_in = np.array(([[cmin, cmin, cmin, cmin, cmax, cmax, cmax, cmax],                         [rmin, rmin, rmax, rmax, rmin, rmin, rmax, rmax],                         [zmin, zmax, zmin, zmax, zmin, zmax, zmin, zmax],                         [   1,    1,    1,    1,    1,    1,    1,    1]]))    # translate centre to origin    cr, cc, cz = centre    cent2ori = np.eye(4)    cent2ori[:3, 3] = -cr, -cc -cz    # rotation about the x-axis    theta = np.deg2rad(angle_x)    rmat_x = np.eye(4)    rmat_x[1:3, 1:3] = np.array([[ np.cos(theta),-np.sin(theta)],                                 [ np.sin(theta), np.cos(theta)]])    # rotation about the y-axis    theta = np.deg2rad(angle_y)    rmat_y = np.eye(4)    rmat_y[[0, 0, 2, 2], [0, 2, 0, 2]] = (        np.cos(theta), np.sin(theta), -np.sin(theta), np.cos(theta))    # rotation about the z-axis    theta = np.deg2rad(angle_z)    rmat_z = np.eye(4)    rmat_z[:2, :2] = np.array([[ np.cos(theta),-np.sin(theta)],                               [ np.sin(theta), np.cos(theta)]])    # translate from origin back to centre    ori2cent = np.eye(4)    ori2cent[:3, 3] = cr, cc, cz    # combine transformations (rightmost matrix is applied first)    tform = ori2cent.dot(rmat_z).dot(rmat_y).dot(rmat_x).dot(cent2ori)    xyzu_out = tform.dot(xyzu_in)    r, c, z = xyzu_out[:3]    rmin = int(r.min())    rmax = int(r.max())    cmin = int(c.min())    cmax = int(c.max())    zmin = int(z.min())    zmax = int(z.max())    return rmin, rmax, cmin, cmax, zmin, zmax

I've essentially just modified the function above using the rotation matrix expressions from here - I haven't had time to write a test-case yet, so use with caution.


Here is an algorithm to calculate the bounding box for N dimensional arrays,

def get_bounding_box(x):    """ Calculates the bounding box of a ndarray"""    mask = x == 0    bbox = []    all_axis = np.arange(x.ndim)    for kdim in all_axis:        nk_dim = np.delete(all_axis, kdim)        mask_i = mask.all(axis=tuple(nk_dim))        dmask_i = np.diff(mask_i)        idx_i = np.nonzero(dmask_i)[0]        if len(idx_i) != 2:            raise ValueError('Algorithm failed, {} does not have 2 elements!'.format(idx_i))        bbox.append(slice(idx_i[0]+1, idx_i[1]+1))    return bbox

which can be used with 2D, 3D, etc arrays as follows,

In [1]: print((img2!=0).astype(int))   ...: bbox = get_bounding_box(img2)   ...: print((img2[bbox]!=0).astype(int))   ...: [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0] [0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0] [0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0] [0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0] [0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0] [0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0] [0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0] [0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0] [0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0] [0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]][[0 0 0 0 0 0 1 1 0 0 0 0 0 0] [0 0 0 0 0 1 1 1 1 0 0 0 0 0] [0 0 0 0 1 1 1 1 1 1 0 0 0 0] [0 0 0 1 1 1 1 1 1 1 1 0 0 0] [0 0 1 1 1 1 1 1 1 1 1 1 0 0] [0 1 1 1 1 1 1 1 1 1 1 1 1 0] [1 1 1 1 1 1 1 1 1 1 1 1 1 1] [1 1 1 1 1 1 1 1 1 1 1 1 1 1] [0 1 1 1 1 1 1 1 1 1 1 1 1 0] [0 0 1 1 1 1 1 1 1 1 1 1 0 0] [0 0 0 1 1 1 1 1 1 1 1 0 0 0] [0 0 0 0 1 1 1 1 1 1 0 0 0 0] [0 0 0 0 0 1 1 1 1 0 0 0 0 0] [0 0 0 0 0 0 1 1 0 0 0 0 0 0]]

Although replacing the np.diff and np.nonzero calls by one np.where might be better.


I was able to squeeze out a little more performance by replacing np.where with np.argmax and working on a boolean mask.

def bbox(img):    img = (img > 0)    rows = np.any(img, axis=1)    cols = np.any(img, axis=0)    rmin, rmax = np.argmax(rows), img.shape[0] - 1 - np.argmax(np.flipud(rows))    cmin, cmax = np.argmax(cols), img.shape[1] - 1 - np.argmax(np.flipud(cols))    return rmin, rmax, cmin, cmax

This was about 10µs faster for me than the bbox2 solution above on the same benchmark. There should also be a way to just use the result of argmax to find the non-zero rows and columns, avoiding the extra search done by using np.any, but this may require some tricky indexing that I wasn't able to get working efficiently with simple vectorized code.