Where are the gains using numba coming from for pure numpy code? Where are the gains using numba coming from for pure numpy code? numpy numpy

Where are the gains using numba coming from for pure numpy code?


TL:DR The random and looping get accelerated, but the matrix multiply doesn't except for small matrix size. At small matrix/loop size, there seems to be significant speedups that are probably related to python overhead. At large N, the matrix multiply begins to dominate and the jit less helpful

Function definitions, using a square matrix for simplicity.

from IPython.display import displayimport numpy as npfrom numba import jitimport pandas as pd#Dimensions of MatricesN = 1000def py_rand(i, j):    a = np.random.rand(i, j)jit_rand = jit(nopython=True)(py_rand)def py_matmul(a, b):    c = np.dot(a, b)jit_matmul = jit(nopython=True)(py_matmul)def py_loop(N, val):    count = 0    for i in range(N):        count += val     jit_loop = jit(nopython=True)(py_loop)      def pure_python(N,i,j):    for n in range(N):        a = np.random.rand(i,j)        b = np.random.rand(i,j)        c = np.dot(a,a)jit_func = jit(nopython=True)(pure_python)

Timing:

df = pd.DataFrame(columns=['Func', 'jit', 'N', 'Time'])def meantime(f, *args, **kwargs):    t = %timeit -oq -n5 f(*args, **kwargs)    return t.averagefor N in [10, 100, 1000, 2000]:    a = np.random.randn(N, N)    b = np.random.randn(N, N)    df = df.append({'Func': 'jit_rand', 'N': N, 'Time': meantime(jit_rand, N, N)}, ignore_index=True)    df = df.append({'Func': 'py_rand', 'N': N, 'Time': meantime(py_rand, N, N)}, ignore_index=True)    df = df.append({'Func': 'jit_matmul', 'N': N, 'Time': meantime(jit_matmul, a, b)}, ignore_index=True)    df = df.append({'Func': 'py_matmul', 'N': N, 'Time': meantime(py_matmul, a, b)}, ignore_index=True)    df = df.append({'Func': 'jit_loop', 'N': N, 'Time': meantime(jit_loop, N, 2.0)}, ignore_index=True)    df = df.append({'Func': 'py_loop', 'N': N, 'Time': meantime(py_loop, N, 2.0)}, ignore_index=True)    df = df.append({'Func': 'jit_func', 'N': N, 'Time': meantime(jit_func, 5, N, N)}, ignore_index=True)    df = df.append({'Func': 'py_func', 'N': N, 'Time': meantime(pure_python, 5, N, N)}, ignore_index=True)df['jit'] = df['Func'].str.contains('jit')df['Func'] = df['Func'].apply(lambda s: s.split('_')[1])df.set_index('Func')display(df)

result:

    Func    jit     N   Time0   rand    True    10  1.030686e-061   rand    False   10  1.115149e-052   matmul  True    10  2.250371e-063   matmul  False   10  2.199343e-064   loop    True    10  2.706000e-075   loop    False   10  7.274286e-076   func    True    10  1.217046e-057   func    False   10  2.495837e-058   rand    True    100 5.199217e-059   rand    False   100 8.149794e-0510  matmul  True    100 7.848071e-0511  matmul  False   100 2.130794e-0512  loop    True    100 2.728571e-0713  loop    False   100 3.003743e-0614  func    True    100 6.739634e-0415  func    False   100 1.146594e-0316  rand    True    1000    5.644258e-0317  rand    False   1000    8.012790e-0318  matmul  True    1000    1.476098e-0219  matmul  False   1000    1.613211e-0220  loop    True    1000    2.846572e-0721  loop    False   1000    3.539849e-0522  func    True    1000    1.256926e-0123  func    False   1000    1.581177e-0124  rand    True    2000    2.061612e-0225  rand    False   2000    3.204709e-0226  matmul  True    2000    9.866484e-0227  matmul  False   2000    1.007234e-0128  loop    True    2000    3.011143e-0729  loop    False   2000    7.477454e-0530  func    True    2000    1.033560e+0031  func    False   2000    1.199969e+00

It looks like numba is optimizing away the loop, so I'm not gonna bother including it in the compare

plot:

def jit_speedup(d):    py_time = d[d['jit'] == False]['Time'].mean()    jit_time = d[d['jit'] == True]['Time'].mean()    return py_time / jit_time import seaborn as snsresult = df.groupby(['Func', 'N']).apply(jit_speedup).reset_index().rename(columns={0: 'Jit Speedup'})result = result[result['Func'] != 'loop']sns.factorplot(data=result, x='N', y='Jit Speedup', hue='Func')

enter image description here

So for the loop being 5 repetitions, the jit speeds things up quite solidly until the matrix multiply becomes expensive enough to make the other overhead insignificant in comparison.