Where are the gains using numba coming from for pure numpy code?
TL:DR The random and looping get accelerated, but the matrix multiply doesn't except for small matrix size. At small matrix/loop size, there seems to be significant speedups that are probably related to python overhead. At large N, the matrix multiply begins to dominate and the jit less helpful
Function definitions, using a square matrix for simplicity.
from IPython.display import displayimport numpy as npfrom numba import jitimport pandas as pd#Dimensions of MatricesN = 1000def py_rand(i, j): a = np.random.rand(i, j)jit_rand = jit(nopython=True)(py_rand)def py_matmul(a, b): c = np.dot(a, b)jit_matmul = jit(nopython=True)(py_matmul)def py_loop(N, val): count = 0 for i in range(N): count += val jit_loop = jit(nopython=True)(py_loop) def pure_python(N,i,j): for n in range(N): a = np.random.rand(i,j) b = np.random.rand(i,j) c = np.dot(a,a)jit_func = jit(nopython=True)(pure_python)
Timing:
df = pd.DataFrame(columns=['Func', 'jit', 'N', 'Time'])def meantime(f, *args, **kwargs): t = %timeit -oq -n5 f(*args, **kwargs) return t.averagefor N in [10, 100, 1000, 2000]: a = np.random.randn(N, N) b = np.random.randn(N, N) df = df.append({'Func': 'jit_rand', 'N': N, 'Time': meantime(jit_rand, N, N)}, ignore_index=True) df = df.append({'Func': 'py_rand', 'N': N, 'Time': meantime(py_rand, N, N)}, ignore_index=True) df = df.append({'Func': 'jit_matmul', 'N': N, 'Time': meantime(jit_matmul, a, b)}, ignore_index=True) df = df.append({'Func': 'py_matmul', 'N': N, 'Time': meantime(py_matmul, a, b)}, ignore_index=True) df = df.append({'Func': 'jit_loop', 'N': N, 'Time': meantime(jit_loop, N, 2.0)}, ignore_index=True) df = df.append({'Func': 'py_loop', 'N': N, 'Time': meantime(py_loop, N, 2.0)}, ignore_index=True) df = df.append({'Func': 'jit_func', 'N': N, 'Time': meantime(jit_func, 5, N, N)}, ignore_index=True) df = df.append({'Func': 'py_func', 'N': N, 'Time': meantime(pure_python, 5, N, N)}, ignore_index=True)df['jit'] = df['Func'].str.contains('jit')df['Func'] = df['Func'].apply(lambda s: s.split('_')[1])df.set_index('Func')display(df)
result:
Func jit N Time0 rand True 10 1.030686e-061 rand False 10 1.115149e-052 matmul True 10 2.250371e-063 matmul False 10 2.199343e-064 loop True 10 2.706000e-075 loop False 10 7.274286e-076 func True 10 1.217046e-057 func False 10 2.495837e-058 rand True 100 5.199217e-059 rand False 100 8.149794e-0510 matmul True 100 7.848071e-0511 matmul False 100 2.130794e-0512 loop True 100 2.728571e-0713 loop False 100 3.003743e-0614 func True 100 6.739634e-0415 func False 100 1.146594e-0316 rand True 1000 5.644258e-0317 rand False 1000 8.012790e-0318 matmul True 1000 1.476098e-0219 matmul False 1000 1.613211e-0220 loop True 1000 2.846572e-0721 loop False 1000 3.539849e-0522 func True 1000 1.256926e-0123 func False 1000 1.581177e-0124 rand True 2000 2.061612e-0225 rand False 2000 3.204709e-0226 matmul True 2000 9.866484e-0227 matmul False 2000 1.007234e-0128 loop True 2000 3.011143e-0729 loop False 2000 7.477454e-0530 func True 2000 1.033560e+0031 func False 2000 1.199969e+00
It looks like numba is optimizing away the loop, so I'm not gonna bother including it in the compare
plot:
def jit_speedup(d): py_time = d[d['jit'] == False]['Time'].mean() jit_time = d[d['jit'] == True]['Time'].mean() return py_time / jit_time import seaborn as snsresult = df.groupby(['Func', 'N']).apply(jit_speedup).reset_index().rename(columns={0: 'Jit Speedup'})result = result[result['Func'] != 'loop']sns.factorplot(data=result, x='N', y='Jit Speedup', hue='Func')
So for the loop being 5 repetitions, the jit speeds things up quite solidly until the matrix multiply becomes expensive enough to make the other overhead insignificant in comparison.