Is there a faster version of numpy.random.shuffle? Is there a faster version of numpy.random.shuffle? numpy numpy

Is there a faster version of numpy.random.shuffle?


It's likely that this will give a nice speed boost:

from timeit import Timerimport numpy as nparr = np.random.sample((50, 5000))def timeline_sample(series, num):    random = series.copy()    for i in range(num):        np.random.shuffle(random.T)        yield randomdef timeline_sample_fast(series, num):    random = series.T.copy()    for i in range(num):        np.random.shuffle(random)        yield random.Tdef timeline_sample_faster(series, num):    length = arr.shape[1]    for i in range(num):        yield series[:, np.random.permutation(length)]def consume(iterable):    for s in iterable:        np.sum(s)min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))#>>> 0.2585161680035526#>>> 0.2416607110062614#>>> 0.04835709399776533

Forcing it to be contiguous does increase the time, but not by a ton:

def consume(iterable):    for s in iterable:        np.sum(np.ascontiguousarray(s))min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))#>>> 0.2632228760048747#>>> 0.25778737501241267#>>> 0.07451769898761995


Randomizing rows will be cheaper, the code below is equivalent in functionality but is about 3 times faster on my machine.

def timeline_sample_fast(series, num):   random = series.T.copy()   for i in range(num):       np.random.shuffle(random)       yield random.Tarr = np.random.sample((600, 50))%%timeit                         for s in timeline_sample(arr, 100):    np.sum(s)10 loops, best of 3: 55.5 ms per loop%%timeitfor s in timeline_sample_fast(arr, 100):   np.sum(s)10 loops, best of 3: 18.6 ms per loop