Is there a faster version of numpy.random.shuffle?
It's likely that this will give a nice speed boost:
from timeit import Timerimport numpy as nparr = np.random.sample((50, 5000))def timeline_sample(series, num): random = series.copy() for i in range(num): np.random.shuffle(random.T) yield randomdef timeline_sample_fast(series, num): random = series.T.copy() for i in range(num): np.random.shuffle(random) yield random.Tdef timeline_sample_faster(series, num): length = arr.shape[1] for i in range(num): yield series[:, np.random.permutation(length)]def consume(iterable): for s in iterable: np.sum(s)min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))#>>> 0.2585161680035526#>>> 0.2416607110062614#>>> 0.04835709399776533
Forcing it to be contiguous does increase the time, but not by a ton:
def consume(iterable): for s in iterable: np.sum(np.ascontiguousarray(s))min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))#>>> 0.2632228760048747#>>> 0.25778737501241267#>>> 0.07451769898761995
Randomizing rows will be cheaper, the code below is equivalent in functionality but is about 3 times faster on my machine.
def timeline_sample_fast(series, num): random = series.T.copy() for i in range(num): np.random.shuffle(random) yield random.Tarr = np.random.sample((600, 50))%%timeit for s in timeline_sample(arr, 100): np.sum(s)10 loops, best of 3: 55.5 ms per loop%%timeitfor s in timeline_sample_fast(arr, 100): np.sum(s)10 loops, best of 3: 18.6 ms per loop