Iterating through a scipy.sparse vector (or matrix) Iterating through a scipy.sparse vector (or matrix) python python

Iterating through a scipy.sparse vector (or matrix)


Edit: bbtrb's method (using coo_matrix) is much faster than my original suggestion, using nonzero. Sven Marnach's suggestion to use itertools.izip also improves the speed. Current fastest is using_tocoo_izip:

import scipy.sparseimport randomimport itertoolsdef using_nonzero(x):    rows,cols = x.nonzero()    for row,col in zip(rows,cols):        ((row,col), x[row,col])def using_coo(x):    cx = scipy.sparse.coo_matrix(x)        for i,j,v in zip(cx.row, cx.col, cx.data):        (i,j,v)def using_tocoo(x):    cx = x.tocoo()        for i,j,v in zip(cx.row, cx.col, cx.data):        (i,j,v)def using_tocoo_izip(x):    cx = x.tocoo()        for i,j,v in itertools.izip(cx.row, cx.col, cx.data):        (i,j,v)N=200x = scipy.sparse.lil_matrix( (N,N) )for _ in xrange(N):    x[random.randint(0,N-1),random.randint(0,N-1)]=random.randint(1,100)

yields these timeit results:

% python -mtimeit -s'import test' 'test.using_tocoo_izip(test.x)'1000 loops, best of 3: 670 usec per loop% python -mtimeit -s'import test' 'test.using_tocoo(test.x)'1000 loops, best of 3: 706 usec per loop% python -mtimeit -s'import test' 'test.using_coo(test.x)'1000 loops, best of 3: 802 usec per loop% python -mtimeit -s'import test' 'test.using_nonzero(test.x)'100 loops, best of 3: 5.25 msec per loop


The fastest way should be by converting to a coo_matrix:

cx = scipy.sparse.coo_matrix(x)for i,j,v in zip(cx.row, cx.col, cx.data):    print "(%d, %d), %s" % (i,j,v)


To loop a variety of sparse matrices from the scipy.sparse code section I would use this small wrapper function (note that for Python-2 you are encouraged to use xrange and izip for better performance on large matrices):

from scipy.sparse import *def iter_spmatrix(matrix):    """ Iterator for iterating the elements in a ``scipy.sparse.*_matrix``     This will always return:    >>> (row, column, matrix-element)    Currently this can iterate `coo`, `csc`, `lil` and `csr`, others may easily be added.    Parameters    ----------    matrix : ``scipy.sparse.sp_matrix``      the sparse matrix to iterate non-zero elements    """    if isspmatrix_coo(matrix):        for r, c, m in zip(matrix.row, matrix.col, matrix.data):            yield r, c, m    elif isspmatrix_csc(matrix):        for c in range(matrix.shape[1]):            for ind in range(matrix.indptr[c], matrix.indptr[c+1]):                yield matrix.indices[ind], c, matrix.data[ind]    elif isspmatrix_csr(matrix):        for r in range(matrix.shape[0]):            for ind in range(matrix.indptr[r], matrix.indptr[r+1]):                yield r, matrix.indices[ind], matrix.data[ind]    elif isspmatrix_lil(matrix):        for r in range(matrix.shape[0]):            for c, d in zip(matrix.rows[r], matrix.data[r]):                yield r, c, d    else:        raise NotImplementedError("The iterator for this sparse matrix has not been implemented")