Numpy and 16-bit PGM Numpy and 16-bit PGM python python

Numpy and 16-bit PGM


import reimport numpydef read_pgm(filename, byteorder='>'):    """Return image data from a raw PGM file as numpy array.    Format specification: http://netpbm.sourceforge.net/doc/pgm.html    """    with open(filename, 'rb') as f:        buffer = f.read()    try:        header, width, height, maxval = re.search(            b"(^P5\s(?:\s*#.*[\r\n])*"            b"(\d+)\s(?:\s*#.*[\r\n])*"            b"(\d+)\s(?:\s*#.*[\r\n])*"            b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", buffer).groups()    except AttributeError:        raise ValueError("Not a raw PGM file: '%s'" % filename)    return numpy.frombuffer(buffer,                            dtype='u1' if int(maxval) < 256 else byteorder+'u2',                            count=int(width)*int(height),                            offset=len(header)                            ).reshape((int(height), int(width)))if __name__ == "__main__":    from matplotlib import pyplot    image = read_pgm("foo.pgm", byteorder='<')    pyplot.imshow(image, pyplot.cm.gray)    pyplot.show()


I'm not terribly familar with the PGM format, but generally speaking you'd just use numpy.fromfile. fromfile will start at whatever position the file pointer you pass to it is at, so you can simply seek (or read) to the end of the header, and then use fromfile to read the rest in.

You'll need to use infile.readline() instead of next(infile).

import numpy as npwith open('foo.pgm', 'r') as infile:    header = infile.readline()    width, height, maxval = [int(item) for item in header.split()[1:]]    image = np.fromfile(infile, dtype=np.uint16).reshape((height, width))

On a side note, the "foo.pgm" file you pointed to in your comment appears to specify the wrong number of rows in the header.

If you're going to be reading in a lot of files that potentially have that problem, you can just pad the array with zeros or truncate it, like this.

import numpy as npwith open('foo.pgm', 'r') as infile:    header = next(infile)    width, height, maxval = [int(item) for item in header.split()[1:]]    image = np.fromfile(infile, dtype=np.uint16)    if image.size < width * height:        pad = np.zeros(width * height - image.size, dtype=np.uint16)        image = np.hstack([image, pad])    if image.size > width * height:        image = image[:width * height]    image = image.reshape((height, width))


Indeed, the 'string' after the header is a binary in your file. I solved that below (found the following: ndarray: [2047 2047 2047 ..., 540 539 539]) but there is another problem: the file is not long enough; counts only 289872 numbers instead of 640*480...

I am terribly sorry for my exageration by making a class for it...

import numpy as npimport Imageclass PGM(object):    def __init__(self, filepath):        with open(filepath) as f:            # suppose all header info in first line:            info = f.readline().split()            self.type = info[0]            self.width, self.height, self.maxval = [int(v) for v in info[1:]]            size = self.width * self.height            lines = f.readlines()            dt = [np.int8, np.int16][self.maxval > 255]            try:                # this will work if lines are integers separated by e.g. spaces                self.data = np.array([l.split() for l in lines], dtype=dt).T            except ValueError:                # data is binary                data = np.fromstring(lines[0], dtype=dt)                if data.size < size:                    # this is the case for the 'db.tt/phaR587 (foo.pgm)'                    #raise ValueError('data binary string probably uncomplete')                    data = np.hstack((data, np.zeros(size-data.size)))                self.data = data[:size].reshape((self.width, self.height))            assert (self.width, self.height) == self.data.shape            assert self.maxval >= self.data.max()        self._img = None    def get_img(self):        if self._img is None:            # only executed once            size = (self.width, self.height)            mode = 'L'            data = self.data            self.img = Image.frombuffer(mode, size, data)        return self.img    Image = property(get_img)mypgm = PGM('foo.pgm')mypgm.Image

edit: great Idea from Joe Kington to fill image with zeros!