How can I take a screenshot/image of a website using Python?

Here is a simple solution using webkit:http://webscraping.com/blog/Webpage-screenshots-with-webkit/

import sysimport timefrom PyQt4.QtCore import *from PyQt4.QtGui import *from PyQt4.QtWebKit import *class Screenshot(QWebView):    def __init__(self):        self.app = QApplication(sys.argv)        QWebView.__init__(self)        self._loaded = False        self.loadFinished.connect(self._loadFinished)    def capture(self, url, output_file):        self.load(QUrl(url))        self.wait_load()        # set to webpage size        frame = self.page().mainFrame()        self.page().setViewportSize(frame.contentsSize())        # render image        image = QImage(self.page().viewportSize(), QImage.Format_ARGB32)        painter = QPainter(image)        frame.render(painter)        painter.end()        print 'saving', output_file        image.save(output_file)    def wait_load(self, delay=0):        # process app events until page loaded        while not self._loaded:            self.app.processEvents()            time.sleep(delay)        self._loaded = False    def _loadFinished(self, result):        self._loaded = Trues = Screenshot()s.capture('http://webscraping.com', 'website.png')s.capture('http://webscraping.com/blog', 'blog.png')

python screenshot webpage backend

Here is my solution by grabbing help from various sources. It takes full web page screen capture and it crops it (optional) and generates thumbnail from the cropped image also. Following are the requirements:

Requirements:

Install NodeJS
Using Node's package manager install phantomjs: npm -g install phantomjs
Install selenium (in your virtualenv, if you are using that)
Install imageMagick
Add phantomjs to system path (on windows)

import osfrom subprocess import Popen, PIPEfrom selenium import webdriverabspath = lambda *p: os.path.abspath(os.path.join(*p))ROOT = abspath(os.path.dirname(__file__))def execute_command(command):    result = Popen(command, shell=True, stdout=PIPE).stdout.read()    if len(result) > 0 and not result.isspace():        raise Exception(result)def do_screen_capturing(url, screen_path, width, height):    print "Capturing screen.."    driver = webdriver.PhantomJS()    # it save service log file in same directory    # if you want to have log file stored else where    # initialize the webdriver.PhantomJS() as    # driver = webdriver.PhantomJS(service_log_path='/var/log/phantomjs/ghostdriver.log')    driver.set_script_timeout(30)    if width and height:        driver.set_window_size(width, height)    driver.get(url)    driver.save_screenshot(screen_path)def do_crop(params):    print "Croping captured image.."    command = [        'convert',        params['screen_path'],        '-crop', '%sx%s+0+0' % (params['width'], params['height']),        params['crop_path']    ]    execute_command(' '.join(command))def do_thumbnail(params):    print "Generating thumbnail from croped captured image.."    command = [        'convert',        params['crop_path'],        '-filter', 'Lanczos',        '-thumbnail', '%sx%s' % (params['width'], params['height']),        params['thumbnail_path']    ]    execute_command(' '.join(command))def get_screen_shot(**kwargs):    url = kwargs['url']    width = int(kwargs.get('width', 1024)) # screen width to capture    height = int(kwargs.get('height', 768)) # screen height to capture    filename = kwargs.get('filename', 'screen.png') # file name e.g. screen.png    path = kwargs.get('path', ROOT) # directory path to store screen    crop = kwargs.get('crop', False) # crop the captured screen    crop_width = int(kwargs.get('crop_width', width)) # the width of crop screen    crop_height = int(kwargs.get('crop_height', height)) # the height of crop screen    crop_replace = kwargs.get('crop_replace', False) # does crop image replace original screen capture?    thumbnail = kwargs.get('thumbnail', False) # generate thumbnail from screen, requires crop=True    thumbnail_width = int(kwargs.get('thumbnail_width', width)) # the width of thumbnail    thumbnail_height = int(kwargs.get('thumbnail_height', height)) # the height of thumbnail    thumbnail_replace = kwargs.get('thumbnail_replace', False) # does thumbnail image replace crop image?    screen_path = abspath(path, filename)    crop_path = thumbnail_path = screen_path    if thumbnail and not crop:        raise Exception, 'Thumnail generation requires crop image, set crop=True'    do_screen_capturing(url, screen_path, width, height)    if crop:        if not crop_replace:            crop_path = abspath(path, 'crop_'+filename)        params = {            'width': crop_width, 'height': crop_height,            'crop_path': crop_path, 'screen_path': screen_path}        do_crop(params)        if thumbnail:            if not thumbnail_replace:                thumbnail_path = abspath(path, 'thumbnail_'+filename)            params = {                'width': thumbnail_width, 'height': thumbnail_height,                'thumbnail_path': thumbnail_path, 'crop_path': crop_path}            do_thumbnail(params)    return screen_path, crop_path, thumbnail_pathif __name__ == '__main__':    '''        Requirements:        Install NodeJS        Using Node's package manager install phantomjs: npm -g install phantomjs        install selenium (in your virtualenv, if you are using that)        install imageMagick        add phantomjs to system path (on windows)    '''    url = 'http://stackoverflow.com/questions/1197172/how-can-i-take-a-screenshot-image-of-a-website-using-python'    screen_path, crop_path, thumbnail_path = get_screen_shot(        url=url, filename='sof.png',        crop=True, crop_replace=False,        thumbnail=True, thumbnail_replace=False,        thumbnail_width=200, thumbnail_height=150,    )

These are the generated images:

python screenshot webpage backend

can do using Selenium

from selenium import webdriverDRIVER = 'chromedriver'driver = webdriver.Chrome(DRIVER)driver.get('https://www.spotify.com')screenshot = driver.save_screenshot('my_screenshot.png')driver.quit()

https://sites.google.com/a/chromium.org/chromedriver/getting-started

CodeHunter

How can I take a screenshot/image of a website using Python?

Recent Posts

How can I color dots in a xy scatterplot according to column value?

How to update a claim in ASP.NET Identity?

What does {0} mean when initializing an object?

Accessing members of items in a JSONArray with Java

How to log SQL statements in Spring Boot?

Powershell Get-WebSite name parameter is ignored

How to detect scroll to bottom of html element

Java synchronized method

How to test controllers with CodeIgniter?

Detect Visual Composer

Matplotlib: Specify format of floats for tick labels

Rails join a list of strings with commas and "and" before the last