How to download google image search results in Python

python image search-engine

Use the Google Custom Search for what you want to achieve.See @i08in's answer of Python - Download Images from google Image search? it has great description, script samples and libraries references.

python image search-engine

To download any number of images from Google image search using Selenium:

from selenium import webdriverfrom selenium.webdriver.common.keys import Keysimport osimport jsonimport urllib2import sysimport time# adding path to geckodriver to the OS environment variable# assuming that it is stored at the same path as this scriptos.environ["PATH"] += os.pathsep + os.getcwd()download_path = "dataset/"def main():    searchtext = sys.argv[1] # the search query    num_requested = int(sys.argv[2]) # number of images to download    number_of_scrolls = num_requested / 400 + 1     # number_of_scrolls * 400 images will be opened in the browser    if not os.path.exists(download_path + searchtext.replace(" ", "_")):        os.makedirs(download_path + searchtext.replace(" ", "_"))    url = "https://www.google.co.in/search?q="+searchtext+"&source=lnms&tbm=isch"    driver = webdriver.Firefox()    driver.get(url)    headers = {}    headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"    extensions = {"jpg", "jpeg", "png", "gif"}    img_count = 0    downloaded_img_count = 0    for _ in xrange(number_of_scrolls):        for __ in xrange(10):            # multiple scrolls needed to show all 400 images            driver.execute_script("window.scrollBy(0, 1000000)")            time.sleep(0.2)        # to load next 400 images        time.sleep(0.5)        try:            driver.find_element_by_xpath("//input[@value='Show more results']").click()        except Exception as e:            print "Less images found:", e            break    # imges = driver.find_elements_by_xpath('//div[@class="rg_meta"]') # not working anymore    imges = driver.find_elements_by_xpath('//div[contains(@class,"rg_meta")]')    print "Total images:", len(imges), "\n"    for img in imges:        img_count += 1        img_url = json.loads(img.get_attribute('innerHTML'))["ou"]        img_type = json.loads(img.get_attribute('innerHTML'))["ity"]        print "Downloading image", img_count, ": ", img_url        try:            if img_type not in extensions:                img_type = "jpg"            req = urllib2.Request(img_url, headers=headers)            raw_img = urllib2.urlopen(req).read()            f = open(download_path+searchtext.replace(" ", "_")+"/"+str(downloaded_img_count)+"."+img_type, "wb")            f.write(raw_img)            f.close            downloaded_img_count += 1        except Exception as e:            print "Download failed:", e        finally:            print        if downloaded_img_count >= num_requested:            break    print "Total downloaded: ", downloaded_img_count, "/", img_count    driver.quit()if __name__ == "__main__":    main()

Full code is here.

python image search-engine

Make sure you install icrawler library first, use.

pip install icrawler

from icrawler.builtin import GoogleImageCrawlergoogle_Crawler = GoogleImageCrawler(storage = {'root_dir': r'write the name of the directory you want to save to here'})google_Crawler.crawl(keyword = 'sad human faces', max_num = 800)

CodeHunter

How to download google image search results in Python

Recent Posts

How can I color dots in a xy scatterplot according to column value?

How to update a claim in ASP.NET Identity?

What does {0} mean when initializing an object?

Accessing members of items in a JSONArray with Java

How to log SQL statements in Spring Boot?

Powershell Get-WebSite name parameter is ignored

How to detect scroll to bottom of html element

Java synchronized method

How to test controllers with CodeIgniter?

Detect Visual Composer

Matplotlib: Specify format of floats for tick labels

Rails join a list of strings with commas and "and" before the last