Download Images from list of urls Download Images from list of urls google-chrome google-chrome

Download Images from list of urls


  • Create a folder in your machine.

  • Place your text file of images URL in the folder.

  • cd to that folder.
  • Use wget -i images.txt

  • You will find all your downloaded files in the folder.


This needs to be made into a function with error handling but it repeatedly downloads images for image classification projects

    import requests    urls = pd.read_csv('cat_urls.csv') #save the url list as a dataframe    rows = []    for index, i in urls.iterrows():        rows.append(i[-1])    counter = 0    for i in rows:        file_name = 'cat' + str(counter) + '.jpg'            print(file_name)        response = requests.get(i)        file = open(file_name, "wb")        file.write(response.content)        file.close()        counter += 1


import osimport timeimport sysimport urllibfrom progressbar import ProgressBardef get_raw_html(url):    version = (3,0)    curr_version = sys.version_info    if curr_version >= version:     #If the Current Version of Python is 3.0 or above        import urllib.request    #urllib library for Extracting web pages        try:            headers = {}            headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"            request = urllib.request.Request(url, headers = headers)            resp = urllib.request.urlopen(request)            respData = str(resp.read())            return respData        except Exception as e:            print(str(e))    else:                        #If the Current Version of Python is 2.x        import urllib2        try:            headers = {}            headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"            request = urllib2.Request(url, headers = headers)            try:                response = urllib2.urlopen(request)            except URLError: # Handling SSL certificate failed                context = ssl._create_unverified_context()                response = urlopen(req,context=context)            #response = urllib2.urlopen(req)            raw_html = response.read()            return raw_html            except:            return"Page Not found"def next_link(s):    start_line = s.find('rg_di')    if start_line == -1:    #If no links are found then give an error!        end_quote = 0        link = "no_links"        return link, end_quote    else:        start_line = s.find('"class="rg_meta"')        start_content = s.find('"ou"',start_line+1)        end_content = s.find(',"ow"',start_content+1)        content_raw = str(s[start_content+6:end_content-1])        return content_raw, end_contentdef all_links(page):    links = []    while True:        link, end_content = next_link(page)        if link == "no_links":            break        else:            links.append(link)      #Append all the links in the list named 'Links'            #time.sleep(0.1)        #Timer could be used to slow down the request for image downloads            page = page[end_content:]    return linksdef download_images(links, search_keyword):    choice = input("Do you want to save the links? [y]/[n]: ")    if choice=='y' or choice=='Y':        #write all the links into a test file.         f = open('links.txt', 'a')        #Open the text file called links.txt        for link in links:            f.write(str(link))            f.write("\n")        f.close()   #Close the file     num = input("Enter number of images to download (max 100): ")    counter = 1    errors=0    search_keyword = search_keyword.replace("%20","_")    directory = search_keyword+'/'    if not os.path.isdir(directory):        os.makedirs(directory)    pbar = ProgressBar()    for link in pbar(links):        if counter<=int(num):            file_extension = link.split(".")[-1]            filename = directory + str(counter) + "."+ file_extension            #print ("Downloading image: " + str(counter)+'/'+str(num))            try:                urllib.request.urlretrieve(link, filename)            except IOError:                errors+=1                #print ("\nIOError on Image" + str(counter))            except urllib.error.HTTPError as e:                errors+=1                #print ("\nHTTPError on Image"+ str(counter))            except urllib.error.URLError as e:                errors+=1                #print ("\nURLError on Image" + str(counter))        counter+=1    return errorsdef search():    version = (3,0)    curr_version = sys.version_info    if curr_version >= version:     #If the Current Version of Python is 3.0 or above        import urllib.request    #urllib library for Extracting web pages    else:        import urllib2 #If current version of python is 2.x    search_keyword = input("Enter the search query: ")    #Download Image Links    links = []    search_keyword = search_keyword.replace(" ","%20")    url = 'https://www.google.com/search?q=' + search_keyword+ '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'    raw_html =  (get_raw_html(url))    links = links + (all_links(raw_html))    print ("Total Image Links = "+str(len(links)))    print ("\n")    errors = download_images(links, search_keyword)    print ("Download Complete.\n"+ str(errors) +" errors while downloading.")search()