Selenium Threads: how to run multi-threaded browser with proxy ( python) Selenium Threads: how to run multi-threaded browser with proxy ( python) selenium selenium

Selenium Threads: how to run multi-threaded browser with proxy ( python)


( I personaly think that a problem is there that when you start a program, it will go to new thread, which will go throught the textfile from beginning, becasue you aint deleting them )

I have cane across the same problem, when I was doing the same thing as you do now. I know you would rather want help with your code, but I am in hurry to test it and want to help you ;) , so here is a code that works for me ... There is even task killer for a chrome ( you just have to edit it to firefox )

If I were you, I would start the thread after opening the file, cuz it looks liek you are opening the same file from 1st line everytime the tread starts

links = [ // Link you want to go to ]def funk(xxx , website):    link = website    chrome_options = webdriver.ChromeOptions()    chrome_options.add_argument('--proxy-server=%s' % str(xxx))    chromedriver = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chromedriver')    chrome = webdriver.Chrome(chromedriver, chrome_options=chrome_options)    try :        // Do stuff    except:        print('exception')    chrome.close()for link in links:    f = open('proxies.txt')    line = f.readline()    x = 1    xx = 0    while line:        if number_of_used_proxies < 10:            print(line)            line = f.readline()            try:                threading.Timer(40, funk, [line, link]).start()            except Exception as e:                print(e)            time.sleep(1)            x += 1            number_of_used_proxies += 1        else:            time.sleep(100)            for x in range(1, 10):                try:                    xzxzx = 'os.system("taskkill /f /im chrome.exe")'                    os.system("killall 'Google Chrome'")                except:                    print("NoMore")            time.sleep(10)            number_of_used_proxies = 0    f.close()

Hope it helps :)


Dominik Lašo captured it correctly - each threads processes the file from the beginning. Here's probably how it should look like:

from selenium import webdriverfrom selenium import webdriverimport time , randomimport threadingdef e(ip, port):    profile = webdriver.FirefoxProfile()    profile.set_preference("network.proxy.type", 1)    profile.set_preference("network.proxy.socks", IP)    profile.set_preference("network.proxy.socks_port", PORT)    try:        driver = webdriver.Firefox(firefox_profile=profile)        driver.get("http://www.whatsmyip.org/")    except:        print("Proxy Connection Error")        driver.quit()    else:        time.sleep(random.randint(40, 70))        driver.quit()my_threads = []with open("sock2.txt", "r") as fd:    for line in fd.readlines():        line = line.strip()        if not line:           continue        prox = line.split(":")        ip = prox[0]        port = int(prox[1])        print('-> {}:{}'.format(ip, port))        t = threading.Thread(target=e, args=(ip, port,))        t.start()        my_threads.append(t)for t in my_threads:    t.join()


vantuong: Here's how you can solve the problem with ThreadPoolExecutor.

Reference: https://docs.python.org/3/library/concurrent.futures.html

from selenium import webdriverimport time, random#import threadingimport concurrent.futuresMAX_WORKERS = 5def get_proxys(data_file):    proxys = []    with open(data_file, "r") as fd:        for line in fd.readlines():            line = line.strip()            if not line:               continue            prox = line.split(":")            ip = prox[0]            port = int(prox[1])            proxys.append((ip, port))    return proxysdef e(ip, port):    profile = webdriver.FirefoxProfile()    profile.set_preference("network.proxy.type", 1)    profile.set_preference("network.proxy.socks", IP)    profile.set_preference("network.proxy.socks_port", PORT)    try:        driver = webdriver.Firefox(firefox_profile=profile)        driver.get("http://www.whatsmyip.org/")    except:        print("Proxy Connection Error")        driver.quit()    else:        time.sleep(random.randint(40, 70))        driver.quit()with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:    proxys = get_proxys('sock2.txt')    tasks = {executor.submit(e, proxy[0], proxy[1]): proxy for proxy in proxys}    for task in concurrent.futures.as_completed(tasks):        proxy = tasks[task]        try:            data = task.result()        except Exception as exc:            print('{} generated an exception: {}'.format(proxy, exc))        else:            print('{} completed successfully'.format(proxy))

Fun exercise: Try playing around with different values of MAX_WORKERS.