From df15a8237185e9419f85a102cc90d746822b1d10 Mon Sep 17 00:00:00 2001 From: emamaker Date: Sat, 5 Feb 2022 09:43:26 +0100 Subject: [PATCH] Revert "go back to single-threaded (because of chromedriver)" This reverts commit 56530c96c729e90d166a2d6dcf074dd890c3d5ba. --- .gitignore | 6 +++--- browser_manager.py | 33 +++++++++++++++++++-------------- page_viewer.py | 22 +++++++++++++++++----- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 24d83a9..3490fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ __pycache__ -chromedriver -tor-data-dir* -tor \ No newline at end of file +*-dir* +tor +chromedriver \ No newline at end of file diff --git a/browser_manager.py b/browser_manager.py index dde2cfd..a3f2031 100644 --- a/browser_manager.py +++ b/browser_manager.py @@ -1,6 +1,5 @@ import os import re -import stem import shutil import socket import requests @@ -9,28 +8,32 @@ import undetected_chromedriver as uc import threading -data_directory = "tor-data-dir" - def find_free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('', 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] +def get_tor_data_dir(): + return './tor-data-dir-'+(str(threading.get_native_id())) + +def get_chrome_data_dir(): + return './chrome-data-dir-'+(str(threading.get_native_id())) + def create_tor_proxy(socks_port, control_port): + import stem.process as process global data_directory + TOR_PATH = os.environ['TOR_PATH'] - data_directory += (str(threading.get_native_id())) - try: - tor_process = stem.process.launch_tor_with_config( + tor_process = process.launch_tor_with_config( config = { 'SocksPort': str(socks_port), 'ControlPort' : str(control_port), 'MaxCircuitDirtiness' : '300', - 'DataDirectory' : data_directory + 'DataDirectory' : get_tor_data_dir() }, init_msg_handler = lambda line: print(line) if re.search('Bootstrapped', line) else False, tor_cmd = TOR_PATH @@ -44,15 +47,16 @@ def create_tor_proxy(socks_port, control_port): return tor_process def start_browser(use_tor=False, headless=False): - options = uc.ChromeOptions() if headless: options.add_argument('--disable-gpu') + options.add_argument('--headless') options.add_argument('--no-first-run') options.add_argument('--password-store=basic') options.add_argument('--start-maximized') + # options.add_argument('--user-data-dir='+get_chrome_data_dir()) tor_process = None @@ -67,21 +71,22 @@ def start_browser(use_tor=False, headless=False): else: proxies = [] + driver=uc.Chrome(options=options) + ip = requests.get("http://httpbin.org/ip", proxies=proxies).json()["origin"] print (f'IP is {ip}') - - driver = uc.Chrome(options=options) - return driver, tor_process + return driver, tor_process def close_browser(driver, tor_process): if tor_process: tor_process.kill() - shutil.rmtree(data_directory) + shutil.rmtree(get_tor_data_dir()) + + shutil.rmtree(get_chrome_data_dir()) try: driver.close() except: - print("[INFO] Undetected chromedriver threw the usual exception while closing, exiting") - + print("[INFO] Undetected chromedriver threw the usual exception while closing, exiting") \ No newline at end of file diff --git a/page_viewer.py b/page_viewer.py index 3951820..2b0fb69 100644 --- a/page_viewer.py +++ b/page_viewer.py @@ -1,9 +1,7 @@ # Use undetected-chromedriver together with stem to use the tor network -import undetected_chromedriver as uc -from tbselenium.utils import launch_tbb_tor_with_stem - import time import random +import os from selenium.webdriver.support.ui import Select import selenium.webdriver as webdriver @@ -108,11 +106,25 @@ def click(driver, by, desc, timeout): def launch_and_visit(use_tor, page_url, headless=False): driver, tor_process = browser_manager.start_browser(use_tor=use_tor, headless=headless) - visit_page(driver, page_url) + + try: + visit_page(driver, page_url) + except Exception as e: + print("Unknown error, exiting. Error log:", e) time.sleep(5) browser_manager.close_browser(driver, tor_process) if __name__ == "__main__": - launch_and_visit(use_tor=True, page_url='https://giangillorossi.altervista.org', headless=False) \ No newline at end of file + while True: + threads = [] + + for i in range(0, 5): + t1 = threading.Thread(target=launch_and_visit, args=(True, 'https://giangillorossi.altervista.org', True)) + t1.start() + + threads.append(t1) + + for t in threads: + t.join()