Revert "go back to single-threaded (because of chromedriver)"

This reverts commit 56530c96c7.
master
emamaker 2022-02-05 09:43:26 +01:00
parent 3664838580
commit df15a82371
3 changed files with 39 additions and 22 deletions

4
.gitignore vendored
View File

@ -1,4 +1,4 @@
__pycache__ __pycache__
chromedriver *-dir*
tor-data-dir*
tor tor
chromedriver

View File

@ -1,6 +1,5 @@
import os import os
import re import re
import stem
import shutil import shutil
import socket import socket
import requests import requests
@ -9,28 +8,32 @@ import undetected_chromedriver as uc
import threading import threading
data_directory = "tor-data-dir"
def find_free_port(): def find_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0)) s.bind(('', 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1] return s.getsockname()[1]
def get_tor_data_dir():
return './tor-data-dir-'+(str(threading.get_native_id()))
def get_chrome_data_dir():
return './chrome-data-dir-'+(str(threading.get_native_id()))
def create_tor_proxy(socks_port, control_port): def create_tor_proxy(socks_port, control_port):
import stem.process as process
global data_directory global data_directory
TOR_PATH = os.environ['TOR_PATH'] TOR_PATH = os.environ['TOR_PATH']
data_directory += (str(threading.get_native_id()))
try: try:
tor_process = stem.process.launch_tor_with_config( tor_process = process.launch_tor_with_config(
config = { config = {
'SocksPort': str(socks_port), 'SocksPort': str(socks_port),
'ControlPort' : str(control_port), 'ControlPort' : str(control_port),
'MaxCircuitDirtiness' : '300', 'MaxCircuitDirtiness' : '300',
'DataDirectory' : data_directory 'DataDirectory' : get_tor_data_dir()
}, },
init_msg_handler = lambda line: print(line) if re.search('Bootstrapped', line) else False, init_msg_handler = lambda line: print(line) if re.search('Bootstrapped', line) else False,
tor_cmd = TOR_PATH tor_cmd = TOR_PATH
@ -44,15 +47,16 @@ def create_tor_proxy(socks_port, control_port):
return tor_process return tor_process
def start_browser(use_tor=False, headless=False): def start_browser(use_tor=False, headless=False):
options = uc.ChromeOptions() options = uc.ChromeOptions()
if headless: if headless:
options.add_argument('--disable-gpu') options.add_argument('--disable-gpu')
options.add_argument('--headless')
options.add_argument('--no-first-run') options.add_argument('--no-first-run')
options.add_argument('--password-store=basic') options.add_argument('--password-store=basic')
options.add_argument('--start-maximized') options.add_argument('--start-maximized')
# options.add_argument('--user-data-dir='+get_chrome_data_dir())
tor_process = None tor_process = None
@ -67,21 +71,22 @@ def start_browser(use_tor=False, headless=False):
else: else:
proxies = [] proxies = []
driver=uc.Chrome(options=options)
ip = requests.get("http://httpbin.org/ip", proxies=proxies).json()["origin"] ip = requests.get("http://httpbin.org/ip", proxies=proxies).json()["origin"]
print (f'IP is {ip}') print (f'IP is {ip}')
driver = uc.Chrome(options=options)
return driver, tor_process return driver, tor_process
def close_browser(driver, tor_process): def close_browser(driver, tor_process):
if tor_process: if tor_process:
tor_process.kill() tor_process.kill()
shutil.rmtree(data_directory) shutil.rmtree(get_tor_data_dir())
shutil.rmtree(get_chrome_data_dir())
try: try:
driver.close() driver.close()
except: except:
print("[INFO] Undetected chromedriver threw the usual exception while closing, exiting") print("[INFO] Undetected chromedriver threw the usual exception while closing, exiting")

View File

@ -1,9 +1,7 @@
# Use undetected-chromedriver together with stem to use the tor network # Use undetected-chromedriver together with stem to use the tor network
import undetected_chromedriver as uc
from tbselenium.utils import launch_tbb_tor_with_stem
import time import time
import random import random
import os
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
import selenium.webdriver as webdriver import selenium.webdriver as webdriver
@ -108,11 +106,25 @@ def click(driver, by, desc, timeout):
def launch_and_visit(use_tor, page_url, headless=False): def launch_and_visit(use_tor, page_url, headless=False):
driver, tor_process = browser_manager.start_browser(use_tor=use_tor, headless=headless) driver, tor_process = browser_manager.start_browser(use_tor=use_tor, headless=headless)
try:
visit_page(driver, page_url) visit_page(driver, page_url)
except Exception as e:
print("Unknown error, exiting. Error log:", e)
time.sleep(5) time.sleep(5)
browser_manager.close_browser(driver, tor_process) browser_manager.close_browser(driver, tor_process)
if __name__ == "__main__": if __name__ == "__main__":
launch_and_visit(use_tor=True, page_url='https://giangillorossi.altervista.org', headless=False) while True:
threads = []
for i in range(0, 5):
t1 = threading.Thread(target=launch_and_visit, args=(True, 'https://giangillorossi.altervista.org', True))
t1.start()
threads.append(t1)
for t in threads:
t.join()