Revert "go back to single-threaded (because of chromedriver)"

This reverts commit 56530c96c7.
master
emamaker 2022-02-05 09:43:26 +01:00
parent 3664838580
commit df15a82371
3 changed files with 39 additions and 22 deletions

4
.gitignore vendored
View File

@ -1,4 +1,4 @@
__pycache__
chromedriver
tor-data-dir*
*-dir*
tor
chromedriver

View File

@ -1,6 +1,5 @@
import os
import re
import stem
import shutil
import socket
import requests
@ -9,28 +8,32 @@ import undetected_chromedriver as uc
import threading
data_directory = "tor-data-dir"
def find_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
def get_tor_data_dir():
return './tor-data-dir-'+(str(threading.get_native_id()))
def get_chrome_data_dir():
return './chrome-data-dir-'+(str(threading.get_native_id()))
def create_tor_proxy(socks_port, control_port):
import stem.process as process
global data_directory
TOR_PATH = os.environ['TOR_PATH']
data_directory += (str(threading.get_native_id()))
try:
tor_process = stem.process.launch_tor_with_config(
tor_process = process.launch_tor_with_config(
config = {
'SocksPort': str(socks_port),
'ControlPort' : str(control_port),
'MaxCircuitDirtiness' : '300',
'DataDirectory' : data_directory
'DataDirectory' : get_tor_data_dir()
},
init_msg_handler = lambda line: print(line) if re.search('Bootstrapped', line) else False,
tor_cmd = TOR_PATH
@ -44,15 +47,16 @@ def create_tor_proxy(socks_port, control_port):
return tor_process
def start_browser(use_tor=False, headless=False):
options = uc.ChromeOptions()
if headless:
options.add_argument('--disable-gpu')
options.add_argument('--headless')
options.add_argument('--no-first-run')
options.add_argument('--password-store=basic')
options.add_argument('--start-maximized')
# options.add_argument('--user-data-dir='+get_chrome_data_dir())
tor_process = None
@ -67,21 +71,22 @@ def start_browser(use_tor=False, headless=False):
else:
proxies = []
driver=uc.Chrome(options=options)
ip = requests.get("http://httpbin.org/ip", proxies=proxies).json()["origin"]
print (f'IP is {ip}')
driver = uc.Chrome(options=options)
return driver, tor_process
def close_browser(driver, tor_process):
if tor_process:
tor_process.kill()
shutil.rmtree(data_directory)
shutil.rmtree(get_tor_data_dir())
shutil.rmtree(get_chrome_data_dir())
try:
driver.close()
except:
print("[INFO] Undetected chromedriver threw the usual exception while closing, exiting")

View File

@ -1,9 +1,7 @@
# Use undetected-chromedriver together with stem to use the tor network
import undetected_chromedriver as uc
from tbselenium.utils import launch_tbb_tor_with_stem
import time
import random
import os
from selenium.webdriver.support.ui import Select
import selenium.webdriver as webdriver
@ -108,11 +106,25 @@ def click(driver, by, desc, timeout):
def launch_and_visit(use_tor, page_url, headless=False):
driver, tor_process = browser_manager.start_browser(use_tor=use_tor, headless=headless)
visit_page(driver, page_url)
try:
visit_page(driver, page_url)
except Exception as e:
print("Unknown error, exiting. Error log:", e)
time.sleep(5)
browser_manager.close_browser(driver, tor_process)
if __name__ == "__main__":
launch_and_visit(use_tor=True, page_url='https://giangillorossi.altervista.org', headless=False)
while True:
threads = []
for i in range(0, 5):
t1 = threading.Thread(target=launch_and_visit, args=(True, 'https://giangillorossi.altervista.org', True))
t1.start()
threads.append(t1)
for t in threads:
t.join()