# Use undetected-chromedriver together with stem to use the tor network import time import random import os from selenium.webdriver.support.ui import Select import selenium.webdriver as webdriver import selenium.common.exceptions as sexceptions from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import requests import browser_manager import threading outside_website_scopes = ["blog.altervista.org", "en.altervista.org", "it.altervista.org", "pinterest", "facebook.com", "instagram.com", "iubenda.com", "twitter.com", "#"] ACCEPT_COOKIES_BTN_SEL = ".iubenda-cs-accept-btn" MAX_REDIRECTION = 15 LOOK_FOR_ADS = True def visit_page(driver, url): # go to the page print(f"Visiting: {url}") driver.get(url) # time.sleep a little bit to wait for all of the page to be loaded (especially ads) and to simulate a user reading. Keep in mind the profile is preset to accept cookies on this website time.sleep(3) click(driver, By.CSS_SELECTOR, ACCEPT_COOKIES_BTN_SEL, 10) time.sleep(4) for i in range(0, MAX_REDIRECTION): time.sleep(6) # ads = get_possibile_ads(driver) if not LOOK_FOR_ADS or (LOOK_FOR_ADS and random.random() < 0.65): # or len(ads) == 0: # click random element try_to_click_elements(driver, get_possibile_webelements(driver)) else: # Look for an iframe, check if there's an "a" element with an href attribute that contains the "ad" word, it will probably we a link of this type: https://adclick.g.doubleclick.net/aclk?sa=l&ai=Cd8zYPwn0Yaa-CriP7_UPn9eL0AH0mNG7Z-jMmOvlDtrZHhABIJn9jhdg_YqihNQSoAGf08b9A8gBCeACAKgDAcgDCqoEpQJP0Bhs9u34Kcc-HNkWqAWhVh3eYt8wWhsYc-x06cQ2rdO9JUopiRUFeFWuf1P0_jg4FVCG7kbS5eit_70Q9c4AcZaJG7QByYKfaWLsVDTGDUJ4t7v3hl2wGTDD-ep5teyxxtCykaypJMuO3jdhE_YA_7pQy63jkTglMnWGCN4sd-6qNNr_vTk1Uv5nqbV6cQSC8PbEd0RaM78kNNz4Z3i8yFWnyav6kfHnSn6kjR_8bCziGQu7NHf8bQoDb0mhIh6BBV-SU2MMM00Tm91QPNqVePZRY3IKNd8EuIhzGGdYA9hSTtnTQGA6Hfe4BUy8ZEAxMDxVr4s2mm-JRZnHVMmvP5_KksicKf4x9BgwLa1p0N_6-s4dtEgTrNv1Bs-bi3Eeqh1Jh8AE3NCrw9gD4AQBoAYugAfJrLkCqAeOzhuoB5PYG6gH7paxAqgH_p6xAqgH1ckbqAemvhuoB_PRG6gHltgbqAeqm7ECqAffn7EC2AcA0ggJCIjhgBAQARgdsQlalI-L2WNNd4AKA5gLAcgLAYAMAbgMAdgTDYgUAtAVAZgWAYAXAQ&ae=1&num=1&sig=AOD64_24Zd3eetdN63hdYcO7O7s5IFwTRw&client=ca-pub-4188807138211503&nb=9&adurl=https://www.avaya.com/it/prodotti/ccaas/public/lots_of_stuff_after # if no iframes are available or none of them contains an ad, this will just skip if try_to_click_elements(driver, get_clickable_ads(driver)): time.sleep(4) break driver.quit() def try_to_click_elements(driver, clickable_elements): while True: index = random.randint(0, len(clickable_elements) - 1) if len(clickable_elements) == 0: print("All the elements weren't clickable!") return False try: print("trying to click element", clickable_elements[index]) clickable_elements[index].click() return True except: print(clickable_elements[index], "was not clickable, deleting and trying again") clickable_elements.pop(index) def get_clickable_ads(driver): searchable_ads = [frame for frame in driver.find_elements_by_xpath('.//iframe') ] id_ok_ads = [frame for frame in searchable_ads if ('ad' in frame.get_attribute('id') or 'google_ad' in frame.get_attribute('id')) and frame.is_displayed() ] print(id_ok_ads) return id_ok_ads def get_possibile_webelements(driver): # Fetch all the element with links present on the page and only save the clickable ones clickable_elements = [] for el in [x for x in driver.find_elements_by_xpath('.//a') if x.get_attribute('href') != None]: clickable = False try: WebDriverWait(driver, 0.25).until(EC.element_to_be_clickable(el)) clickable = True except: clickable = False #print(f'{el}, which brings to, {el.get_attribute("href")} is {clickable} ', end="") if clickable: broke = False for outside_scope in outside_website_scopes: if outside_scope in el.get_attribute('href'): broke = True break if not broke: clickable_elements.append(el) return clickable_elements def click(driver, by, desc, timeout): WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((by, desc))).click() def launch_and_visit(use_tor, page_url, headless=False): driver, tor_process = browser_manager.start_browser(use_tor=use_tor, headless=headless) try: visit_page(driver, page_url) except Exception as e: print("Unknown error, exiting. Error log:", e) time.sleep(5) browser_manager.close_browser(driver, tor_process) if __name__ == "__main__": NO_PROCESSES = 10 threads = [] while True: if len(threads) < NO_PROCESSES: print(f"[BOT] Starting thread n.{len(threads)}/{NO_PROCESSES}") t1 = threading.Thread(target=launch_and_visit, args=(False, 'https://giangillorossi.altervista.org', False)) t1.start() threads.append(t1) i = 0 while i < len(threads): # print(i) if not (threads[i].is_alive()): print(f"[BOT] Thread n.{i} has stopped, removing from list") threads.pop(i) else: i += 1 # for i in range( 0, 5): # t1 = threading.Thread(target=launch_and_visit, args=(True, 'https://giangillorossi.altervista.org', True)) # t1.start() # threads.append(t1) # for t in threads: # t.join()