'''Website User Simulator Simulate the behaviour of a user visiting the website * Start from a given page * Fetch all redirection links present on the page withing the defines website scope * Chose a random one and redirect to that * Repeat until out of the website (link to another website) or maximum number of times exceeded This requires the Tor Daemon to be installed, so you want to look into installing that. Most Linux Distros have that in their repos This can also be used to simulate YT views: * When visiting the webpage click on the play button * Wait at least 30 seconds * Youtube should count this as a view ''' from selenium.webdriver.support.ui import Select import selenium.webdriver as webdriver import selenium.common.exceptions as sexceptions from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from time import sleep import random import subprocess import sys outside_website_scopes = ["blog.altervista.org", "it.altervista.org", "pinterest", "facebook.com", "instagram.com", "iubenda.com", "twitter.com", "#"] ACCEPT_COOKIES_BTN_SEL = ".iubenda-cs-accept-btn" MAX_REDIRECTION = 5 LOOK_FOR_ADS = True def visit(browser, url, redirs): # go to the page print(f"Visiting: {url}") browser.get(url) # Sleep a little bit to wait for all of the page to be loaded (especially ads) and to simulate a user reading. Keep in mind the profile is preset to accept cookies on this website sleep(3) click_noexit(browser, By.CSS_SELECTOR, ACCEPT_COOKIES_BTN_SEL, 10) for i in range(0, MAX_REDIRECTION): try: sleep(4) # Fetch all the element with links present on the page all_redirect_elements = [x for x in browser.find_elements_by_xpath('.//a') if x.get_attribute('href') != None] print(all_redirect_elements) clickable_elements = [x for x in all_redirect_elements if x.is_enabled()] if LOOK_FOR_ADS: # Fetch all the ads present on the page. Not all iframes are ads, but that's a good way to get them ads = browser.find_elements_by_tag_name("iframe") else: if not all_redirect_elements: done(browser) # Include wanted urls # wanted_urls = [x for x in all_urls for a in website_scopes if x.find(a) != -1] # Exclude unwanted urls allowed_elements = [] for element in clickable_elements: broke = False for outside_scope in outside_website_scopes: if outside_scope in element.get_attribute('href'): broke = True break if not broke: allowed_elements.append(element) # Remove duplicates in list allowed_elements = list(dict.fromkeys(allowed_elements)) page_index = random.randint(0, len(allowed_elements)-1) if LOOK_FOR_ADS: if(random.random()) < 0.15: print("Moving toward", allowed_elements[page_index].get_attribute('href')) allowed_elements[page_index].click() else: #visit ad #get a random iframe and click it ads[random.randint(0, len(ads)-1)].click() sleep(5) print("I'm out of the website, bye!") break done(browser) else: print("Moving toward", allowed_elements[page_index].get_attribute('href')) allowed_elements[page_index].click() except Exception as e: print("Error, closing", e) continue def main(): browser = webdriver.Chrome(executable_path="/home/emamaker/Documents/Projects/GShellAutomator/chromedriver") launch_browser(browser, sys.argv[1], False) print("Script executed!") def launch_browser(browser, url, yt): if yt: visit_yt(browser, url) else: visit(browser, url, 0) def done(browser): global tor browser.close() browser.quit() def click_exit(browser, by, desc, timeout): try: WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((by, desc))).click() except: done(browser) def click_noexit(browser, by, desc, timeout): try: WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((by, desc))).click() except: done(browser) if __name__ == '__main__': main()