132 lines
4.6 KiB
Python
132 lines
4.6 KiB
Python
'''Website User Simulator
|
|
Simulate the behaviour of a user visiting the website
|
|
* Start from a given page
|
|
* Fetch all redirection links present on the page withing the defines website scope
|
|
* Chose a random one and redirect to that
|
|
* Repeat until out of the website (link to another website) or maximum number of times exceeded
|
|
|
|
This requires the Tor Daemon to be installed, so you want to look into installing that. Most Linux Distros have that in their repos
|
|
|
|
This can also be used to simulate YT views:
|
|
* When visiting the webpage click on the play button
|
|
* Wait at least 30 seconds
|
|
* Youtube should count this as a view
|
|
'''
|
|
|
|
from selenium.webdriver.support.ui import Select
|
|
import selenium.webdriver as webdriver
|
|
import selenium.common.exceptions as sexceptions
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.common.by import By
|
|
|
|
from time import sleep
|
|
import random
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
outside_website_scopes = ["blog.altervista.org", "it.altervista.org", "pinterest",
|
|
"facebook.com", "instagram.com", "iubenda.com", "twitter.com", "#"]
|
|
|
|
ACCEPT_COOKIES_BTN_SEL = ".iubenda-cs-accept-btn"
|
|
MAX_REDIRECTION = 5
|
|
LOOK_FOR_ADS = True
|
|
|
|
def visit(browser, url, redirs):
|
|
# go to the page
|
|
print(f"Visiting: {url}")
|
|
browser.get(url)
|
|
|
|
# Sleep a little bit to wait for all of the page to be loaded (especially ads) and to simulate a user reading. Keep in mind the profile is preset to accept cookies on this website
|
|
sleep(3)
|
|
click_noexit(browser, By.CSS_SELECTOR, ACCEPT_COOKIES_BTN_SEL, 10)
|
|
|
|
for i in range(0, MAX_REDIRECTION):
|
|
try:
|
|
sleep(4)
|
|
|
|
# Fetch all the element with links present on the page
|
|
all_redirect_elements = [x for x in browser.find_elements_by_xpath('.//a') if x.get_attribute('href') != None]
|
|
print(all_redirect_elements)
|
|
|
|
clickable_elements = [x for x in all_redirect_elements if x.is_enabled()]
|
|
|
|
if LOOK_FOR_ADS:
|
|
# Fetch all the ads present on the page. Not all iframes are ads, but that's a good way to get them
|
|
ads = browser.find_elements_by_tag_name("iframe")
|
|
else:
|
|
if not all_redirect_elements:
|
|
done(browser)
|
|
|
|
# Include wanted urls
|
|
# wanted_urls = [x for x in all_urls for a in website_scopes if x.find(a) != -1]
|
|
|
|
# Exclude unwanted urls
|
|
allowed_elements = []
|
|
for element in clickable_elements:
|
|
broke = False
|
|
for outside_scope in outside_website_scopes:
|
|
if outside_scope in element.get_attribute('href'):
|
|
broke = True
|
|
break
|
|
if not broke:
|
|
allowed_elements.append(element)
|
|
|
|
# Remove duplicates in list
|
|
allowed_elements = list(dict.fromkeys(allowed_elements))
|
|
|
|
page_index = random.randint(0, len(allowed_elements)-1)
|
|
|
|
if LOOK_FOR_ADS:
|
|
if(random.random()) < 0.15:
|
|
print("Moving toward", allowed_elements[page_index].get_attribute('href'))
|
|
allowed_elements[page_index].click()
|
|
else:
|
|
#visit ad
|
|
#get a random iframe and click it
|
|
ads[random.randint(0, len(ads)-1)].click()
|
|
|
|
sleep(5)
|
|
print("I'm out of the website, bye!")
|
|
break
|
|
done(browser)
|
|
else:
|
|
print("Moving toward", allowed_elements[page_index].get_attribute('href'))
|
|
allowed_elements[page_index].click()
|
|
except Exception as e:
|
|
print("Error, closing", e)
|
|
continue
|
|
|
|
def main():
|
|
browser = webdriver.Chrome(executable_path="/home/emamaker/Documents/Projects/GShellAutomator/chromedriver")
|
|
launch_browser(browser, sys.argv[1], False)
|
|
print("Script executed!")
|
|
|
|
def launch_browser(browser, url, yt):
|
|
if yt:
|
|
visit_yt(browser, url)
|
|
else:
|
|
visit(browser, url, 0)
|
|
|
|
|
|
def done(browser):
|
|
global tor
|
|
browser.close()
|
|
browser.quit()
|
|
|
|
def click_exit(browser, by, desc, timeout):
|
|
try:
|
|
WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((by, desc))).click()
|
|
except:
|
|
done(browser)
|
|
|
|
def click_noexit(browser, by, desc, timeout):
|
|
try:
|
|
WebDriverWait(browser, timeout).until(EC.element_to_be_clickable((by, desc))).click()
|
|
except:
|
|
done(browser)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|