2022-01-30 17:09:24 +01:00
# Use undetected-chromedriver together with stem to use the tor network
import time
import random
2022-02-05 09:43:26 +01:00
import os
2022-01-30 17:09:24 +01:00
from selenium . webdriver . support . ui import Select
import selenium . webdriver as webdriver
import selenium . common . exceptions as sexceptions
from selenium . webdriver . support . ui import WebDriverWait
from selenium . webdriver . support import expected_conditions as EC
from selenium . webdriver . common . by import By
import requests
import browser_manager
import threading
outside_website_scopes = [ " blog.altervista.org " , " en.altervista.org " , " it.altervista.org " , " pinterest " ,
" facebook.com " , " instagram.com " , " iubenda.com " , " twitter.com " , " # " ]
ACCEPT_COOKIES_BTN_SEL = " .iubenda-cs-accept-btn "
MAX_REDIRECTION = 15
LOOK_FOR_ADS = True
def visit_page ( driver , url ) :
# go to the page
print ( f " Visiting: { url } " )
driver . get ( url )
# time.sleep a little bit to wait for all of the page to be loaded (especially ads) and to simulate a user reading. Keep in mind the profile is preset to accept cookies on this website
time . sleep ( 3 )
click ( driver , By . CSS_SELECTOR , ACCEPT_COOKIES_BTN_SEL , 10 )
time . sleep ( 4 )
for i in range ( 0 , MAX_REDIRECTION ) :
time . sleep ( 6 )
# ads = get_possibile_ads(driver)
if not LOOK_FOR_ADS or ( LOOK_FOR_ADS and random . random ( ) < 0.65 ) : # or len(ads) == 0:
# click random element
try_to_click_elements ( driver , get_possibile_webelements ( driver ) )
else :
# Look for an iframe, check if there's an "a" element with an href attribute that contains the "ad" word, it will probably we a link of this type: https://adclick.g.doubleclick.net/aclk?sa=l&ai=Cd8zYPwn0Yaa-CriP7_UPn9eL0AH0mNG7Z-jMmOvlDtrZHhABIJn9jhdg_YqihNQSoAGf08b9A8gBCeACAKgDAcgDCqoEpQJP0Bhs9u34Kcc-HNkWqAWhVh3eYt8wWhsYc-x06cQ2rdO9JUopiRUFeFWuf1P0_jg4FVCG7kbS5eit_70Q9c4AcZaJG7QByYKfaWLsVDTGDUJ4t7v3hl2wGTDD-ep5teyxxtCykaypJMuO3jdhE_YA_7pQy63jkTglMnWGCN4sd-6qNNr_vTk1Uv5nqbV6cQSC8PbEd0RaM78kNNz4Z3i8yFWnyav6kfHnSn6kjR_8bCziGQu7NHf8bQoDb0mhIh6BBV-SU2MMM00Tm91QPNqVePZRY3IKNd8EuIhzGGdYA9hSTtnTQGA6Hfe4BUy8ZEAxMDxVr4s2mm-JRZnHVMmvP5_KksicKf4x9BgwLa1p0N_6-s4dtEgTrNv1Bs-bi3Eeqh1Jh8AE3NCrw9gD4AQBoAYugAfJrLkCqAeOzhuoB5PYG6gH7paxAqgH_p6xAqgH1ckbqAemvhuoB_PRG6gHltgbqAeqm7ECqAffn7EC2AcA0ggJCIjhgBAQARgdsQlalI-L2WNNd4AKA5gLAcgLAYAMAbgMAdgTDYgUAtAVAZgWAYAXAQ&ae=1&num=1&sig=AOD64_24Zd3eetdN63hdYcO7O7s5IFwTRw&client=ca-pub-4188807138211503&nb=9&adurl=https://www.avaya.com/it/prodotti/ccaas/public/lots_of_stuff_after
# if no iframes are available or none of them contains an ad, this will just skip
if try_to_click_elements ( driver , get_clickable_ads ( driver ) ) :
time . sleep ( 4 )
break
2022-02-17 15:32:20 +01:00
2022-01-30 17:09:24 +01:00
driver . quit ( )
def try_to_click_elements ( driver , clickable_elements ) :
while True :
if len ( clickable_elements ) == 0 :
print ( " All the elements weren ' t clickable! " )
return False
2022-02-17 15:32:20 +01:00
index = random . randint ( 0 , len ( clickable_elements ) - 1 )
2022-01-30 17:09:24 +01:00
try :
print ( " trying to click element " , clickable_elements [ index ] )
clickable_elements [ index ] . click ( )
return True
except :
print ( clickable_elements [ index ] , " was not clickable, deleting and trying again " )
clickable_elements . pop ( index )
def get_clickable_ads ( driver ) :
searchable_ads = [ frame for frame in driver . find_elements_by_xpath ( ' .//iframe ' ) ]
id_ok_ads = [ frame for frame in searchable_ads if ( ' ad ' in frame . get_attribute ( ' id ' ) or ' google_ad ' in frame . get_attribute ( ' id ' ) ) and frame . is_displayed ( ) ]
2022-02-17 15:32:20 +01:00
print ( f " Ads that can be clicked { id_ok_ads } " )
2022-01-30 17:09:24 +01:00
return id_ok_ads
def get_possibile_webelements ( driver ) :
# Fetch all the element with links present on the page and only save the clickable ones
clickable_elements = [ ]
for el in [ x for x in driver . find_elements_by_xpath ( ' .//a ' ) if x . get_attribute ( ' href ' ) != None ] :
clickable = False
try :
WebDriverWait ( driver , 0.25 ) . until ( EC . element_to_be_clickable ( el ) )
clickable = True
except :
clickable = False
#print(f'{el}, which brings to, {el.get_attribute("href")} is {clickable} ', end="")
if clickable :
broke = False
for outside_scope in outside_website_scopes :
if outside_scope in el . get_attribute ( ' href ' ) :
broke = True
break
if not broke :
clickable_elements . append ( el )
return clickable_elements
def click ( driver , by , desc , timeout ) :
WebDriverWait ( driver , timeout ) . until ( EC . element_to_be_clickable ( ( by , desc ) ) ) . click ( )
2022-01-31 22:26:53 +01:00
def launch_and_visit ( use_tor , page_url , headless = False ) :
driver , tor_process = browser_manager . start_browser ( use_tor = use_tor , headless = headless )
2022-02-05 09:43:26 +01:00
try :
visit_page ( driver , page_url )
except Exception as e :
print ( " Unknown error, exiting. Error log: " , e )
2022-01-30 17:09:24 +01:00
time . sleep ( 5 )
2022-01-31 22:26:20 +01:00
browser_manager . close_browser ( driver , tor_process )
2022-01-30 17:09:24 +01:00
if __name__ == " __main__ " :
2022-02-06 09:18:28 +01:00
NO_PROCESSES = 10
threads = [ ]
2022-02-05 09:43:26 +01:00
2022-02-06 09:18:28 +01:00
while True :
if len ( threads ) < NO_PROCESSES :
2022-02-17 15:32:20 +01:00
print ( f " [BOT] Starting thread n. { len ( threads ) + 1 } / { NO_PROCESSES } " )
t1 = threading . Thread ( target = launch_and_visit , args = ( True , ' https://giangillorossi.altervista.org ' , True ) )
2022-02-05 09:43:26 +01:00
t1 . start ( )
threads . append ( t1 )
2022-02-06 09:18:28 +01:00
i = 0
while i < len ( threads ) :
# print(i)
if not ( threads [ i ] . is_alive ( ) ) :
2022-02-17 15:32:20 +01:00
print ( f " [BOT] Thread n. { i + 1 } has stopped, removing from list " )
2022-02-06 09:18:28 +01:00
threads . pop ( i )
else :
i + = 1
# for i in range( 0, 5):
# t1 = threading.Thread(target=launch_and_visit, args=(True, 'https://giangillorossi.altervista.org', True))
# t1.start()
# threads.append(t1)
# for t in threads:
# t.join()