From 4adba7d0174a0faba38a8d2c0763b21e7249176f Mon Sep 17 00:00:00 2001 From: scossa Date: Mon, 27 Nov 2023 20:18:40 +0100 Subject: [PATCH] update --- scrapeje.py | 81 +++++++++++++++++++++++++++++------------------------ setup.sh | 12 +++++--- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/scrapeje.py b/scrapeje.py index 39e2427..fdf2c53 100644 --- a/scrapeje.py +++ b/scrapeje.py @@ -11,6 +11,20 @@ import shutil restaurant_url='' +driver='' +page = '' +nrist='' +restaurant_address='' +tel='' +result='' +soup = '' +nome=[] +desc=[] +npezzi=[] +prezzo=[] +prezzoN=[] + + def inputurl(): global restaurant_url #INPUT @@ -21,15 +35,30 @@ def inputurl(): #restaurant_url = 'https://www.justeat.it/restaurants-pizzeriadelrondone-bologna/menu' -nome=[] -desc=[] -npezzi=[] -prezzo=[] -prezzoN=[] -nrist='' -restaurant_address='' -tel='' -result='' +def scraper(): + global driver + global page + global restaurant_url + driver = webdriver.Chrome() + driver.get(restaurant_url) + + wait = WebDriverWait(driver, 16) + wait.until(EC.presence_of_element_located((By.CLASS_NAME, "c-menuItems-price"))) + + page = driver.page_source + + with open('JEmenu.html', 'w') as f: + f.write(page) + + +def parser(): + global soup + with open('JEmenu.html', 'r') as f: + page = f.read() + + soup = BeautifulSoup(page, "html.parser") + + def stora_tutto(): global nome global desc @@ -105,31 +134,6 @@ def stora_tutto(): continue -driver='' -page = '' -def scraper(): - global driver - global page - global restaurant_url - driver = webdriver.Chrome() - driver.get(restaurant_url) - - wait = WebDriverWait(driver, 16) - wait.until(EC.presence_of_element_located((By.CLASS_NAME, "c-menuItems-price"))) - - page = driver.page_source - - with open('JEmenu.html', 'w') as f: - f.write(page) - -soup = '' -def parser(): - global soup - with open('JEmenu.html', 'r') as f: - page = f.read() - - soup = BeautifulSoup(page, "html.parser") - def parserdarubrica(): global parser global stora_tutto @@ -146,6 +150,7 @@ def parserdarubrica(): print ("numero inesistente") continue + def stampa_liste(): print("\n") for x in range(len(nome)): @@ -155,6 +160,7 @@ def stampa_liste(): print(npezzi[x]) print(prezzo[x]) + def stampa_info(): print("-" * (len(desc)) + "\n") print(nrist) @@ -173,6 +179,7 @@ def stampa_info(): doppione = (result[i+1]) print("\n" + "-" * (len(desc))+ "\n") + def genera_prezzoN(): #Genera la lista prezzoN[] che è un clone di "prezzo[] ma con i valori float anzichè string" prezzoN = prezzo.copy() @@ -186,7 +193,6 @@ def genera_prezzoN(): prezzoN[i] = 99999 - def salvainrubrica(): maxn=0 def trova_nuovo_numero(): @@ -247,11 +253,13 @@ def salvainrubrica(): rubrica.write(nristmax + '\n') rubrica.write(nrist + '\n') rubrica.write(restaurant_url + '\n') + print ("\nIl ristorante",nrist +"+è stato salvato in rubrica") else: #PULISCE os.remove("JEmenu.html") os.remove("rubrica.txt") + ############################################################################################# ############################################################################################# ############################################################################################# @@ -287,7 +295,6 @@ genera_prezzoN() salvainrubrica() - # #PER DEBUG # for x in range(len(nome)): # print(prezzoN[x]) @@ -307,6 +314,7 @@ salvainrubrica() # print("\n" + "-" * 25 + "\n") +#DA RIFARE # # Genera codice HTML # html = "" # for x in range(len(nome)): @@ -336,4 +344,3 @@ salvainrubrica() - diff --git a/setup.sh b/setup.sh index 5d37b36..2b85ca8 100755 --- a/setup.sh +++ b/setup.sh @@ -7,12 +7,16 @@ echo pip3 install requests pip3 install BeautifulSoup4 -pip3 install colorama pip3 install selenium -#NON PIU UTILIZZATI -#pip3 install cloudscraper -#pip3 install json +#SCRAPEJE UTILIZZA I MODULI PREINSTALLATI: +# import re +# import os +# import shutil +#NON PIU UTILIZZATI: +# pip3 install colorama +# pip3 install cloudscraper +# pip3 install json echo "Installo chromium (se non già installato) necessario per lo scrape" echo