This commit is contained in:
2023-11-26 21:28:34 +01:00
parent 277f12e4ea
commit 7551806620

View File

@@ -1,14 +1,12 @@
import re
import os
from bs4 import BeautifulSoup
#IMPORTO MODULI
#selenium: scraper | bs4: parser | re: regex | os: interazione con os
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#import requests
#import cloudscraper
#import json
from bs4 import BeautifulSoup
import re
import os
#Inizializzo liste
nome=[]
@@ -16,29 +14,28 @@ desc=[]
npezzi=[]
prezzo=[]
prezzoN=[]
scripto=[]
#INPUT
#prende l url della pagina justeat del ristorante in input
print ("\nesempio: https://www.justeat.it/restaurants-pizzeria-la-garganica-bologna/menu")
restaurant_url = input('INSERISCI IL LINK DELLA PAGINA DEL RISTORANTE: ')
# print ("\nesempio: https://www.justeat.it/restaurants-pizzeria-la-garganica-bologna/menu")
# restaurant_url = input('INSERISCI IL LINK DELLA PAGINA DEL RISTORANTE: ')
#PER DEBUG
#restaurant_url = 'https://www.justeat.it/restaurants-pizzeria-del-mercato-bologna/menu'
# restaurant_url = 'https://www.justeat.it/restaurants-pizzeria-del-mercato-bologna/menu'
#SCRAPE
driver = webdriver.Chrome()
driver.get(restaurant_url)
# #SCRAPE
# driver = webdriver.Chrome()
# driver.get(restaurant_url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "c-menuItems-price--offline")))
# wait = WebDriverWait(driver, 10)
# wait.until(EC.presence_of_element_located((By.CLASS_NAME, "c-menuItems-price--offline")))
page = driver.page_source
# page = driver.page_source
with open('JEmenu.html', 'w') as f:
f.write(page)
# with open('JEmenu.html', 'w') as f:
# f.write(page)
#PARSER
@@ -195,6 +192,7 @@ for i in range(len(prezzo)):
# with open("pagina.html", "w") as file:
# file.write(html)
#Pulisce
#ToDo:PROPORRE DI SALVARE IN RUBRICA
os.remove("JEmenu.html")
#os.remove("JEmenu.html")