From 1919eeb88bfee7854f6abc8eb4c777c49e9f6854 Mon Sep 17 00:00:00 2001 From: scossa Date: Thu, 1 Dec 2022 15:19:56 +0100 Subject: [PATCH] Update 'ScrapeJE.py' --- ScrapeJE.py | 102 +++++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/ScrapeJE.py b/ScrapeJE.py index e935e5e..174e025 100644 --- a/ScrapeJE.py +++ b/ScrapeJE.py @@ -1,21 +1,28 @@ import requests from bs4 import BeautifulSoup -import cloudscraper +#import cloudscraper import json nome=[] desc=[] npezzi=[] prezzo=[] +scripto=[] + #prende l url della pagina justeat del ristorante in input -#linkJE = input('link della pagina justeat del ristorante: ') #decommenta per input manuale +url = "https://www.justeat.it/restaurants-pizzeria-la-garganica-bologna/menu" +#url = input('link della pagina justeat del ristorante: ') + +#scrape html scavalcando cloudflare +# scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False}) +# page = scraper.get(url).content #usa input manuale +# page = scraper.get("https://www.justeat.it/restaurants-saporedialeppo/menu").content #usa input automatico -#scrape html -scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False}) -#page = scraper.get(linkJE).content #usa input manuale -page = scraper.get("https://www.justeat.it/restaurants-saporedialeppo/menu").content #usa input automatico #crea il file html +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} +page = requests.get(url, headers=headers).content + with open('JEmenu.html', 'wb') as f: f.write(page) @@ -25,22 +32,26 @@ with open('JEmenu.html', 'rb') as f: #parser soup = BeautifulSoup(page, "html.parser") -menu = soup.find(attrs={"data-test-id": "menu-item"}) - #Stora nome ristorante -nrist=soup.title.text[8:-32] -menu.find(attrs={"allergenPhoneNumber": "menu-item-name"}) +nrist = soup.title.text[8:-32] +#Stora telefono del ristorante +for i in soup.findAll('script'): + scripto.append(i) -## -#Stora il numero di telfono del ristorante -#info-> alla riga 870 dell html, all interno di uno