From bffe88c5a502232d4e5fd9077635f6d94334aa1a Mon Sep 17 00:00:00 2001 From: scossa Date: Tue, 27 Sep 2022 18:47:25 +0200 Subject: [PATCH] Bozza scrape JustEat menu Scrape nomeProdotto descrizione descrizione2 (es 1 pezzo, 2 pezzi, 3 pezzi, tipo di falafel) prezzo --- BozzaScrapeJE.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 BozzaScrapeJE.py diff --git a/BozzaScrapeJE.py b/BozzaScrapeJE.py new file mode 100644 index 0000000..1d6dc72 --- /dev/null +++ b/BozzaScrapeJE.py @@ -0,0 +1,65 @@ +import requests +from bs4 import BeautifulSoup +import cloudscraper + +nome=[] +desc=[] +desc2=[] +prezzo=[] +n=0 + +#scrape html +scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False}) +page = scraper.get("https://www.justeat.it/restaurants-saporedialeppo/menu").content +with open('aleppo.html', 'wb') as f: + f.write(page) + +with open('aleppo.html', 'rb') as f: + page = f.read() + +soup = BeautifulSoup(page, "html.parser") +menu = soup.find(attrs={"data-test-id": "menu-item-name"}) + +#riempie la lista "nome" con i nomi dei prodotti +for menu in soup.find_all(attrs={"data-test-id": "menu-item-name"}): + nome.append(menu.text.splitlines()[2]) + nome[n]=nome[n].lstrip() + n=n+1 +n=0 + +#riempie la lista "desc" con le descrizioni dei prodotti +for menu in soup.find_all(attrs={"data-test-id": "menu-item-description"}): + if menu.text.find("pezza") > 0: #poichè c'è un piatto con lo sPEZZAtino + desc.append(menu.text.splitlines()[1]) + desc[n]=desc[n].lstrip() + desc2.append("") + n=n+1 + elif menu.text.find("pezz") > 0: + desc2.append(menu.text.splitlines()[1]) + desc2[n]=desc2[n].lstrip() + desc.append("") + n=n+1 + else: + desc.append(menu.text.splitlines()[1]) + desc[n]=desc[n].lstrip() + desc2.append("") + n=n+1 +n=0 + +#riempie la lista "prezzo" con le descrizioni dei prodotti +for menu in soup.find_all(class_="c-menuItems-price notranslate"): + prezzo.append(menu.text.splitlines()[1]) + prezzo[n]=prezzo[n].lstrip() + n=n+1 +n=0 + +for x in range(len(nome)): + print(nome[x]) + print(desc[x]) + print(desc2[x]) + print(prezzo[x],"\n") + +print(len(nome)) +print(len(desc)) +print(len(desc2)) +print(len(prezzo)) \ No newline at end of file