2022-09-27 18:47:25 +02:00
import requests
from bs4 import BeautifulSoup
import cloudscraper
nome = [ ]
desc = [ ]
2022-09-29 19:07:26 +02:00
npezzi = [ ]
2022-09-27 18:47:25 +02:00
prezzo = [ ]
2022-10-01 03:28:54 +02:00
#prende l url della pagina justeat del ristorante in input
linkJE = input ( ' link della pagina justeat del ristorante: ' )
2022-09-27 18:47:25 +02:00
#scrape html
2022-10-01 03:09:17 +02:00
scraper = cloudscraper . create_scraper ( browser = { ' browser ' : ' firefox ' , ' platform ' : ' windows ' , ' mobile ' : False } )
page = scraper . get ( linkJE ) . content
2022-10-01 03:28:54 +02:00
#crea il file html
2022-10-01 03:09:17 +02:00
with open ( ' aleppo.html ' , ' wb ' ) as f :
f . write ( page )
2022-09-27 18:47:25 +02:00
2022-10-01 03:28:54 +02:00
#apre e legge il file
2022-09-27 18:47:25 +02:00
with open ( ' aleppo.html ' , ' rb ' ) as f :
page = f . read ( )
2022-10-01 03:28:54 +02:00
#parser
2022-09-27 18:47:25 +02:00
soup = BeautifulSoup ( page , " html.parser " )
2022-09-29 19:07:26 +02:00
menu = soup . find ( attrs = { " data-test-id " : " menu-item " } )
2022-09-27 18:47:25 +02:00
2022-10-01 00:22:45 +02:00
#Stora nome ristorante
2022-10-01 03:09:17 +02:00
nrist = soup . title . text [ 8 : - 32 ]
2022-10-01 00:22:45 +02:00
menu . find ( attrs = { " allergenPhoneNumber " : " menu-item-name " } )
2022-09-30 23:27:10 +02:00
2022-09-27 18:47:25 +02:00
2022-10-01 03:28:54 +02:00
##
#Stora il numero di telfono del ristorante
2022-10-01 03:30:38 +02:00
#info-> alla riga 870 dell html, all interno di uno <script> c'è il numero in forma-> "allergenPhoneNumber":"3389529446" (es riferito a quando si scrapa aleppo)
2022-10-01 03:28:54 +02:00
#tel=
##
#cicla le schede prodotto
2022-09-29 19:07:26 +02:00
for menu in soup . find_all ( attrs = { " data-test-id " : " menu-item " } ) :
att = menu
#riempie la lista "nome"
for att in menu . find ( attrs = { " data-test-id " : " menu-item-name " } ) :
if att != " " :
nome . append ( att . lstrip ( ) . splitlines ( ) [ 0 ] )
#riempie la lista "desc"
att = menu . find ( " p " , class_ = " c-menuItems-description " )
if att != None :
for att in menu . find ( " p " , class_ = " c-menuItems-description " ) :
desc . append ( att . lstrip ( ) . splitlines ( ) [ 0 ] )
2022-09-27 18:47:25 +02:00
else :
2022-09-29 19:07:26 +02:00
desc . append ( None )
2022-09-27 18:47:25 +02:00
2022-09-29 19:07:26 +02:00
#riempie la lista "prezzo"
for att in menu . find ( " p " , class_ = " c-menuItems-price notranslate " ) :
prezzo . append ( att . lstrip ( ) . splitlines ( ) [ 0 ] )
2022-09-27 18:47:25 +02:00
2022-10-01 03:28:54 +02:00
#riempie la lista "npezzi"
2022-09-30 23:27:10 +02:00
att = menu . find_all ( attrs = { " data-test-id " : " menu-item-description " } )
if att != None :
if menu . text . find ( " pezzo " ) > 0 or menu . text . find ( " pezzi " ) > 0 :
npezzi . append ( menu . text . splitlines ( ) [ 7 ] . lstrip ( ) )
else :
npezzi . append ( None )
continue
2022-09-29 19:07:26 +02:00
#stampa liste
2022-09-27 18:47:25 +02:00
for x in range ( len ( nome ) ) :
2022-09-30 23:27:10 +02:00
print ( " \n " )
print ( nome [ x ] )
print ( desc [ x ] )
print ( npezzi [ x ] )
print ( prezzo [ x ] )
2022-09-29 19:07:26 +02:00
2022-10-01 03:28:54 +02:00
#stampa lunghezza liste e nome del risrorante # e numero di telefono
2022-10-01 00:22:45 +02:00
print ( " \n " )
print ( nrist )
2022-10-01 03:28:54 +02:00
#print(tel)
2022-10-01 00:22:45 +02:00
print ( " lista nome " , len ( nome ) , )
2022-09-29 19:07:26 +02:00
print ( " lista desc: " , len ( desc ) )
print ( " lista npezzi: " , len ( npezzi ) )
2022-10-01 03:28:54 +02:00
print ( " lista prezzi: " , len ( prezzo ) ) #da trattare come stringa perchè ci sono anche prezzi come es. "da 1,00 €" (da formattare formattare int per fare i conti)
2022-10-01 03:09:17 +02:00
2022-10-01 03:28:54 +02:00
#
2022-10-01 03:09:17 +02:00
#Creare un file json formattato per jawanndenn
#automatizzare l inserimento in jawandenn
#jawanndenn include la funzione per argomento
2022-10-01 03:28:54 +02:00
#"jawanndenn --loaddata FILE.json" --> Load a JSON export of the database from FILE.json, then quit.
#