update

2023-11-25 20:38:36 +01:00
parent a6eeb26534
commit 2a3fddea3d
2 changed files with 46 additions and 70 deletions
--- a/scrapeje.py
+++ b/scrapeje.py
@@ -1,14 +1,15 @@
 import re
-#import requests
 from bs4 import BeautifulSoup
-#import cloudscraper
-#import json
 from selenium import webdriver
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.by import By
+#import requests
+#import cloudscraper
+#import json


+#Inizializzo liste 
 nome=[]
 desc=[]
 npezzi=[]
@@ -17,43 +18,23 @@ prezzof=[]
 scripto=[]


+#INPUT
 #prende l url della pagina justeat del ristorante in input
-#url = "https://www.justeat.it/restaurants-pizzeria-girasole-bologna/menu" 
-#url = input('link della pagina justeat del ristorante: ')
-#input ('inserisci il link della pagina justeat del ristorante: ')
+#restaurant_url = input('link della pagina justeat del ristorante: ')

-# print("\n")
-# url = print(Fore.WHITE + Style.DIM + "es https://www.justeat.it/NOME_RISTORANTE/menu" + Style.RESET_ALL)
-# url = input("inserisci il link della pagina justeat del ristorante: ")
-
-
-#scrape html scavalcando cloudflare
-# scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})
-# page = scraper.get(url).content #usa input manuale
-# page = scraper.get("https://www.justeat.it/restaurants-saporedialeppo/menu").content #usa input automatico
-
-#crea il file html
-# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
-# page = requests.get(url, headers=headers).content
-
-
-restaurant_url = "https://www.justeat.it/restaurants-saporedialeppo/menu"
-restaurant_url = 'https://www.justeat.it/restaurants-gelateria-ice-cream-casalecchio-di-reno-40033/menu'
+#PER DEBUG
 restaurant_url = 'https://www.justeat.it/restaurants-pizzeria-la-garganica-bologna/menu'
-#restaurant_url = 'https://www.justeat.it/restaurants-viavaipizzaekebab/menu'
+
+
+#INIZIALLIZZO PARSER
 driver = webdriver.Chrome()
 driver.get(restaurant_url)

 wait = WebDriverWait(driver, 10)
 wait.until(EC.presence_of_element_located((By.CLASS_NAME, "c-menuItems-price--offline")))

-
-
 page = driver.page_source

-
-
-
 with open('JEmenu.html', 'w') as f:
    f.write(page)

@@ -61,40 +42,28 @@ with open('JEmenu.html', 'w') as f:
 with open('JEmenu.html', 'r') as f:
    page = f.read()

-#parser
+#PARSER
 soup = BeautifulSoup(page, "html.parser")

+
 #Stora nome ristorante
 nrist = soup.title.text[8:-32]

-
-
-
-
-# #Stora telefono del ristorante
-
-# Define the regex pattern
+#Stora telefono del ristorante
 pattern = re.compile(r'allergenPhoneNumber')
-
-# Find all script tags that match the regex pattern
-script_tags = soup.find_all('script', text=pattern)
-
-# Define the regex pattern
+script_tags = soup.find_all('script', string=pattern)
 pattern = re.compile(r'"allergenPhoneNumber":"(\d+)"')
-
-# Search for the phone number in the given string
 tel = re.search(pattern, script_tags[0].next)
 if tel:
    tel = tel.group(1)

-
 #Stato ristorante
 restaurant_is_open = menu = soup.find(attrs={"data-js-test":"order-status-wrapper"}).text
 restaurant_is_open = restaurant_is_open.replace('\n', ' ')
-
 regex = r" {4,}"
 result = re.split(regex, restaurant_is_open)

+#indirizzo ristorante
 restaurant_address = soup.find(attrs={"data-js-test":"header-restaurantAddress"}).text

 #cicla le schede prodotto
@@ -110,7 +79,6 @@ for menu in soup.find_all(attrs={"data-test-id": "menu-item"}):
        else:
            continue

-
    #riempie la lista "desc" 
    att=menu.find("p", class_="c-menuItems-description")
    if att != None:
@@ -119,10 +87,10 @@ for menu in soup.find_all(attrs={"data-test-id": "menu-item"}):
    else:
        desc.append(None)

-    #riempie la lista "prezzo" 
-    for att in menu.find(attrs={"data-js-test": "menu-item-price"}):
-        prezzo.append(att.lstrip().splitlines()[0])

+    #riempie la lista "prezzo" 
+    #for att in menu.find(attrs={"data-js-test": "menu-item-price"}):
+    #    prezzo.append(att.lstrip().splitlines()[0])

    
    #riempie la lista "npezzi" 
@@ -134,23 +102,25 @@ for menu in soup.find_all(attrs={"data-test-id": "menu-item"}):
            npezzi.append(None)
    continue

+
 # #stampa liste
 for x in range(len(nome)):
     print("\n")
     print(nome[x])
     print(desc[x])
     print(npezzi[x])
-     print(prezzo[x])
+    # print(prezzo[x])
     regex = r"(?:da\s+)?([\d.]+)"

-     match = re.search(regex, prezzo[x])
-     if match:
-        numero = match.group(1)
-        print(numero)
-        prezzof.append(int(numero))
-     else:
-        prezzof.append(0)
-        print(prezzof[x])
+   #  match = re.search(regex, prezzo[x])
+   #    if match:
+   #     numero = match.group(1)
+   #     print(numero)
+   #     prezzof.append(int(numero))
+   #  else:
+   #     prezzof.append(0)
+   #     print(prezzof[x])
+

 #stampa lunghezza liste 
 print("\n")
@@ -171,19 +141,17 @@ print("telefono:",tel)
 print("lista nome:",len(nome))
 print("lista desc:",len(desc))
 print("lista npezzi:",len(npezzi))
-print("lista prezzi:",len(prezzo)) #sono stringhe ovvero ci sono anche prezzi come "da 1,00 €" (servirà formattarla in double per poter fare i conti)
-
-
+#print("lista prezzi:",len(prezzo)) #sono stringhe ovvero ci sono anche prezzi come "da 1,00 €" (servirà formattarla in double per poter fare i conti)


 # Generazione del codice HTML
 html = "<html><body>"
 for x in range(len(nome)):
    html += "<h2>" + nome[x] + "</h2>"
-    # html += "<p>" + desc[x] + "</p>"
+#    html += "<p>" + desc[x] + "</p>"
    html += "<p>N. pezzi disponibili: " + str(npezzi[x]) + "</p>"
-    html += "<p>Prezzo: €" + str(prezzo[x]) + "</p>"
-    html += "<button onclick=\"aggiungiProdotto('" + nome[x] + "', '" + str(prezzof[x]) + "')\">+1</button>"
+#    html += "<p>Prezzo: €" + str(prezzo[x]) + "</p>"
+#    html += "<button onclick=\"aggiungiProdotto('" + nome[x] + "', '" + str(prezzof[x]) + "')\">+1</button>"
    html += "<br><br>"

 html += "<br><hr><h2>Prodotti aggiunti</h2>"
@@ -197,6 +165,7 @@ html += "}"
 html += "</script>"
 html += "</body></html>"

+
 # Salvataggio su file
 with open("pagina.html", "w") as file:
    file.write(html)
--- a/setup.sh
+++ b/setup.sh
@@ -1,8 +1,15 @@
-#!/bin/sh
+#!/bin/bash
+python -m venv .venv
+source .venv/bin/activate

-#pip3 install jawanndenn
-pip3 install requests
-pip3 install BeautifulSoup4
+#pip3 install requests
+#pip3 install BeautifulSoup4
+#pip3 install colorama
+#pip3 install selenium
+
+#NON PIU UTILIZZATI
 #pip3 install cloudscraper
 #pip3 install json
-pip3 install colorama
+
+#echo "USA LO SCRIPT ALL'INTERNO DEL VENV"
+#echo "PER USCIRE SCRIVI 'deactivated'"