diff --git a/chaineTV.py b/chaineTV.py index 49090c3..da0ddd1 100644 --- a/chaineTV.py +++ b/chaineTV.py @@ -5,6 +5,8 @@ import os import unicodedata from bs4 import BeautifulSoup import webbrowser +import urllib.request + class Labbelink (Label): @@ -18,7 +20,6 @@ class Labbelink (Label): class Interface: - def __init__(self): self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/chaine.json" self.data = load_jsonfile(self.datafilepath) @@ -110,7 +111,7 @@ def RepresentsInt(s): return False def parsechaine(file): - import urllib.request + URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' liste_chaine = {} response = urllib.request.urlopen(URL) @@ -142,6 +143,29 @@ def geturlprogrammetv(strsearch): return "https://www.programme-tv.net/rechercher?q="+strsearch +def parse_emmission(URL): + try: + response = urllib.request.urlopen(URL) + except urllib.error.URLError: + return False + html = response.read() + parse=BeautifulSoup(html,"html.parser") + link=parse.select_one(".prog_name") + response = urllib.request.urlopen(("https://www.programme-tv.net"+link['href'])) + html = response.read() + parse=BeautifulSoup(html,"html.parser") + divcasting=parse.select_one(".descriptif") + casting=divcasting.find_all(href=re.compile("biographie")) + i=0 + for actor in casting: + casting[i]=actor.text + i+=1 + divsynopsis=parse.select_one(".episode-synopsis") + img=divsynopsis.find_next('img')['data-src'] + synopsis=divsynopsis.select_one(".d-b").text + + return {'title':link['title'],'href':("https://www.programme-tv.net"+link['href']),'casting':casting,'synopsis':synopsis,'img':img} + def cli(num): datafilepath=os.path.dirname(os.path.realpath(__file__))+"/chaine.json" @@ -152,8 +176,17 @@ def cli(num): except KeyError: print("numero de chaine inconnue") return - print (geturlprogrammetv(data[num])) + emision=parse_emmission(geturlprogrammetv(data[num])) + if emision: + print("emmision ce soir: "+emision["title"]) + if len(emision['casting']) > 0: + print("réalisateur: "+emision['casting'][0]) + print("acteur: "+str(emision['casting'][1:])) + print("synopsys: " +emision['synopsis']) + print("lien: "+emision['href']) + else: + print("pas de connection internet impossible de determiner l'émission du soir") if len(sys.argv) > 1: for i in sys.argv[1:]: diff --git a/parserHTML.py b/parserHTML.py index 986f43c..c867032 100644 --- a/parserHTML.py +++ b/parserHTML.py @@ -4,6 +4,7 @@ from bs4 import BeautifulSoup import urllib.request import json +import re def RepresentsInt(s): try: @@ -47,7 +48,20 @@ def parse_emmission(URL): html = response.read() parse=BeautifulSoup(html,"html.parser") link=parse.select_one(".prog_name") - return {'title':link['title'],'href':("https://www.programme-tv.net"+link['href'])} + response = urllib.request.urlopen(("https://www.programme-tv.net"+link['href'])) + html = response.read() + parse=BeautifulSoup(html,"html.parser") + divcasting=parse.select_one(".descriptif") + casting=divcasting.find_all(href=re.compile("biographie")) + i=0 + for actor in casting: + casting[i]=actor.text + i+=1 + divsynopsis=parse.select_one(".episode-synopsis") + img=divsynopsis.find_next('img')['data-src'] + synopsis=divsynopsis.select_one(".d-b").text + + return {'title':link['title'],'href':("https://www.programme-tv.net"+link['href']),'casting':casting,'synopsis':synopsis,'img':img} print(parse_emmission("https://www.programme-tv.net/rechercher?q=France+3"))