#!/usr/bin/env python # -*- coding: utf-8 -*- from bs4 import BeautifulSoup import urllib.request import json import re def RepresentsInt(s): try: int(s) return True except ValueError: return False except TypeError: return False def parsechaine(): URL='https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' liste_chaine={} response = urllib.request.urlopen(URL) html = response.read() parse=BeautifulSoup(html,"html.parser") for item in parse.find_all('table'): if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable'] ): for tr in item.find_all('tr'): firstTD=tr.find() num=firstTD.string if RepresentsInt(num): #print (num) if RepresentsInt(firstTD.find_next().string): #print (firstTD.find_next().find_next().string) liste_chaine[num]=firstTD.find_next().find_next().string else: #print (firstTD.find_next().string) liste_chaine[num]=firstTD.find_next().string print(json.dumps(liste_chaine, indent=4)) with open('chaine.json', 'w', encoding='utf-8') as f: json.dump(liste_chaine, f, indent=4) def load_jsonfile(file): with open(file, 'r', encoding='utf-8') as f: return json.load(f) def parse_emmission(URL): response = urllib.request.urlopen(URL) html = response.read() parse=BeautifulSoup(html,"html.parser") link=parse.select_one(".prog_name") response = urllib.request.urlopen(("https://www.programme-tv.net"+link['href'])) html = response.read() parse=BeautifulSoup(html,"html.parser") divcasting=parse.select_one(".descriptif") casting=divcasting.find_all(href=re.compile("biographie")) i=0 for actor in casting: casting[i]=actor.text i+=1 divsynopsis=parse.select_one(".episode-synopsis") img=divsynopsis.find_next('img')['data-src'] synopsis=divsynopsis.select_one(".d-b").text return {'title':link['title'],'href':("https://www.programme-tv.net"+link['href']),'casting':casting,'synopsis':synopsis,'img':img} print(parse_emmission("https://www.programme-tv.net/rechercher?q=France+3")) #parsechaine() #data=load_jsonfile('chaine.json') #print(data["0"])