from bs4 import BeautifulSoup import urllib.request import re from datetime import datetime, timedelta from time import sleep #debug #import pprint class Emmission(object): loading = False def __init__(self): self._LoadreferencePage() def _LoadreferencePage(self): URL="https://www.programme-tv.net/programme/canal-5/" try: response = urllib.request.urlopen(URL) except urllib.error.URLError: return None print("load") self.html = BeautifulSoup(response.read(),"html.parser") self.timeexp=datetime.utcnow() +timedelta(seconds=30) def parse_emmission(self,strsearch): if ((datetime.utcnow() > self.timeexp) and (self.loading == False)): self.loading = True self._LoadreferencePage() self.loading = False else: while(self.loading): sleep(0.1) pass strsearch=strsearch.replace('É','E') linkchaine=self.html.find(text=re.compile(re.escape(strsearch))) if linkchaine == None: strsearch=strsearch.replace(" ","") linkchaine=self.html.find(text=re.compile(re.escape(strsearch))) if linkchaine == None: return "can't find channel" link = linkchaine.parent.parent.find_next_sibling().find_next_sibling().find("a") href = link['href'] response = urllib.request.urlopen(href) parse=BeautifulSoup(response.read(),"html.parser") divcasting=parse.select_one(".descriptif") if (divcasting): casting=divcasting.find_all(href=re.compile("biographie")) count=0 for actor in casting: casting[count]=actor.text count+=1 else: casting= None divsynopsis=parse.select_one(".episode-synopsis") if (divsynopsis): img=divsynopsis.find_next('img')['data-src'] synopsis=divsynopsis.select_one(".d-b").text else: img=None synopsis="" return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} def remove_first_space (string): space_number=0 for char in string: if char.isspace(): space_number+=1 else: break return string[space_number:]