from bs4 import BeautifulSoup import urllib.request import re #debug #import pprint def parse_emmission(strsearch): URL="https://www.programme-tv.net/programme/canal-5/" try: response = urllib.request.urlopen(URL) except urllib.error.URLError: return False html = response.read() parse=BeautifulSoup(html,"html.parser") strsearch=strsearch.replace('É','E') linkchaine=parse.find(text=re.compile(re.escape(strsearch))) if linkchaine == None: return "can't find channel" link=linkchaine.parent.parent.find_next_sibling().find("a") href=link['href'] response = urllib.request.urlopen(href) html = response.read() parse=BeautifulSoup(html,"html.parser") divcasting=parse.select_one(".descriptif") casting=divcasting.find_all(href=re.compile("biographie")) count=0 for actor in casting: casting[count]=actor.text count+=1 divsynopsis=parse.select_one(".episode-synopsis") img=divsynopsis.find_next('img')['data-src'] synopsis=divsynopsis.select_one(".d-b").text return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} def remove_first_space (string): space_number=0 for char in string: if char.isspace(): space_number+=1 else: break return string[space_number:]