chainetv_web/backend/chainetv/emission.py

56 lines
1.5 KiB
Python
Raw Normal View History

2019-04-25 15:27:49 +00:00
from bs4 import BeautifulSoup
import urllib.request
import re
#debug
#import pprint
def parse_emmission(strsearch):
URL="https://www.programme-tv.net/programme/canal-5/"
try:
response = urllib.request.urlopen(URL)
except urllib.error.URLError:
return False
html = response.read()
parse=BeautifulSoup(html,"html.parser")
strsearch=strsearch.replace('É','E')
linkchaine=parse.find(text=re.compile(re.escape(strsearch)))
if linkchaine == None:
return "can't find channel"
link=linkchaine.parent.parent.find_next_sibling().find("a")
href=link['href']
response = urllib.request.urlopen(href)
html = response.read()
parse=BeautifulSoup(html,"html.parser")
divcasting=parse.select_one(".descriptif")
2019-04-29 15:24:02 +00:00
if (divcasting):
casting=divcasting.find_all(href=re.compile("biographie"))
count=0
for actor in casting:
casting[count]=actor.text
count+=1
else:
casting= None
2019-04-25 15:27:49 +00:00
divsynopsis=parse.select_one(".episode-synopsis")
2019-04-29 15:24:02 +00:00
if (divsynopsis):
img=divsynopsis.find_next('img')['data-src']
synopsis=divsynopsis.select_one(".d-b").text
else:
img=None
synopsis=""
2019-04-25 15:27:49 +00:00
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
def remove_first_space (string):
space_number=0
for char in string:
if char.isspace():
space_number+=1
else:
break
return string[space_number:]