64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
from bs4 import BeautifulSoup
|
|
import urllib.request
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
|
|
#debug
|
|
#import pprint
|
|
|
|
class Emmission(object):
|
|
|
|
def __init__(self):
|
|
self._LoadreferencePage()
|
|
|
|
def _LoadreferencePage(self):
|
|
URL="https://www.programme-tv.net/programme/canal-5/"
|
|
try:
|
|
response = urllib.request.urlopen(URL)
|
|
except urllib.error.URLError:
|
|
return None
|
|
print("load")
|
|
self.html = BeautifulSoup(response.read(),"html.parser")
|
|
self.timeexp=datetime.utcnow() +timedelta(minutes=5)
|
|
|
|
def parse_emmission(self,strsearch):
|
|
if (datetime.utcnow() > self.timeexp):
|
|
self._LoadreferencePage()
|
|
strsearch=strsearch.replace('É','E')
|
|
linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))
|
|
if linkchaine == None:
|
|
return "can't find channel"
|
|
link = linkchaine.parent.parent.find_next_sibling().find("a")
|
|
href = link['href']
|
|
response = urllib.request.urlopen(href)
|
|
parse=BeautifulSoup(response.read(),"html.parser")
|
|
divcasting=parse.select_one(".descriptif")
|
|
if (divcasting):
|
|
casting=divcasting.find_all(href=re.compile("biographie"))
|
|
count=0
|
|
for actor in casting:
|
|
casting[count]=actor.text
|
|
count+=1
|
|
else:
|
|
casting= None
|
|
divsynopsis=parse.select_one(".episode-synopsis")
|
|
if (divsynopsis):
|
|
img=divsynopsis.find_next('img')['data-src']
|
|
synopsis=divsynopsis.select_one(".d-b").text
|
|
else:
|
|
img=None
|
|
synopsis=""
|
|
|
|
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
|
|
|
|
|
|
def remove_first_space (string):
|
|
space_number=0
|
|
for char in string:
|
|
if char.isspace():
|
|
space_number+=1
|
|
else:
|
|
break
|
|
return string[space_number:]
|
|
|