chainetv_web/backend/chainetv/emission.py

80 lines
2.6 KiB
Python
Raw Normal View History

2019-04-25 15:27:49 +00:00
from bs4 import BeautifulSoup
import urllib.request
import re
2019-05-09 17:12:41 +00:00
from datetime import datetime, timedelta
from time import sleep
2019-04-25 15:27:49 +00:00
#debug
#import pprint
2019-05-09 17:12:41 +00:00
class Emmission(object):
loading = False
2019-05-09 17:12:41 +00:00
def __init__(self):
self._LoadreferencePage()
def _LoadreferencePage(self):
URL="https://www.programme-tv.net/programme/canal-5/"
try:
response = urllib.request.urlopen(URL)
except urllib.error.URLError:
return None
print("load")
self.html = BeautifulSoup(response.read(),"html.parser")
self.timeexp=datetime.utcnow() +timedelta(seconds=30)
2019-05-09 17:12:41 +00:00
def parse_emmission(self,strsearch):
if ((datetime.utcnow() > self.timeexp) and (self.loading == False)):
self.loading = True
2019-05-09 17:12:41 +00:00
self._LoadreferencePage()
self.loading = False
else:
while(self.loading):
sleep(0.1)
pass
2019-05-09 17:12:41 +00:00
strsearch=strsearch.replace('É','E')
2021-02-16 17:54:06 +00:00
strsearch=strsearch.strip()
2021-05-13 11:30:23 +00:00
print(strsearch)
chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
if chaineElement == None:
2019-05-14 19:28:29 +00:00
strsearch=strsearch.replace(" ","")
2021-05-13 11:30:23 +00:00
chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
if chaineElement == None:
2019-05-09 17:12:41 +00:00
return "can't find channel"
2021-05-13 11:30:23 +00:00
emissionElement=chaineElement.parent.parent.parent.find_next_sibling()
print(emissionElement)
link = emissionElement.find("a")
2019-05-09 17:12:41 +00:00
href = link['href']
2021-05-13 11:30:23 +00:00
try:
img=emissionElement.find_next('img')['data-src']
except KeyError:
img=emissionElement.find_next('img')['src']
2019-05-09 17:12:41 +00:00
response = urllib.request.urlopen(href)
parse=BeautifulSoup(response.read(),"html.parser")
2021-02-16 17:54:06 +00:00
divcasting=parse.select_one(".peopleList")
2019-05-09 17:12:41 +00:00
if (divcasting):
2021-02-16 17:54:06 +00:00
casting=divcasting.find_all(href=re.compile("\/biographie.*"))
2019-05-09 17:12:41 +00:00
count=0
for actor in casting:
2021-02-16 17:54:06 +00:00
casting[count]=actor['title']
2019-05-09 17:12:41 +00:00
count+=1
else:
casting= None
2021-05-13 11:30:23 +00:00
divsynopsis=parse.select_one(".synopsis")
2019-05-09 17:12:41 +00:00
if (divsynopsis):
2021-02-16 17:54:06 +00:00
synopsis=divsynopsis.text
2019-05-09 17:12:41 +00:00
else:
img=None
synopsis=""
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
2019-04-25 15:27:49 +00:00
def remove_first_space (string):
space_number=0
for char in string:
if char.isspace():
space_number+=1
else:
break
return string[space_number:]