chainetv_web/backend/chainetv/emission.py

85 lines
2.6 KiB
Python
Raw Normal View History

2019-04-25 15:27:49 +00:00
from bs4 import BeautifulSoup
2022-01-30 20:46:21 +00:00
import requests
2019-04-25 15:27:49 +00:00
import re
2019-05-09 17:12:41 +00:00
from datetime import datetime, timedelta
from time import sleep
2019-04-25 15:27:49 +00:00
2022-01-30 20:46:21 +00:00
# debug
# import pprint
2019-04-25 15:27:49 +00:00
2019-05-09 17:12:41 +00:00
class Emmission(object):
loading = False
2022-01-30 20:46:21 +00:00
2019-05-09 17:12:41 +00:00
def __init__(self):
self._LoadreferencePage()
def _LoadreferencePage(self):
2022-01-30 20:46:21 +00:00
URL = "https://www.programme-tv.net/programme/canal-5/"
response = requests.get(URL)
2019-05-09 17:12:41 +00:00
print("load")
2022-01-30 20:46:21 +00:00
self.html = BeautifulSoup(response.content, "html.parser")
self.timeexp = datetime.utcnow() + timedelta(seconds=30)
2019-05-09 17:12:41 +00:00
2022-01-30 20:46:21 +00:00
def parse_emmission(self, strsearch):
if (datetime.utcnow() > self.timeexp) and (self.loading is False):
self.loading = True
2019-05-09 17:12:41 +00:00
self._LoadreferencePage()
self.loading = False
else:
2022-01-30 20:46:21 +00:00
while self.loading:
sleep(0.1)
pass
2022-01-30 20:46:21 +00:00
strsearch = strsearch.replace("É", "E")
strsearch = strsearch.strip()
2021-05-13 11:30:23 +00:00
print(strsearch)
2022-01-30 20:46:21 +00:00
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
2021-05-13 11:30:23 +00:00
if chaineElement == None:
2022-01-30 20:46:21 +00:00
strsearch = strsearch.replace(" ", "")
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
2021-05-13 11:30:23 +00:00
if chaineElement == None:
2019-05-09 17:12:41 +00:00
return "can't find channel"
2022-01-30 20:46:21 +00:00
emissionElement = chaineElement.parent.parent.parent.find_next_sibling()
2021-05-13 11:30:23 +00:00
print(emissionElement)
link = emissionElement.find("a")
2022-01-30 20:46:21 +00:00
href = link["href"]
2021-05-13 11:30:23 +00:00
try:
2022-01-30 20:46:21 +00:00
img = emissionElement.find_next("img")["data-src"]
2021-05-13 11:30:23 +00:00
except KeyError:
2022-01-30 20:46:21 +00:00
img = emissionElement.find_next("img")["src"]
response = requests.get(href)
parse = BeautifulSoup(response.content, "html.parser")
divcasting = parse.select_one(".peopleList")
if divcasting:
casting = divcasting.find_all(href=re.compile("\/biographie.*"))
count = 0
2019-05-09 17:12:41 +00:00
for actor in casting:
2022-01-30 20:46:21 +00:00
casting[count] = actor["title"]
count += 1
2019-05-09 17:12:41 +00:00
else:
2022-01-30 20:46:21 +00:00
casting = None
divsynopsis = parse.select_one(".synopsis")
if divsynopsis:
synopsis = divsynopsis.text
2019-05-09 17:12:41 +00:00
else:
2022-01-30 20:46:21 +00:00
img = None
synopsis = ""
2019-05-09 17:12:41 +00:00
2022-01-30 20:46:21 +00:00
return {
"title": link["title"],
"href": href,
"casting": casting,
"synopsis": remove_first_space(synopsis),
"img": img,
}
2019-04-25 15:27:49 +00:00
2022-01-30 20:46:21 +00:00
def remove_first_space(string):
space_number = 0
2019-04-25 15:27:49 +00:00
for char in string:
if char.isspace():
2022-01-30 20:46:21 +00:00
space_number += 1
2019-04-25 15:27:49 +00:00
else:
break
return string[space_number:]