85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from time import sleep
|
|
|
|
# debug
|
|
# import pprint
|
|
|
|
|
|
class Emmission(object):
|
|
loading = False
|
|
|
|
def __init__(self):
|
|
self._LoadreferencePage()
|
|
|
|
def _LoadreferencePage(self):
|
|
URL = "https://www.programme-tv.net/programme/canal-5/"
|
|
response = requests.get(URL)
|
|
print("load")
|
|
self.html = BeautifulSoup(response.content, "html.parser")
|
|
self.timeexp = datetime.utcnow() + timedelta(seconds=30)
|
|
|
|
def parse_emmission(self, strsearch):
|
|
if (datetime.utcnow() > self.timeexp) and (self.loading is False):
|
|
self.loading = True
|
|
self._LoadreferencePage()
|
|
self.loading = False
|
|
else:
|
|
while self.loading:
|
|
sleep(0.1)
|
|
pass
|
|
strsearch = strsearch.replace("É", "E")
|
|
strsearch = strsearch.strip()
|
|
print(strsearch)
|
|
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
|
|
if chaineElement == None:
|
|
strsearch = strsearch.replace(" ", "")
|
|
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
|
|
if chaineElement == None:
|
|
return "can't find channel"
|
|
emissionElement = chaineElement.parent.parent.parent.find_next_sibling()
|
|
print(emissionElement)
|
|
link = emissionElement.find("a")
|
|
href = link["href"]
|
|
try:
|
|
img = emissionElement.find_next("img")["data-src"]
|
|
except KeyError:
|
|
img = emissionElement.find_next("img")["src"]
|
|
response = requests.get(href)
|
|
parse = BeautifulSoup(response.content, "html.parser")
|
|
divcasting = parse.select_one(".peopleList")
|
|
if divcasting:
|
|
casting = divcasting.find_all(href=re.compile("\/biographie.*"))
|
|
count = 0
|
|
for actor in casting:
|
|
casting[count] = actor["title"]
|
|
count += 1
|
|
else:
|
|
casting = None
|
|
divsynopsis = parse.select_one(".synopsis")
|
|
if divsynopsis:
|
|
synopsis = divsynopsis.text
|
|
else:
|
|
img = None
|
|
synopsis = ""
|
|
|
|
return {
|
|
"title": link["title"],
|
|
"href": href,
|
|
"casting": casting,
|
|
"synopsis": remove_first_space(synopsis),
|
|
"img": img,
|
|
}
|
|
|
|
|
|
def remove_first_space(string):
|
|
space_number = 0
|
|
for char in string:
|
|
if char.isspace():
|
|
space_number += 1
|
|
else:
|
|
break
|
|
return string[space_number:]
|