switch to requests API for emission

This commit is contained in:
vincent 2022-01-30 21:46:21 +01:00
parent db7890ff4b
commit d62b16df39
2 changed files with 57 additions and 48 deletions

View File

@ -1,5 +1,5 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import urllib.request import requests
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import sleep from time import sleep
@ -7,31 +7,30 @@ from time import sleep
# debug # debug
# import pprint # import pprint
class Emmission(object): class Emmission(object):
loading = False loading = False
def __init__(self): def __init__(self):
self._LoadreferencePage() self._LoadreferencePage()
def _LoadreferencePage(self): def _LoadreferencePage(self):
URL = "https://www.programme-tv.net/programme/canal-5/" URL = "https://www.programme-tv.net/programme/canal-5/"
try: response = requests.get(URL)
response = urllib.request.urlopen(URL)
except urllib.error.URLError:
return None
print("load") print("load")
self.html = BeautifulSoup(response.read(),"html.parser") self.html = BeautifulSoup(response.content, "html.parser")
self.timeexp = datetime.utcnow() + timedelta(seconds=30) self.timeexp = datetime.utcnow() + timedelta(seconds=30)
def parse_emmission(self, strsearch): def parse_emmission(self, strsearch):
if ((datetime.utcnow() > self.timeexp) and (self.loading == False)): if (datetime.utcnow() > self.timeexp) and (self.loading is False):
self.loading = True self.loading = True
self._LoadreferencePage() self._LoadreferencePage()
self.loading = False self.loading = False
else: else:
while(self.loading): while self.loading:
sleep(0.1) sleep(0.1)
pass pass
strsearch=strsearch.replace('É','E') strsearch = strsearch.replace("É", "E")
strsearch = strsearch.strip() strsearch = strsearch.strip()
print(strsearch) print(strsearch)
chaineElement = self.html.find(string=re.compile(re.escape(strsearch))) chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
@ -43,30 +42,36 @@ class Emmission(object):
emissionElement = chaineElement.parent.parent.parent.find_next_sibling() emissionElement = chaineElement.parent.parent.parent.find_next_sibling()
print(emissionElement) print(emissionElement)
link = emissionElement.find("a") link = emissionElement.find("a")
href = link['href'] href = link["href"]
try: try:
img=emissionElement.find_next('img')['data-src'] img = emissionElement.find_next("img")["data-src"]
except KeyError: except KeyError:
img=emissionElement.find_next('img')['src'] img = emissionElement.find_next("img")["src"]
response = urllib.request.urlopen(href) response = requests.get(href)
parse=BeautifulSoup(response.read(),"html.parser") parse = BeautifulSoup(response.content, "html.parser")
divcasting = parse.select_one(".peopleList") divcasting = parse.select_one(".peopleList")
if (divcasting): if divcasting:
casting = divcasting.find_all(href=re.compile("\/biographie.*")) casting = divcasting.find_all(href=re.compile("\/biographie.*"))
count = 0 count = 0
for actor in casting: for actor in casting:
casting[count]=actor['title'] casting[count] = actor["title"]
count += 1 count += 1
else: else:
casting = None casting = None
divsynopsis = parse.select_one(".synopsis") divsynopsis = parse.select_one(".synopsis")
if (divsynopsis): if divsynopsis:
synopsis = divsynopsis.text synopsis = divsynopsis.text
else: else:
img = None img = None
synopsis = "" synopsis = ""
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} return {
"title": link["title"],
"href": href,
"casting": casting,
"synopsis": remove_first_space(synopsis),
"img": img,
}
def remove_first_space(string): def remove_first_space(string):

View File

@ -1,21 +1,25 @@
astroid==2.2.5 astroid==2.2.5
beautifulsoup4==4.9.3 beautifulsoup4==4.9.3
bs4==0.0.1 bs4==0.0.1
Click==7.0 certifi==2021.10.8
Flask==1.0.2 charset-normalizer==2.0.11
Flask-Cors==3.0.7 click==8.0.3
Flask==2.0.2
Flask-Cors==3.0.10
gunicorn==20.0.4 gunicorn==20.0.4
idna==3.3
isort==4.3.17 isort==4.3.17
itsdangerous==1.1.0 itsdangerous==2.0.1
Jinja2==2.10.1 Jinja2==3.0.3
lazy-object-proxy==1.3.1 lazy-object-proxy==1.3.1
MarkupSafe==1.1.1 MarkupSafe==2.0.1
mccabe==0.6.1 mccabe==0.6.1
PyJWT==1.7.1 PyJWT==1.7.1
pylint==2.3.1 pylint==2.3.1
requests==2.27.1
six==1.12.0 six==1.12.0
soupsieve==1.9.1 soupsieve==1.9.1
typed-ast==1.5.2 typed-ast==1.5.2
Werkzeug==0.16.0 urllib3==1.26.8
Werkzeug==2.0.2
wrapt==1.11.1 wrapt==1.11.1