From d62b16df39dd2c5828f5459c46b14f60a257cd10 Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 30 Jan 2022 21:46:21 +0100 Subject: [PATCH] switch to requests API for emission --- backend/chainetv/emission.py | 85 +++++++++++++++++++----------------- backend/requirements.txt | 20 +++++---- 2 files changed, 57 insertions(+), 48 deletions(-) diff --git a/backend/chainetv/emission.py b/backend/chainetv/emission.py index d79a56e..3ed35ee 100644 --- a/backend/chainetv/emission.py +++ b/backend/chainetv/emission.py @@ -1,79 +1,84 @@ from bs4 import BeautifulSoup -import urllib.request +import requests import re from datetime import datetime, timedelta from time import sleep -#debug -#import pprint +# debug +# import pprint + class Emmission(object): loading = False + def __init__(self): self._LoadreferencePage() def _LoadreferencePage(self): - URL="https://www.programme-tv.net/programme/canal-5/" - try: - response = urllib.request.urlopen(URL) - except urllib.error.URLError: - return None + URL = "https://www.programme-tv.net/programme/canal-5/" + response = requests.get(URL) print("load") - self.html = BeautifulSoup(response.read(),"html.parser") - self.timeexp=datetime.utcnow() +timedelta(seconds=30) + self.html = BeautifulSoup(response.content, "html.parser") + self.timeexp = datetime.utcnow() + timedelta(seconds=30) - def parse_emmission(self,strsearch): - if ((datetime.utcnow() > self.timeexp) and (self.loading == False)): + def parse_emmission(self, strsearch): + if (datetime.utcnow() > self.timeexp) and (self.loading is False): self.loading = True self._LoadreferencePage() self.loading = False else: - while(self.loading): + while self.loading: sleep(0.1) pass - strsearch=strsearch.replace('É','E') - strsearch=strsearch.strip() + strsearch = strsearch.replace("É", "E") + strsearch = strsearch.strip() print(strsearch) - chaineElement=self.html.find(string=re.compile(re.escape(strsearch))) + chaineElement = self.html.find(string=re.compile(re.escape(strsearch))) if chaineElement == None: - strsearch=strsearch.replace(" ","") - chaineElement=self.html.find(string=re.compile(re.escape(strsearch))) + strsearch = strsearch.replace(" ", "") + chaineElement = self.html.find(string=re.compile(re.escape(strsearch))) if chaineElement == None: return "can't find channel" - emissionElement=chaineElement.parent.parent.parent.find_next_sibling() + emissionElement = chaineElement.parent.parent.parent.find_next_sibling() print(emissionElement) link = emissionElement.find("a") - href = link['href'] + href = link["href"] try: - img=emissionElement.find_next('img')['data-src'] + img = emissionElement.find_next("img")["data-src"] except KeyError: - img=emissionElement.find_next('img')['src'] - response = urllib.request.urlopen(href) - parse=BeautifulSoup(response.read(),"html.parser") - divcasting=parse.select_one(".peopleList") - if (divcasting): - casting=divcasting.find_all(href=re.compile("\/biographie.*")) - count=0 + img = emissionElement.find_next("img")["src"] + response = requests.get(href) + parse = BeautifulSoup(response.content, "html.parser") + divcasting = parse.select_one(".peopleList") + if divcasting: + casting = divcasting.find_all(href=re.compile("\/biographie.*")) + count = 0 for actor in casting: - casting[count]=actor['title'] - count+=1 + casting[count] = actor["title"] + count += 1 else: - casting= None - divsynopsis=parse.select_one(".synopsis") - if (divsynopsis): - synopsis=divsynopsis.text + casting = None + divsynopsis = parse.select_one(".synopsis") + if divsynopsis: + synopsis = divsynopsis.text else: - img=None - synopsis="" + img = None + synopsis = "" - return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} + return { + "title": link["title"], + "href": href, + "casting": casting, + "synopsis": remove_first_space(synopsis), + "img": img, + } -def remove_first_space (string): - space_number=0 +def remove_first_space(string): + space_number = 0 for char in string: if char.isspace(): - space_number+=1 + space_number += 1 else: break return string[space_number:] diff --git a/backend/requirements.txt b/backend/requirements.txt index d648332..382e298 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,21 +1,25 @@ astroid==2.2.5 beautifulsoup4==4.9.3 bs4==0.0.1 -Click==7.0 -Flask==1.0.2 -Flask-Cors==3.0.7 +certifi==2021.10.8 +charset-normalizer==2.0.11 +click==8.0.3 +Flask==2.0.2 +Flask-Cors==3.0.10 gunicorn==20.0.4 +idna==3.3 isort==4.3.17 -itsdangerous==1.1.0 -Jinja2==2.10.1 +itsdangerous==2.0.1 +Jinja2==3.0.3 lazy-object-proxy==1.3.1 -MarkupSafe==1.1.1 +MarkupSafe==2.0.1 mccabe==0.6.1 PyJWT==1.7.1 pylint==2.3.1 +requests==2.27.1 six==1.12.0 soupsieve==1.9.1 typed-ast==1.5.2 -Werkzeug==0.16.0 +urllib3==1.26.8 +Werkzeug==2.0.2 wrapt==1.11.1 -