switch to requests API for emission

This commit is contained in:
vincent 2022-01-30 21:46:21 +01:00
parent db7890ff4b
commit d62b16df39
2 changed files with 57 additions and 48 deletions

View File

@ -1,79 +1,84 @@
from bs4 import BeautifulSoup
import urllib.request
import requests
import re
from datetime import datetime, timedelta
from time import sleep
#debug
#import pprint
# debug
# import pprint
class Emmission(object):
loading = False
def __init__(self):
self._LoadreferencePage()
def _LoadreferencePage(self):
URL="https://www.programme-tv.net/programme/canal-5/"
try:
response = urllib.request.urlopen(URL)
except urllib.error.URLError:
return None
URL = "https://www.programme-tv.net/programme/canal-5/"
response = requests.get(URL)
print("load")
self.html = BeautifulSoup(response.read(),"html.parser")
self.timeexp=datetime.utcnow() +timedelta(seconds=30)
self.html = BeautifulSoup(response.content, "html.parser")
self.timeexp = datetime.utcnow() + timedelta(seconds=30)
def parse_emmission(self,strsearch):
if ((datetime.utcnow() > self.timeexp) and (self.loading == False)):
def parse_emmission(self, strsearch):
if (datetime.utcnow() > self.timeexp) and (self.loading is False):
self.loading = True
self._LoadreferencePage()
self.loading = False
else:
while(self.loading):
while self.loading:
sleep(0.1)
pass
strsearch=strsearch.replace('É','E')
strsearch=strsearch.strip()
strsearch = strsearch.replace("É", "E")
strsearch = strsearch.strip()
print(strsearch)
chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
if chaineElement == None:
strsearch=strsearch.replace(" ","")
chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
strsearch = strsearch.replace(" ", "")
chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
if chaineElement == None:
return "can't find channel"
emissionElement=chaineElement.parent.parent.parent.find_next_sibling()
emissionElement = chaineElement.parent.parent.parent.find_next_sibling()
print(emissionElement)
link = emissionElement.find("a")
href = link['href']
href = link["href"]
try:
img=emissionElement.find_next('img')['data-src']
img = emissionElement.find_next("img")["data-src"]
except KeyError:
img=emissionElement.find_next('img')['src']
response = urllib.request.urlopen(href)
parse=BeautifulSoup(response.read(),"html.parser")
divcasting=parse.select_one(".peopleList")
if (divcasting):
casting=divcasting.find_all(href=re.compile("\/biographie.*"))
count=0
img = emissionElement.find_next("img")["src"]
response = requests.get(href)
parse = BeautifulSoup(response.content, "html.parser")
divcasting = parse.select_one(".peopleList")
if divcasting:
casting = divcasting.find_all(href=re.compile("\/biographie.*"))
count = 0
for actor in casting:
casting[count]=actor['title']
count+=1
casting[count] = actor["title"]
count += 1
else:
casting= None
divsynopsis=parse.select_one(".synopsis")
if (divsynopsis):
synopsis=divsynopsis.text
casting = None
divsynopsis = parse.select_one(".synopsis")
if divsynopsis:
synopsis = divsynopsis.text
else:
img=None
synopsis=""
img = None
synopsis = ""
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
return {
"title": link["title"],
"href": href,
"casting": casting,
"synopsis": remove_first_space(synopsis),
"img": img,
}
def remove_first_space (string):
space_number=0
def remove_first_space(string):
space_number = 0
for char in string:
if char.isspace():
space_number+=1
space_number += 1
else:
break
return string[space_number:]

View File

@ -1,21 +1,25 @@
astroid==2.2.5
beautifulsoup4==4.9.3
bs4==0.0.1
Click==7.0
Flask==1.0.2
Flask-Cors==3.0.7
certifi==2021.10.8
charset-normalizer==2.0.11
click==8.0.3
Flask==2.0.2
Flask-Cors==3.0.10
gunicorn==20.0.4
idna==3.3
isort==4.3.17
itsdangerous==1.1.0
Jinja2==2.10.1
itsdangerous==2.0.1
Jinja2==3.0.3
lazy-object-proxy==1.3.1
MarkupSafe==1.1.1
MarkupSafe==2.0.1
mccabe==0.6.1
PyJWT==1.7.1
pylint==2.3.1
requests==2.27.1
six==1.12.0
soupsieve==1.9.1
typed-ast==1.5.2
Werkzeug==0.16.0
urllib3==1.26.8
Werkzeug==2.0.2
wrapt==1.11.1