optimize parsing
This commit is contained in:
parent
db3001be7e
commit
2991eb2f59
@ -1,11 +1,13 @@
|
|||||||
from flask import Blueprint, jsonify, request,make_response,redirect,url_for,render_template,current_app
|
from flask import Blueprint, jsonify, request,make_response,redirect,url_for,render_template,current_app
|
||||||
from .Jsonfile import JSONfile
|
from .Jsonfile import JSONfile
|
||||||
from . import emission
|
from .emission import Emmission
|
||||||
import jwt
|
import jwt
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from .user import User
|
from .user import User
|
||||||
|
|
||||||
data= JSONfile("chaine.json")
|
data= JSONfile("chaine.json")
|
||||||
|
emmission= Emmission()
|
||||||
|
|
||||||
def token_required(f):
|
def token_required(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
@ -68,7 +70,8 @@ def get_emmission(num):
|
|||||||
if (chaine == "numero de chaine inconnue"):
|
if (chaine == "numero de chaine inconnue"):
|
||||||
return make_response("",204)
|
return make_response("",204)
|
||||||
else:
|
else:
|
||||||
return jsonify(emission.parse_emmission(chaine))
|
|
||||||
|
return jsonify(emmission.parse_emmission(chaine))
|
||||||
|
|
||||||
#@api.route('/register/', methods=('POST',))
|
#@api.route('/register/', methods=('POST',))
|
||||||
#def register():
|
#def register():
|
||||||
|
@ -1,46 +1,55 @@
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import re
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
#debug
|
#debug
|
||||||
#import pprint
|
#import pprint
|
||||||
|
|
||||||
def parse_emmission(strsearch):
|
class Emmission(object):
|
||||||
URL="https://www.programme-tv.net/programme/canal-5/"
|
|
||||||
try:
|
|
||||||
response = urllib.request.urlopen(URL)
|
|
||||||
except urllib.error.URLError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
html = response.read()
|
def __init__(self):
|
||||||
parse=BeautifulSoup(html,"html.parser")
|
self._LoadreferencePage()
|
||||||
strsearch=strsearch.replace('É','E')
|
|
||||||
linkchaine=parse.find(text=re.compile(re.escape(strsearch)))
|
|
||||||
if linkchaine == None:
|
|
||||||
return "can't find channel"
|
|
||||||
link=linkchaine.parent.parent.find_next_sibling().find("a")
|
|
||||||
href=link['href']
|
|
||||||
response = urllib.request.urlopen(href)
|
|
||||||
html = response.read()
|
|
||||||
parse=BeautifulSoup(html,"html.parser")
|
|
||||||
divcasting=parse.select_one(".descriptif")
|
|
||||||
if (divcasting):
|
|
||||||
casting=divcasting.find_all(href=re.compile("biographie"))
|
|
||||||
count=0
|
|
||||||
for actor in casting:
|
|
||||||
casting[count]=actor.text
|
|
||||||
count+=1
|
|
||||||
else:
|
|
||||||
casting= None
|
|
||||||
divsynopsis=parse.select_one(".episode-synopsis")
|
|
||||||
if (divsynopsis):
|
|
||||||
img=divsynopsis.find_next('img')['data-src']
|
|
||||||
synopsis=divsynopsis.select_one(".d-b").text
|
|
||||||
else:
|
|
||||||
img=None
|
|
||||||
synopsis=""
|
|
||||||
|
|
||||||
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
|
def _LoadreferencePage(self):
|
||||||
|
URL="https://www.programme-tv.net/programme/canal-5/"
|
||||||
|
try:
|
||||||
|
response = urllib.request.urlopen(URL)
|
||||||
|
except urllib.error.URLError:
|
||||||
|
return None
|
||||||
|
print("load")
|
||||||
|
self.html = BeautifulSoup(response.read(),"html.parser")
|
||||||
|
self.timestamp=datetime.utcnow()
|
||||||
|
|
||||||
|
def parse_emmission(self,strsearch):
|
||||||
|
if (self.timestamp > self.timestamp+timedelta(minutes=5)):
|
||||||
|
self._LoadreferencePage()
|
||||||
|
strsearch=strsearch.replace('É','E')
|
||||||
|
linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))
|
||||||
|
if linkchaine == None:
|
||||||
|
return "can't find channel"
|
||||||
|
link = linkchaine.parent.parent.find_next_sibling().find("a")
|
||||||
|
href = link['href']
|
||||||
|
response = urllib.request.urlopen(href)
|
||||||
|
parse=BeautifulSoup(response.read(),"html.parser")
|
||||||
|
divcasting=parse.select_one(".descriptif")
|
||||||
|
if (divcasting):
|
||||||
|
casting=divcasting.find_all(href=re.compile("biographie"))
|
||||||
|
count=0
|
||||||
|
for actor in casting:
|
||||||
|
casting[count]=actor.text
|
||||||
|
count+=1
|
||||||
|
else:
|
||||||
|
casting= None
|
||||||
|
divsynopsis=parse.select_one(".episode-synopsis")
|
||||||
|
if (divsynopsis):
|
||||||
|
img=divsynopsis.find_next('img')['data-src']
|
||||||
|
synopsis=divsynopsis.select_one(".d-b").text
|
||||||
|
else:
|
||||||
|
img=None
|
||||||
|
synopsis=""
|
||||||
|
|
||||||
|
return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
|
||||||
|
|
||||||
|
|
||||||
def remove_first_space (string):
|
def remove_first_space (string):
|
||||||
|
Loading…
Reference in New Issue
Block a user