From 2991eb2f597377109c3ce9cc5230ca9e72de583a Mon Sep 17 00:00:00 2001 From: vincent Date: Thu, 9 May 2019 19:12:41 +0200 Subject: [PATCH] optimize parsing --- backend/chainetv/api.py | 7 +++- backend/chainetv/emission.py | 81 ++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/backend/chainetv/api.py b/backend/chainetv/api.py index 4253d98..aeb12c0 100644 --- a/backend/chainetv/api.py +++ b/backend/chainetv/api.py @@ -1,11 +1,13 @@ from flask import Blueprint, jsonify, request,make_response,redirect,url_for,render_template,current_app from .Jsonfile import JSONfile -from . import emission +from .emission import Emmission import jwt from functools import wraps from datetime import datetime, timedelta from .user import User + data= JSONfile("chaine.json") +emmission= Emmission() def token_required(f): @wraps(f) @@ -68,7 +70,8 @@ def get_emmission(num): if (chaine == "numero de chaine inconnue"): return make_response("",204) else: - return jsonify(emission.parse_emmission(chaine)) + + return jsonify(emmission.parse_emmission(chaine)) #@api.route('/register/', methods=('POST',)) #def register(): diff --git a/backend/chainetv/emission.py b/backend/chainetv/emission.py index 8068e78..e94bd3e 100644 --- a/backend/chainetv/emission.py +++ b/backend/chainetv/emission.py @@ -1,46 +1,55 @@ from bs4 import BeautifulSoup import urllib.request import re +from datetime import datetime, timedelta #debug #import pprint -def parse_emmission(strsearch): - URL="https://www.programme-tv.net/programme/canal-5/" - try: - response = urllib.request.urlopen(URL) - except urllib.error.URLError: - return False - - html = response.read() - parse=BeautifulSoup(html,"html.parser") - strsearch=strsearch.replace('É','E') - linkchaine=parse.find(text=re.compile(re.escape(strsearch))) - if linkchaine == None: - return "can't find channel" - link=linkchaine.parent.parent.find_next_sibling().find("a") - href=link['href'] - response = urllib.request.urlopen(href) - html = response.read() - parse=BeautifulSoup(html,"html.parser") - divcasting=parse.select_one(".descriptif") - if (divcasting): - casting=divcasting.find_all(href=re.compile("biographie")) - count=0 - for actor in casting: - casting[count]=actor.text - count+=1 - else: - casting= None - divsynopsis=parse.select_one(".episode-synopsis") - if (divsynopsis): - img=divsynopsis.find_next('img')['data-src'] - synopsis=divsynopsis.select_one(".d-b").text - else: - img=None - synopsis="" - - return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} +class Emmission(object): + + def __init__(self): + self._LoadreferencePage() + + def _LoadreferencePage(self): + URL="https://www.programme-tv.net/programme/canal-5/" + try: + response = urllib.request.urlopen(URL) + except urllib.error.URLError: + return None + print("load") + self.html = BeautifulSoup(response.read(),"html.parser") + self.timestamp=datetime.utcnow() + + def parse_emmission(self,strsearch): + if (self.timestamp > self.timestamp+timedelta(minutes=5)): + self._LoadreferencePage() + strsearch=strsearch.replace('É','E') + linkchaine=self.html.find(text=re.compile(re.escape(strsearch))) + if linkchaine == None: + return "can't find channel" + link = linkchaine.parent.parent.find_next_sibling().find("a") + href = link['href'] + response = urllib.request.urlopen(href) + parse=BeautifulSoup(response.read(),"html.parser") + divcasting=parse.select_one(".descriptif") + if (divcasting): + casting=divcasting.find_all(href=re.compile("biographie")) + count=0 + for actor in casting: + casting[count]=actor.text + count+=1 + else: + casting= None + divsynopsis=parse.select_one(".episode-synopsis") + if (divsynopsis): + img=divsynopsis.find_next('img')['data-src'] + synopsis=divsynopsis.select_one(".d-b").text + else: + img=None + synopsis="" + + return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img} def remove_first_space (string): -- 2.45.2