optimize parsing

2019-05-09 19:12:41 +02:00 · 2019-05-09 19:12:41 +02:00 · 2991eb2f59
commit 2991eb2f59
parent db3001be7e
2 changed files with 50 additions and 38 deletions
--- a/backend/chainetv/api.py
+++ b/backend/chainetv/api.py
@ -1,11 +1,13 @@
 from flask import Blueprint, jsonify, request,make_response,redirect,url_for,render_template,current_app
 from .Jsonfile import JSONfile
-from . import emission
+from .emission import Emmission
 import jwt
 from functools import wraps  
 from datetime import datetime, timedelta
 from .user import User
+
 data= JSONfile("chaine.json")
+emmission= Emmission()

 def token_required(f):  
    @wraps(f)
@ -68,7 +70,8 @@ def get_emmission(num):
    if (chaine == "numero de chaine inconnue"):
        return make_response("",204)
    else:    
-        return jsonify(emission.parse_emmission(chaine))
+
+        return jsonify(emmission.parse_emmission(chaine))

 #@api.route('/register/', methods=('POST',))
 #def register():  
--- a/backend/chainetv/emission.py
+++ b/backend/chainetv/emission.py
@ -1,46 +1,55 @@
 from bs4 import BeautifulSoup
 import urllib.request
 import re
+from datetime import datetime, timedelta

 #debug
 #import pprint

-def parse_emmission(strsearch):
-    URL="https://www.programme-tv.net/programme/canal-5/"
-    try:
-        response = urllib.request.urlopen(URL)
-    except urllib.error.URLError:
-        return False
-    
-    html = response.read()
-    parse=BeautifulSoup(html,"html.parser")
-    strsearch=strsearch.replace('É','E')
-    linkchaine=parse.find(text=re.compile(re.escape(strsearch)))
-    if linkchaine == None:
-        return "can't find channel"
-    link=linkchaine.parent.parent.find_next_sibling().find("a")
-    href=link['href']
-    response = urllib.request.urlopen(href)
-    html = response.read()
-    parse=BeautifulSoup(html,"html.parser")
-    divcasting=parse.select_one(".descriptif")
-    if (divcasting):
-        casting=divcasting.find_all(href=re.compile("biographie"))
-        count=0
-        for actor in casting:
-            casting[count]=actor.text
-            count+=1
-    else:
-        casting= None
-    divsynopsis=parse.select_one(".episode-synopsis")
-    if (divsynopsis):
-        img=divsynopsis.find_next('img')['data-src']
-        synopsis=divsynopsis.select_one(".d-b").text
-    else:
-        img=None
-        synopsis=""
-        
-    return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}
+class Emmission(object):
+
+    def __init__(self):
+        self._LoadreferencePage()
+
+    def _LoadreferencePage(self):
+        URL="https://www.programme-tv.net/programme/canal-5/"
+        try:
+            response = urllib.request.urlopen(URL)
+        except urllib.error.URLError:
+            return None
+        print("load")
+        self.html = BeautifulSoup(response.read(),"html.parser")
+        self.timestamp=datetime.utcnow() 
+
+    def parse_emmission(self,strsearch):
+        if (self.timestamp > self.timestamp+timedelta(minutes=5)):
+            self._LoadreferencePage()
+        strsearch=strsearch.replace('É','E')
+        linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))
+        if linkchaine == None:
+            return "can't find channel"
+        link = linkchaine.parent.parent.find_next_sibling().find("a")
+        href = link['href']
+        response = urllib.request.urlopen(href)
+        parse=BeautifulSoup(response.read(),"html.parser")
+        divcasting=parse.select_one(".descriptif")
+        if (divcasting):
+            casting=divcasting.find_all(href=re.compile("biographie"))
+            count=0
+            for actor in casting:
+                casting[count]=actor.text
+                count+=1
+        else:
+            casting= None
+        divsynopsis=parse.select_one(".episode-synopsis")
+        if (divsynopsis):
+            img=divsynopsis.find_next('img')['data-src']
+            synopsis=divsynopsis.select_one(".d-b").text
+        else:
+            img=None
+            synopsis=""
+
+        return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}


 def remove_first_space (string):