chainetv_web/backend/chainetv/emission.py

from bs4 import BeautifulSoup
import urllib.request
import re
from datetime import datetime, timedelta
from time import sleep

#debug
#import pprint

class Emmission(object):
    loading = False
    def __init__(self):
        self._LoadreferencePage()

    def _LoadreferencePage(self):
        URL="https://www.programme-tv.net/programme/canal-5/"
        try:
            response = urllib.request.urlopen(URL)
        except urllib.error.URLError:
            return None
        print("load")
        self.html = BeautifulSoup(response.read(),"html.parser")
        self.timeexp=datetime.utcnow() +timedelta(seconds=30)

    def parse_emmission(self,strsearch):
        if ((datetime.utcnow() > self.timeexp) and (self.loading == False)):
            self.loading = True
            self._LoadreferencePage()
            self.loading = False
        else:
            while(self.loading):
                sleep(0.1)
                pass
        strsearch=strsearch.replace('É','E')
        linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))
        if linkchaine == None:
            strsearch=strsearch.replace(" ","")
            linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))
        if linkchaine == None:
            return "can't find channel"
        link = linkchaine.parent.parent.find_next_sibling().find("a")
        href = link['href']
        response = urllib.request.urlopen(href)
        parse=BeautifulSoup(response.read(),"html.parser")
        divcasting=parse.select_one(".descriptif")
        if (divcasting):
            casting=divcasting.find_all(href=re.compile("biographie"))
            count=0
            for actor in casting:
                casting[count]=actor.text
                count+=1
        else:
            casting= None
        divsynopsis=parse.select_one(".episode-synopsis")
        if (divsynopsis):
            img=divsynopsis.find_next('img')['data-src']
            synopsis=divsynopsis.select_one(".d-b").text
        else:
            img=None
            synopsis=""

        return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}


def remove_first_space (string):
    space_number=0
    for char in string:
        if char.isspace():
            space_number+=1
        else:
            break
    return string[space_number:]
add emission parser to api 2019-04-25 15:27:49 +00:00			`from bs4 import BeautifulSoup`
			`import urllib.request`
			`import re`
optimize parsing 2019-05-09 17:12:41 +00:00			`from datetime import datetime, timedelta`
reference page ce chargé plusieur foi a cause des connection asynchrone 2019-05-11 13:25:04 +00:00			`from time import sleep`
add emission parser to api 2019-04-25 15:27:49 +00:00
			`#debug`
			`#import pprint`

optimize parsing 2019-05-09 17:12:41 +00:00			`class Emmission(object):`
reference page ce chargé plusieur foi a cause des connection asynchrone 2019-05-11 13:25:04 +00:00			`loading = False`
optimize parsing 2019-05-09 17:12:41 +00:00			`def __init__(self):`
			`self._LoadreferencePage()`

			`def _LoadreferencePage(self):`
			`URL="https://www.programme-tv.net/programme/canal-5/"`
			`try:`
			`response = urllib.request.urlopen(URL)`
			`except urllib.error.URLError:`
			`return None`
			`print("load")`
			`self.html = BeautifulSoup(response.read(),"html.parser")`
reference page ce chargé plusieur foi a cause des connection asynchrone 2019-05-11 13:25:04 +00:00			`self.timeexp=datetime.utcnow() +timedelta(seconds=30)`
optimize parsing 2019-05-09 17:12:41 +00:00
			`def parse_emmission(self,strsearch):`
reference page ce chargé plusieur foi a cause des connection asynchrone 2019-05-11 13:25:04 +00:00			`if ((datetime.utcnow() > self.timeexp) and (self.loading == False)):`
			`self.loading = True`
optimize parsing 2019-05-09 17:12:41 +00:00			`self._LoadreferencePage()`
reference page ce chargé plusieur foi a cause des connection asynchrone 2019-05-11 13:25:04 +00:00			`self.loading = False`
			`else:`
			`while(self.loading):`
			`sleep(0.1)`
			`pass`
optimize parsing 2019-05-09 17:12:41 +00:00			`strsearch=strsearch.replace('É','E')`
			`linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))`
add remove space if no result 2019-05-14 19:28:29 +00:00			`if linkchaine == None:`
			`strsearch=strsearch.replace(" ","")`
			`linkchaine=self.html.find(text=re.compile(re.escape(strsearch)))`
optimize parsing 2019-05-09 17:12:41 +00:00			`if linkchaine == None:`
			`return "can't find channel"`
			`link = linkchaine.parent.parent.find_next_sibling().find("a")`
			`href = link['href']`
			`response = urllib.request.urlopen(href)`
			`parse=BeautifulSoup(response.read(),"html.parser")`
			`divcasting=parse.select_one(".descriptif")`
			`if (divcasting):`
			`casting=divcasting.find_all(href=re.compile("biographie"))`
			`count=0`
			`for actor in casting:`
			`casting[count]=actor.text`
			`count+=1`
			`else:`
			`casting= None`
			`divsynopsis=parse.select_one(".episode-synopsis")`
			`if (divsynopsis):`
			`img=divsynopsis.find_next('img')['data-src']`
			`synopsis=divsynopsis.select_one(".d-b").text`
			`else:`
			`img=None`
			`synopsis=""`

			`return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}`
add emission parser to api 2019-04-25 15:27:49 +00:00

			`def remove_first_space (string):`
			`space_number=0`
			`for char in string:`
			`if char.isspace():`
			`space_number+=1`
			`else:`
			`break`
			`return string[space_number:]`