chainetv_web/backend/chainetv/emission.py

from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime, timedelta
from time import sleep

# debug
# import pprint


class Emmission(object):
    loading = False

    def __init__(self):
        self._LoadreferencePage()

    def _LoadreferencePage(self):
        URL = "https://www.programme-tv.net/programme/canal-5/"
        response = requests.get(URL)
        print("load")
        self.html = BeautifulSoup(response.content, "html.parser")
        self.timeexp = datetime.utcnow() + timedelta(seconds=30)

    def parse_emmission(self, strsearch):
        if (datetime.utcnow() > self.timeexp) and (self.loading is False):
            self.loading = True
            self._LoadreferencePage()
            self.loading = False
        else:
            while self.loading:
                sleep(0.1)
                pass
        strsearch = strsearch.replace("É", "E")
        strsearch = strsearch.strip()
        print(strsearch)
        chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
        if chaineElement == None:
            strsearch = strsearch.replace(" ", "")
            chaineElement = self.html.find(string=re.compile(re.escape(strsearch)))
        if chaineElement == None:
            return "can't find channel"
        emissionElement = chaineElement.parent.parent.parent.find_next_sibling()
        print(emissionElement)
        link = emissionElement.find("a")
        href = link["href"]
        try:
            img = emissionElement.find_next("img")["data-src"]
        except KeyError:
            img = emissionElement.find_next("img")["src"]
        response = requests.get(href)
        parse = BeautifulSoup(response.content, "html.parser")
        divcasting = parse.select_one(".peopleList")
        if divcasting:
            casting = divcasting.find_all(href=re.compile("\/biographie.*"))
            count = 0
            for actor in casting:
                casting[count] = actor["title"]
                count += 1
        else:
            casting = None
        divsynopsis = parse.select_one(".synopsis")
        if divsynopsis:
            synopsis = divsynopsis.text
        else:
            img = None
            synopsis = ""

        return {
            "title": link["title"],
            "href": href,
            "casting": casting,
            "synopsis": remove_first_space(synopsis),
            "img": img,
        }


def remove_first_space(string):
    space_number = 0
    for char in string:
        if char.isspace():
            space_number += 1
        else:
            break
    return string[space_number:]