chainetv_web/backend/emission.py

from bs4 import BeautifulSoup
import urllib.request
import re

#debug
#import pprint

def parse_emmission(strsearch):
    URL="https://www.programme-tv.net/programme/canal-5/"
    try:
        response = urllib.request.urlopen(URL)
    except urllib.error.URLError:
        return False

    html = response.read()
    parse=BeautifulSoup(html,"html.parser")
    strsearch=strsearch.replace('É','E')
    linkchaine=parse.find(text=re.compile(re.escape(strsearch)))
    if linkchaine == None:
        return "can't find channel"
    link=linkchaine.parent.parent.find_next_sibling().find("a")
    href=link['href']
    response = urllib.request.urlopen(href)
    html = response.read()
    parse=BeautifulSoup(html,"html.parser")
    divcasting=parse.select_one(".descriptif")
    casting=divcasting.find_all(href=re.compile("biographie"))
    count=0
    for actor in casting:
        casting[count]=actor.text
        count+=1
    divsynopsis=parse.select_one(".episode-synopsis")
    img=divsynopsis.find_next('img')['data-src']
    synopsis=divsynopsis.select_one(".d-b").text
    return {'title':link['title'],'href':href,'casting':casting,'synopsis':remove_first_space(synopsis),'img':img}


def remove_first_space (string):
    space_number=0
    for char in string:
        if char.isspace():
            space_number+=1
        else:
            break
    return string[space_number:]