chainetv_web/backend/Jsonfile.py

import json
from bs4 import BeautifulSoup
import urllib.request
import os

class JSONfile:

    def __init__(self,filename):
        self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/"+filename
        try:
            with open(self.datafilepath, 'r', encoding='utf-8') as f:
                self.data=json.load(f)
        except FileNotFoundError:
            self.parsechaine()

    def get_chaine(self,number):
        try:
            return self.data[number]
        except KeyError:
            return "numero de chaine inconnue"
    def parsechaine(self):

        URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal'
        liste_chaine = {}
        response = urllib.request.urlopen(URL)
        html = response.read()
        parse = BeautifulSoup(html,"html.parser")
        for item in parse.find_all('table'):
            if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable']):
                for tr in item.find_all('tr'):

                    firstTD = tr.find()
                    num = firstTD.text
                    #print(num)
                    if RepresentsInt(num):

                        if RepresentsInt(firstTD.find_next().string):
                            #print(firstTD.find_next().find_next().text)
                            liste_chaine[str(int(num))] = firstTD.find_next().find_next().text
                        else:
                            #print(firstTD.find_next().string)
                            liste_chaine[str(int(num))] = firstTD.find_next().text
        print(json.dumps(liste_chaine, indent=4))
        self.data=liste_chaine
        with open(self.datafilepath, 'w', encoding='utf-8') as f:
            json.dump(liste_chaine, f, indent=4)
        return "ok"
    def __repr__(self):
        return str(self.data)

def RepresentsInt(s):
    try:
        int(s)
        return True
    except ValueError:
        return False
    except TypeError:
        return False