import json from bs4 import BeautifulSoup import urllib.request import os class JSONfile: def __init__(self,filename): self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/"+filename try: with open(self.datafilepath, 'r', encoding='utf-8') as f: self.data=json.load(f) except FileNotFoundError: self.parsechaine() def get_chaine(self,number): try: return self.data[number] except KeyError: return "numero de chaine inconnue" def parsechaine(self): URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' liste_chaine = {} response = urllib.request.urlopen(URL) html = response.read() parse = BeautifulSoup(html,"html.parser") for item in parse.find_all('table'): if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable']): for tr in item.find_all('tr'): firstTD = tr.find() num = firstTD.text #print(num) if RepresentsInt(num): if RepresentsInt(firstTD.find_next().string): #print(firstTD.find_next().find_next().text) liste_chaine[str(int(num))] = firstTD.find_next().find_next().text else: #print(firstTD.find_next().string) liste_chaine[str(int(num))] = firstTD.find_next().text print(json.dumps(liste_chaine, indent=4)) self.data=liste_chaine with open(self.datafilepath, 'w', encoding='utf-8') as f: json.dump(liste_chaine, f, indent=4) return "ok" def __repr__(self): return str(self.data) def RepresentsInt(s): try: int(s) return True except ValueError: return False except TypeError: return False