chainetv_web/backend/chainetv/Jsonfile.py

58 lines
2.0 KiB
Python
Raw Permalink Normal View History

2019-04-24 11:57:08 +00:00
import json
from bs4 import BeautifulSoup
import urllib.request
import os
class JSONfile:
def __init__(self,filename):
self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/"+filename
try:
with open(self.datafilepath, 'r', encoding='utf-8') as f:
self.data=json.load(f)
except FileNotFoundError:
self.parsechaine()
def get_chaine(self,number):
try:
return self.data[number]
except KeyError:
return "numero de chaine inconnue"
def parsechaine(self):
URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal'
liste_chaine = {}
response = urllib.request.urlopen(URL)
html = response.read()
parse = BeautifulSoup(html,"html.parser")
for item in parse.find_all('table'):
if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable']):
for tr in item.find_all('tr'):
firstTD = tr.find()
num = firstTD.text
#print(num)
if RepresentsInt(num):
if RepresentsInt(firstTD.find_next().string):
#print(firstTD.find_next().find_next().text)
liste_chaine[str(int(num))] = firstTD.find_next().find_next().text
else:
#print(firstTD.find_next().string)
liste_chaine[str(int(num))] = firstTD.find_next().text
print(json.dumps(liste_chaine, indent=4))
self.data=liste_chaine
with open(self.datafilepath, 'w', encoding='utf-8') as f:
json.dump(liste_chaine, f, indent=4)
2019-04-25 15:49:21 +00:00
return "ok"
2019-04-24 11:57:08 +00:00
def __repr__(self):
return str(self.data)
def RepresentsInt(s):
try:
int(s)
return True
except ValueError:
return False
except TypeError:
return False