58 lines
2.0 KiB
Python
58 lines
2.0 KiB
Python
import json
|
|
from bs4 import BeautifulSoup
|
|
import urllib.request
|
|
import os
|
|
|
|
class JSONfile:
|
|
|
|
def __init__(self,filename):
|
|
self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/"+filename
|
|
try:
|
|
with open(self.datafilepath, 'r', encoding='utf-8') as f:
|
|
self.data=json.load(f)
|
|
except FileNotFoundError:
|
|
self.parsechaine()
|
|
|
|
def get_chaine(self,number):
|
|
try:
|
|
return self.data[number]
|
|
except KeyError:
|
|
return "numero de chaine inconnue"
|
|
def parsechaine(self):
|
|
|
|
URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal'
|
|
liste_chaine = {}
|
|
response = urllib.request.urlopen(URL)
|
|
html = response.read()
|
|
parse = BeautifulSoup(html,"html.parser")
|
|
for item in parse.find_all('table'):
|
|
if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable']):
|
|
for tr in item.find_all('tr'):
|
|
|
|
firstTD = tr.find()
|
|
num = firstTD.text
|
|
#print(num)
|
|
if RepresentsInt(num):
|
|
|
|
if RepresentsInt(firstTD.find_next().string):
|
|
#print(firstTD.find_next().find_next().text)
|
|
liste_chaine[str(int(num))] = firstTD.find_next().find_next().text
|
|
else:
|
|
#print(firstTD.find_next().string)
|
|
liste_chaine[str(int(num))] = firstTD.find_next().text
|
|
print(json.dumps(liste_chaine, indent=4))
|
|
self.data=liste_chaine
|
|
with open(self.datafilepath, 'w', encoding='utf-8') as f:
|
|
json.dump(liste_chaine, f, indent=4)
|
|
return "ok"
|
|
def __repr__(self):
|
|
return str(self.data)
|
|
|
|
def RepresentsInt(s):
|
|
try:
|
|
int(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
except TypeError:
|
|
return False |