2018-01-22 18:11:29 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import urllib.request
|
|
|
|
import json
|
|
|
|
|
|
|
|
def RepresentsInt(s):
|
|
|
|
try:
|
|
|
|
int(s)
|
|
|
|
return True
|
|
|
|
except ValueError:
|
|
|
|
return False
|
|
|
|
except TypeError:
|
|
|
|
return False
|
|
|
|
def parsechaine():
|
|
|
|
URL='https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal'
|
|
|
|
liste_chaine={}
|
|
|
|
response = urllib.request.urlopen(URL)
|
|
|
|
html = response.read()
|
|
|
|
parse=BeautifulSoup(html,"html.parser")
|
|
|
|
for item in parse.find_all('table'):
|
2018-01-22 19:33:40 +00:00
|
|
|
if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable'] ):
|
2018-01-22 18:11:29 +00:00
|
|
|
for tr in item.find_all('tr'):
|
|
|
|
|
|
|
|
firstTD=tr.find()
|
|
|
|
num=firstTD.string
|
|
|
|
if RepresentsInt(num):
|
|
|
|
#print (num)
|
|
|
|
if RepresentsInt(firstTD.find_next().string):
|
|
|
|
#print (firstTD.find_next().find_next().string)
|
|
|
|
liste_chaine[num]=firstTD.find_next().find_next().string
|
|
|
|
else:
|
|
|
|
#print (firstTD.find_next().string)
|
|
|
|
liste_chaine[num]=firstTD.find_next().string
|
|
|
|
print(json.dumps(liste_chaine, indent=4))
|
|
|
|
with open('chaine.json', 'w', encoding='utf-8') as f:
|
|
|
|
json.dump(liste_chaine, f, indent=4)
|
|
|
|
|
|
|
|
def load_jsonfile(file):
|
|
|
|
with open(file, 'r', encoding='utf-8') as f:
|
|
|
|
return json.load(f)
|
|
|
|
|
2018-01-22 19:33:40 +00:00
|
|
|
parsechaine()
|
2018-01-22 18:11:29 +00:00
|
|
|
data=load_jsonfile('chaine.json')
|
|
|
|
print(data["0"])
|