update parser html / cli et interface non depndante

This commit is contained in:
vincent 2018-08-16 21:16:54 +02:00
parent be9d434ee0
commit e9b3636df8

View File

@ -1,6 +1,4 @@
from tkinter import * from tkinter import *
from bs4 import BeautifulSoup
import urllib.request
import json import json
import sys import sys
import os import os
@ -86,6 +84,8 @@ def RepresentsInt(s):
def parsechaine(file): def parsechaine(file):
from bs4 import BeautifulSoup
import urllib.request
URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal'
liste_chaine = {} liste_chaine = {}
response = urllib.request.urlopen(URL) response = urllib.request.urlopen(URL)
@ -96,15 +96,16 @@ def parsechaine(file):
for tr in item.find_all('tr'): for tr in item.find_all('tr'):
firstTD = tr.find() firstTD = tr.find()
num = firstTD.string num = firstTD.text
#print(num)
if RepresentsInt(num): if RepresentsInt(num):
#print(num)
if RepresentsInt(firstTD.find_next().string): if RepresentsInt(firstTD.find_next().string):
#print(firstTD.find_next().find_next().string) #print(firstTD.find_next().find_next().text)
liste_chaine[num] = firstTD.find_next().find_next().string liste_chaine[int(num)] = firstTD.find_next().find_next().text
else: else:
#print(firstTD.find_next().string) #print(firstTD.find_next().string)
liste_chaine[num] = firstTD.find_next().string liste_chaine[int(num)] = firstTD.find_next().text
print(json.dumps(liste_chaine, indent=4)) print(json.dumps(liste_chaine, indent=4))
with open(file, 'w', encoding='utf-8') as f: with open(file, 'w', encoding='utf-8') as f:
json.dump(liste_chaine, f, indent=4) json.dump(liste_chaine, f, indent=4)
@ -120,18 +121,14 @@ def cli(num):
print("numero de chaine inconnue") print("numero de chaine inconnue")
interface = Interface()
if len(sys.argv) > 1:
arg = True
else:
interface.value.set("")
arg = False
if arg == True: if len(sys.argv) > 1:
for i in sys.argv[1:]: for i in sys.argv[1:]:
if i =="update": if i =="update":
parsechaine(os.path.dirname(os.path.realpath(__file__))+"/chaine.json") parsechaine(os.path.dirname(os.path.realpath(__file__))+"/chaine.json")
else: else:
cli(i) cli(i)
else: else:
interface = Interface()
interface.value.set("")
interface.mainloop() interface.mainloop()