From f10ca0b5428b87da8e98f648728f5493ae7ccbfc Mon Sep 17 00:00:00 2001 From: vincent Date: Fri, 17 Aug 2018 20:31:23 +0200 Subject: [PATCH] first commit split python project --- chaineTV.py | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++ parserHTML.py | 46 ++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 chaineTV.py create mode 100644 parserHTML.py diff --git a/chaineTV.py b/chaineTV.py new file mode 100644 index 0000000..9fa76f6 --- /dev/null +++ b/chaineTV.py @@ -0,0 +1,148 @@ +from tkinter import * +import json +import sys +import os +import unicodedata + +class Interface: + import webbrowser + + + def __init__(self): + self.datafilepath=os.path.dirname(os.path.realpath(__file__))+"/chaine.json" + self.data = load_jsonfile(self.datafilepath) + self.fenetre = Tk() + self.fenetre.title("recherche de chaine") + self.value = StringVar() + self.label = Label(self.fenetre, text="entrer numero de chaine") + self.entree = Entry(self.fenetre, textvariable=self.value, width=30) + self.frame = Frame(self.fenetre) + self.label2 = Label(self.fenetre, text="") + self.bouton_update_base = Button( + self.fenetre, text="update la base de chaine", command=self.click_update) + self.bouton = Button(self.frame, text="OK", command=self.click) + self.reset = Button(self.frame, text="reset", command=self.click_reset) + self.label.pack() + self.entree.pack() + self.entree.focus_set() + self.frame.pack() + self.bouton.pack(side=LEFT) + self.reset.pack(side=RIGHT) + self.label2.pack() + self.bouton_update_base.pack() + self.fenetre.bind("", self.enter) + self.fenetre.bind("", self.eventreset) + + + def enter(self,evt): + self.click() + + def eventreset(self,evt): + self.click_reset() + + def mainloop(self): + self.fenetre.mainloop() + + def click(self): + print(self.value.get()) + try: + self.label2["text"] += self.data[self.value.get()]+"\r" + print(self.data[self.value.get()]) + except KeyError: + print("numero de chaine inconnue") + self.label2["text"] += "numero de chaine inconnue"+"\r" + self.value.set("") + return + self.label2["text"] += geturlprogrammetv(self.data[self.value.get()]) + self.value.set("") + + + def click_reset(self): + print("reset") + self.fenetre.quit + self.label2["text"] = "" + self.fenetre.mainloop + + def click_update(self): + parsechaine(self.datafilepath) + self.data = load_jsonfile(self.datafilepath) + self.label2["text"] += "update chaine done"+"\r" + + + +def load_jsonfile(file): + try: + with open(file, 'r', encoding='utf-8') as f: + return json.load(f) + except FileNotFoundError: + parsechaine(file) + with open(file, 'r', encoding='utf-8') as f: + return json.load(f) + +def RepresentsInt(s): + try: + int(s) + return True + except ValueError: + return False + except TypeError: + return False + + +def parsechaine(file): + from bs4 import BeautifulSoup + import urllib.request + URL = 'https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' + liste_chaine = {} + response = urllib.request.urlopen(URL) + html = response.read() + parse = BeautifulSoup(html,"html.parser") + for item in parse.find_all('table'): + if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable']): + for tr in item.find_all('tr'): + + firstTD = tr.find() + num = firstTD.text + #print(num) + if RepresentsInt(num): + + if RepresentsInt(firstTD.find_next().string): + #print(firstTD.find_next().find_next().text) + liste_chaine[int(num)] = firstTD.find_next().find_next().text + else: + #print(firstTD.find_next().string) + liste_chaine[int(num)] = firstTD.find_next().text + print(json.dumps(liste_chaine, indent=4)) + with open(file, 'w', encoding='utf-8') as f: + json.dump(liste_chaine, f, indent=4) + +def geturlprogrammetv(strsearch): + strsearch=unicodedata.normalize('NFD', strsearch).encode('ascii', 'ignore') + strsearch=strsearch.decode("utf-8") + strsearch=strsearch.replace(" ","+") + return "https://www.programme-tv.net/rechercher?q="+strsearch + + + +def cli(num): + datafilepath=os.path.dirname(os.path.realpath(__file__))+"/chaine.json" + data = load_jsonfile(datafilepath) + print(num) + try: + print(data[num]) + except KeyError: + print("numero de chaine inconnue") + return + print (geturlprogrammetv(data[num])) + + +if len(sys.argv) > 1: + for i in sys.argv[1:]: + if i =="update": + parsechaine(os.path.dirname(os.path.realpath(__file__))+"/chaine.json") + else: + cli(i) +else: + interface = Interface() + interface.value.set("") + interface.mainloop() diff --git a/parserHTML.py b/parserHTML.py new file mode 100644 index 0000000..686927e --- /dev/null +++ b/parserHTML.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from bs4 import BeautifulSoup +import urllib.request +import json + +def RepresentsInt(s): + try: + int(s) + return True + except ValueError: + return False + except TypeError: + return False +def parsechaine(): + URL='https://fr.wikipedia.org/wiki/Liste_des_cha%C3%AEnes_de_Canal' + liste_chaine={} + response = urllib.request.urlopen(URL) + html = response.read() + parse=BeautifulSoup(html,"html.parser") + for item in parse.find_all('table'): + if (item.get("class") == ['wikitable'] or item.get("class") == ['wikitable', 'sortable'] ): + for tr in item.find_all('tr'): + + firstTD=tr.find() + num=firstTD.string + if RepresentsInt(num): + #print (num) + if RepresentsInt(firstTD.find_next().string): + #print (firstTD.find_next().find_next().string) + liste_chaine[num]=firstTD.find_next().find_next().string + else: + #print (firstTD.find_next().string) + liste_chaine[num]=firstTD.find_next().string + print(json.dumps(liste_chaine, indent=4)) + with open('chaine.json', 'w', encoding='utf-8') as f: + json.dump(liste_chaine, f, indent=4) + +def load_jsonfile(file): + with open(file, 'r', encoding='utf-8') as f: + return json.load(f) + +parsechaine() +data=load_jsonfile('chaine.json') +print(data["0"]) \ No newline at end of file