fix parsing issue

This commit is contained in:
vincent 2021-05-13 13:30:23 +02:00
parent f18dd28614
commit 4787a36d0e

View File

@ -33,15 +33,21 @@ class Emmission(object):
pass pass
strsearch=strsearch.replace('É','E') strsearch=strsearch.replace('É','E')
strsearch=strsearch.strip() strsearch=strsearch.strip()
linkchaine=self.html.find(title=re.compile(re.escape(strsearch))) print(strsearch)
if linkchaine == None: chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
if chaineElement == None:
strsearch=strsearch.replace(" ","") strsearch=strsearch.replace(" ","")
linkchaine=self.html.find(title=re.compile(re.escape(strsearch))) chaineElement=self.html.find(string=re.compile(re.escape(strsearch)))
if linkchaine == None: if chaineElement == None:
return "can't find channel" return "can't find channel"
link = linkchaine.parent.parent.find_next_sibling().find("a") emissionElement=chaineElement.parent.parent.parent.find_next_sibling()
print(emissionElement)
link = emissionElement.find("a")
href = link['href'] href = link['href']
img=linkchaine.parent.parent.find_next_sibling().find_next('img')['src'] try:
img=emissionElement.find_next('img')['data-src']
except KeyError:
img=emissionElement.find_next('img')['src']
response = urllib.request.urlopen(href) response = urllib.request.urlopen(href)
parse=BeautifulSoup(response.read(),"html.parser") parse=BeautifulSoup(response.read(),"html.parser")
divcasting=parse.select_one(".peopleList") divcasting=parse.select_one(".peopleList")
@ -53,7 +59,7 @@ class Emmission(object):
count+=1 count+=1
else: else:
casting= None casting= None
divsynopsis=parse.select_one(".synopsis-text") divsynopsis=parse.select_one(".synopsis")
if (divsynopsis): if (divsynopsis):
synopsis=divsynopsis.text synopsis=divsynopsis.text
else: else:
@ -71,4 +77,3 @@ def remove_first_space (string):
else: else:
break break
return string[space_number:] return string[space_number:]