#! /usr/bin/env python3 # -*- coding: UTF-8 -*- import re import requests from bs4 import BeautifulSoup import datetime import json import jvsCal class shiai: def __init__(self, titel, date, ageGroups): self.title = title self.date = date self.ageGroups = ageGroups return def toDict(self): return { 'title': self.title, 'date': self.date, 'ageGroups': self.ageGroups, } def toJson(self): return json.dumps(self.toDict()) class event: ''' ein Event besteht aus: - Zeitspanne - Startdatum (+Endatum) - Titel (Name) - Ort - Url ''' def __init__(self, timespan, title, place=None, url=None): self.date = datetime.date.fromisoformat( timespan[0] ) self.endDate = None if len(timespan) >= 2: self.endDate = datetime.date.fromisoformat( timespan[1] ) self.title = title self.place = place self.url = url return def toDict(self): wkDict = {} wkDict["date"] = str(self.date) if self.endDate is not None: wkDict["endDate"] = str(self.endDate) wkDict["title"] = self.title if self.place is not None: wkDict["place"] = self.place if self.url is not None: wkDict["url"] = self.url return wkDict def toJson(self): return json.dumps(self.toDict()) class wk(event): def __init__(self, timespan, title, akList, place=None, url=None): super().__init__(timespan, title, place, url) self.akList = akList def toDict(self): wkDict = super().toDict() wkDict["akList"] = self.akList return wkDict def parseJvsEvent(jvsEvent, onlyIfWithAk=False): try: aks = None for e in jvsEvent.find_all(class_="col-2"): if e.find("time"): time = [ t.strip() for t in re.sub( "\s+", " ", e.find("time").text.strip() ).split("-")] time = [ t.split(".") for t in time ] time = [ "-".join( [year, t[1], t[0]] ) for t in time ] if e.find("span"): aks = [ ak.text.strip() for ak in e.find_all("span") ] aks = [ ak for ak in aks if ak != "" ] if len(aks) == 0: aks=None place = event.find(class_="col-3").text.strip() if place == "": place = None title = event.find(class_="col-4").find("a").text.strip() url = event.find(class_="col-4").find("a")['href'] titleFull = event.find(class_="col-4").text.strip() assert(title==titleFull) except: print("Error parsing:") print(event.prettify()) if aks is not None: return wk(time, title, aks, place, url) else: if onlyIfWithAk: return None else: return event(time, title, place, url) events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) #events = jvsCal.parseJvsCal() #print( jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) ) print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] ) print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) ) exit() jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl ) print(testWk.to_json()) exit() #with open("rkp.html", "w") as f: # f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify()) #exit() url = "https://judoverbandsachsen.de/kalender/?show=all" jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" #jvsCalShiaiUrl = "file://rkp.html" #jvsCalShiaiPage = requests.get(jvsCalShiaiUrl) #jvsCalShiaiSoup = BeautifulSoup(jvsCalShiaiPage.content, "html.parser") jvsCalShiaiSoup = BeautifulSoup(open("rkp.html"), "html.parser") year = "2020" for e in jvsCalShiaiSoup.find_all(class_="event-single"): print(e.prettify()) title = e.find("header").text.strip() articleTag= e.find("article") date = [ t.strip() for t in re.sub( "\s+", " ", articleTag.find("time").text.strip() ).split("-")] date = [ t.split(".") for t in date ] date = [ "-".join( [year, t[1], t[0]] ) for t in date ] [announcementDiv, placeDiv, ageGroupsDiv] = articleTag.find_all("div") announcement = {} place = {} for dt, dd in zip(articleTag.find_all("dt"), articleTag.find_all("dd")): if dt.text.strip() == "Ausschreibung:": announcement['url'] = dd.find("a")['href'] if dt.text.strip() == "Veranstalter:": announcement['organizer'] = dd.text.strip() if dt.text.strip() == "Veranstaltungsort:": place['name'] = dd.text.strip() if dt.text.strip() == "Veranstaltungsadresse:": place['address'] = re.sub("\s+", " ", dd.text.strip()) # print(dt.text, dd.text) ageGroups = [ ak.text.strip() for ak in ageGroupsDiv.find_all("span") ] print(f"title: {title})") print(f"date: {date})") print(f"announcement: {announcement}") print(f"place: {place}") print(f"ageGroups: {ageGroups}") exit() jvsCalPage = requests.get(url) jvsCalSoup = BeautifulSoup(jvsCalPage.content, "html.parser") #jvsCalEventList = jvsCalSoup.find(id="eventList") #print(jvsCalEventList.prettify()) jvsCalEventListItems = jvsCalSoup.find(id="eventListItems") #print(jvsCalEventListItems.prettify()) jvsCalEventMonts = jvsCalEventListItems.find_all("div", class_="month") jvsWkList = [] for m in jvsCalEventMonts: if m.has_attr("data-month"): yearMonth = m.get("data-month") year = (yearMonth[0:4]) month = yearMonth[4:6] print(f"Jahr: {year}, Monat: {month}") events = m.find_all(class_="posts") for event in events: parsedEvent = parseJvsEvent(event, onlyIfWithAk=True) if parsedEvent is not None: jvsWkList.append(parsedEvent) else: print("no data-month") for w in jvsWkList: print(w.toJson())