From 9ce46eb253b808d87e5e9b7b370634a92fe14710 Mon Sep 17 00:00:00 2001 From: marko Date: Sun, 17 Oct 2021 12:17:54 +0200 Subject: [PATCH] cleaned output jvsCal scraper --- wkOrg/src/wkScraper/jvsCal.py | 6 ++--- wkOrg/src/wkScraper/wkScraper-JvsCal.py | 35 +++++++++++++++---------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/wkOrg/src/wkScraper/jvsCal.py b/wkOrg/src/wkScraper/jvsCal.py index 1728116..b6ed790 100644 --- a/wkOrg/src/wkScraper/jvsCal.py +++ b/wkOrg/src/wkScraper/jvsCal.py @@ -1,11 +1,11 @@ #! /usr/bin/env python3 # -*- coding: UTF-8 -*- +import requests from bs4 import BeautifulSoup import datetime import json import re -import requests jvsCalUrl = "https://judoverbandsachsen.de/kalender/?show=all" @@ -118,8 +118,8 @@ class wettkampf: 'address': self.address, 'ageGroups': self.ageGroups } - def to_json(self): - return json.dumps(self.to_dict()) + def to_json(self, indent=2): + return json.dumps(self.to_dict(), indent=indent) @staticmethod def from_htmlString(wkString, year=None): ''' diff --git a/wkOrg/src/wkScraper/wkScraper-JvsCal.py b/wkOrg/src/wkScraper/wkScraper-JvsCal.py index f8b531d..76bfe5f 100755 --- a/wkOrg/src/wkScraper/wkScraper-JvsCal.py +++ b/wkOrg/src/wkScraper/wkScraper-JvsCal.py @@ -100,27 +100,34 @@ def parseJvsEvent(jvsEvent, onlyIfWithAk=False): return event(time, title, place, url) -events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) -#events = jvsCal.parseJvsCal() +if __name__=="__main__": + events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) + # print(f"{json.dumps(events, indent=2)}") + + for year in events: + for month in events[year]: + for event in events[year][month]: + print(f"{event}") + wk = jvsCal.wettkampf.from_url( event['url'] ) + print(f"{wk.to_json()}") + exit(-1) -#print( jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) ) + print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] ) -print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] ) + print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) ) -print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) ) + exit() -exit() + jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" -jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" - -testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl ) -print(testWk.to_json()) -exit() + testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl ) + print(testWk.to_json()) + exit() -#with open("rkp.html", "w") as f: -# f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify()) -#exit() + #with open("rkp.html", "w") as f: + # f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify()) + #exit() url = "https://judoverbandsachsen.de/kalender/?show=all"