cleaned output jvsCal scraper

This commit is contained in:
marko
2021-10-17 12:17:54 +02:00
parent 96ac67503b
commit 9ce46eb253
2 changed files with 24 additions and 17 deletions

View File

@@ -1,11 +1,11 @@
#! /usr/bin/env python3
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import datetime
import json
import re
import requests
jvsCalUrl = "https://judoverbandsachsen.de/kalender/?show=all"
@@ -118,8 +118,8 @@ class wettkampf:
'address': self.address,
'ageGroups': self.ageGroups
}
def to_json(self):
return json.dumps(self.to_dict())
def to_json(self, indent=2):
return json.dumps(self.to_dict(), indent=indent)
@staticmethod
def from_htmlString(wkString, year=None):
'''

View File

@@ -100,27 +100,34 @@ def parseJvsEvent(jvsEvent, onlyIfWithAk=False):
return event(time, title, place, url)
events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True)
#events = jvsCal.parseJvsCal()
if __name__=="__main__":
events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True)
# print(f"{json.dumps(events, indent=2)}")
for year in events:
for month in events[year]:
for event in events[year][month]:
print(f"{event}")
wk = jvsCal.wettkampf.from_url( event['url'] )
print(f"{wk.to_json()}")
exit(-1)
#print( jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) )
print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] )
print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] )
print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) )
print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) )
exit()
exit()
jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/"
jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/"
testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl )
print(testWk.to_json())
exit()
testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl )
print(testWk.to_json())
exit()
#with open("rkp.html", "w") as f:
# f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify())
#exit()
#with open("rkp.html", "w") as f:
# f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify())
#exit()
url = "https://judoverbandsachsen.de/kalender/?show=all"