cleaned output jvsCal scraper

This commit is contained in:
marko
2021-10-17 12:17:54 +02:00
parent 96ac67503b
commit 9ce46eb253
2 changed files with 24 additions and 17 deletions

View File

@@ -1,11 +1,11 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import datetime import datetime
import json import json
import re import re
import requests
jvsCalUrl = "https://judoverbandsachsen.de/kalender/?show=all" jvsCalUrl = "https://judoverbandsachsen.de/kalender/?show=all"
@@ -118,8 +118,8 @@ class wettkampf:
'address': self.address, 'address': self.address,
'ageGroups': self.ageGroups 'ageGroups': self.ageGroups
} }
def to_json(self): def to_json(self, indent=2):
return json.dumps(self.to_dict()) return json.dumps(self.to_dict(), indent=indent)
@staticmethod @staticmethod
def from_htmlString(wkString, year=None): def from_htmlString(wkString, year=None):
''' '''

View File

@@ -100,27 +100,34 @@ def parseJvsEvent(jvsEvent, onlyIfWithAk=False):
return event(time, title, place, url) return event(time, title, place, url)
events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) if __name__=="__main__":
#events = jvsCal.parseJvsCal() events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True)
# print(f"{json.dumps(events, indent=2)}")
#print( jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) ) for year in events:
for month in events[year]:
for event in events[year][month]:
print(f"{event}")
wk = jvsCal.wettkampf.from_url( event['url'] )
print(f"{wk.to_json()}")
exit(-1)
print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] ) print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] )
print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) ) print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) )
exit() exit()
jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/"
testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl ) testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl )
print(testWk.to_json()) print(testWk.to_json())
exit() exit()
#with open("rkp.html", "w") as f: #with open("rkp.html", "w") as f:
# f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify()) # f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify())
#exit() #exit()
url = "https://judoverbandsachsen.de/kalender/?show=all" url = "https://judoverbandsachsen.de/kalender/?show=all"