From 7486a9e886ad08b73c3f5462c14427e92d1c35c5 Mon Sep 17 00:00:00 2001 From: marko Date: Sun, 11 Oct 2020 09:40:27 +0200 Subject: [PATCH 01/10] =?UTF-8?q?=20Zum=20Commit=20vorgemerkte=20=C3=84nde?= =?UTF-8?q?rungen:=20=09neue=20Datei:=20=20=20=20=20src/wkScraper/jvsCal.p?= =?UTF-8?q?y=20=09neue=20Datei:=20=20=20=20=20src/wkScraper/wkScraper-JvsC?= =?UTF-8?q?al.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wkOrg/src/wkScraper/jvsCal.py | 165 ++++++++++++++++++++ wkOrg/src/wkScraper/wkScraper-JvsCal.py | 197 ++++++++++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 wkOrg/src/wkScraper/jvsCal.py create mode 100755 wkOrg/src/wkScraper/wkScraper-JvsCal.py diff --git a/wkOrg/src/wkScraper/jvsCal.py b/wkOrg/src/wkScraper/jvsCal.py new file mode 100644 index 0000000..1728116 --- /dev/null +++ b/wkOrg/src/wkScraper/jvsCal.py @@ -0,0 +1,165 @@ +#! /usr/bin/env python3 +# -*- coding: UTF-8 -*- + +from bs4 import BeautifulSoup +import datetime +import json +import re +import requests + +jvsCalUrl = "https://judoverbandsachsen.de/kalender/?show=all" + +def parseJvsCal(url=jvsCalUrl, minYear = 0, minMonth = 0, onlyWithAks=False): + ''' + Parse the calender page of the jvs + + returns dictionary of dictionaries of list cal[year][month] = listOfUrls + ''' + jvsCalPage = requests.get(url) + jvsCalSoup = BeautifulSoup(jvsCalPage.content, "html.parser") + jvsCalEventListItems = jvsCalSoup.find(id="eventListItems") + jvsCalEventMonts = jvsCalEventListItems.find_all("div", class_="month") + + jvsWkList = {} + + for m in jvsCalEventMonts: + if m.has_attr("data-month"): + yearMonth = m.get("data-month") + year = int(yearMonth[0:4]) + if year < minYear: + continue + if not year in jvsWkList: + jvsWkList[year] = {} + month = int(yearMonth[4:6]) + if year==minYear and month= 2: + self.endDate = datetime.date.fromisoformat( timespan[1] ) + self.title = title + self.place = place + self.url = url + return + def toDict(self): + wkDict = {} + wkDict["date"] = str(self.date) + if self.endDate is not None: + wkDict["endDate"] = str(self.endDate) + wkDict["title"] = self.title + if self.place is not None: + wkDict["place"] = self.place + if self.url is not None: + wkDict["url"] = self.url + return wkDict + def toJson(self): + return json.dumps(self.toDict()) + +class wk(event): + def __init__(self, timespan, title, akList, place=None, url=None): + super().__init__(timespan, title, place, url) + self.akList = akList + def toDict(self): + wkDict = super().toDict() + wkDict["akList"] = self.akList + return wkDict + +def parseJvsEvent(jvsEvent, onlyIfWithAk=False): + try: + aks = None + for e in jvsEvent.find_all(class_="col-2"): + if e.find("time"): + time = [ t.strip() for t in re.sub( "\s+", " ", e.find("time").text.strip() ).split("-")] + time = [ t.split(".") for t in time ] + time = [ "-".join( [year, t[1], t[0]] ) for t in time ] + if e.find("span"): + aks = [ ak.text.strip() for ak in e.find_all("span") ] + aks = [ ak for ak in aks if ak != "" ] + if len(aks) == 0: + aks=None + place = event.find(class_="col-3").text.strip() + if place == "": + place = None + title = event.find(class_="col-4").find("a").text.strip() + url = event.find(class_="col-4").find("a")['href'] + titleFull = event.find(class_="col-4").text.strip() + assert(title==titleFull) + except: + print("Error parsing:") + print(event.prettify()) + + if aks is not None: + return wk(time, title, aks, place, url) + else: + if onlyIfWithAk: + return None + else: + return event(time, title, place, url) + + +events = jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) +#events = jvsCal.parseJvsCal() + +#print( jvsCal.parseJvsCal(minYear=datetime.date.today().year, minMonth=datetime.date.today().month, onlyWithAks=True) ) + +print( [ e['url'] for y in events for m in events[y] for e in events[y][m]] ) + +print( jvsCal.getWk([ e['url'] for y in events for m in events[y] for e in events[y][m]]) ) + +exit() + +jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" + +testWk = jvsCal.wettkampf.from_url( jvsCalShiaiUrl ) +print(testWk.to_json()) +exit() + + +#with open("rkp.html", "w") as f: +# f.write(BeautifulSoup(requests.get("https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/").content, "html.parser").prettify()) +#exit() + + +url = "https://judoverbandsachsen.de/kalender/?show=all" + +jvsCalShiaiUrl = "https://judoverbandsachsen.de/events/23-raeucherkerzenpokal/" +#jvsCalShiaiUrl = "file://rkp.html" +#jvsCalShiaiPage = requests.get(jvsCalShiaiUrl) +#jvsCalShiaiSoup = BeautifulSoup(jvsCalShiaiPage.content, "html.parser") +jvsCalShiaiSoup = BeautifulSoup(open("rkp.html"), "html.parser") + +year = "2020" +for e in jvsCalShiaiSoup.find_all(class_="event-single"): + print(e.prettify()) + title = e.find("header").text.strip() + articleTag= e.find("article") + date = [ t.strip() for t in re.sub( "\s+", " ", articleTag.find("time").text.strip() ).split("-")] + date = [ t.split(".") for t in date ] + date = [ "-".join( [year, t[1], t[0]] ) for t in date ] + + [announcementDiv, placeDiv, ageGroupsDiv] = articleTag.find_all("div") + + announcement = {} + place = {} + for dt, dd in zip(articleTag.find_all("dt"), articleTag.find_all("dd")): + if dt.text.strip() == "Ausschreibung:": + announcement['url'] = dd.find("a")['href'] + if dt.text.strip() == "Veranstalter:": + announcement['organizer'] = dd.text.strip() + if dt.text.strip() == "Veranstaltungsort:": + place['name'] = dd.text.strip() + if dt.text.strip() == "Veranstaltungsadresse:": + place['address'] = re.sub("\s+", " ", dd.text.strip()) +# print(dt.text, dd.text) + + ageGroups = [ ak.text.strip() for ak in ageGroupsDiv.find_all("span") ] + +print(f"title: {title})") +print(f"date: {date})") +print(f"announcement: {announcement}") +print(f"place: {place}") +print(f"ageGroups: {ageGroups}") +exit() + + +jvsCalPage = requests.get(url) + +jvsCalSoup = BeautifulSoup(jvsCalPage.content, "html.parser") + +#jvsCalEventList = jvsCalSoup.find(id="eventList") +#print(jvsCalEventList.prettify()) + +jvsCalEventListItems = jvsCalSoup.find(id="eventListItems") +#print(jvsCalEventListItems.prettify()) + +jvsCalEventMonts = jvsCalEventListItems.find_all("div", class_="month") + +jvsWkList = [] + +for m in jvsCalEventMonts: + if m.has_attr("data-month"): + yearMonth = m.get("data-month") + year = (yearMonth[0:4]) + month = yearMonth[4:6] + print(f"Jahr: {year}, Monat: {month}") + events = m.find_all(class_="posts") + for event in events: + parsedEvent = parseJvsEvent(event, onlyIfWithAk=True) + if parsedEvent is not None: + jvsWkList.append(parsedEvent) + else: + print("no data-month") + +for w in jvsWkList: + print(w.toJson()) From 239ae8b68a29979d9cf9b50fe32b06817a345749 Mon Sep 17 00:00:00 2001 From: marko Date: Tue, 8 Jun 2021 20:13:50 +0200 Subject: [PATCH 02/10] =?UTF-8?q?=20Zum=20Commit=20vorgemerkte=20=C3=84nde?= =?UTF-8?q?rungen:=20=09ge=C3=A4ndert:=20=20=20=20=20=20=20index.php?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../phpLib/cwsvJudo/miscAssis.php | 28 +++++++++---------- homepage/wkParticipo/index.php | 5 ++++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/homepage/redesign2018/markdownExperiment/phpLib/cwsvJudo/miscAssis.php b/homepage/redesign2018/markdownExperiment/phpLib/cwsvJudo/miscAssis.php index e89e127..c902b11 100644 --- a/homepage/redesign2018/markdownExperiment/phpLib/cwsvJudo/miscAssis.php +++ b/homepage/redesign2018/markdownExperiment/phpLib/cwsvJudo/miscAssis.php @@ -7,8 +7,8 @@ return (is_numeric($str) && $str > 0 && $str == round($str)); function getPdoDbConnection($hostname, $dbName, $user, $password){ try{ $dbConnection = new PDO( - 'mysql:host='.$hostname.';dbname='.$dbName, - $user, + 'mysql:host='.$hostname.';dbname='.$dbName, + $user, $password ); } @@ -23,8 +23,8 @@ global $cwsvJudoConfig; // Datenbankverbindung bereit stellen try{ $db_connection = new PDO( - 'mysql:host='.$cwsvJudoConfig["db"]["host"].';dbname='.$cwsvJudoConfig["db"]["name"],//.';charset=utf8', - $cwsvJudoConfig["db"]["user"], + 'mysql:host='.$cwsvJudoConfig["db"]["host"].';dbname='.$cwsvJudoConfig["db"]["name"],//.';charset=utf8', + $cwsvJudoConfig["db"]["user"], $cwsvJudoConfig["db"]["password"] ); } @@ -47,7 +47,7 @@ if (is_array($somePossibleEmptyStuff) || $somePossibleEmptyStuff instanceof Trav return null; } -/// Eine als String gegebene Liste kommagetrennter key=value Paare in +/// Eine als String gegebene Liste kommagetrennter key=value Paare in /// ein assoziatives Array überführen function getKeyValueArray($aKeyValueStringList){ $retKeyValueArray = array(); @@ -87,7 +87,7 @@ if( empty($optionsArray['outCharset']) ) $optionsArray['outCharset'] = "UTF-8"; ); $pdoStatementForQuerryingZitat->execute(); $retZitat = $pdoStatementForQuerryingZitat->fetchAll(PDO::FETCH_ASSOC); - + // Zeichensatzkonvertierung foreach($retZitat as &$entry){ array_walk( @@ -127,8 +127,8 @@ if( !file_exists($aJsonFileName) ) return null; if( !(is_array( $someLinkNames ) || is_object( $someLinkNames )) ) return null; return arrayKeyFilter( - json_decode( - file_get_contents($aJsonFileName), + json_decode( + file_get_contents($aJsonFileName), true ), $someLinkNames @@ -162,14 +162,14 @@ function toAscii($str, $replace=array(), $delimiter='-') { return $clean; } +/// Wrapper function for a query to the +/// $aDbConnection connection to a db function dbQuery($aDbConnection, $aQueryString, $aBindArray = array(), $someOptions = array( "dbCharset" => "ISO-8859-1", "outCharset" => "UTF-8" ) ){ -//echo("Doing ".$aQueryString); -var_dump($aBindArray); if( empty($someOptions['dbCharset']) ) $someOptions['dbCharset'] = "ISO-8859-1"; if( empty($someOptions['outCharset']) ) $someOptions['outCharset'] = "UTF-8"; -/// @toDo: Bisher wird nur die Rückgabe konvertiert. Eigentlich muss -/// doch auch die Eingabe konvertiert werden. Aber das jetzt -/// umzustellen wird schwer! Die User m Wettkampfplaner sind ja z.B. +/// @toDo: Bisher wird nur die Rückgabe konvertiert. Eigentlich muss +/// doch auch die Eingabe konvertiert werden. Aber das jetzt +/// umzustellen wird schwer! Die User m Wettkampfplaner sind ja z.B. /// als UTF8 in latin1(?) gespeichert. try{ $pdoStatement = $aDbConnection->prepare( $aQueryString ); @@ -187,7 +187,7 @@ if( empty($someOptions['outCharset']) ) $someOptions['outCharset'] = "UTF-8"; print "Error!: " . $db_error->getMessage() . "
"; return null; } - //var_dump($ret); + // Zeichensatzkonvertierung if( is_array($ret) ){ foreach($ret as &$entry){ diff --git a/homepage/wkParticipo/index.php b/homepage/wkParticipo/index.php index aad2387..44358a7 100644 --- a/homepage/wkParticipo/index.php +++ b/homepage/wkParticipo/index.php @@ -72,6 +72,11 @@ return $ret;